Skip to content

Commit

Permalink
Use PackedFingerprint throughout serialization code.
Browse files Browse the repository at this point in the history
This should have less overhead than ByteString and being a custom type, it
is easier to make guarantees about its properties.

PiperOrigin-RevId: 676012196
Change-Id: I65afb00fea5960cf228815d7f1c1c9a31c197171
  • Loading branch information
aoeui authored and copybara-github committed Sep 18, 2024
1 parent a7210b8 commit 63c887b
Show file tree
Hide file tree
Showing 21 changed files with 201 additions and 200 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
import com.google.common.util.concurrent.ListenableFuture;
import com.google.devtools.build.lib.skyframe.serialization.DeserializationContext;
import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
import com.google.devtools.build.lib.skyframe.serialization.PackedFingerprint;
import com.google.devtools.build.lib.skyframe.serialization.PutOperation;
import com.google.devtools.build.lib.skyframe.serialization.SerializationContext;
import com.google.devtools.build.lib.skyframe.serialization.SerializationException;
import com.google.protobuf.ByteString;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.CodedOutputStream;
import java.io.IOException;
Expand Down Expand Up @@ -86,7 +86,7 @@ public void serialize(SerializationContext context, NestedSet<?> obj, CodedOutpu
PutOperation fingerprintComputationResult =
nestedSetStore.computeFingerprintAndStore((Object[]) obj.getChildren(), context);
context.addFutureToBlockWritingOn(fingerprintComputationResult.writeStatus());
codedOut.writeByteArrayNoTag(fingerprintComputationResult.fingerprint().toByteArray());
fingerprintComputationResult.fingerprint().writeTo(codedOut);
}
interner.put(new EqualsWrapper(obj), obj);
}
Expand All @@ -106,7 +106,7 @@ public NestedSet<?> deserialize(DeserializationContext context, CodedInputStream
}
case NONLEAF -> {
int depth = codedIn.readInt32();
ByteString fingerprint = ByteString.copyFrom(codedIn.readByteArray());
var fingerprint = PackedFingerprint.readFrom(codedIn);
return intern(order, depth, nestedSetStore.getContentsAndDeserialize(fingerprint, context));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,14 @@

import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.google.auto.value.AutoValue;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.SettableFuture;
import com.google.devtools.build.lib.bugreport.BugReporter;
import com.google.devtools.build.lib.skyframe.serialization.PackedFingerprint;
import com.google.devtools.build.lib.skyframe.serialization.PutOperation;
import com.google.devtools.build.lib.skyframe.serialization.SerializationConstants;
import com.google.protobuf.ByteString;
import javax.annotation.Nullable;

/**
Expand Down Expand Up @@ -93,10 +92,10 @@ class NestedSetSerializationCache {
*/
@Nullable
Object putFutureIfAbsent(
ByteString fingerprint, SettableFuture<Object[]> future, Object context) {
PackedFingerprint fingerprint, SettableFuture<Object[]> future, Object context) {
checkArgument(!future.isDone(), "Must pass a fresh future: %s", future);
Object existing =
fingerprintToContents.asMap().putIfAbsent(FingerprintKey.of(fingerprint, context), future);
fingerprintToContents.asMap().putIfAbsent(new FingerprintKey(fingerprint, context), future);
if (existing != null) {
return existing;
}
Expand All @@ -110,7 +109,7 @@ Object putFutureIfAbsent(
* the future, when it completes.
*/
private void unwrapWhenDone(
ByteString fingerprint, ListenableFuture<Object[]> futureContents, Object context) {
PackedFingerprint fingerprint, ListenableFuture<Object[]> futureContents, Object context) {
Futures.addCallback(
futureContents,
new FutureCallback<Object[]>() {
Expand All @@ -126,7 +125,7 @@ public void onSuccess(Object[] contents) {
// has no effect).
var unused =
putIfAbsent(
contents, PutOperation.create(fingerprint, immediateVoidFuture()), context);
contents, new PutOperation(fingerprint, immediateVoidFuture()), context);
}

@Override
Expand Down Expand Up @@ -165,18 +164,9 @@ PutOperation putIfAbsent(Object[] contents, PutOperation result, Object context)
if (existingResult != null) {
return existingResult;
}
fingerprintToContents.put(FingerprintKey.of(result.fingerprint(), context), contents);
fingerprintToContents.put(new FingerprintKey(result.fingerprint(), context), contents);
return null;
}

@AutoValue
abstract static class FingerprintKey {
abstract ByteString fingerprint();

abstract Object context();

static FingerprintKey of(ByteString fingerprint, Object context) {
return new AutoValue_NestedSetSerializationCache_FingerprintKey(fingerprint, context);
}
}
record FingerprintKey(PackedFingerprint fingerprint, Object context) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
import com.google.devtools.build.lib.skyframe.serialization.DeserializationContext;
import com.google.devtools.build.lib.skyframe.serialization.FingerprintValueStore;
import com.google.devtools.build.lib.skyframe.serialization.FingerprintValueStore.MissingFingerprintValueException;
import com.google.devtools.build.lib.skyframe.serialization.PackedFingerprint;
import com.google.devtools.build.lib.skyframe.serialization.PutOperation;
import com.google.devtools.build.lib.skyframe.serialization.SerializationContext;
import com.google.devtools.build.lib.skyframe.serialization.SerializationDependencyProvider;
import com.google.devtools.build.lib.skyframe.serialization.SerializationException;
import com.google.protobuf.ByteString;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.CodedOutputStream;
import java.io.ByteArrayOutputStream;
Expand Down Expand Up @@ -182,8 +182,9 @@ private PutOperation computeFingerprintAndStore(
}

byte[] serializedBytes = byteArrayOutputStream.toByteArray();
ByteString fingerprint =
ByteString.copyFrom(Hashing.md5().hashBytes(serializedBytes).asBytes());
// TODO: b/368012715 - reconsider use of md5.
PackedFingerprint fingerprint =
PackedFingerprint.fromBytes(Hashing.md5().hashBytes(serializedBytes).asBytes());
SettableFuture<Void> localWriteFuture = SettableFuture.create();
futureBuilder.add(localWriteFuture);

Expand All @@ -196,7 +197,7 @@ private PutOperation computeFingerprintAndStore(

ListenableFuture<Void> writeFuture =
Futures.whenAllSucceed(futureBuilder.build()).call(() -> null, directExecutor());
PutOperation result = PutOperation.create(fingerprint, writeFuture);
var result = new PutOperation(fingerprint, writeFuture);

PutOperation existingResult = nestedSetCache.putIfAbsent(contents, result, cacheContext);
if (existingResult != null) {
Expand Down Expand Up @@ -228,15 +229,18 @@ private static ListenableFuture<Object[]> maybeWrapInFuture(Object contents) {
* which may be completed with a {@link MissingFingerprintValueException}.
*/
Object getContentsAndDeserialize(
ByteString fingerprint, DeserializationContext deserializationContext) throws IOException {
PackedFingerprint fingerprint, DeserializationContext deserializationContext)
throws IOException {
return getContentsAndDeserialize(
fingerprint, deserializationContext, cacheContextFn.apply(deserializationContext));
}

// All callers will test on type and check return value if it's a future.
@SuppressWarnings("FutureReturnValueIgnored")
private Object getContentsAndDeserialize(
ByteString fingerprint, DeserializationContext deserializationContext, Object cacheContext)
PackedFingerprint fingerprint,
DeserializationContext deserializationContext,
Object cacheContext)
throws IOException {
SettableFuture<Object[]> future = SettableFuture.create();
Object contents = nestedSetCache.putFutureIfAbsent(fingerprint, future, cacheContext);
Expand Down Expand Up @@ -273,10 +277,10 @@ private Object getContentsAndDeserialize(
ImmutableList.builderWithExpectedSize(numberOfElements);
for (int i = 0; i < numberOfElements; i++) {
Object deserializedElement = newDeserializationContext.deserialize(codedIn);
if (deserializedElement instanceof ByteString) {
if (deserializedElement instanceof PackedFingerprint transitiveFingerprint) {
Object innerContents =
getContentsAndDeserialize(
(ByteString) deserializedElement, deserializationContext, cacheContext);
transitiveFingerprint, deserializationContext, cacheContext);
deserializationFutures.add(maybeWrapInFuture(innerContents));
} else {
deserializationFutures.add(Futures.immediateFuture(deserializedElement));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.devtools.build.lib.skyframe.serialization.FingerprintValueStore.MissingFingerprintValueException;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import javax.annotation.Nullable;
Expand All @@ -44,7 +43,8 @@ public final class FingerprintValueCache {
* <p>Used to deduplicate fetches, or in some cases, where the object to be fetched was already
* serialized, retrieves the already existing object.
*
* <p>The keys can either be a {@link ByteString} or a {@link FingerprintWithDistinguisher}.
* <p>The keys can either be a {@link PackedFingerprint} or a {@link
* FingerprintWithDistinguisher}.
*
* <p>The values in this cache are always {@code Object} or {@code ListenableFuture<Object>}. We
* avoid a common wrapper object both for memory efficiency and because our cache eviction policy
Expand All @@ -69,7 +69,7 @@ public final class FingerprintValueCache {
* <ul>
* <li>key: the content value object, using reference equality
* <li>value: either a {@code ListenableFuture<PutOperation>} when the operation is in flight or
* a {@link ByteString} fingerprint when it is complete
* a {@link PackedFingerprint} fingerprint when it is complete
* </ul>
*
* <p>{@code ListenableFuture<PutOperation>} contains two distinct asynchronous operations.
Expand Down Expand Up @@ -132,7 +132,7 @@ public FingerprintValueCache(SyncMode mode) {
*
* <ul>
* <li>a {@code ListenableFuture<PutOperation>} if it is still in flight; or
* <li>a {@link ByteString} fingerprint if writing to remote storage is successful.
* <li>a {@link PackedFingerprint} fingerprint if writing to remote storage is successful.
* </ul>
*
* <p>If a {@code ListenableFuture<PutOperation>} is returned, its expected {@link
Expand Down Expand Up @@ -185,7 +185,7 @@ Object getOrClaimPutOperation(
*/
@Nullable
Object getOrClaimGetOperation(
ByteString fingerprint,
PackedFingerprint fingerprint,
@Nullable Object distinguisher,
ListenableFuture<Object> getOperation) {
Object key = createKey(fingerprint, distinguisher);
Expand Down Expand Up @@ -244,7 +244,7 @@ public void onFailure(Throwable t) {

/** Unwraps the future and populates the reverse mapping when done. */
private void unwrapValueWhenDone(
ByteString fingerprint, Object key, ListenableFuture<Object> getOperation) {
PackedFingerprint fingerprint, Object key, ListenableFuture<Object> getOperation) {
Futures.addCallback(
getOperation,
new FutureCallback<Object>() {
Expand All @@ -265,7 +265,7 @@ public void onFailure(Throwable t) {
directExecutor());
}

private static Object createKey(ByteString fingerprint, @Nullable Object distinguisher) {
private static Object createKey(PackedFingerprint fingerprint, @Nullable Object distinguisher) {
if (distinguisher == null) {
return fingerprint;
}
Expand All @@ -289,12 +289,12 @@ private static Object createKey(ByteString fingerprint, @Nullable Object disting
@AutoValue
abstract static class FingerprintWithDistinguisher {
/** The primary key for a {@link #deserializationCache} entry. */
abstract ByteString fingerprint();
abstract PackedFingerprint fingerprint();

/** A secondary key, sometimes needed to resolve ambiguity. */
abstract Object distinguisher();

static FingerprintWithDistinguisher of(ByteString fingerprint, Object distinguisher) {
static FingerprintWithDistinguisher of(PackedFingerprint fingerprint, Object distinguisher) {
return new AutoValue_FingerprintValueCache_FingerprintWithDistinguisher(
fingerprint, distinguisher);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@
// limitations under the License.
package com.google.devtools.build.lib.skyframe.serialization;

import static com.google.common.hash.Hashing.murmur3_128;
import static java.util.concurrent.Executors.newSingleThreadExecutor;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.devtools.common.options.OptionsParsingResult;
import com.google.protobuf.ByteString;
Expand All @@ -38,6 +37,15 @@ public interface Factory {
FingerprintValueService create(OptionsParsingResult options);
}

/** Injectable implementation of the fingerprint function. */
public interface Fingerprinter {
PackedFingerprint fingerprint(byte[] input);
}

/** A {@link Fingerprinter} implementation for non-production use. */
public static final Fingerprinter NONPROD_FINGERPRINTER =
input -> PackedFingerprint.fromBytes(murmur3_128().hashBytes(input).asBytes());

private final Executor executor;
private final FingerprintValueStore store;
private final FingerprintValueCache cache;
Expand All @@ -47,9 +55,9 @@ public interface Factory {
*
* <p>Used to derive {@link #fingerprintPlaceholder} and {@link #fingerprintLength}.
*/
private final HashFunction hashFunction;
private final Fingerprinter fingerprinter;

private final ByteString fingerprintPlaceholder;
private final PackedFingerprint fingerprintPlaceholder;
private final int fingerprintLength;

@VisibleForTesting
Expand All @@ -71,30 +79,30 @@ public static FingerprintValueService createForTesting(FingerprintValueCache.Syn
private static FingerprintValueService createForTesting(
FingerprintValueStore store, FingerprintValueCache.SyncMode mode) {
return new FingerprintValueService(
newSingleThreadExecutor(), store, new FingerprintValueCache(mode), Hashing.murmur3_128());
newSingleThreadExecutor(), store, new FingerprintValueCache(mode), NONPROD_FINGERPRINTER);
}

public FingerprintValueService(
Executor executor,
FingerprintValueStore store,
FingerprintValueCache cache,
HashFunction hashFunction) {
Fingerprinter fingerprinter) {
this.executor = executor;
this.store = store;
this.cache = cache;
this.hashFunction = hashFunction;
this.fingerprinter = fingerprinter;

this.fingerprintPlaceholder = fingerprint(new byte[] {});
this.fingerprintLength = fingerprintPlaceholder.size();
this.fingerprintLength = fingerprintPlaceholder.toBytes().length;
}

/** Delegates to {@link FingerprintValueStore#put}. */
public ListenableFuture<Void> put(ByteString fingerprint, byte[] serializedBytes) {
public ListenableFuture<Void> put(PackedFingerprint fingerprint, byte[] serializedBytes) {
return store.put(fingerprint, serializedBytes);
}

/** Delegates to {@link FingerprintValueStore#get}. */
ListenableFuture<byte[]> get(ByteString fingerprint) throws IOException {
ListenableFuture<byte[]> get(PackedFingerprint fingerprint) throws IOException {
return store.get(fingerprint);
}

Expand All @@ -108,15 +116,20 @@ Object getOrClaimPutOperation(
/** Delegates to {@link FingerprintValueCache#getOrClaimGetOperation}. */
@Nullable
Object getOrClaimGetOperation(
ByteString fingerprint,
PackedFingerprint fingerprint,
@Nullable Object distinguisher,
ListenableFuture<Object> getOperation) {
return cache.getOrClaimGetOperation(fingerprint, distinguisher, getOperation);
}

/** Computes the fingerprint of {@code bytes}. */
ByteString fingerprint(byte[] bytes) {
return ByteString.copyFrom(hashFunction.hashBytes(bytes).asBytes());
public PackedFingerprint fingerprint(byte[] bytes) {
return fingerprinter.fingerprint(bytes);
}

/** Convenience overload of {@link #fingerprint(byte[])}. */
public PackedFingerprint fingerprint(ByteString bytes) {
return fingerprint(bytes.toByteArray());
}

/**
Expand All @@ -125,7 +138,7 @@ ByteString fingerprint(byte[] bytes) {
* <p>The placeholder has the same length as the real fingerprint so the real fingerprint can
* overwrite the placeholder when it becomes available.
*/
ByteString fingerprintPlaceholder() {
PackedFingerprint fingerprintPlaceholder() {
return fingerprintPlaceholder;
}

Expand Down
Loading

0 comments on commit 63c887b

Please sign in to comment.