diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueService.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueService.java index 7114b20251e9ff..2057f68dcae8bd 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueService.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueService.java @@ -152,7 +152,7 @@ int fingerprintLength() { * *

Those callbacks may be executing on RPC threads that should not be blocked. */ - Executor getExecutor() { + public Executor getExecutor() { return executor; } } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/BUILD index 2493722b46facb..de25f48dca331d 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/BUILD @@ -78,6 +78,26 @@ java_library( ], ) +java_library( + name = "file_dependency_deserializer", + srcs = [ + "FileDependencies.java", + "FileDependencyDeserializer.java", + ], + deps = [ + ":file_dependency_key_support", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization", + "//src/main/java/com/google/devtools/build/lib/vfs:ospathpolicy", + "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", + "//src/main/protobuf:file_invalidation_data_java_proto", + "//third_party:caffeine", + "//third_party:error_prone_annotations", + "//third_party:guava", + "//third_party:jsr305", + "@protobuf//:protobuf_java", + ], +) + java_library( name = "file_dependency_key_support", srcs = ["FileDependencyKeySupport.java"], diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencies.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencies.java new file mode 100644 index 00000000000000..ccd9c3eee666c9 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencies.java @@ -0,0 +1,184 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.skyframe.serialization.analysis; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import java.util.ArrayList; + +/** + * Representation of a set of file names that could invalidate a given value. + * + *

Most values can be associated with some set of input files, represented in this nested way to + * facilitate sharing between values. So given a set of changed files, invalidation is performed by + * calling {@link #containsMatch} on an instance and all transitively reachable instances via {@link + * #getDependencyCount} and {@link #getDependency}. If any matches are encountered, the associated + * value is invalidated. + */ +sealed interface FileDependencies extends FileDependencyDeserializer.GetDependenciesResult + permits FileDependencies.SingleResolvedPath, + FileDependencies.SingleResolvedPathAndDependency, + FileDependencies.MultiplePaths { + + boolean containsMatch(ImmutableSet paths); + + int getDependencyCount(); + + FileDependencies getDependency(int index); + + /** + * The real path associated with this node after resolution. + * + *

This is used by {@link FileDependencyDeserializer} to retrieve resolved parent paths but + * isn't directly used by invalidation. + */ + String resolvedPath(); + + /** Returns the resolved paths associated with the current node for testing. */ + // An interface must be used instead of an abstract class to allow records, which are helpful + // here. That means that this method must be public, triggering the warning. + @SuppressWarnings("VisibleForTestingMisused") + @VisibleForTesting + ImmutableList getAllResolvedPathsForTesting(); + + static Builder builder(String firstResolvedPath) { + return new Builder(firstResolvedPath); + } + + static class Builder { + private final ArrayList paths = new ArrayList<>(); + private final ArrayList dependencies = new ArrayList<>(); + + /** + * At least one resolved path is required. + * + *

The last path added is treated as the overall {@link #resolvedPath} of the instance. The + * {@code firstResolvedPath} argument is the {@link #resolvedPath} if it's the only path. + */ + Builder(String firstResolvedPath) { + paths.add(firstResolvedPath); + } + + @CanIgnoreReturnValue + Builder addPath(String path) { + paths.add(path); + return this; + } + + @CanIgnoreReturnValue + Builder addDependency(FileDependencies dependency) { + dependencies.add(dependency); + return this; + } + + FileDependencies build() { + if (paths.size() == 1) { + int dependenciesSize = dependencies.size(); + if (dependenciesSize == 0) { + return new SingleResolvedPath(paths.get(0)); + } + if (dependenciesSize == 1) { + return new SingleResolvedPathAndDependency(paths.get(0), dependencies.get(0)); + } + } + return new MultiplePaths(ImmutableList.copyOf(paths), ImmutableList.copyOf(dependencies)); + } + } + + // The implementations here exist to reduce indirection and memory use. + + record SingleResolvedPath(String resolvedPath) implements FileDependencies { + @Override + public boolean containsMatch(ImmutableSet paths) { + return paths.contains(resolvedPath); + } + + @Override + public int getDependencyCount() { + return 0; + } + + @Override + public FileDependencies getDependency(int index) { + throw new IndexOutOfBoundsException(this + " " + index); + } + + @Override + public ImmutableList getAllResolvedPathsForTesting() { + return ImmutableList.of(resolvedPath); + } + } + + record SingleResolvedPathAndDependency(String resolvedPath, FileDependencies dependency) + implements FileDependencies { + @Override + public boolean containsMatch(ImmutableSet paths) { + return paths.contains(resolvedPath); + } + + @Override + public int getDependencyCount() { + return 1; + } + + @Override + public FileDependencies getDependency(int index) { + if (index != 0) { + throw new IndexOutOfBoundsException(this + " " + index); + } + return dependency; + } + + @Override + public ImmutableList getAllResolvedPathsForTesting() { + return ImmutableList.of(resolvedPath); + } + } + + record MultiplePaths( + ImmutableList resolvedPaths, ImmutableList dependencies) + implements FileDependencies { + @Override + public boolean containsMatch(ImmutableSet paths) { + for (int i = 0; i < resolvedPaths.size(); i++) { + if (paths.contains(resolvedPaths.get(i))) { + return true; + } + } + return false; + } + + @Override + public int getDependencyCount() { + return dependencies.size(); + } + + @Override + public FileDependencies getDependency(int index) { + return dependencies.get(index); + } + + @Override + public String resolvedPath() { + return resolvedPaths.get(resolvedPaths().size() - 1); + } + + @Override + public ImmutableList getAllResolvedPathsForTesting() { + return resolvedPaths; + } + } +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencyDeserializer.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencyDeserializer.java new file mode 100644 index 00000000000000..d4c954a526e96f --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencyDeserializer.java @@ -0,0 +1,415 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.skyframe.serialization.analysis; + +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.util.concurrent.Futures.immediateFailedFuture; +import static com.google.common.util.concurrent.Futures.immediateFuture; +import static com.google.common.util.concurrent.MoreExecutors.directExecutor; +import static com.google.devtools.build.lib.skyframe.serialization.analysis.FileDependencyKeySupport.FILE_KEY_DELIMITER; +import static com.google.devtools.build.lib.skyframe.serialization.analysis.FileDependencyKeySupport.MAX_KEY_LENGTH; +import static com.google.devtools.build.lib.skyframe.serialization.analysis.FileDependencyKeySupport.MTSV_SENTINEL; +import static com.google.devtools.build.lib.skyframe.serialization.analysis.FileDependencyKeySupport.computeCacheKey; +import static com.google.devtools.build.lib.vfs.PathFragment.SEPARATOR_CHAR; +import static com.google.protobuf.ExtensionRegistry.getEmptyRegistry; +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.google.common.util.concurrent.AbstractFuture; +import com.google.common.util.concurrent.AsyncFunction; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.devtools.build.lib.skyframe.serialization.FingerprintValueService; +import com.google.devtools.build.lib.skyframe.serialization.KeyBytesProvider; +import com.google.devtools.build.lib.skyframe.serialization.SerializationException; +import com.google.devtools.build.lib.skyframe.serialization.StringKey; +import com.google.devtools.build.lib.skyframe.serialization.proto.FileInvalidationData; +import com.google.devtools.build.lib.skyframe.serialization.proto.Symlink; +import com.google.devtools.build.lib.vfs.OsPathPolicy; +import com.google.protobuf.InvalidProtocolBufferException; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import javax.annotation.Nullable; + +/** + * Deserializes dependency information persisted by {@link FileDependencySerializer}. + * + *

Fetching a dependency is a mostly linear asynchronous state machine that performs actions then + * waits in an alternating manner. + * + *

    + *
  1. Request the data for a given key. + *
  2. {@link WaitForData}. + *
  3. Request the data for the parent directory (a recursive call). + *
  4. {@link WaitForParent}. + *
  5. Process any symlinks, resolving symlink parents as needed. + *
  6. {@link WaitForSymlinkParent}. + *
  7. Processing symlinks repeats for all the symlinks associated with an entry. + *
+ */ +final class FileDependencyDeserializer { + private static final OsPathPolicy OS = OsPathPolicy.getFilePathOs(); + + private final FingerprintValueService fingerprintValueService; + + /** + * A cache for {@link FileDependencies}, primarily for deduplication. + * + *

The cache keys are as described at {@link FileInvalidationData}. We can potentially strip + * the version information here, but keeping the version enables a single {@link + * FileDependencyDeserializer} instance to be shared across disparate builds. + * + *

While in-flight, the value has type {@link FutureFileDependencies}, which is replaced by + * {@link FileDependencies} once the computation completes. + * + *

References to {@link FileDependencies} form DAGs where certain top-level entries are + * retained by the {@code SkyValue}s that depend on them. When all such associated {@code + * SkyValue}s are invalidated, the dependency information becomes eligible for GC. + */ + private final Cache dependenciesCache = + Caffeine.newBuilder().weakValues().build(); + + FileDependencyDeserializer(FingerprintValueService fingerprintValueService) { + this.fingerprintValueService = fingerprintValueService; + } + + sealed interface GetDependenciesResult permits FileDependencies, FutureFileDependencies {} + + /** + * Reconstitutes the set of file dependencies associated with {@code key}. + * + *

Performs lookups and parent resolution (recursively) and symlink resolution to obtain all + * files associated with {@code key} represented as {@link FileDependencies}. + * + * @param key as described in {@link FileInvalidationData}. + * @return either an immediate {@link FileDependencies} instance or effectively a {@link + * ListenableFuture} instance. + */ + GetDependenciesResult getFileDependencies(String key) { + FutureFileDependencies ownedFuture; + switch (dependenciesCache.get(key, unused -> new FutureFileDependencies())) { + case FileDependencies dependencies: + return dependencies; + case FutureFileDependencies future: + if (!future.tryTakeOwnership()) { + return future; // Owned by a another thread. + } + ownedFuture = future; + break; + } + // `ownedFuture` is owned by this thread, which must complete its value. + try { + ListenableFuture futureBytes; + try { + futureBytes = fingerprintValueService.get(getKeyBytes(key)); + } catch (IOException e) { + ownedFuture.setIoException(e); + return ownedFuture; + } + + ownedFuture.setFutureFiles( + Futures.transformAsync( + futureBytes, new WaitForData(key), fingerprintValueService.getExecutor())); + return ownedFuture; + } finally { + ownedFuture.verifySet(); + } + } + + /** + * The main purpose of this class is to act as a {@link ListenableFuture}. + * + *

Its specific type is explicitly visible to clients to allow them to cleanly distinguish it + * as a permitted subtype of {@link GetDependenciesResult}. + */ + static final class FutureFileDependencies extends AbstractFuture + implements GetDependenciesResult { + /** Used to establish exactly-once ownership of this future with {@link #tryTakeOwnership}. */ + @SuppressWarnings({"UnusedVariable", "FieldCanBeFinal"}) // set with OWNED_HANDLE + private boolean owned = false; + + private boolean isSet = false; + + private boolean tryTakeOwnership() { + return OWNED_HANDLE.compareAndSet(this, false, true); + } + + private void setFutureFiles(ListenableFuture files) { + checkState(setFuture(files), "already set %s", this); + isSet = true; + } + + private void setIoException(IOException e) { + checkState(setException(e)); + isSet = true; + } + + private void verifySet() { + if (!isSet) { + checkState( + setException( + new IllegalStateException( + "future was unexpectedly unset, look for unchecked exceptions in" + + " FileDependencyDeserializer"))); + } + } + + private static final VarHandle OWNED_HANDLE; + + static { + try { + OWNED_HANDLE = + MethodHandles.lookup() + .findVarHandle(FutureFileDependencies.class, "owned", boolean.class); + } catch (ReflectiveOperationException e) { + throw new ExceptionInInitializerError(e); + } + } + } + + private class WaitForData implements AsyncFunction { + private final String key; + + private WaitForData(String key) { + this.key = key; + } + + @Override + public ListenableFuture apply(byte[] bytes) + throws InvalidProtocolBufferException { + var data = FileInvalidationData.parseFrom(bytes, getEmptyRegistry()); + if (data.hasOverflowKey() && !data.getOverflowKey().equals(key)) { + return immediateFailedFuture( + new SerializationException( + String.format( + "Non-matching overflow key. This is possible if there is a key fingerprint" + + " collision. Expected %s got %s", + key, data))); + } + + int pathBegin = key.indexOf(FILE_KEY_DELIMITER) + 1; + int parentDirectoryEnd = key.lastIndexOf(SEPARATOR_CHAR); + + if (parentDirectoryEnd == -1) { + checkState( + !data.hasParentMtsv(), "no parent directory, but had parent MTSV %s, %s", key, data); + return resolveParent(key, data, key.substring(pathBegin), /* parentKey= */ null); + } + + String parentDirectory = key.substring(pathBegin, parentDirectoryEnd); + String parentKey = + computeCacheKey( + parentDirectory, + data.hasParentMtsv() ? data.getParentMtsv() : MTSV_SENTINEL, + FILE_KEY_DELIMITER); + String basename = key.substring(parentDirectoryEnd + 1); + return resolveParent(key, data, basename, parentKey); + } + } + + private ListenableFuture resolveParent( + String key, FileInvalidationData data, String basename, @Nullable String parentKey) { + var waitForParent = new WaitForParent(key, data, basename); + + if (parentKey == null) { + return waitForParent.apply(/* parent= */ null); + } + + switch (getFileDependencies(parentKey)) { + case FileDependencies parent: + return waitForParent.apply(parent); + case FutureFileDependencies future: + return Futures.transformAsync(future, waitForParent, directExecutor()); + } + } + + private class WaitForParent implements AsyncFunction { + private final String key; + private final FileInvalidationData data; + private final String basename; + + private WaitForParent(String key, FileInvalidationData data, String basename) { + this.key = key; + this.data = data; + this.basename = basename; + } + + @Override + public ListenableFuture apply(@Nullable FileDependencies parent) { + FileDependencies.Builder builder; + String parentDirectory; + if (parent == null) { + parentDirectory = null; + builder = FileDependencies.builder(basename); + } else { + parentDirectory = parent.resolvedPath(); + builder = + FileDependencies.builder(getRelative(parentDirectory, basename)).addDependency(parent); + } + return processSymlinks(key, data, /* symlinkIndex= */ 0, parentDirectory, builder); + } + } + + /** + * Processes any symlinks that my be present in {@code data}. + * + * @param key the main key that this symlink belongs to + * @param parentDirectory the real directory containing the symlink + */ + private ListenableFuture processSymlinks( + String key, + FileInvalidationData data, + int symlinkIndex, + @Nullable String parentDirectory, // null if root-level + FileDependencies.Builder builder) { + if (symlinkIndex >= data.getSymlinksCount()) { + FileDependencies dependencies = builder.build(); + // Replaces the cache value with the completed value. The future is likely to become eligible + // for GC shortly after the return below. Clients are expected to retain the meaningful + // top-level values. + dependenciesCache.put(key, dependencies); + return immediateFuture(dependencies); + } + + Symlink link = data.getSymlinks(symlinkIndex); + String linkContents = link.getContents(); + checkState( + OS.getDriveStrLength(linkContents) == 0, + "expected symlink contents to be a relative path: %s", + data); + // Combines the parent directory of the link with its contents and normalizes. + String normalizedLinkTarget = getRelativeAndNormalize(parentDirectory, linkContents); + String normalizedLinkParent = getParentDirectory(normalizedLinkTarget); + + if (!doesSymlinkParentNeedResolution(parentDirectory, normalizedLinkParent)) { + checkState( + !link.hasParentMtsv(), + "no resolution needed for data=%s, symlinkIndex=%s, parentDirectory=%s," + + " normalizedLinkParent=%s but symlink had parent MTSV", + data, + symlinkIndex, + parentDirectory, + normalizedLinkParent); + // Since `normalizedLinkParent` is already a real directory, `normalizedLinkTarget` is the + // resolved symlink path. + if (!normalizedLinkTarget.isEmpty()) { // Avoids adding root as a resolved path. + builder.addPath(normalizedLinkTarget); + } + return processSymlinks(key, data, symlinkIndex + 1, normalizedLinkParent, builder); + } + + String linkBasename = normalizedLinkTarget.substring(normalizedLinkParent.length() + 1); + + String newParentKey = + computeCacheKey( + normalizedLinkParent, + link.hasParentMtsv() ? link.getParentMtsv() : MTSV_SENTINEL, + FILE_KEY_DELIMITER); + + var waitForSymlinkParent = + new WaitForSymlinkParent(key, data, symlinkIndex, linkBasename, builder); + + switch (getFileDependencies(newParentKey)) { + case FileDependencies resolvedParent: + return waitForSymlinkParent.apply(resolvedParent); + case FutureFileDependencies future: + return Futures.transformAsync(future, waitForSymlinkParent, directExecutor()); + } + } + + private class WaitForSymlinkParent implements AsyncFunction { + private final String key; + private final FileInvalidationData data; + private final int symlinkIndex; + private final String linkBasename; + private final FileDependencies.Builder builder; + + private WaitForSymlinkParent( + String key, + FileInvalidationData data, + int symlinkIndex, + String linkBasename, + FileDependencies.Builder builder) { + this.key = key; + this.data = data; + this.symlinkIndex = symlinkIndex; + this.linkBasename = linkBasename; + this.builder = builder; + } + + @Override + public ListenableFuture apply(FileDependencies parent) { + String parentPath = parent.resolvedPath(); + builder.addPath(getRelative(parentPath, linkBasename)).addDependency(parent); + return processSymlinks(key, data, symlinkIndex + 1, parentPath, builder); + } + } + + private static String getRelative(@Nullable String parentDirectory, String basename) { + if (parentDirectory == null) { + return basename; + } + return parentDirectory + SEPARATOR_CHAR + basename; + } + + private static String getRelativeAndNormalize( + @Nullable String parentDirectory, String linkContents) { + int normalizationLevel = OS.needsToNormalize(linkContents); + return OS.normalize(getRelative(parentDirectory, linkContents), normalizationLevel); + } + + @Nullable // null if `path` is at the root level + private static String getParentDirectory(String path) { + int lastSeparator = path.lastIndexOf(SEPARATOR_CHAR); + if (lastSeparator == -1) { // no separator + return null; + } + return path.substring(0, lastSeparator); + } + + /** + * Predicate specifying when a symlink parent directory needs further resolution. + * + *

A relative path specifier in symlink contents can modify the parent directory but it does + * not always do so. For example, the symlink could point to a file in the same directory or the + * symlink could point to a file in an ancestor directory. In both of these cases, the parent + * directory is already fully resolved. + * + * @param previousParent the parent of the actual symlink itself. Null if the parent is actually + * the root directory. + * @param newParent the parent directory after combining the symlink with {@code previousParent}. + * Null if the result is the root directory. + */ + private static boolean doesSymlinkParentNeedResolution( + @Nullable String previousParent, @Nullable String newParent) { + if (newParent == null) { + return false; // Already root level. No parent resolution needed. + } + if (previousParent == null) { + return true; // No previousParent so resolution is needed. + } + // `newParent` is already a resolved path if it is the same as or an ancestor of the already + // resolved `previousParent`. + return !previousParent.startsWith(newParent); + } + + private KeyBytesProvider getKeyBytes(String cacheKey) { + if (cacheKey.length() > MAX_KEY_LENGTH) { + return fingerprintValueService.fingerprint(cacheKey.getBytes(UTF_8)); + } + return new StringKey(cacheKey); + } +}