diff --git a/src/main/java/com/google/devtools/build/lib/remote/disk/BUILD b/src/main/java/com/google/devtools/build/lib/remote/disk/BUILD index 0f4eb5083fcc81..2d7b5a71aac69a 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/disk/BUILD +++ b/src/main/java/com/google/devtools/build/lib/remote/disk/BUILD @@ -15,6 +15,7 @@ java_library( name = "disk", srcs = glob(["*.java"]), deps = [ + "//src/main/java/com/google/devtools/build/lib/concurrent", "//src/main/java/com/google/devtools/build/lib/jni", "//src/main/java/com/google/devtools/build/lib/remote:store", "//src/main/java/com/google/devtools/build/lib/remote/common", diff --git a/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollector.java b/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollector.java new file mode 100644 index 00000000000000..6ccaa6fca2cb4a --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollector.java @@ -0,0 +1,232 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.remote.disk; + +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static java.util.Comparator.comparing; + +import com.google.common.collect.ImmutableSet; +import com.google.devtools.build.lib.concurrent.AbstractQueueVisitor; +import com.google.devtools.build.lib.concurrent.ErrorClassifier; +import com.google.devtools.build.lib.vfs.Dirent; +import com.google.devtools.build.lib.vfs.FileStatus; +import com.google.devtools.build.lib.vfs.IORuntimeException; +import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.Symlinks; +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ExecutorService; + +/** + * A garbage collector for the disk cache. + * + *

Garbage collection works by enumerating the entire contents of the disk cache, identifying + * candidates for deletion according to a {@link CollectionPolicy}, and deleting them. This process + * may take a significant amount of time on large disk caches and slow filesystems, and may be + * interrupted at any time. + */ +public final class DiskCacheGarbageCollector { + private static final ImmutableSet EXCLUDED_DIRS = ImmutableSet.of("tmp", "gc"); + + /** + * Describes a disk cache entry. + * + * @param path path relative to the root directory of the disk cache + * @param size file size in bytes + * @param mtime file modification time + */ + private record Entry(String path, long size, long mtime) {} + + /** Determines which entries should be collected. */ + public static final class CollectionPolicy { + private final Optional maxSizeBytes; + private final Optional maxAge; + + /** + * Creates a new policy. + * + * @param maxSizeBytes the maximum total size in bytes, or empty for no size limit + * @param maxAge the maximum age of cache entries, or empty for no age limit + */ + CollectionPolicy(Optional maxSizeBytes, Optional maxAge) { + this.maxSizeBytes = maxSizeBytes; + this.maxAge = maxAge; + } + + /** + * Returns the entries to be deleted. + * + * @param entries the full list of entries + */ + List getEntriesToDelete(List entries) { + entries.sort(comparing(Entry::mtime)); + + long excessSizeBytes = getExcessSizeBytes(entries); + long timeCutoff = getTimeCutoff(); + + int i = 0; + for (; i < entries.size(); i++) { + if (excessSizeBytes <= 0 && entries.get(i).mtime() >= timeCutoff) { + break; + } + excessSizeBytes -= entries.get(i).size(); + } + + return entries.subList(0, i); + } + + private long getExcessSizeBytes(List entries) { + if (maxSizeBytes.isEmpty()) { + return 0; + } + long currentSizeBytes = entries.stream().mapToLong(Entry::size).sum(); + return currentSizeBytes - maxSizeBytes.get(); + } + + private long getTimeCutoff() { + if (maxAge.isEmpty()) { + return 0; + } + return Instant.now().minus(maxAge.get()).toEpochMilli(); + } + } + + private final Path root; + private final CollectionPolicy policy; + private final ExecutorService executorService; + private final ImmutableSet excludedDirs; + + /** + * Creates a new garbage collector. + * + * @param root the root directory of the disk cache + * @param executorService the executor service to schedule I/O operations onto + * @param policy the garbage collection policy to use + */ + public DiskCacheGarbageCollector( + Path root, ExecutorService executorService, CollectionPolicy policy) { + this.root = root; + this.policy = policy; + this.executorService = executorService; + this.excludedDirs = EXCLUDED_DIRS.stream().map(root::getChild).collect(toImmutableSet()); + } + + /** + * Runs garbage collection. + * + * @throws IOException if an I/O error occurred + * @throws InterruptedException if the thread was interrupted + */ + public void run() throws IOException, InterruptedException { + EntryScanner scanner = new EntryScanner(); + EntryDeleter deleter = new EntryDeleter(); + + List allEntries = scanner.scan(); + List entriesToDelete = policy.getEntriesToDelete(allEntries); + + for (Entry entry : entriesToDelete) { + deleter.delete(root.getRelative(entry.path())); + } + + deleter.await(); + } + + /** Lists all disk cache entries, performing I/O in parallel. */ + private final class EntryScanner extends AbstractQueueVisitor { + private final ArrayList entries = new ArrayList<>(); + + EntryScanner() { + super( + executorService, + ExecutorOwnership.SHARED, + ExceptionHandlingMode.FAIL_FAST, + ErrorClassifier.DEFAULT); + } + + /** Lists all disk cache entries. */ + List scan() throws IOException, InterruptedException { + execute(() -> visitDirectory(root)); + try { + awaitQuiescence(true); + } catch (IORuntimeException e) { + throw e.getCauseIOException(); + } + return entries; + } + + private void visitDirectory(Path path) { + try { + for (Dirent dirent : path.readdir(Symlinks.NOFOLLOW)) { + Path childPath = path.getChild(dirent.getName()); + if (dirent.getType().equals(Dirent.Type.FILE)) { + // The file may be gone by the time we open it. + FileStatus status = childPath.statIfFound(); + if (status != null) { + Entry entry = + new Entry( + childPath.relativeTo(root).getPathString(), + status.getSize(), + status.getLastModifiedTime()); + synchronized (entries) { + entries.add(entry); + } + } + } else if (dirent.getType().equals(Dirent.Type.DIRECTORY) + && !excludedDirs.contains(childPath)) { + execute(() -> visitDirectory(childPath)); + } + // Deliberately ignore other file types, which should never occur in a well-formed cache. + } + } catch (IOException e) { + throw new IORuntimeException(e); + } + } + } + + /** Deletes disk cache entries, performing I/O in parallel. */ + private final class EntryDeleter extends AbstractQueueVisitor { + EntryDeleter() { + super( + executorService, + ExecutorOwnership.SHARED, + ExceptionHandlingMode.FAIL_FAST, + ErrorClassifier.DEFAULT); + } + + /** Enqueues an entry to be deleted. */ + void delete(Path path) { + execute( + () -> { + try { + path.delete(); + } catch (IOException e) { + throw new IORuntimeException(e); + } + }); + } + + /** Waits for all enqueued deletions to complete. */ + void await() throws IOException, InterruptedException { + try { + awaitQuiescence(true); + } catch (IORuntimeException e) { + throw e.getCauseIOException(); + } + } + } +} diff --git a/src/test/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollectorTest.java b/src/test/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollectorTest.java new file mode 100644 index 00000000000000..2f9714a63ba5c6 --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/remote/disk/DiskCacheGarbageCollectorTest.java @@ -0,0 +1,206 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.remote.disk; + +import static com.google.common.truth.Truth.assertWithMessage; + +import com.google.common.util.concurrent.MoreExecutors; +import com.google.devtools.build.lib.testutil.TestUtils; +import com.google.devtools.build.lib.vfs.Path; +import java.io.IOException; +import java.io.OutputStream; +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link DiskCacheGarbageCollector}. */ +@RunWith(JUnit4.class) +public final class DiskCacheGarbageCollectorTest { + + private final ExecutorService executorService = + MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(1)); + + private Path rootDir; + + record Entry(String path, long size, Instant mtime) { + static Entry of(String path, long size, Instant mtime) { + return new Entry(path, size, mtime); + } + } + + @Before + public void setUp() throws Exception { + rootDir = TestUtils.createUniqueTmpDir(null); + } + + @Test + public void sizePolicy_noCollection() throws Exception { + writeFiles( + Entry.of("ac/123", kbytes(1), Instant.now()), + Entry.of("cas/456", kbytes(1), Instant.now())); + + runGarbageCollector(Optional.of(kbytes(2)), Optional.empty()); + + assertFilesExist("ac/123", "cas/456"); + } + + @Test + public void sizePolicy_collectsOldest() throws Exception { + writeFiles( + Entry.of("ac/123", kbytes(1), daysAgo(1)), + Entry.of("cas/456", kbytes(1), daysAgo(2)), + Entry.of("ac/abc", kbytes(1), daysAgo(3)), + Entry.of("cas/def", kbytes(1), daysAgo(4))); + + runGarbageCollector(Optional.of(kbytes(2)), Optional.empty()); + + assertFilesExist("ac/123", "cas/456"); + assertFilesDoNotExist("ac/abc", "cas/def"); + } + + @Test + public void agePolicy_noCollection() throws Exception { + writeFiles( + Entry.of("ac/123", kbytes(1), Instant.now()), + Entry.of("cas/456", kbytes(1), Instant.now())); + + runGarbageCollector(Optional.empty(), Optional.of(days(3))); + + assertFilesExist("ac/123", "cas/456"); + } + + @Test + public void agePolicy_collectsOldest() throws Exception { + writeFiles( + Entry.of("ac/123", kbytes(1), daysAgo(1)), + Entry.of("cas/456", kbytes(1), daysAgo(2)), + Entry.of("ac/abc", kbytes(1), daysAgo(3)), + Entry.of("cas/def", kbytes(1), daysAgo(4))); + + runGarbageCollector(Optional.empty(), Optional.of(Duration.ofDays(3))); + + assertFilesExist("ac/123", "cas/456"); + assertFilesDoNotExist("ac/abc", "cas/def"); + } + + @Test + public void sizeAndAgePolicy_noCollection() throws Exception { + writeFiles( + Entry.of("ac/123", kbytes(1), Instant.now()), + Entry.of("cas/456", kbytes(1), Instant.now())); + + runGarbageCollector(Optional.of(kbytes(2)), Optional.of(days(1))); + + assertFilesExist("ac/123", "cas/456"); + } + + @Test + public void sizeAndAgePolicy_sizeMoreRestrictiveThanAge_collectsOldest() throws Exception { + writeFiles( + Entry.of("ac/123", kbytes(1), daysAgo(1)), + Entry.of("cas/456", kbytes(1), daysAgo(2)), + Entry.of("ac/abc", kbytes(1), daysAgo(3)), + Entry.of("cas/def", kbytes(1), daysAgo(4))); + + runGarbageCollector(Optional.of(kbytes(2)), Optional.of(days(4))); + + assertFilesExist("ac/123", "cas/456"); + assertFilesDoNotExist("ac/abc", "cas/def"); + } + + @Test + public void sizeAndAgePolicy_ageMoreRestrictiveThanSize_collectsOldest() throws Exception { + writeFiles( + Entry.of("ac/123", kbytes(1), daysAgo(1)), + Entry.of("cas/456", kbytes(1), daysAgo(2)), + Entry.of("ac/abc", kbytes(1), daysAgo(3)), + Entry.of("cas/def", kbytes(1), daysAgo(4))); + + runGarbageCollector(Optional.of(kbytes(3)), Optional.of(days(3))); + + assertFilesExist("ac/123", "cas/456"); + assertFilesDoNotExist("ac/abc", "cas/def"); + } + + @Test + public void ignoresTmpAndGcSubdirectories() throws Exception { + writeFiles( + Entry.of("gc/foo", kbytes(1), daysAgo(1)), Entry.of("tmp/foo", kbytes(1), daysAgo(1))); + + runGarbageCollector(Optional.of(1L), Optional.of(days(1))); + + assertFilesExist("gc/foo", "tmp/foo"); + } + + private void assertFilesExist(String... relativePaths) throws IOException { + for (String relativePath : relativePaths) { + Path path = rootDir.getRelative(relativePath); + assertWithMessage("expected %s to exist".formatted(relativePath)) + .that(path.exists()) + .isTrue(); + } + } + + private void assertFilesDoNotExist(String... relativePaths) throws IOException { + for (String relativePath : relativePaths) { + Path path = rootDir.getRelative(relativePath); + assertWithMessage("expected %s to not exist".formatted(relativePath)) + .that(path.exists()) + .isFalse(); + } + } + + private void runGarbageCollector(Optional maxSizeBytes, Optional maxAge) + throws Exception { + var gc = + new DiskCacheGarbageCollector( + rootDir, + executorService, + new DiskCacheGarbageCollector.CollectionPolicy(maxSizeBytes, maxAge)); + gc.run(); + } + + private void writeFiles(Entry... entries) throws IOException { + for (Entry entry : entries) { + writeFile(entry.path(), entry.size(), entry.mtime()); + } + } + + private void writeFile(String relativePath, long size, Instant mtime) throws IOException { + Path path = rootDir.getRelative(relativePath); + path.getParentDirectory().createDirectoryAndParents(); + try (OutputStream out = path.getOutputStream()) { + out.write(new byte[(int) size]); + } + path.setLastModifiedTime(mtime.toEpochMilli()); + } + + private static Instant daysAgo(int days) { + return Instant.now().minus(Duration.ofDays(days)); + } + + private static Duration days(int days) { + return Duration.ofDays(days); + } + + private static long kbytes(int kbytes) { + return kbytes * 1024L; + } +}