From b4dcdc27b904243a90e16dc9c2a1d4c0a80eec78 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 12 Feb 2024 11:05:58 -0800 Subject: [PATCH] [7.1.0] Don't use worker threads for repo fetching during Skyframe error bubbling For some reason, using worker threads for repo fetching during Skyframe error bubbling frequently causes deadlocks on Linux. I wasn't able to find out why the deadlock happens, but this CL is the immediate solution to the problem, and shouldn't be a performance concern since no Skyframe restarts should happen during error bubbling anyway. Tested on Linux; with this CL, `bazel test //src/test/shell/bazel:starlark_repository_test --test_filter=test_download_failure_message --runs_per_test=20` finishes just fine. (On an M1 macbook, I can't trigger the deadlock even without this CL.) Fixes https://github.com/bazelbuild/bazel/issues/21238 PiperOrigin-RevId: 606305306 Change-Id: I6f47a144b29030011f6c10c2b37f6874190fed0e --- .../repository/starlark/StarlarkRepositoryFunction.java | 9 ++++++++- .../com/google/devtools/build/skyframe/SkyFunction.java | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java index a46d92d7ded829..1137d9d9bc8c5f 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryFunction.java @@ -147,7 +147,14 @@ public RepositoryDirectoryValue.Builder fetch( Map markerData, SkyKey key) throws RepositoryFunctionException, InterruptedException { - if (workerExecutorService == null) { + if (workerExecutorService == null + || env.inErrorBubblingForSkyFunctionsThatCanFullyRecoverFromErrors()) { + // Don't use the worker thread if we're in Skyframe error bubbling. For some reason, using a + // worker thread during error bubbling very frequently causes deadlocks on Linux platforms. + // The deadlock is rather elusive and this is just the immediate thing that seems to help. + // Fortunately, no Skyframe restarts should happen during error bubbling anyway, so this + // shouldn't be a performance concern. See https://github.com/bazelbuild/bazel/issues/21238 + // for more context. return fetchInternal(rule, outputDirectory, directories, env, markerData, key); } var state = env.getState(RepoFetchingSkyKeyComputeState::new); diff --git a/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java b/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java index dd02762ff3c70b..97596273e2b184 100644 --- a/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java +++ b/src/main/java/com/google/devtools/build/skyframe/SkyFunction.java @@ -380,6 +380,7 @@ default void injectVersionForNonHermeticFunction(Version version) {} * may be called upon to transform a lower-level exception. This method can tell it whether to * transform a dependency's exception or ignore it and return a value as usual. */ + // TODO: Rename this as it can be called for other purposes. boolean inErrorBubblingForSkyFunctionsThatCanFullyRecoverFromErrors(); /**