From a4dca9c07203ab550bd19a4dbbc3cbc545e63c42 Mon Sep 17 00:00:00 2001 From: Daniel Wagner-Hall Date: Thu, 1 Jun 2023 17:19:22 +0100 Subject: [PATCH] Use consistent paths to bazel binaries (#465) Before this change, changing which mirror you were downloading from would change the $PATH environment variable bazel is run with, even though the bazel binaries being pointed to are identical. This can cause repository rules to invalidate and re-run. Instead, store downloaded bazels in directories keyed off of the sha256 of the bazel binary itself, and track the metadata of a mirror+version -> sha256. This avoid spurious rebuilds when only changing the URL bazelisk would use to download bazel. --- bazelisk_test.sh | 78 +++++++++++++++++++++++++++-- core/core.go | 125 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 161 insertions(+), 42 deletions(-) diff --git a/bazelisk_test.sh b/bazelisk_test.sh index 2ce2c905..95b51d69 100755 --- a/bazelisk_test.sh +++ b/bazelisk_test.sh @@ -299,6 +299,52 @@ EOF (echo "FAIL: Expected to find 'BAZELISK_SKIP_WRAPPER=true' in the output of 'bazelisk version'"; exit 1) } +function test_path_is_consistent_regardless_of_base_url() { + setup + + echo 6.2.0 > .bazelversion + + cat >WORKSPACE <print_path.bzl <&1 | tee log1 + + BAZELISK_HOME="$BAZELISK_HOME" bazelisk clean --expunge 2>&1 + + # We need a separate mirror of bazel binaries, which has identical files. + # Ideally we wouldn't depend on sourceforge for test runtime, but hey, it exists and it works. + BAZELISK_HOME="$BAZELISK_HOME" BAZELISK_BASE_URL=https://downloads.sourceforge.net/project/bazel.mirror bazelisk sync --only=print_path 2>&1 | tee log2 + + path1="$(grep "PATH is:" log1)" + path2="$(grep "PATH is:" log2)" + + [[ -n "${path1}" && -n "${path2}" ]] || \ + (echo "FAIL: Expected PATH to be non-empty, got path1=${path1}, path2=${path2}"; exit 1) + + [[ "${path1}" == "${path2}" ]] || \ + (echo "FAIL: Expected PATH to be the same regardless of which mirror was used, got path1=${path1}, path2=${path2}"; exit 1) +} + function test_skip_wrapper() { setup @@ -327,10 +373,10 @@ function test_bazel_download_path_go() { BAZELISK_HOME="$BAZELISK_HOME" \ bazelisk version 2>&1 | tee log - find "$BAZELISK_HOME/downloads/bazelbuild" 2>&1 | tee log + find "$BAZELISK_HOME/downloads/metadata/bazelbuild" 2>&1 | tee log - grep "^$BAZELISK_HOME/downloads/bazelbuild/bazel-[0-9][0-9]*.[0-9][0-9]*.[0-9][0-9]*-[a-z0-9_-]*/bin/bazel\(.exe\)\?$" log || \ - (echo "FAIL: Expected to download bazel binary into specific path."; exit 1) + grep "^$BAZELISK_HOME/downloads/metadata/bazelbuild/bazel-[0-9][0-9]*.[0-9][0-9]*.[0-9][0-9]*-[a-z0-9_-]*$" log || \ + (echo "FAIL: Expected to download bazel metadata in specific path."; exit 1) } function test_bazel_verify_sha256() { @@ -395,8 +441,26 @@ function test_bazel_prepend_binary_directory_to_path_go() { BAZELISK_HOME="$BAZELISK_HOME" \ bazelisk --print_env 2>&1 | tee log - PATTERN=$(echo "^PATH=$BAZELISK_HOME/downloads/bazelbuild/bazel-[0-9][0-9]*.[0-9][0-9]*.[0-9][0-9]*-[a-z0-9_-]*/bin[:;]" | sed -e 's/\//\[\/\\\\\]/g') - grep "$PATTERN" log || \ + local os="$(uname -s | tr A-Z a-z)" + case "${os}" in + darwin|linux) + path_entry_delimiter=":" + path_delimiter="/" + extension="" + ;; + msys*|mingw*|cygwin*) + path_entry_delimiter=";" + path_delimiter="\\" + extension=".exe" + ;; + *) + echo "FAIL: Unknown OS ${os} in test" + exit 1 + ;; + esac + path_entry="$(grep "^PATH=" log | cut -d= -f2- | cut -d"${path_entry_delimiter}" -f1)" + + [[ -x "${path_entry}${path_delimiter}bazel${extension}" ]] || \ (echo "FAIL: Expected PATH to contains bazel binary directory."; exit 1) } @@ -480,6 +544,10 @@ if [[ $BAZELISK_VERSION == "GO" ]]; then test_bazel_prepend_binary_directory_to_path_go echo + echo "# test_path_is_consistent_regardless_of_base_url" + test_path_is_consistent_regardless_of_base_url + echo + case "$(uname -s)" in MSYS*) # The tests are currently not compatible with Windows. diff --git a/core/core.go b/core/core.go index ef4a66f4..f08a3e66 100644 --- a/core/core.go +++ b/core/core.go @@ -5,6 +5,7 @@ package core import ( "bufio" + "crypto/rand" "crypto/sha256" "encoding/json" "fmt" @@ -416,73 +417,123 @@ func downloadBazel(bazelVersionString string, bazeliskHome string, repos *Reposi bazelForkOrURL = bazelFork } - baseDirectory := filepath.Join(bazeliskHome, "downloads", bazelForkOrURL) - bazelPath, err := downloadBazelIfNecessary(resolvedBazelVersion, baseDirectory, repos, downloader) + bazelPath, err := downloadBazelIfNecessary(resolvedBazelVersion, bazeliskHome, bazelForkOrURL, repos, downloader) return bazelPath, err } -func downloadBazelIfNecessary(version string, baseDirectory string, repos *Repositories, downloader DownloadFunc) (string, error) { +// downloadBazelIfNecessary returns a path to a bazel which can be run, which may have been cached. +// The directory it returns may depend on version and bazeliskHome, but does not depend on bazelForkOrURLDirName. +// This is important, as the directory may be added to $PATH, and varying the path for equivalent files may cause unnecessary repository rule cache invalidations. +// Where a file was downloaded from shouldn't affect cache behaviour of Bazel invocations. +// +// The structure of the downloads directory is as follows ([]s indicate variables): +// +// downloads/metadata/[fork-or-url]/bazel-[version-os-etc] is a text file containing a hex sha256 of the contents of the downloaded bazel file. +// downloads/sha256/[sha256]/bin/bazel[extension] contains the bazel with a particular sha256. +func downloadBazelIfNecessary(version string, bazeliskHome string, bazelForkOrURLDirName string, repos *Repositories, downloader DownloadFunc) (string, error) { pathSegment, err := platforms.DetermineBazelFilename(version, false) if err != nil { return "", fmt.Errorf("could not determine path segment to use for Bazel binary: %v", err) } + destFile := "bazel" + platforms.DetermineExecutableFilenameSuffix() + + mappingPath := filepath.Join(bazeliskHome, "downloads", "metadata", bazelForkOrURLDirName, pathSegment) + digestFromMappingFile, err := os.ReadFile(mappingPath) + if err == nil { + pathToBazelInCAS := filepath.Join(bazeliskHome, "downloads", "sha256", string(digestFromMappingFile), "bin", destFile) + if _, err := os.Stat(pathToBazelInCAS); err == nil { + return pathToBazelInCAS, nil + } + } + + pathToBazelInCAS, downloadedDigest, err := downloadBazelToCAS(version, bazeliskHome, repos, downloader) + if err != nil { + return "", fmt.Errorf("failed to download bazel: %w", err) + } - destDir := filepath.Join(baseDirectory, pathSegment, "bin") expectedSha256 := strings.ToLower(GetEnvOrConfig("BAZELISK_VERIFY_SHA256")) + if len(expectedSha256) > 0 { + if expectedSha256 != downloadedDigest { + return "", fmt.Errorf("%s has sha256=%s but need sha256=%s", pathToBazelInCAS, downloadedDigest, expectedSha256) + } + } - tmpDestFile := "bazel-tmp" + platforms.DetermineExecutableFilenameSuffix() - destFile := "bazel" + platforms.DetermineExecutableFilenameSuffix() + if err := atomicWriteFile(mappingPath, []byte(downloadedDigest), 0644); err != nil { + return "", fmt.Errorf("failed to write mapping file after downloading bazel: %w", err) + } - destPath := filepath.Join(destDir, destFile) - if _, err := os.Stat(destPath); err == nil { - return destPath, nil + return pathToBazelInCAS, nil +} + +func atomicWriteFile(path string, contents []byte, perm os.FileMode) error { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return fmt.Errorf("failed to MkdirAll parent of %s: %w", path, err) + } + tmpPath := path + ".tmp" + if err := os.WriteFile(tmpPath, contents, perm); err != nil { + return fmt.Errorf("failed to write file %s: %w", tmpPath, err) + } + if err := os.Rename(tmpPath, path); err != nil { + return fmt.Errorf("failed to rename %s to %s: %w", tmpPath, path, err) } + return nil +} + +func downloadBazelToCAS(version string, bazeliskHome string, repos *Repositories, downloader DownloadFunc) (string, string, error) { + downloadsDir := filepath.Join(bazeliskHome, "downloads") + temporaryDownloadDir := filepath.Join(downloadsDir, "_tmp") + casDir := filepath.Join(bazeliskHome, "downloads", "sha256") + + tmpDestFileBytes := make([]byte, 32) + if _, err := rand.Read(tmpDestFileBytes); err != nil { + return "", "", fmt.Errorf("failed to generate temporary file name: %w", err) + } + tmpDestFile := fmt.Sprintf("%x", tmpDestFileBytes) var tmpDestPath string + var err error baseURL := GetEnvOrConfig(BaseURLEnv) formatURL := GetEnvOrConfig(FormatURLEnv) if baseURL != "" && formatURL != "" { - return "", fmt.Errorf("cannot set %s and %s at once", BaseURLEnv, FormatURLEnv) + return "", "", fmt.Errorf("cannot set %s and %s at once", BaseURLEnv, FormatURLEnv) } else if formatURL != "" { - tmpDestPath, err = repos.DownloadFromFormatURL(formatURL, version, destDir, tmpDestFile) + tmpDestPath, err = repos.DownloadFromFormatURL(formatURL, version, temporaryDownloadDir, tmpDestFile) } else if baseURL != "" { - tmpDestPath, err = repos.DownloadFromBaseURL(baseURL, version, destDir, tmpDestFile) + tmpDestPath, err = repos.DownloadFromBaseURL(baseURL, version, temporaryDownloadDir, tmpDestFile) } else { - tmpDestPath, err = downloader(destDir, tmpDestFile) + tmpDestPath, err = downloader(temporaryDownloadDir, tmpDestFile) } if err != nil { - return "", err + return "", "", fmt.Errorf("failed to download bazel: %w", err) } - if len(expectedSha256) > 0 { - f, err := os.Open(tmpDestPath) - if err != nil { - os.Remove(tmpDestPath) - return "", fmt.Errorf("cannot open %s after download: %v", tmpDestPath, err) - } - defer os.Remove(tmpDestPath) - // We cannot defer f.Close() because keeping the handle open when we try to do the - // rename later on fails on Windows. + f, err := os.Open(tmpDestPath) + if err != nil { + return "", "", fmt.Errorf("failed to open downloaded bazel to digest it: %w", err) + } - h := sha256.New() - if _, err := io.Copy(h, f); err != nil { - f.Close() - return "", fmt.Errorf("cannot compute sha256 of %s after download: %v", tmpDestPath, err) - } + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { f.Close() + return "", "", fmt.Errorf("cannot compute sha256 of %s after download: %v", tmpDestPath, err) + } + f.Close() + actualSha256 := strings.ToLower(fmt.Sprintf("%x", h.Sum(nil))) - actualSha256 := strings.ToLower(fmt.Sprintf("%x", h.Sum(nil))) - if expectedSha256 != actualSha256 { - return "", fmt.Errorf("%s has sha256=%s but need sha256=%s", tmpDestPath, actualSha256, expectedSha256) - } + pathToBazelInCAS := filepath.Join(casDir, actualSha256, "bin", "bazel"+platforms.DetermineExecutableFilenameSuffix()) + if err := os.MkdirAll(filepath.Dir(pathToBazelInCAS), 0755); err != nil { + return "", "", fmt.Errorf("failed to MkdirAll parent of %s: %w", pathToBazelInCAS, err) } - // Only place the downloaded binary in its final location once we know it is fully downloaded - // and valid, to prevent invalid files from ever being executed. - if err = os.Rename(tmpDestPath, destPath); err != nil { - return "", fmt.Errorf("cannot rename %s to %s: %v", tmpDestPath, destPath, err) + tmpPathInCorrectDirectory := pathToBazelInCAS + ".tmp" + if err := os.Rename(tmpDestPath, tmpPathInCorrectDirectory); err != nil { + return "", "", fmt.Errorf("failed to move %s to %s: %w", tmpDestPath, tmpPathInCorrectDirectory, err) + } + if err := os.Rename(tmpPathInCorrectDirectory, pathToBazelInCAS); err != nil { + return "", "", fmt.Errorf("failed to move %s to %s: %w", tmpPathInCorrectDirectory, pathToBazelInCAS, err) } - return destPath, nil + + return pathToBazelInCAS, actualSha256, nil } func copyFile(src, dst string, perm os.FileMode) error {