Skip to content

Commit

Permalink
Use consistent paths to bazel binaries (#465)
Browse files Browse the repository at this point in the history
Before this change, changing which mirror you were downloading from
would change the $PATH environment variable bazel is run with, even
though the bazel binaries being pointed to are identical. This can cause
repository rules to invalidate and re-run.

Instead, store downloaded bazels in directories keyed off of the sha256
of the bazel binary itself, and track the metadata of a mirror+version
-> sha256.

This avoid spurious rebuilds when only changing the URL bazelisk would
use to download bazel.
  • Loading branch information
illicitonion authored Jun 1, 2023
1 parent ad2f27d commit a4dca9c
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 42 deletions.
78 changes: 73 additions & 5 deletions bazelisk_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,52 @@ EOF
(echo "FAIL: Expected to find 'BAZELISK_SKIP_WRAPPER=true' in the output of 'bazelisk version'"; exit 1)
}

function test_path_is_consistent_regardless_of_base_url() {
setup

echo 6.2.0 > .bazelversion

cat >WORKSPACE <<EOF
load("//:print_path.bzl", "print_path")
print_path(name = "print_path")
load("@print_path//:defs.bzl", "noop")
noop()
EOF

cat >print_path.bzl <<EOF
def _print_path_impl(rctx):
print("PATH is: {}".format(rctx.os.environ["PATH"]))
rctx.file("WORKSPACE", "")
rctx.file("BUILD", "")
rctx.file("defs.bzl", "def noop(): pass")
print_path = repository_rule(
implementation = _print_path_impl,
)
EOF

BAZELISK_HOME="$BAZELISK_HOME" bazelisk sync --only=print_path 2>&1 | tee log1

BAZELISK_HOME="$BAZELISK_HOME" bazelisk clean --expunge 2>&1

# We need a separate mirror of bazel binaries, which has identical files.
# Ideally we wouldn't depend on sourceforge for test runtime, but hey, it exists and it works.
BAZELISK_HOME="$BAZELISK_HOME" BAZELISK_BASE_URL=https://downloads.sourceforge.net/project/bazel.mirror bazelisk sync --only=print_path 2>&1 | tee log2

path1="$(grep "PATH is:" log1)"
path2="$(grep "PATH is:" log2)"

[[ -n "${path1}" && -n "${path2}" ]] || \
(echo "FAIL: Expected PATH to be non-empty, got path1=${path1}, path2=${path2}"; exit 1)

[[ "${path1}" == "${path2}" ]] || \
(echo "FAIL: Expected PATH to be the same regardless of which mirror was used, got path1=${path1}, path2=${path2}"; exit 1)
}

function test_skip_wrapper() {
setup

Expand Down Expand Up @@ -327,10 +373,10 @@ function test_bazel_download_path_go() {
BAZELISK_HOME="$BAZELISK_HOME" \
bazelisk version 2>&1 | tee log

find "$BAZELISK_HOME/downloads/bazelbuild" 2>&1 | tee log
find "$BAZELISK_HOME/downloads/metadata/bazelbuild" 2>&1 | tee log

grep "^$BAZELISK_HOME/downloads/bazelbuild/bazel-[0-9][0-9]*.[0-9][0-9]*.[0-9][0-9]*-[a-z0-9_-]*/bin/bazel\(.exe\)\?$" log || \
(echo "FAIL: Expected to download bazel binary into specific path."; exit 1)
grep "^$BAZELISK_HOME/downloads/metadata/bazelbuild/bazel-[0-9][0-9]*.[0-9][0-9]*.[0-9][0-9]*-[a-z0-9_-]*$" log || \
(echo "FAIL: Expected to download bazel metadata in specific path."; exit 1)
}

function test_bazel_verify_sha256() {
Expand Down Expand Up @@ -395,8 +441,26 @@ function test_bazel_prepend_binary_directory_to_path_go() {
BAZELISK_HOME="$BAZELISK_HOME" \
bazelisk --print_env 2>&1 | tee log

PATTERN=$(echo "^PATH=$BAZELISK_HOME/downloads/bazelbuild/bazel-[0-9][0-9]*.[0-9][0-9]*.[0-9][0-9]*-[a-z0-9_-]*/bin[:;]" | sed -e 's/\//\[\/\\\\\]/g')
grep "$PATTERN" log || \
local os="$(uname -s | tr A-Z a-z)"
case "${os}" in
darwin|linux)
path_entry_delimiter=":"
path_delimiter="/"
extension=""
;;
msys*|mingw*|cygwin*)
path_entry_delimiter=";"
path_delimiter="\\"
extension=".exe"
;;
*)
echo "FAIL: Unknown OS ${os} in test"
exit 1
;;
esac
path_entry="$(grep "^PATH=" log | cut -d= -f2- | cut -d"${path_entry_delimiter}" -f1)"

[[ -x "${path_entry}${path_delimiter}bazel${extension}" ]] || \
(echo "FAIL: Expected PATH to contains bazel binary directory."; exit 1)
}

Expand Down Expand Up @@ -480,6 +544,10 @@ if [[ $BAZELISK_VERSION == "GO" ]]; then
test_bazel_prepend_binary_directory_to_path_go
echo

echo "# test_path_is_consistent_regardless_of_base_url"
test_path_is_consistent_regardless_of_base_url
echo

case "$(uname -s)" in
MSYS*)
# The tests are currently not compatible with Windows.
Expand Down
125 changes: 88 additions & 37 deletions core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package core

import (
"bufio"
"crypto/rand"
"crypto/sha256"
"encoding/json"
"fmt"
Expand Down Expand Up @@ -416,73 +417,123 @@ func downloadBazel(bazelVersionString string, bazeliskHome string, repos *Reposi
bazelForkOrURL = bazelFork
}

baseDirectory := filepath.Join(bazeliskHome, "downloads", bazelForkOrURL)
bazelPath, err := downloadBazelIfNecessary(resolvedBazelVersion, baseDirectory, repos, downloader)
bazelPath, err := downloadBazelIfNecessary(resolvedBazelVersion, bazeliskHome, bazelForkOrURL, repos, downloader)
return bazelPath, err
}

func downloadBazelIfNecessary(version string, baseDirectory string, repos *Repositories, downloader DownloadFunc) (string, error) {
// downloadBazelIfNecessary returns a path to a bazel which can be run, which may have been cached.
// The directory it returns may depend on version and bazeliskHome, but does not depend on bazelForkOrURLDirName.
// This is important, as the directory may be added to $PATH, and varying the path for equivalent files may cause unnecessary repository rule cache invalidations.
// Where a file was downloaded from shouldn't affect cache behaviour of Bazel invocations.
//
// The structure of the downloads directory is as follows ([]s indicate variables):
//
// downloads/metadata/[fork-or-url]/bazel-[version-os-etc] is a text file containing a hex sha256 of the contents of the downloaded bazel file.
// downloads/sha256/[sha256]/bin/bazel[extension] contains the bazel with a particular sha256.
func downloadBazelIfNecessary(version string, bazeliskHome string, bazelForkOrURLDirName string, repos *Repositories, downloader DownloadFunc) (string, error) {
pathSegment, err := platforms.DetermineBazelFilename(version, false)
if err != nil {
return "", fmt.Errorf("could not determine path segment to use for Bazel binary: %v", err)
}
destFile := "bazel" + platforms.DetermineExecutableFilenameSuffix()

mappingPath := filepath.Join(bazeliskHome, "downloads", "metadata", bazelForkOrURLDirName, pathSegment)
digestFromMappingFile, err := os.ReadFile(mappingPath)
if err == nil {
pathToBazelInCAS := filepath.Join(bazeliskHome, "downloads", "sha256", string(digestFromMappingFile), "bin", destFile)
if _, err := os.Stat(pathToBazelInCAS); err == nil {
return pathToBazelInCAS, nil
}
}

pathToBazelInCAS, downloadedDigest, err := downloadBazelToCAS(version, bazeliskHome, repos, downloader)
if err != nil {
return "", fmt.Errorf("failed to download bazel: %w", err)
}

destDir := filepath.Join(baseDirectory, pathSegment, "bin")
expectedSha256 := strings.ToLower(GetEnvOrConfig("BAZELISK_VERIFY_SHA256"))
if len(expectedSha256) > 0 {
if expectedSha256 != downloadedDigest {
return "", fmt.Errorf("%s has sha256=%s but need sha256=%s", pathToBazelInCAS, downloadedDigest, expectedSha256)
}
}

tmpDestFile := "bazel-tmp" + platforms.DetermineExecutableFilenameSuffix()
destFile := "bazel" + platforms.DetermineExecutableFilenameSuffix()
if err := atomicWriteFile(mappingPath, []byte(downloadedDigest), 0644); err != nil {
return "", fmt.Errorf("failed to write mapping file after downloading bazel: %w", err)
}

destPath := filepath.Join(destDir, destFile)
if _, err := os.Stat(destPath); err == nil {
return destPath, nil
return pathToBazelInCAS, nil
}

func atomicWriteFile(path string, contents []byte, perm os.FileMode) error {
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return fmt.Errorf("failed to MkdirAll parent of %s: %w", path, err)
}
tmpPath := path + ".tmp"
if err := os.WriteFile(tmpPath, contents, perm); err != nil {
return fmt.Errorf("failed to write file %s: %w", tmpPath, err)
}
if err := os.Rename(tmpPath, path); err != nil {
return fmt.Errorf("failed to rename %s to %s: %w", tmpPath, path, err)
}
return nil
}

func downloadBazelToCAS(version string, bazeliskHome string, repos *Repositories, downloader DownloadFunc) (string, string, error) {
downloadsDir := filepath.Join(bazeliskHome, "downloads")
temporaryDownloadDir := filepath.Join(downloadsDir, "_tmp")
casDir := filepath.Join(bazeliskHome, "downloads", "sha256")

tmpDestFileBytes := make([]byte, 32)
if _, err := rand.Read(tmpDestFileBytes); err != nil {
return "", "", fmt.Errorf("failed to generate temporary file name: %w", err)
}
tmpDestFile := fmt.Sprintf("%x", tmpDestFileBytes)

var tmpDestPath string
var err error
baseURL := GetEnvOrConfig(BaseURLEnv)
formatURL := GetEnvOrConfig(FormatURLEnv)
if baseURL != "" && formatURL != "" {
return "", fmt.Errorf("cannot set %s and %s at once", BaseURLEnv, FormatURLEnv)
return "", "", fmt.Errorf("cannot set %s and %s at once", BaseURLEnv, FormatURLEnv)
} else if formatURL != "" {
tmpDestPath, err = repos.DownloadFromFormatURL(formatURL, version, destDir, tmpDestFile)
tmpDestPath, err = repos.DownloadFromFormatURL(formatURL, version, temporaryDownloadDir, tmpDestFile)
} else if baseURL != "" {
tmpDestPath, err = repos.DownloadFromBaseURL(baseURL, version, destDir, tmpDestFile)
tmpDestPath, err = repos.DownloadFromBaseURL(baseURL, version, temporaryDownloadDir, tmpDestFile)
} else {
tmpDestPath, err = downloader(destDir, tmpDestFile)
tmpDestPath, err = downloader(temporaryDownloadDir, tmpDestFile)
}
if err != nil {
return "", err
return "", "", fmt.Errorf("failed to download bazel: %w", err)
}

if len(expectedSha256) > 0 {
f, err := os.Open(tmpDestPath)
if err != nil {
os.Remove(tmpDestPath)
return "", fmt.Errorf("cannot open %s after download: %v", tmpDestPath, err)
}
defer os.Remove(tmpDestPath)
// We cannot defer f.Close() because keeping the handle open when we try to do the
// rename later on fails on Windows.
f, err := os.Open(tmpDestPath)
if err != nil {
return "", "", fmt.Errorf("failed to open downloaded bazel to digest it: %w", err)
}

h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
f.Close()
return "", fmt.Errorf("cannot compute sha256 of %s after download: %v", tmpDestPath, err)
}
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
f.Close()
return "", "", fmt.Errorf("cannot compute sha256 of %s after download: %v", tmpDestPath, err)
}
f.Close()
actualSha256 := strings.ToLower(fmt.Sprintf("%x", h.Sum(nil)))

actualSha256 := strings.ToLower(fmt.Sprintf("%x", h.Sum(nil)))
if expectedSha256 != actualSha256 {
return "", fmt.Errorf("%s has sha256=%s but need sha256=%s", tmpDestPath, actualSha256, expectedSha256)
}
pathToBazelInCAS := filepath.Join(casDir, actualSha256, "bin", "bazel"+platforms.DetermineExecutableFilenameSuffix())
if err := os.MkdirAll(filepath.Dir(pathToBazelInCAS), 0755); err != nil {
return "", "", fmt.Errorf("failed to MkdirAll parent of %s: %w", pathToBazelInCAS, err)
}

// Only place the downloaded binary in its final location once we know it is fully downloaded
// and valid, to prevent invalid files from ever being executed.
if err = os.Rename(tmpDestPath, destPath); err != nil {
return "", fmt.Errorf("cannot rename %s to %s: %v", tmpDestPath, destPath, err)
tmpPathInCorrectDirectory := pathToBazelInCAS + ".tmp"
if err := os.Rename(tmpDestPath, tmpPathInCorrectDirectory); err != nil {
return "", "", fmt.Errorf("failed to move %s to %s: %w", tmpDestPath, tmpPathInCorrectDirectory, err)
}
if err := os.Rename(tmpPathInCorrectDirectory, pathToBazelInCAS); err != nil {
return "", "", fmt.Errorf("failed to move %s to %s: %w", tmpPathInCorrectDirectory, pathToBazelInCAS, err)
}
return destPath, nil

return pathToBazelInCAS, actualSha256, nil
}

func copyFile(src, dst string, perm os.FileMode) error {
Expand Down

0 comments on commit a4dca9c

Please sign in to comment.