From 16de805e07e19a212b75152c6c1b295c75f20a53 Mon Sep 17 00:00:00 2001 From: Justin Yao Du Date: Tue, 12 Dec 2023 21:51:41 -0800 Subject: [PATCH] Update Git hooks and add secret scanning --- .github/workflows/lint-check.yml | 36 +++ .husky/lint-config.sh | 3 +- .husky/pre-commit | 13 +- .husky/pre-push | 3 + .secret-scan/.gitignore | 2 + .secret-scan/secret-scan-config.json | 27 ++ .secret-scan/secret-scan.js | 420 +++++++++++++++++++++++++++ 7 files changed, 500 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/lint-check.yml create mode 100755 .husky/pre-push create mode 100644 .secret-scan/.gitignore create mode 100644 .secret-scan/secret-scan-config.json create mode 100644 .secret-scan/secret-scan.js diff --git a/.github/workflows/lint-check.yml b/.github/workflows/lint-check.yml new file mode 100644 index 0000000..0f26552 --- /dev/null +++ b/.github/workflows/lint-check.yml @@ -0,0 +1,36 @@ +name: Lint and style checks + +on: + pull_request: + branches: + main + +jobs: + backend: + name: Backend lint and style check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3 + - working-directory: backend # Change this to the name of your backend directory + run: | + npm ci + npm run lint-check + frontend: + name: Frontend lint and style check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3 + - working-directory: frontend # Change this to the name of your frontend directory + run: | + npm ci + npm run lint-check + secret-scan: + name: Secret scan + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3 + - run: | + node .secret-scan/secret-scan.js diff --git a/.husky/lint-config.sh b/.husky/lint-config.sh index a4e07d8..2665786 100644 --- a/.husky/lint-config.sh +++ b/.husky/lint-config.sh @@ -1,9 +1,10 @@ # This config file is sourced by the pre-commit script. +# shellcheck disable=SC2034,SC2148 # Change 1 to 0 to disable linting. enabled=1 -# Directories containing Node.js projects to be linted, separated by spaces. +# Directories containing JavaScript projects to be linted, separated by spaces. node_dirs='backend frontend' # Command used to run a lint check. diff --git a/.husky/pre-commit b/.husky/pre-commit index 70911e4..a0fd94d 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -3,6 +3,8 @@ # produces a "cannot spawn" error with a bash shebang, since it uses dash. # However, dash is not available on many Unix-like systems. +# shellcheck disable=SC2317 + log() { echo "${0}: ${*}" >&2 } @@ -61,7 +63,7 @@ ask_yes_no() { fi while :; do - printf "${0}: ${prompt}" + printf "%s: %s" "${0}" "${prompt}" read -r selection selection="$(parse_yes_no "${selection}")" @@ -86,7 +88,7 @@ ask_yes_no() { explain_no_verify() { log "If you wish to bypass the lint check entirely," log "use the following command:" - log " git commit --no-verify" + log " NO_LINT=1 git commit" } dir_check() { @@ -176,12 +178,17 @@ cancel() { main() { config_file="$(dirname "${0}")/lint-config.sh" + + # shellcheck source=./lint-config.sh if ! . "${config_file}"; then error "Error while sourcing config file '${config_file}'." exit 1 fi - if [ "${enabled}" -eq 0 ]; then + secret_scan_script="$(dirname "${0}")/../.secret-scan/secret-scan.js" + node "${secret_scan_script}" || exit + + if [ "${enabled}" = 0 ] || [ "${NO_LINT}" = 1 ]; then warn "Lint check has been disabled." exit 0 fi diff --git a/.husky/pre-push b/.husky/pre-push new file mode 100755 index 0000000..1ca98d4 --- /dev/null +++ b/.husky/pre-push @@ -0,0 +1,3 @@ +#!/bin/sh +secret_scan_script="$(dirname "${0}")/../.secret-scan/secret-scan.js" +node "${secret_scan_script}" diff --git a/.secret-scan/.gitignore b/.secret-scan/.gitignore new file mode 100644 index 0000000..a8df5f5 --- /dev/null +++ b/.secret-scan/.gitignore @@ -0,0 +1,2 @@ +/secret-scan-cache.json +/secret-scan-report.json diff --git a/.secret-scan/secret-scan-config.json b/.secret-scan/secret-scan-config.json new file mode 100644 index 0000000..b7ab4d0 --- /dev/null +++ b/.secret-scan/secret-scan-config.json @@ -0,0 +1,27 @@ +{ + "//": [ + "To prevent a particular string from being flagged, add it (or a substring", + "of it) to this array. This can be useful if your repository contains an", + "example of what a credential should look like, a development credential", + "(e.g. a database on localhost), or a previously leaked credential that", + "has already been revoked. Obviously, do not put active credentials here." + ], + "allowedStrings": ["mongodb://127.0.0.1", "mongodb://localhost"], + "//": [ + "Regexes used to scan the repository contents for secrets.", + "If possible, try to make the regex match the entire secret, or", + "allowedStrings might not work as expected. For example, if a regex", + "matches only 'mongodb', this string by itself does not contain any of the", + "strings in the allowlist, so it will still be flagged." + ], + "secretRegexes": { + "mongodbUrl": "mongodb([+]srv)?://[^\\s]+", + "firebaseJsonPrivateKeyFile": "-----BEGIN PRIVATE KEY-----[^\\s]+" + }, + "//": [ + "Do not check for secrets in these files. You should almost always use", + "allowedStrings instead of this. We only add these files because they", + "naturally contain things that look like secrets, but aren't." + ], + "skippedFiles": [".secret-scan/secret-scan-cache.json", ".secret-scan/secret-scan-config.json"] +} diff --git a/.secret-scan/secret-scan.js b/.secret-scan/secret-scan.js new file mode 100644 index 0000000..c301aa8 --- /dev/null +++ b/.secret-scan/secret-scan.js @@ -0,0 +1,420 @@ +const child_process = require("node:child_process"); +const fs = require("node:fs"); +const os = require("node:os"); +const path = require("node:path"); +const process = require("node:process"); + +const CACHE_PATH = path.join(__dirname, "secret-scan-cache.json"); +const CONFIG_PATH = path.join(__dirname, "secret-scan-config.json"); +const REPORT_PATH = path.join(__dirname, "secret-scan-report.json"); +const JSON_ENCODING = "utf8"; + +const secretRemovalAdvice = ` +1. If you are absolutely confident that the reported + secrets are not actually secrets, see + ${CONFIG_PATH} + for next steps and try again. Ask your engineering + manager or VP Technology if you have any uncertainty + whatsoever. + +2. If the secrets are in a file in the working tree, add + the file to a .gitignore and try again. + +3. If the secrets are in the index, unstage them with + git restore --staged and try again. + +4. If the secrets are in an existing commit, you are + REQUIRED to report this to your engineering manager AND + VP Technology, even if you are sure that the commit was + never pushed. This is because a secret being committed + anywhere (even locally) indicates a potential issue with + the implementation or configuration of this secret + scanning tool. + + If the commit was pushed, assume that the secret is now + publicly known, and revoke it as soon as possible. + + Remember, there is no shame in making mistakes, as long + as you let us know. We all have to work together to + ensure that we build secure software for our clients. +`.trim(); + +/** + * @param {string} filePath + * @returns {unknown} + */ +function parseJSONFromFile(filePath) { + const text = fs.readFileSync(filePath, { encoding: JSON_ENCODING }); + return JSON.parse(text); +} + +/** + * @template T + * @param {() => T} callback + * @returns {T | null} + */ +function nullIfFileNotFound(callback) { + try { + return callback(); + } catch (e) { + if (typeof e === "object" && e !== null && "code" in e && e.code === "ENOENT") { + return null; + } + throw e; + } +} + +/** + * @param {unknown} array + * @returns {string[]} + */ +function asStringArray(array) { + if (!Array.isArray(array)) { + throw new Error(`Not a string array: ${JSON.stringify(array)}`); + } + + return array.map((s) => { + if (typeof s === "string") { + return s; + } + throw new Error(`Not a string: ${JSON.stringify(s)}`); + }); +} + +/** + * @typedef {{ + * allowedStrings: string[]; + * secretRegexes: Record; + * skippedFiles: string[]; + * }} SecretScanConfig + */ + +/** + * @returns {SecretScanConfig} + */ +function loadConfig() { + const parsed = parseJSONFromFile(CONFIG_PATH); + if ( + typeof parsed === "object" && + parsed !== null && + "allowedStrings" in parsed && + "secretRegexes" in parsed && + "skippedFiles" in parsed && + typeof parsed.secretRegexes === "object" && + parsed.secretRegexes !== null + ) { + const secretRegexes = Object.fromEntries( + Object.entries(parsed.secretRegexes).map(([k, v]) => { + if (typeof v !== "string") { + throw new Error(`Not a string: ${JSON.stringify(v)}`); + } + return [k, v]; + }) + ); + + return { + allowedStrings: asStringArray(parsed.allowedStrings), + secretRegexes, + skippedFiles: asStringArray(parsed.skippedFiles), + }; + } + throw new Error("Config format is invalid."); +} + +/** + * @typedef {{ + * config: unknown; + * script: string; + * safeCommitHashes: string[]; + * }} SecretScanCache + */ + +/** @returns {SecretScanCache | null} */ +function loadCache() { + return nullIfFileNotFound(() => { + const parsed = parseJSONFromFile(CACHE_PATH); + if ( + typeof parsed === "object" && + parsed !== null && + "config" in parsed && + "script" in parsed && + typeof parsed.script === "string" && + "safeCommitHashes" in parsed + ) { + return { + config: parsed.config, + script: parsed.script, + safeCommitHashes: asStringArray(parsed.safeCommitHashes), + }; + } else { + console.error("Cache format is invalid, so it will not be used."); + return null; + } + }); +} + +/** + * @param {SecretScanCache} cache + * @returns {void} + */ +function saveCache(cache) { + fs.writeFileSync(CACHE_PATH, JSON.stringify(cache), { encoding: JSON_ENCODING }); +} + +function deleteReport() { + if (fs.statSync(REPORT_PATH, { throwIfNoEntry: false })?.isFile()) { + fs.unlinkSync(REPORT_PATH); + } +} + +/** + * @typedef {{ + * where: string; + * path: string; + * line: number; + * regexName: string; + * matchedText: string; + * }[]} SecretScanReport + */ + +/** + * @param {SecretScanReport} report + */ +function saveReport(report) { + fs.writeFileSync(REPORT_PATH, JSON.stringify(report, null, 2) + "\n", { encoding: JSON_ENCODING }); +} + +/** + * @param {[string, ...string[]]} command + * @returns {string} + */ +function runCommand(command) { + const process = child_process.spawnSync(command[0], command.slice(1), { + cwd: __dirname, + encoding: "utf8", + maxBuffer: Infinity, + }); + + if (process.status === 0) { + return process.stdout; + } + + console.error(process); + throw new Error(`Command did not execute successfully: ${JSON.stringify(command)}`); +} + +/** + * @param {string} text + * @returns {string[]} + */ +function nonEmptyLines(text) { + return text.split(os.EOL).filter((line) => line.length > 0); +} + +/** @returns {void} */ +function checkGitVersion() { + const command = ["git", "--version"]; + const output = runCommand(["git", "--version"]); + const expectedPrefix = "git version "; + + if (!output.startsWith(expectedPrefix)) { + const msg = `Output of command ${JSON.stringify( + command + )} did not start with expected prefix ${JSON.stringify( + expectedPrefix + )}. Maybe the text encoding for child process output is not utf8 in this environment?`; + throw new Error(msg); + } +} + +/** @returns {string} */ +function getRepoRoot() { + const repoRoot = runCommand(["git", "rev-parse", "--show-toplevel"]).replace(os.EOL, ""); + + // Make sure we don't get "file not found" later and assume the file was + // deleted from the working tree, when the actual cause is having an incorrect + // path for the repo root. Don't ask me how I know... + if (!fs.statSync(path.join(repoRoot, ".git"), { throwIfNoEntry: false })?.isDirectory()) { + throw new Error( + `Could not determine repo root: got ${JSON.stringify(repoRoot)}, but this is incorrect?` + ); + } + + return repoRoot; +} + +/** @returns {number} */ +function main() { + const shouldSaveReport = process.env["SECRET_SCAN_WRITE_REPORT"] === "1"; + + deleteReport(); + + /** + * @type {SecretScanReport} + */ + const report = []; + + console.log(`${__filename}: Scanning commit history and working tree for secrets.`); + + checkGitVersion(); + const repoRoot = getRepoRoot(); + + const config = loadConfig(); + const script = fs.readFileSync(__filename, { encoding: "utf8" }); + + let loadedCache = loadCache(); + if (loadedCache !== null) { + if (JSON.stringify(config) !== JSON.stringify(loadedCache.config)) { + console.log("Invalidating cache because config has changed."); + loadedCache = null; + } else if (script !== loadedCache.script) { + console.log("Invalidating cache because script has changed."); + loadedCache = null; + } + } + + /** @type {SecretScanCache} */ + const cache = loadedCache ?? { + config: JSON.parse(JSON.stringify(config)), + script, + safeCommitHashes: [], + }; + + const previouslyScannedCommitHashes = new Set(cache.safeCommitHashes); + const filesToSkip = new Set(config.skippedFiles); + const secretRegexes = Object.fromEntries( + Object.entries(config.secretRegexes).map(([k, v]) => [k, new RegExp(v, "g")]) + ); + + /** @param {string} matchedText */ + function isFalsePositive(matchedText) { + return config.allowedStrings.some((allowed) => matchedText.includes(allowed)); + } + + /** + * Scan the commit with the given hash, or null to scan the index and working + * tree. + * + * @param {string | null} maybeCommitHash + * @returns {void} + */ + function scan(maybeCommitHash) { + /** @type {{ path: string; where: string; contents: string; }[]} */ + const changedFiles = []; + + // Don't try to read deleted files. If you ever get an error message like + // "unknown revision or path not in the working tree", double check this. + const gitListFileOptions = ["--no-renames", "--diff-filter=d", "--name-only"]; + + if (maybeCommitHash === null) { + const workingTreePaths = nonEmptyLines(runCommand(["git", "status", "--porcelain"])).map( + (line) => line.slice(3) + ); + for (const workingTreePath of workingTreePaths) { + // If the file was deleted, we can ignore it. I was a bit too lazy to + // parse the status letters of `git status --porcelain`. + let contents = nullIfFileNotFound(() => + fs.readFileSync(path.join(repoRoot, workingTreePath), { encoding: "utf8" }) + ); + + if (contents !== null) { + changedFiles.push({ + path: workingTreePath, + where: "working tree", + contents, + }); + } + } + + const stagedPaths = nonEmptyLines( + runCommand(["git", "diff", "--staged", ...gitListFileOptions]) + ); + for (const stagedPath of stagedPaths) { + changedFiles.push({ + path: stagedPath, + where: "index", + contents: runCommand(["git", "show", ":" + stagedPath]), + }); + } + } else { + const [commitDescription, ...changedPaths] = nonEmptyLines( + runCommand(["git", "show", "--oneline", ...gitListFileOptions, maybeCommitHash]) + ); + const where = `commit ${JSON.stringify(commitDescription)}`; + for (const changedPath of changedPaths) { + changedFiles.push({ + path: changedPath, + where, + contents: runCommand(["git", "show", `${maybeCommitHash}:${changedPath}`]), + }); + } + } + + let secretDetected = false; + for (const { path, where, contents } of changedFiles) { + if (filesToSkip.has(path)) { + continue; + } + + for (const [regexName, regex] of Object.entries(secretRegexes)) { + for (const match of contents.matchAll(regex)) { + const matchedText = match[0]; + if (isFalsePositive(matchedText)) { + continue; + } + + const line = contents.substring(0, match.index).split("\n").length; + + secretDetected = true; + report.push({ + where, + path, + line, + regexName, + matchedText, + }); + + console.log( + `SECRET DETECTED in ${where}, file ${JSON.stringify( + path + )}, line ${line}: regex ${regexName} (${regex}) matched text ${JSON.stringify( + matchedText + )}` + ); + } + } + } + + if (!secretDetected && maybeCommitHash !== null) { + cache.safeCommitHashes.push(maybeCommitHash); + } + } + + // Scan every commit. + const allCommitHashes = nonEmptyLines(runCommand(["git", "log", "--pretty=format:%H"])); + for (const hash of allCommitHashes) { + if (!previouslyScannedCommitHashes.has(hash)) { + scan(hash); + } + } + + // Scan the index and working tree. + scan(null); + + if (shouldSaveReport) { + saveReport(report); + console.log(`Report written to ${JSON.stringify(REPORT_PATH)}`); + } + + saveCache(cache); + + if (report.length > 0) { + console.log(`Secret scan completed with errors.\n\n${secretRemovalAdvice}\n`); + return 1; + } else { + console.log("Secret scan completed successfully."); + return 0; + } +} + +process.exit(main());