From ae9ea13ad193d575a9a3196fd68189b34aaa4fdc Mon Sep 17 00:00:00 2001 From: Aurelien Francillon Date: Sat, 28 May 2022 15:18:07 +0200 Subject: [PATCH 01/64] This is a temporary fix due to std::iterator depercation. This commit needs to be reverted once a proper fix is in place. --- CMakeLists.txt | 2 +- runtime/simple_backend/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 74aa7a25..8d090acc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,7 +88,7 @@ include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 \ -Wredundant-decls -Wcast-align -Wmissing-include-dirs -Wswitch-default \ -Wextra -Wall -Winvalid-pch -Wredundant-decls -Wformat=2 \ --Wmissing-format-attribute -Wformat-nonliteral -Werror") +-Wmissing-format-attribute -Wformat-nonliteral -Werror -Wno-error=deprecated-declarations") # Mark nodelete to work around unload bug in upstream LLVM 5.0+ set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-z,nodelete") diff --git a/runtime/simple_backend/CMakeLists.txt b/runtime/simple_backend/CMakeLists.txt index a8fef903..baaff20b 100644 --- a/runtime/simple_backend/CMakeLists.txt +++ b/runtime/simple_backend/CMakeLists.txt @@ -39,4 +39,4 @@ target_include_directories(SymRuntime PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/.. ${Z3_C_INCLUDE_DIRS}) -set_target_properties(SymRuntime PROPERTIES COMPILE_FLAGS "-Werror") +set_target_properties(SymRuntime PROPERTIES COMPILE_FLAGS "-Werror -Wno-error=deprecated-declarations") From 88b464c290786b58e5883f5aa1d08e2a3d5de0fb Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Mon, 30 May 2022 00:55:07 +0200 Subject: [PATCH 02/64] symcc_fuzzing_helper: Move to clap3 (#94) --- util/symcc_fuzzing_helper/Cargo.toml | 2 +- util/symcc_fuzzing_helper/src/main.rs | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/util/symcc_fuzzing_helper/Cargo.toml b/util/symcc_fuzzing_helper/Cargo.toml index d09d8ae3..f8858d88 100644 --- a/util/symcc_fuzzing_helper/Cargo.toml +++ b/util/symcc_fuzzing_helper/Cargo.toml @@ -20,7 +20,7 @@ edition = "2018" license = "GPL-3.0-or-later" [dependencies] -structopt = "0.3" +clap = { version = "3.0", features = ["derive"]} tempfile = "3.1" anyhow = "1.0" log = "0.4.0" diff --git a/util/symcc_fuzzing_helper/src/main.rs b/util/symcc_fuzzing_helper/src/main.rs index 6d284e1e..a75f9db8 100644 --- a/util/symcc_fuzzing_helper/src/main.rs +++ b/util/symcc_fuzzing_helper/src/main.rs @@ -22,7 +22,7 @@ use std::io::Write; use std::path::{Path, PathBuf}; use std::thread; use std::time::{Duration, Instant}; -use structopt::StructOpt; +use clap::{self, StructOpt}; use symcc::{AflConfig, AflMap, AflShowmapResult, SymCC, TestcaseDir}; use tempfile::tempdir; @@ -32,22 +32,22 @@ const STATS_INTERVAL_SEC: u64 = 60; // inputs. #[derive(Debug, StructOpt)] -#[structopt(about = "Make SymCC collaborate with AFL.", no_version)] -struct CLI { +#[clap(about = "Make SymCC collaborate with AFL.", no_version)] +struct Opt { /// The name of the fuzzer to work with - #[structopt(short = "a")] + #[clap(short = 'a')] fuzzer_name: String, /// The AFL output directory - #[structopt(short = "o")] + #[clap(short = 'o')] output_dir: PathBuf, /// Name to use for SymCC - #[structopt(short = "n")] + #[clap(short = 'n')] name: String, /// Enable verbose logging - #[structopt(short = "v")] + #[clap(short = 'v')] verbose: bool, /// Program under test @@ -264,7 +264,7 @@ impl State { } fn main() -> Result<()> { - let options = CLI::from_args(); + let options = Opt::parse(); env_logger::builder() .filter_level(if options.verbose { log::LevelFilter::Debug From ad149a24d90c4ff3e95eb7e8b72c14dca0d3e036 Mon Sep 17 00:00:00 2001 From: aurelf Date: Mon, 30 May 2022 01:51:02 +0200 Subject: [PATCH 03/64] Revert "symcc_fuzzing_helper: Move to clap3 (#94)" (#101) This reverts commit 88b464c290786b58e5883f5aa1d08e2a3d5de0fb. --- util/symcc_fuzzing_helper/Cargo.toml | 2 +- util/symcc_fuzzing_helper/src/main.rs | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/util/symcc_fuzzing_helper/Cargo.toml b/util/symcc_fuzzing_helper/Cargo.toml index f8858d88..d09d8ae3 100644 --- a/util/symcc_fuzzing_helper/Cargo.toml +++ b/util/symcc_fuzzing_helper/Cargo.toml @@ -20,7 +20,7 @@ edition = "2018" license = "GPL-3.0-or-later" [dependencies] -clap = { version = "3.0", features = ["derive"]} +structopt = "0.3" tempfile = "3.1" anyhow = "1.0" log = "0.4.0" diff --git a/util/symcc_fuzzing_helper/src/main.rs b/util/symcc_fuzzing_helper/src/main.rs index a75f9db8..6d284e1e 100644 --- a/util/symcc_fuzzing_helper/src/main.rs +++ b/util/symcc_fuzzing_helper/src/main.rs @@ -22,7 +22,7 @@ use std::io::Write; use std::path::{Path, PathBuf}; use std::thread; use std::time::{Duration, Instant}; -use clap::{self, StructOpt}; +use structopt::StructOpt; use symcc::{AflConfig, AflMap, AflShowmapResult, SymCC, TestcaseDir}; use tempfile::tempdir; @@ -32,22 +32,22 @@ const STATS_INTERVAL_SEC: u64 = 60; // inputs. #[derive(Debug, StructOpt)] -#[clap(about = "Make SymCC collaborate with AFL.", no_version)] -struct Opt { +#[structopt(about = "Make SymCC collaborate with AFL.", no_version)] +struct CLI { /// The name of the fuzzer to work with - #[clap(short = 'a')] + #[structopt(short = "a")] fuzzer_name: String, /// The AFL output directory - #[clap(short = 'o')] + #[structopt(short = "o")] output_dir: PathBuf, /// Name to use for SymCC - #[clap(short = 'n')] + #[structopt(short = "n")] name: String, /// Enable verbose logging - #[clap(short = 'v')] + #[structopt(short = "v")] verbose: bool, /// Program under test @@ -264,7 +264,7 @@ impl State { } fn main() -> Result<()> { - let options = Opt::parse(); + let options = CLI::from_args(); env_logger::builder() .filter_level(if options.verbose { log::LevelFilter::Debug From ff3ea9a70e24f33efff159ba97b3d06b51a37eab Mon Sep 17 00:00:00 2001 From: Aurelien Francillon Date: Mon, 30 May 2022 01:34:24 +0200 Subject: [PATCH 04/64] Add some FAQs to the Readme --- README.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/README.md b/README.md index 4d59677f..2d02f09d 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,57 @@ every change to SymCC (which is, in principle the right thing to do), whereas in many cases it is sufficient to let the build system figure out what to rebuild (and recompile, e.g., libc++ only when necessary). +## FAQ / BUGS / TODOs + +### Why is SymCC only exploring one path and not all paths? + +SymCC is currently a concolic executor it follows the concrete +path. In theory, it would be possible to make it a forking executor +see [issue #14](https://github.com/eurecom-s3/symcc/issues/14) + +### Why does SymCC not generate some test cases? + +There are multiple possible reasons: + +#### QSym backend performs pruning + +When built with the QSym backend exploration (e.g., loops) symcc is +subject to path pruning, this is part of the optimizations that makes +SymCC/QSym fast, it isn't sound. This is not a problem for using in +hybrid fuzzing, but this may be a problem for other uses. See for +example [issue #88](https://github.com/eurecom-s3/symcc/issues/88). + +When building with the simple backend the paths should be found. If +the paths are not found with the simple backend this may be a bug (or +possibly a limitation of the simple backend). + +#### Incomplete symbolic handing of functions, systems interactions. + +The current symbolic understanding of libc is incomplete. So when an +unsupported libc function is called SymCC can't trace the computations +that happen in the function. + +1. Adding the function to the [collection of wrapped libc + functions](https://github.com/eurecom-s3/symcc/blob/master/runtime/LibcWrappers.cpp) + and [register the + wrapper](https://github.com/eurecom-s3/symcc/blob/b29dc4db2803830ebf50798e72b336473a567655/compiler/Runtime.cpp#L159) + in the compiler. +2. Build a fully instrumented libc. +3. Cherry-pick individual libc functions from a libc implementation (e.g., musl) + +See [issue #23](https://github.com/eurecom-s3/symcc/issues/23) for more details. + + +### Rust support ? + +This would be possible to support RUST, see [issue +#1](https://github.com/eurecom-s3/symcc/issues/1) for tracking this. + +### Bug reporting + +We appreciate bugs with test cases and steps to reproduce, PR with +corresponding test cases. SymCC is currently understaffed, we hope to +catch up and get back to active development at some point. ## Contact From abd6c3b43f80a7566ea6ce752f5c228d036ccbb3 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Mon, 26 Sep 2022 13:44:30 +0200 Subject: [PATCH 05/64] changed from structopt to clap 3 (#103) --- util/symcc_fuzzing_helper/Cargo.lock | 379 ++++++++++++-------------- util/symcc_fuzzing_helper/Cargo.toml | 2 +- util/symcc_fuzzing_helper/src/main.rs | 14 +- 3 files changed, 186 insertions(+), 209 deletions(-) diff --git a/util/symcc_fuzzing_helper/Cargo.lock b/util/symcc_fuzzing_helper/Cargo.lock index 5fa913db..9f8b5069 100644 --- a/util/symcc_fuzzing_helper/Cargo.lock +++ b/util/symcc_fuzzing_helper/Cargo.lock @@ -1,459 +1,436 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "aho-corasick" version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743ad5a418686aad3b87fd14c43badd828cf26e214a00f92a384291cf22e1811" dependencies = [ - "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "ansi_term" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr", ] [[package]] name = "anyhow" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "hermit-abi", + "libc", + "winapi", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "bitflags" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" [[package]] name = "c2-chacha" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" dependencies = [ - "ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "ppv-lite86", ] [[package]] name = "cfg-if" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" [[package]] name = "clap" -version = "2.33.0" +version = "3.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2dbdf4bdacb33466e854ce889eee8dfd5729abf7ccd7664d0a2d60cd384440b" +dependencies = [ + "atty", + "bitflags", + "clap_derive", + "clap_lex", + "indexmap", + "lazy_static", + "strsim", + "termcolor", + "textwrap", +] + +[[package]] +name = "clap_derive" +version = "3.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213" dependencies = [ - "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", - "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", - "textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", - "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", + "os_str_bytes", ] [[package]] name = "env_logger" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" dependencies = [ - "atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", - "humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "termcolor 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty", + "humantime", + "log", + "regex", + "termcolor", ] [[package]] name = "getrandom" version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" dependencies = [ - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", - "wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "wasi", ] +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + [[package]] name = "heck" -version = "0.3.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" [[package]] name = "hermit-abi" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772" dependencies = [ - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", + "libc", ] [[package]] name = "humantime" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +dependencies = [ + "quick-error", +] + +[[package]] +name = "indexmap" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a" dependencies = [ - "quick-error 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg", + "hashbrown", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.66" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" [[package]] name = "log" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" dependencies = [ - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", ] [[package]] name = "memchr" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223" + +[[package]] +name = "os_str_bytes" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" [[package]] name = "ppv-lite86" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" [[package]] name = "proc-macro-error" -version = "0.4.5" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ - "proc-macro-error-attr 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rustversion 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", ] [[package]] name = "proc-macro-error-attr" -version = "0.4.5" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rustversion 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", - "syn-mid 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "version_check", ] [[package]] name = "proc-macro2" -version = "1.0.8" +version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" dependencies = [ - "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-ident", ] [[package]] name = "quick-error" version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.2" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", ] [[package]] name = "rand" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" dependencies = [ - "getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", + "libc", + "rand_chacha", + "rand_core", + "rand_hc", ] [[package]] name = "rand_chacha" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" dependencies = [ - "c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "c2-chacha", + "rand_core", ] [[package]] name = "rand_core" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" dependencies = [ - "getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", + "getrandom", ] [[package]] name = "rand_hc" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" dependencies = [ - "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core", ] [[package]] name = "redox_syscall" version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" [[package]] name = "regex" version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" dependencies = [ - "aho-corasick 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", ] [[package]] name = "regex-syntax" version = "0.6.14" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b28dfe3fe9badec5dbf0a79a9cccad2cfc2ab5484bdb3e44cbd1ae8b3ba2be06" [[package]] name = "remove_dir_all" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" dependencies = [ - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rustversion" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "strsim" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "structopt" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "structopt-derive 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "structopt-derive" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro-error 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "symcc_fuzzing_helper" version = "0.1.0" dependencies = [ - "anyhow 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)", - "env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "structopt 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", - "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "anyhow", + "clap", + "env_logger", + "log", + "regex", + "tempfile", ] [[package]] name = "syn" -version = "1.0.14" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "syn-mid" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] name = "tempfile" version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" dependencies = [ - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)", - "remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if", + "libc", + "rand", + "redox_syscall", + "remove_dir_all", + "winapi", ] [[package]] name = "termcolor" -version = "1.1.0" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ - "winapi-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util", ] [[package]] name = "textwrap" -version = "0.11.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", -] +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" [[package]] name = "thread_local" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" dependencies = [ - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static", ] [[package]] -name = "unicode-segmentation" -version = "1.6.0" +name = "unicode-ident" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" [[package]] -name = "unicode-width" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "unicode-xid" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "vec_map" -version = "0.8.1" +name = "version_check" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" [[package]] name = "winapi" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" dependencies = [ - "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80" dependencies = [ - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" - -[metadata] -"checksum aho-corasick 0.7.8 (registry+https://github.com/rust-lang/crates.io-index)" = "743ad5a418686aad3b87fd14c43badd828cf26e214a00f92a384291cf22e1811" -"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -"checksum anyhow 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" -"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" -"checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" -"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" -"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" -"checksum env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" -"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" -"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" -"checksum hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772" -"checksum humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" -"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -"checksum libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)" = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" -"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" -"checksum memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223" -"checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" -"checksum proc-macro-error 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "1b79a464461615532fcc8a6ed8296fa66cc12350c18460ab3f4594a6cee0fcb6" -"checksum proc-macro-error-attr 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "23832e5eae6bac56bbac190500eef1aaede63776b5cd131eaa4ee7fe120cd892" -"checksum proc-macro2 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "3acb317c6ff86a4e579dfa00fc5e6cca91ecbb4e7eb2df0468805b674eb88548" -"checksum quick-error 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" -"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" -"checksum rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -"checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" -"checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -"checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" -"checksum regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" -"checksum regex-syntax 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)" = "b28dfe3fe9badec5dbf0a79a9cccad2cfc2ab5484bdb3e44cbd1ae8b3ba2be06" -"checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" -"checksum rustversion 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b3bba175698996010c4f6dce5e7f173b6eb781fce25d2cfc45e27091ce0b79f6" -"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" -"checksum structopt 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "df136b42d76b1fbea72e2ab3057343977b04b4a2e00836c3c7c0673829572713" -"checksum structopt-derive 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd50a87d2f7b8958055f3e73a963d78feaccca3836767a9069844e34b5b03c0a" -"checksum syn 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)" = "af6f3550d8dff9ef7dc34d384ac6f107e5d31c8f57d9f28e0081503f547ac8f5" -"checksum syn-mid 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9fd3937748a7eccff61ba5b90af1a20dbf610858923a9192ea0ecb0cb77db1d0" -"checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" -"checksum termcolor 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f" -"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" -"checksum unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" -"checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" -"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" -"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" -"checksum wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" -"checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" -"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -"checksum winapi-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80" -"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/util/symcc_fuzzing_helper/Cargo.toml b/util/symcc_fuzzing_helper/Cargo.toml index d09d8ae3..d884dce1 100644 --- a/util/symcc_fuzzing_helper/Cargo.toml +++ b/util/symcc_fuzzing_helper/Cargo.toml @@ -20,7 +20,7 @@ edition = "2018" license = "GPL-3.0-or-later" [dependencies] -structopt = "0.3" +clap = { version = "3", features = ["derive"] } tempfile = "3.1" anyhow = "1.0" log = "0.4.0" diff --git a/util/symcc_fuzzing_helper/src/main.rs b/util/symcc_fuzzing_helper/src/main.rs index 6d284e1e..378e79d1 100644 --- a/util/symcc_fuzzing_helper/src/main.rs +++ b/util/symcc_fuzzing_helper/src/main.rs @@ -15,6 +15,7 @@ mod symcc; use anyhow::{Context, Result}; +use clap::{self, StructOpt}; use std::collections::HashSet; use std::fs; use std::fs::File; @@ -22,7 +23,6 @@ use std::io::Write; use std::path::{Path, PathBuf}; use std::thread; use std::time::{Duration, Instant}; -use structopt::StructOpt; use symcc::{AflConfig, AflMap, AflShowmapResult, SymCC, TestcaseDir}; use tempfile::tempdir; @@ -32,22 +32,22 @@ const STATS_INTERVAL_SEC: u64 = 60; // inputs. #[derive(Debug, StructOpt)] -#[structopt(about = "Make SymCC collaborate with AFL.", no_version)] +#[clap(about = "Make SymCC collaborate with AFL.")] struct CLI { /// The name of the fuzzer to work with - #[structopt(short = "a")] + #[clap(short = 'a')] fuzzer_name: String, /// The AFL output directory - #[structopt(short = "o")] + #[clap(short = 'o')] output_dir: PathBuf, /// Name to use for SymCC - #[structopt(short = "n")] + #[clap(short = 'n')] name: String, /// Enable verbose logging - #[structopt(short = "v")] + #[clap(short = 'v')] verbose: bool, /// Program under test @@ -264,7 +264,7 @@ impl State { } fn main() -> Result<()> { - let options = CLI::from_args(); + let options = CLI::parse(); env_logger::builder() .filter_level(if options.verbose { log::LevelFilter::Debug From 99dbc3c36c2e8559e775244932a55c1720213300 Mon Sep 17 00:00:00 2001 From: Emilio Coppa Date: Thu, 3 Nov 2022 18:33:55 +0100 Subject: [PATCH 06/64] fix for issue #108 --- test/bool_cast.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 test/bool_cast.c diff --git a/test/bool_cast.c b/test/bool_cast.c new file mode 100644 index 00000000..41c76ca1 --- /dev/null +++ b/test/bool_cast.c @@ -0,0 +1,41 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O1 %s -o %t +// RUN: echo b | %t 2>&1 | %filecheck %s +// +// Check that bool cast is handled correctly (Issue #108) + +#include +#include +#include + +int bar(unsigned char a) { + if (a == 0xCA) return -1; + else return 0; +} + +int main() { + unsigned char input = 0; + read(0, &input, sizeof(input)); + int r = bar(input); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE: stdin0 -> #xca + // QSYM-COUNT-2: SMT + // QSYM: New testcase + if (r == -1) printf("Bingo!\n"); + else printf("Ok\n"); + return r; +} \ No newline at end of file From 7d70aa89181805cb1301e2667f7ed97b21035378 Mon Sep 17 00:00:00 2001 From: Emilio Coppa Date: Thu, 3 Nov 2022 18:37:15 +0100 Subject: [PATCH 07/64] fix for issue #108 --- compiler/Runtime.cpp | 2 +- compiler/Runtime.h | 2 +- compiler/Symbolizer.cpp | 47 ++++++++++++++++++------------ runtime/RuntimeCommon.h | 2 +- runtime/qsym_backend/Runtime.cpp | 4 +-- runtime/simple_backend/Runtime.cpp | 6 ++-- 6 files changed, 37 insertions(+), 26 deletions(-) diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index 81768841..ba0f3d83 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -69,7 +69,7 @@ Runtime::Runtime(Module &M) { buildBoolAnd = import(M, "_sym_build_bool_and", ptrT, ptrT, ptrT); buildBoolOr = import(M, "_sym_build_bool_or", ptrT, ptrT, ptrT); buildBoolXor = import(M, "_sym_build_bool_xor", ptrT, ptrT, ptrT); - buildBoolToBits = import(M, "_sym_build_bool_to_bits", ptrT, ptrT, int8T); + buildBoolToBit = import(M, "_sym_build_bool_to_bit", ptrT, ptrT); pushPathConstraint = import(M, "_sym_push_path_constraint", voidT, ptrT, IRB.getInt1Ty(), intPtrType); diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 7bf4a769..519f9f00 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -49,7 +49,7 @@ struct Runtime { SymFnT buildBoolAnd{}; SymFnT buildBoolOr{}; SymFnT buildBoolXor{}; - SymFnT buildBoolToBits{}; + SymFnT buildBoolToBit{}; SymFnT pushPathConstraint{}; SymFnT getParameterExpression{}; SymFnT setParameterExpression{}; diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index a6206823..42b17c9e 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -696,30 +696,41 @@ void Symbolizer::visitCastInst(CastInst &I) { IRBuilder<> IRB(&I); + SymFnT target; + + switch (I.getOpcode()) { + case Instruction::SExt: + target = runtime.buildSExt; + break; + case Instruction::ZExt: + target = runtime.buildZExt; + break; + default: + llvm_unreachable("Unknown cast opcode"); + } + // LLVM bitcode represents Boolean values as i1. In Z3, those are a not a // bit-vector sort, so trying to cast one into a bit vector of any length // raises an error. The run-time library provides a dedicated conversion // function for this case. if (I.getSrcTy()->getIntegerBitWidth() == 1) { - auto boolToBitConversion = buildRuntimeCall( - IRB, runtime.buildBoolToBits, - {{I.getOperand(0), true}, - {IRB.getInt8(I.getDestTy()->getIntegerBitWidth()), false}}); - registerSymbolicComputation(boolToBitConversion, &I); - } else { - SymFnT target; - - switch (I.getOpcode()) { - case Instruction::SExt: - target = runtime.buildSExt; - break; - case Instruction::ZExt: - target = runtime.buildZExt; - break; - default: - llvm_unreachable("Unknown cast opcode"); - } + SymbolicComputation symbolicComputation; + symbolicComputation.merge( + forceBuildRuntimeCall( + IRB, runtime.buildBoolToBit, + {{I.getOperand(0), true}}) + ); + symbolicComputation.merge( + forceBuildRuntimeCall( + IRB, target, + {{symbolicComputation.lastInstruction, false}, + {IRB.getInt8(I.getDestTy()->getIntegerBitWidth() - 1), false}}) + ); + + registerSymbolicComputation(symbolicComputation, &I); + + } else { auto symbolicCast = buildRuntimeCall(IRB, target, {{I.getOperand(0), true}, diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index f00176ea..d4a20a4b 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -119,7 +119,7 @@ SymExpr _sym_build_bits_to_float(SymExpr expr, int to_double); SymExpr _sym_build_float_to_bits(SymExpr expr); SymExpr _sym_build_float_to_signed_integer(SymExpr expr, uint8_t bits); SymExpr _sym_build_float_to_unsigned_integer(SymExpr expr, uint8_t bits); -SymExpr _sym_build_bool_to_bits(SymExpr expr, uint8_t bits); +SymExpr _sym_build_bool_to_bit(SymExpr expr); /* * Bit-array helpers diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 68b093bd..4b42bdfc 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -305,9 +305,9 @@ SymExpr _sym_extract_helper(SymExpr expr, size_t first_bit, size_t last_bit) { size_t _sym_bits_helper(SymExpr expr) { return expr->bits(); } -SymExpr _sym_build_bool_to_bits(SymExpr expr, uint8_t bits) { +SymExpr _sym_build_bool_to_bit(SymExpr expr) { return registerExpression( - g_expr_builder->boolToBit(allocatedExpressions.at(expr), bits)); + g_expr_builder->boolToBit(allocatedExpressions.at(expr), 1)); } // diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index d7ef5f20..c6542d74 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -407,10 +407,10 @@ Z3_ast _sym_build_float_to_unsigned_integer(Z3_ast expr, uint8_t bits) { g_context, Z3_mk_fpa_round_toward_zero(g_context), expr, bits)); } -Z3_ast _sym_build_bool_to_bits(Z3_ast expr, uint8_t bits) { +Z3_ast _sym_build_bool_to_bit(Z3_ast expr) { return registerExpression(Z3_mk_ite(g_context, expr, - _sym_build_integer(1, bits), - _sym_build_integer(0, bits))); + _sym_build_integer(1, 1), + _sym_build_integer(0, 1))); } void _sym_push_path_constraint(Z3_ast constraint, int taken, From 214429cf76f804bd0fb20d8e8064b0fb665d3d44 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 4 Nov 2022 10:27:59 +0100 Subject: [PATCH 08/64] LLVM 12 works without changes --- CMakeLists.txt | 4 ++-- README.md | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d090acc..3aa9bb3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,8 +77,8 @@ find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}") -if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 11) - message(WARNING "The software has been developed for LLVM 8 through 11; \ +if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 12) + message(WARNING "The software has been developed for LLVM 8 through 12; \ it is unlikely to work with other versions!") endif() diff --git a/README.md b/README.md index 2d02f09d..2a3396cf 100644 --- a/README.md +++ b/README.md @@ -15,16 +15,18 @@ compiler inserts code that computes symbolic expressions for each value in the program. The actual computation happens through calls to the support library at run time. -To build the pass and the support library, make sure that LLVM 8, 9, 10 or 11 -and Z3 version 4.5 or later, as well as a C++ compiler with support for C++17 -are installed. "lit" is also needed which is not always packaged with LLVM. +To build the pass and the support library, install LLVM (any version between 8 +and 12) and Z3 (version 4.5 or later), as well as a C++ compiler with support +for C++17. LLVM lit is only needed to run the tests; if it's not packaged with +your LLVM, you can get it with `pip install lit`. -Under Ubuntu groovy the following one liner should install all required +Under Ubuntu Groovy the following one liner should install all required packages: ``` sudo apt install -y git cargo clang-10 cmake g++ git libz3-dev llvm-10-dev llvm-10-tools ninja-build python2 python3-pip zlib1g-dev && sudo pip3 install lit ``` + Alternatively, see below for using the provided Dockerfile, or the file `util/quicktest.sh` for exact steps to perform under Ubuntu (or use with the provided Vagrant file). From 474b38b6a04c496fe0fdb176e965c34c6aaf60cb Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 4 Nov 2022 15:58:23 +0100 Subject: [PATCH 09/64] Add a clang-format configuration This is just the output of "clang-format -style=llvm -dump-config". --- .clang-format | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..48b2c678 --- /dev/null +++ b/.clang-format @@ -0,0 +1,192 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveMacros: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: true +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +QualifierAlignment: Leave +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +PackConstructorInitializers: BinPack +BasedOnStyle: '' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +AllowAllConstructorInitializersOnNextLine: true +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: false +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequires: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: Signature +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Right +PPIndentWidth: -1 +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + BeforeNonEmptyParentheses: false +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + From 230047ea8e0e863721a7512bb01a69c674041e37 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 4 Nov 2022 16:00:47 +0100 Subject: [PATCH 10/64] Add support for LLVM 13 Clang now uses the new pass manager for the optimization pipeline, so we have to do the same to make Clang use our pass. Moreover, FileCheck now complains if a configured prefix doesn't appear in the checked file; added "ANY" in three tests where it was missing. Finally, printing arbitrary-precision integers in QSYM needed some changes. --- CMakeLists.txt | 10 +++++-- README.md | 2 +- compiler/Main.cpp | 58 +++++++++++++++++++++++++++++++++------ compiler/Pass.cpp | 36 +++++++++++++++++++++--- compiler/Pass.h | 24 +++++++++++----- compiler/sym++.in | 2 +- compiler/symcc.in | 2 +- runtime/qsym_backend/qsym | 2 +- test/bool_cast.c | 3 +- test/if.c | 1 + test/loop.c | 1 + 11 files changed, 115 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3aa9bb3f..a4392a59 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,8 +77,8 @@ find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}") -if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 12) - message(WARNING "The software has been developed for LLVM 8 through 12; \ +if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 13) + message(WARNING "The software has been developed for LLVM 8 through 13; \ it is unlikely to work with other versions!") endif() @@ -115,6 +115,12 @@ if (NOT CLANG_BINARY) message(FATAL_ERROR "Clang not found; please make sure that the version corresponding to your LLVM installation is available.") endif() +if (${LLVM_VERSION_MAJOR} LESS 13) + set(CLANG_LOAD_PASS "-Xclang -load -Xclang ") +else() + set(CLANG_LOAD_PASS "-fpass-plugin=") +endif() + configure_file("compiler/symcc.in" "symcc" @ONLY) configure_file("compiler/sym++.in" "sym++" @ONLY) diff --git a/README.md b/README.md index 2a3396cf..f5cd27fc 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ program. The actual computation happens through calls to the support library at run time. To build the pass and the support library, install LLVM (any version between 8 -and 12) and Z3 (version 4.5 or later), as well as a C++ compiler with support +and 13) and Z3 (version 4.5 or later), as well as a C++ compiler with support for C++17. LLVM lit is only needed to run the tests; if it's not packaged with your LLVM, you can get it with `pip install lit`. diff --git a/compiler/Main.cpp b/compiler/Main.cpp index 9be71ff6..02079475 100644 --- a/compiler/Main.cpp +++ b/compiler/Main.cpp @@ -15,17 +15,59 @@ #include #include +#if LLVM_VERSION_MAJOR >= 13 +#include +#include +#endif + #include "Pass.h" -void addSymbolizePass(const llvm::PassManagerBuilder & /* unused */, - llvm::legacy::PassManagerBase &PM) { - PM.add(new SymbolizePass()); +using namespace llvm; + +// +// Legacy pass registration (up to LLVM 13) +// + +void addSymbolizeLegacyPass(const PassManagerBuilder & /* unused */, + legacy::PassManagerBase &PM) { + PM.add(new SymbolizeLegacyPass()); } // Make the pass known to opt. -static llvm::RegisterPass X("symbolize", "Symbolization Pass"); +static RegisterPass X("symbolize", "Symbolization Pass"); // Tell frontends to run the pass automatically. -static struct llvm::RegisterStandardPasses - Y(llvm::PassManagerBuilder::EP_VectorizerStart, addSymbolizePass); -static struct llvm::RegisterStandardPasses - Z(llvm::PassManagerBuilder::EP_EnabledOnOptLevel0, addSymbolizePass); +static struct RegisterStandardPasses Y(PassManagerBuilder::EP_VectorizerStart, + addSymbolizeLegacyPass); +static struct RegisterStandardPasses + Z(PassManagerBuilder::EP_EnabledOnOptLevel0, addSymbolizeLegacyPass); + +// +// New pass registration (LLVM 13 and above) +// + +#if LLVM_VERSION_MAJOR >= 13 + +PassPluginLibraryInfo getSymbolizePluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "Symbolization Pass", LLVM_VERSION_STRING, + [](PassBuilder &PB) { + // We need to act on the entire module as well as on each function. + // Those actions are independent from each other, so we register a + // module pass at the start of the pipeline and a function pass just + // before the vectorizer. (There doesn't seem to be a way to run + // module passes at the start of the vectorizer, hence the split.) + PB.registerPipelineStartEPCallback( + [](ModulePassManager &PM, PassBuilder::OptimizationLevel) { + PM.addPass(SymbolizePass()); + }); + PB.registerVectorizerStartEPCallback( + [](FunctionPassManager &PM, PassBuilder::OptimizationLevel) { + PM.addPass(SymbolizePass()); + }); + }}; +} + +extern "C" LLVM_ATTRIBUTE_WEAK PassPluginLibraryInfo llvmGetPassPluginInfo() { + return getSymbolizePluginInfo(); +} + +#endif diff --git a/compiler/Pass.cpp b/compiler/Pass.cpp index 122fd571..f17fd7e5 100644 --- a/compiler/Pass.cpp +++ b/compiler/Pass.cpp @@ -34,10 +34,14 @@ using namespace llvm; #define DEBUG(X) ((void)0) #endif -char SymbolizePass::ID = 0; +char SymbolizeLegacyPass::ID = 0; -bool SymbolizePass::doInitialization(Module &M) { - DEBUG(errs() << "Symbolizer module init\n"); +namespace { + +static constexpr char kSymCtorName[] = "__sym_ctor"; + +bool instrumentModule(Module &M) { + DEBUG(errs() << "Symbolizer module instrumentation\n"); // Redirect calls to external functions to the corresponding wrappers and // rename internal functions. @@ -56,7 +60,7 @@ bool SymbolizePass::doInitialization(Module &M) { return true; } -bool SymbolizePass::runOnFunction(Function &F) { +bool instrumentFunction(Function &F) { auto functionName = F.getName(); if (functionName == kSymCtorName) return false; @@ -87,3 +91,27 @@ bool SymbolizePass::runOnFunction(Function &F) { return true; } + +} // namespace + +bool SymbolizeLegacyPass::doInitialization(Module &M) { + return instrumentModule(M); +} + +bool SymbolizeLegacyPass::runOnFunction(Function &F) { + return instrumentFunction(F); +} + +#if LLVM_VERSION_MAJOR >= 13 + +PreservedAnalyses SymbolizePass::run(Function &F, FunctionAnalysisManager &) { + return instrumentFunction(F) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +PreservedAnalyses SymbolizePass::run(Module &M, ModuleAnalysisManager &) { + return instrumentModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +#endif diff --git a/compiler/Pass.h b/compiler/Pass.h index 53764931..cf0676aa 100644 --- a/compiler/Pass.h +++ b/compiler/Pass.h @@ -19,21 +19,31 @@ #include #include -class SymbolizePass : public llvm::FunctionPass { +#if LLVM_VERSION_MAJOR >= 13 +#include +#endif + +class SymbolizeLegacyPass : public llvm::FunctionPass { public: static char ID; - SymbolizePass() : FunctionPass(ID) {} + SymbolizeLegacyPass() : FunctionPass(ID) {} bool doInitialization(llvm::Module &M) override; bool runOnFunction(llvm::Function &F) override; +}; -private: - static constexpr char kSymCtorName[] = "__sym_ctor"; +#if LLVM_VERSION_MAJOR >= 13 - /// Mapping from global variables to their corresponding symbolic expressions. - llvm::ValueMap - globalExpressions; +class SymbolizePass : public llvm::PassInfoMixin { +public: + llvm::PreservedAnalyses run(llvm::Function &F, + llvm::FunctionAnalysisManager &); + llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &); + + static bool isRequired() { return true; } }; #endif + +#endif diff --git a/compiler/sym++.in b/compiler/sym++.in index 82221945..2b775739 100755 --- a/compiler/sym++.in +++ b/compiler/sym++.in @@ -55,7 +55,7 @@ if [ $# -eq 0 ]; then fi exec $compiler \ - -Xclang -load -Xclang "$pass" \ + @CLANG_LOAD_PASS@"$pass" \ $stdlib_cflags \ "$@" \ $stdlib_ldflags \ diff --git a/compiler/symcc.in b/compiler/symcc.in index a0694c06..4e0ad37e 100755 --- a/compiler/symcc.in +++ b/compiler/symcc.in @@ -39,7 +39,7 @@ if [ $# -eq 0 ]; then fi exec $compiler \ - -Xclang -load -Xclang "$pass" \ + @CLANG_LOAD_PASS@"$pass" \ "$@" \ -L"$runtime_dir" \ -lSymRuntime \ diff --git a/runtime/qsym_backend/qsym b/runtime/qsym_backend/qsym index d17a39d4..6cba7f99 160000 --- a/runtime/qsym_backend/qsym +++ b/runtime/qsym_backend/qsym @@ -1 +1 @@ -Subproject commit d17a39d40dc3ea1d17262dd52607f8a6527dde10 +Subproject commit 6cba7f996fa2568dcc02e632a72d9931fdf30f70 diff --git a/test/bool_cast.c b/test/bool_cast.c index 41c76ca1..0d3764a0 100644 --- a/test/bool_cast.c +++ b/test/bool_cast.c @@ -37,5 +37,6 @@ int main() { // QSYM: New testcase if (r == -1) printf("Bingo!\n"); else printf("Ok\n"); + // ANY: Ok return r; -} \ No newline at end of file +} diff --git a/test/if.c b/test/if.c index 036c4402..dda1d13a 100644 --- a/test/if.c +++ b/test/if.c @@ -51,5 +51,6 @@ int main(int argc, char* argv[]) { } fprintf(stderr, "%d\n", x); fprintf(stderr, "%d\n", foo(x, 7)); + // ANY: 7 return 0; } diff --git a/test/loop.c b/test/loop.c index d411e180..5d90eb9d 100644 --- a/test/loop.c +++ b/test/loop.c @@ -50,5 +50,6 @@ int main(int argc, char* argv[]) { } x = ntohl(x); fprintf(stderr, "%d\n", fac(x)); + // ANY: 120 return 0; } From 4e9cbeec904ac1ebce787b967c60474ab8551a86 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 10 Nov 2022 11:45:31 +0100 Subject: [PATCH 11/64] Add support for LLVM 14 --- CMakeLists.txt | 4 ++-- README.md | 2 +- compiler/Main.cpp | 10 ++++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a4392a59..6a069509 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,8 +77,8 @@ find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}") -if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 13) - message(WARNING "The software has been developed for LLVM 8 through 13; \ +if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 14) + message(WARNING "The software has been developed for LLVM 8 through 14; \ it is unlikely to work with other versions!") endif() diff --git a/README.md b/README.md index f5cd27fc..a4c514ee 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ program. The actual computation happens through calls to the support library at run time. To build the pass and the support library, install LLVM (any version between 8 -and 13) and Z3 (version 4.5 or later), as well as a C++ compiler with support +and 14) and Z3 (version 4.5 or later), as well as a C++ compiler with support for C++17. LLVM lit is only needed to run the tests; if it's not packaged with your LLVM, you can get it with `pip install lit`. diff --git a/compiler/Main.cpp b/compiler/Main.cpp index 02079475..f915d5d4 100644 --- a/compiler/Main.cpp +++ b/compiler/Main.cpp @@ -18,6 +18,12 @@ #if LLVM_VERSION_MAJOR >= 13 #include #include + +#if LLVM_VERSION_MAJOR >= 14 +#include +#else +using OptimizationLevel = llvm::PassBuilder::OptimizationLevel; +#endif #endif #include "Pass.h" @@ -56,11 +62,11 @@ PassPluginLibraryInfo getSymbolizePluginInfo() { // before the vectorizer. (There doesn't seem to be a way to run // module passes at the start of the vectorizer, hence the split.) PB.registerPipelineStartEPCallback( - [](ModulePassManager &PM, PassBuilder::OptimizationLevel) { + [](ModulePassManager &PM, OptimizationLevel) { PM.addPass(SymbolizePass()); }); PB.registerVectorizerStartEPCallback( - [](FunctionPassManager &PM, PassBuilder::OptimizationLevel) { + [](FunctionPassManager &PM, OptimizationLevel) { PM.addPass(SymbolizePass()); }); }}; From 7a3400cc99a2f936311f6a8d7068de90bfdddb5a Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 10 Nov 2022 12:53:36 +0100 Subject: [PATCH 12/64] LLVM 15 works without changes --- CMakeLists.txt | 4 ++-- README.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a069509..9cd05dfe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,8 +77,8 @@ find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}") -if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 14) - message(WARNING "The software has been developed for LLVM 8 through 14; \ +if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 15) + message(WARNING "The software has been developed for LLVM 8 through 15; \ it is unlikely to work with other versions!") endif() diff --git a/README.md b/README.md index a4c514ee..0bb20644 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ program. The actual computation happens through calls to the support library at run time. To build the pass and the support library, install LLVM (any version between 8 -and 14) and Z3 (version 4.5 or later), as well as a C++ compiler with support +and 15) and Z3 (version 4.5 or later), as well as a C++ compiler with support for C++17. LLVM lit is only needed to run the tests; if it's not packaged with your LLVM, you can get it with `pip install lit`. From 0ce37fab4831c1c2f2317f538ba49f270cf9bfc6 Mon Sep 17 00:00:00 2001 From: Emilio Coppa Date: Fri, 4 Nov 2022 18:17:01 +0100 Subject: [PATCH 13/64] fix issue #109 --- compiler/Symbolizer.cpp | 8 +++++++ test/propagation_select.c | 47 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 test/propagation_select.c diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 42b17c9e..ec4c14c8 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -380,6 +380,14 @@ void Symbolizer::visitSelectInst(SelectInst &I) { {I.getCondition(), false}, {getTargetPreferredInt(&I), false}}); registerSymbolicComputation(runtimeCall); + if (getSymbolicExpression(I.getTrueValue()) + || getSymbolicExpression(I.getFalseValue())) { + auto *data = IRB.CreateSelect( + I.getCondition(), + getSymbolicExpressionOrNull(I.getTrueValue()), + getSymbolicExpressionOrNull(I.getFalseValue())); + symbolicExpressions[&I] = data; + } } void Symbolizer::visitCmpInst(CmpInst &I) { diff --git a/test/propagation_select.c b/test/propagation_select.c new file mode 100644 index 00000000..0f676ea4 --- /dev/null +++ b/test/propagation_select.c @@ -0,0 +1,47 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O1 %s -o %t +// RUN: echo xxx | %t 2>&1 | %filecheck %s +// +// Check that select instruction is propagating the symbolic value (issue #109) + +#include +#include +#include + +char bar(char a, char b, char c) { + return (a == 0xA) ? b : c; +} + +int main() { + char input[3] = { 0 }; + read(0, &input, sizeof(input)); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE: stdin0 -> #x0a + // QSYM-COUNT-2: SMT + // QSYM: New testcase + char r = bar(input[0], input[1], input[2]); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE: stdin2 -> #x0b + // SIMPLE: stdin0 -> #x00 + // QSYM-COUNT-2: SMT + // QSYM: New testcase + // ANY: KO + if (r == 0xB) printf("OK!\n"); + else printf("KO\n"); + return 0; +} \ No newline at end of file From 2ab0975317d61e925eb637ff9171eac890499435 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 10 Nov 2022 13:06:48 +0100 Subject: [PATCH 14/64] Run clang-format We should really automate this... --- compiler/Symbolizer.cpp | 28 ++++++++++++---------------- test/propagation_select.c | 16 ++++++++-------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index ec4c14c8..05f5a6b5 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -380,12 +380,11 @@ void Symbolizer::visitSelectInst(SelectInst &I) { {I.getCondition(), false}, {getTargetPreferredInt(&I), false}}); registerSymbolicComputation(runtimeCall); - if (getSymbolicExpression(I.getTrueValue()) - || getSymbolicExpression(I.getFalseValue())) { + if (getSymbolicExpression(I.getTrueValue()) || + getSymbolicExpression(I.getFalseValue())) { auto *data = IRB.CreateSelect( - I.getCondition(), - getSymbolicExpressionOrNull(I.getTrueValue()), - getSymbolicExpressionOrNull(I.getFalseValue())); + I.getCondition(), getSymbolicExpressionOrNull(I.getTrueValue()), + getSymbolicExpressionOrNull(I.getFalseValue())); symbolicExpressions[&I] = data; } } @@ -724,20 +723,15 @@ void Symbolizer::visitCastInst(CastInst &I) { if (I.getSrcTy()->getIntegerBitWidth() == 1) { SymbolicComputation symbolicComputation; - symbolicComputation.merge( - forceBuildRuntimeCall( - IRB, runtime.buildBoolToBit, - {{I.getOperand(0), true}}) - ); - symbolicComputation.merge( - forceBuildRuntimeCall( + symbolicComputation.merge(forceBuildRuntimeCall(IRB, runtime.buildBoolToBit, + {{I.getOperand(0), true}})); + symbolicComputation.merge(forceBuildRuntimeCall( IRB, target, {{symbolicComputation.lastInstruction, false}, - {IRB.getInt8(I.getDestTy()->getIntegerBitWidth() - 1), false}}) - ); + {IRB.getInt8(I.getDestTy()->getIntegerBitWidth() - 1), false}})); registerSymbolicComputation(symbolicComputation, &I); - + } else { auto symbolicCast = buildRuntimeCall(IRB, target, @@ -777,7 +771,9 @@ void Symbolizer::visitInsertValueInst(InsertValueInst &I) { {IRB.getInt64(aggregateMemberOffset(I.getAggregateOperand()->getType(), I.getIndices())), false}, - {IRB.getInt8(isLittleEndian(I.getInsertedValueOperand()->getType()) ? 1 : 0), false}}); + {IRB.getInt8(isLittleEndian(I.getInsertedValueOperand()->getType()) ? 1 + : 0), + false}}); registerSymbolicComputation(insert, &I); } diff --git a/test/propagation_select.c b/test/propagation_select.c index 0f676ea4..ee900de5 100644 --- a/test/propagation_select.c +++ b/test/propagation_select.c @@ -17,16 +17,14 @@ // // Check that select instruction is propagating the symbolic value (issue #109) -#include #include +#include #include -char bar(char a, char b, char c) { - return (a == 0xA) ? b : c; -} +char bar(char a, char b, char c) { return (a == 0xA) ? b : c; } int main() { - char input[3] = { 0 }; + char input[3] = {0}; read(0, &input, sizeof(input)); // SIMPLE: Trying to solve // SIMPLE: Found diverging input @@ -41,7 +39,9 @@ int main() { // QSYM-COUNT-2: SMT // QSYM: New testcase // ANY: KO - if (r == 0xB) printf("OK!\n"); - else printf("KO\n"); + if (r == 0xB) + printf("OK!\n"); + else + printf("KO\n"); return 0; -} \ No newline at end of file +} From 1cb77436b0634268469a184c5d05164053b7e6dc Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 10 Nov 2022 14:07:06 +0100 Subject: [PATCH 15/64] Add a GitHub action that checks LLVM compatibility --- .github/workflows/run_tests.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index fe2c0f82..9d00bbd0 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -15,3 +15,29 @@ jobs: run: docker build --target builder_qsym -t symcc . - name: Creation of the final SymCC docker image with Qsym backend and libcxx run: docker build -t symcc . + llvm_compatibility: + runs-on: ubuntu-22.04 + strategy: + matrix: + llvm_version: [11, 12, 13, 14] + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: Install dependencies + run: | + sudo apt-get install -y \ + llvm-${{ matrix.llvm_version }}-dev \ + libz3-dev \ + python2 + - name: Build SymCC with the QSYM backend + run: | + mkdir build + cd build + cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DZ3_TRUST_SYSTEM_VERSION=ON \ + -DQSYM_BACKEND=ON \ + -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm_version }}/cmake \ + .. + make From c4d1aae7078348c1ba55baf8fe79e704dd4fa354 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 18 Nov 2022 13:17:07 +0100 Subject: [PATCH 16/64] Prevent test failures in case of reordered solver output Z3 doesn't always output model constants in the same order; make sure that our tests don't depend on it. --- test/propagation_select.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/propagation_select.c b/test/propagation_select.c index ee900de5..a04af9e0 100644 --- a/test/propagation_select.c +++ b/test/propagation_select.c @@ -34,8 +34,8 @@ int main() { char r = bar(input[0], input[1], input[2]); // SIMPLE: Trying to solve // SIMPLE: Found diverging input - // SIMPLE: stdin2 -> #x0b - // SIMPLE: stdin0 -> #x00 + // SIMPLE-DAG: stdin2 -> #x0b + // SIMPLE-DAG: stdin0 -> #x00 // QSYM-COUNT-2: SMT // QSYM: New testcase // ANY: KO From d5891b02e1f9a662e19aa2e516c30a1d43828a7e Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 18 Nov 2022 14:37:20 +0100 Subject: [PATCH 17/64] Accept symbolic input from memory This commit adds the option to mark symbolic input by calling symcc_make_symbolic from the program under test. The refactoring that was required to add the new feature has had the pleasant side effect that the QSYM backend now doesn't require the entire input upfront anymore, making it much more convenient to feed symbolic data through stdin. --- docs/Configuration.txt | 7 ++- runtime/Config.cpp | 23 ++++++--- runtime/Config.h | 25 +++++++-- runtime/LibcWrappers.cpp | 83 +++++++++++++----------------- runtime/RuntimeCommon.cpp | 23 +++++++++ runtime/RuntimeCommon.h | 13 ++++- runtime/qsym_backend/Runtime.cpp | 77 +++++++++++++-------------- runtime/simple_backend/Runtime.cpp | 2 +- test/memory_input.c | 51 ++++++++++++++++++ 9 files changed, 201 insertions(+), 103 deletions(-) create mode 100644 test/memory_input.c diff --git a/docs/Configuration.txt b/docs/Configuration.txt index 123aec34..4e743aa8 100644 --- a/docs/Configuration.txt +++ b/docs/Configuration.txt @@ -60,7 +60,12 @@ environment variables. - SYMCC_INPUT_FILE (default empty): When empty, SymCC treats data read from standard input as symbolic; when set to a file name, any data read from that - file is considered symbolic. + file is considered symbolic. Ignored if SYMCC_NO_SYMBOLIC_INPUT is set to 1. + +- SYMCC_MEMORY_INPUT=0/1 (default 0): When set to 1, expect the program under + test to communicate symbolic inputs with one or more calls to + symcc_make_symbolic. Can't be combined with SYMCC_INPUT_FILE. Ignored if + SYMCC_NO_SYMBOLIC_INPUT is set to 1. - SYMCC_LOG_FILE (default empty): When set to a file name, SymCC creates the file (or overwrites any existing file!) and uses it to log backend activity diff --git a/runtime/Config.cpp b/runtime/Config.cpp index c7d45ee0..0088a4d1 100644 --- a/runtime/Config.cpp +++ b/runtime/Config.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace { @@ -41,17 +42,26 @@ bool checkFlagString(std::string value) { Config g_config; void loadConfig() { - auto *fullyConcrete = getenv("SYMCC_NO_SYMBOLIC_INPUT"); - if (fullyConcrete != nullptr) - g_config.fullyConcrete = checkFlagString(fullyConcrete); - auto *outputDir = getenv("SYMCC_OUTPUT_DIR"); if (outputDir != nullptr) g_config.outputDir = outputDir; auto *inputFile = getenv("SYMCC_INPUT_FILE"); if (inputFile != nullptr) - g_config.inputFile = inputFile; + g_config.input = FileInput{inputFile}; + + auto *memoryInput = getenv("SYMCC_MEMORY_INPUT"); + if (memoryInput != nullptr && checkFlagString(memoryInput)) { + if (std::holds_alternative(g_config.input)) + throw std::runtime_error{ + "Can't enable file and memory input at the same time"}; + + g_config.input = MemoryInput{}; + } + + auto *fullyConcrete = getenv("SYMCC_NO_SYMBOLIC_INPUT"); + if (fullyConcrete != nullptr && checkFlagString(fullyConcrete)) + g_config.input = NoInput{}; auto *logFile = getenv("SYMCC_LOG_FILE"); if (logFile != nullptr) @@ -76,7 +86,8 @@ void loadConfig() { throw std::runtime_error(msg.str()); } catch (std::out_of_range &) { std::stringstream msg; - msg << "The GC threshold must be between 0 and " << std::numeric_limits::max(); + msg << "The GC threshold must be between 0 and " + << std::numeric_limits::max(); throw std::runtime_error(msg.str()); } } diff --git a/runtime/Config.h b/runtime/Config.h index 450344eb..4ed8f806 100644 --- a/runtime/Config.h +++ b/runtime/Config.h @@ -16,17 +16,32 @@ #define CONFIG_H #include +#include + +/// Marker struct for fully concrete execution. +struct NoInput {}; + +/// Marker struct for symbolic input from stdin. +struct StdinInput {}; + +/// Marker struct for symbolic input via _sym_make_symbolic. +struct MemoryInput {}; + +/// Configuration for symbolic input from a file. +struct FileInput { + /// The name of input file. + std::string fileName; +}; struct Config { - /// Should we allow symbolic data in the program? - bool fullyConcrete = false; + using InputConfig = std::variant; + + /// The configuration for our symbolic input. + InputConfig input = StdinInput{}; /// The directory for storing new outputs. std::string outputDir = "/tmp/output"; - /// The input file, if any. - std::string inputFile; - /// The file to log constraint solving information to. std::string logFile = ""; diff --git a/runtime/LibcWrappers.cpp b/runtime/LibcWrappers.cpp index 319ae9f5..6e9c6c90 100644 --- a/runtime/LibcWrappers.cpp +++ b/runtime/LibcWrappers.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -56,13 +57,28 @@ template void tryAlternative(E *value, SymExpr valueExpr, F caller) { tryAlternative(reinterpret_cast(value), valueExpr, caller); } -} // namespace -void initLibcWrappers() { - if (g_config.fullyConcrete) +void maybeSetInputFile(const char *path, int fd) { + auto *fileInput = std::get_if(&g_config.input); + if (fileInput == nullptr) return; - if (g_config.inputFile.empty()) { + if (strstr(path, fileInput->fileName.c_str()) == nullptr) + return; + + if (inputFileDescriptor != -1) + std::cerr << "Warning: input file opened multiple times; this is not yet " + "supported" + << std::endl; + + inputFileDescriptor = fd; + inputOffset = 0; +} + +} // namespace + +void initLibcWrappers() { + if (std::holds_alternative(g_config.input)) { // Symbolic data comes from standard input. inputFileDescriptor = 0; } @@ -111,15 +127,8 @@ int SYM(open)(const char *path, int oflag, mode_t mode) { auto result = open(path, oflag, mode); _sym_set_return_expression(nullptr); - if (result >= 0 && !g_config.fullyConcrete && !g_config.inputFile.empty() && - strstr(path, g_config.inputFile.c_str()) != nullptr) { - if (inputFileDescriptor != -1) - std::cerr << "Warning: input file opened multiple times; this is not yet " - "supported" - << std::endl; - inputFileDescriptor = result; - inputOffset = 0; - } + if (result >= 0) + maybeSetInputFile(path, result); return result; } @@ -136,9 +145,8 @@ ssize_t SYM(read)(int fildes, void *buf, size_t nbyte) { if (fildes == inputFileDescriptor) { // Reading symbolic input. - ReadWriteShadow shadow(buf, result); - std::generate(shadow.begin(), shadow.end(), - []() { return _sym_get_input_byte(inputOffset++); }); + _sym_make_symbolic(buf, result, inputOffset); + inputOffset += result; } else if (!isConcrete(buf, result)) { ReadWriteShadow shadow(buf, result); std::fill(shadow.begin(), shadow.end(), nullptr); @@ -193,16 +201,8 @@ FILE *SYM(fopen)(const char *pathname, const char *mode) { auto *result = fopen(pathname, mode); _sym_set_return_expression(nullptr); - if (result != nullptr && !g_config.fullyConcrete && - !g_config.inputFile.empty() && - strstr(pathname, g_config.inputFile.c_str()) != nullptr) { - if (inputFileDescriptor != -1) - std::cerr << "Warning: input file opened multiple times; this is not yet " - "supported" - << std::endl; - inputFileDescriptor = fileno(result); - inputOffset = 0; - } + if (result != nullptr) + maybeSetInputFile(pathname, fileno(result)); return result; } @@ -211,16 +211,8 @@ FILE *SYM(fopen64)(const char *pathname, const char *mode) { auto *result = fopen64(pathname, mode); _sym_set_return_expression(nullptr); - if (result != nullptr && !g_config.fullyConcrete && - !g_config.inputFile.empty() && - strstr(pathname, g_config.inputFile.c_str()) != nullptr) { - if (inputFileDescriptor != -1) - std::cerr << "Warning: input file opened multiple times; this is not yet " - "supported" - << std::endl; - inputFileDescriptor = fileno(result); - inputOffset = 0; - } + if (result != nullptr) + maybeSetInputFile(pathname, fileno(result)); return result; } @@ -235,9 +227,8 @@ size_t SYM(fread)(void *ptr, size_t size, size_t nmemb, FILE *stream) { if (fileno(stream) == inputFileDescriptor) { // Reading symbolic input. - ReadWriteShadow shadow(ptr, result * size); - std::generate(shadow.begin(), shadow.end(), - []() { return _sym_get_input_byte(inputOffset++); }); + _sym_make_symbolic(ptr, result * size, inputOffset); + inputOffset += result * size; } else if (!isConcrete(ptr, result * size)) { ReadWriteShadow shadow(ptr, result * size); std::fill(shadow.begin(), shadow.end(), nullptr); @@ -255,9 +246,9 @@ char *SYM(fgets)(char *str, int n, FILE *stream) { if (fileno(stream) == inputFileDescriptor) { // Reading symbolic input. - ReadWriteShadow shadow(str, sizeof(char) * strlen(str)); - std::generate(shadow.begin(), shadow.end(), - []() { return _sym_get_input_byte(inputOffset++); }); + const auto length = sizeof(char) * strlen(str); + _sym_make_symbolic(str, length, inputOffset); + inputOffset += length; } else if (!isConcrete(str, sizeof(char) * strlen(str))) { ReadWriteShadow shadow(str, sizeof(char) * strlen(str)); std::fill(shadow.begin(), shadow.end(), nullptr); @@ -338,7 +329,7 @@ int SYM(getc)(FILE *stream) { if (fileno(stream) == inputFileDescriptor) _sym_set_return_expression(_sym_build_zext( - _sym_get_input_byte(inputOffset++), sizeof(int) * 8 - 8)); + _sym_get_input_byte(inputOffset++, result), sizeof(int) * 8 - 8)); else _sym_set_return_expression(nullptr); @@ -354,16 +345,14 @@ int SYM(fgetc)(FILE *stream) { if (fileno(stream) == inputFileDescriptor) _sym_set_return_expression(_sym_build_zext( - _sym_get_input_byte(inputOffset++), sizeof(int) * 8 - 8)); + _sym_get_input_byte(inputOffset++, result), sizeof(int) * 8 - 8)); else _sym_set_return_expression(nullptr); return result; } -int SYM(getchar)(void) { - return SYM(getc)(stdin); -} +int SYM(getchar)(void) { return SYM(getc)(stdin); } int SYM(ungetc)(int c, FILE *stream) { auto result = ungetc(c, stream); diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index 32081e3e..eddd93d0 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -14,10 +14,15 @@ #include +#include #include #include +#include #include +#include +#include +#include "Config.h" #include "GarbageCollection.h" #include "RuntimeCommon.h" #include "Shadow.h" @@ -196,3 +201,21 @@ SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, void _sym_register_expression_region(SymExpr *start, size_t length) { registerExpressionRegion({start, length}); } + +void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset) { + ReadWriteShadow shadow(data, byte_length); + uint8_t *data_bytes = reinterpret_cast(data); + std::generate(shadow.begin(), shadow.end(), [&, i = 0]() mutable { + return _sym_get_input_byte(input_offset++, data_bytes[i++]); + }); +} + +void symcc_make_symbolic(void *start, size_t byte_length) { + if (!std::holds_alternative(g_config.input)) + throw std::runtime_error{"Calls to symcc_make_symbolic aren't allowed when " + "SYMCC_MEMORY_INPUT isn't set"}; + + static size_t inputOffset = 0; // track the offset across calls + _sym_make_symbolic(start, byte_length, inputOffset); + inputOffset += byte_length; +} diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index d4a20a4b..8677513f 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -26,9 +26,11 @@ #define RUNTIMECOMMON_H #ifdef __cplusplus +#include #include extern "C" { #else +#include #include #endif @@ -141,7 +143,8 @@ SymExpr _sym_get_return_expression(void); */ void _sym_push_path_constraint(SymExpr constraint, int taken, uintptr_t site_id); -SymExpr _sym_get_input_byte(size_t offset); +SymExpr _sym_get_input_byte(size_t offset, uint8_t concrete_value); +void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset); /* * Memory management @@ -176,6 +179,14 @@ bool _sym_feasible(SymExpr expr); void _sym_register_expression_region(SymExpr *start, size_t length); void _sym_collect_garbage(void); +/* + * Symbolic input from memory + * + * This is the only function in the interface that we expect to be called by + * users (i.e., calls to it aren't auto-generated by our compiler pass). + */ +void symcc_make_symbolic(void *start, size_t byte_length); + #ifdef __cplusplus } #endif diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 4b42bdfc..b4558a43 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #if HAVE_FILESYSTEM #include @@ -46,6 +47,7 @@ #endif // C +#include #include // Qsym @@ -77,11 +79,6 @@ namespace { /// Indicate whether the runtime has been initialized. std::atomic_flag g_initialized = ATOMIC_FLAG_INIT; -/// The file that contains out input. -std::string inputFileName; - -void deleteInputFile() { std::remove(inputFileName.c_str()); } - /// A mapping of all expressions that we have ever received from Qsym to the /// corresponding shared pointers on the heap. /// @@ -105,6 +102,34 @@ SymExpr registerExpression(const qsym::ExprRef &expr) { return rawExpr; } +/// A Qsym solver that doesn't require the entire input on initialization. +class EnhancedQsymSolver : public qsym::Solver { + // Warning! + // + // We can't override methods of qsym::Solver. None of them are declared + // virtual, and the Qsym code refers to the solver with a pointer of type + // qsym::Solver*, so it will always choose the implementation of the base + // class. What we can do, though, is add new functions that access the data + // members of the base class. + // + // Subclassing the Qsym solver is ugly but helps us to avoid making too many + // changes in the Qsym codebase. + +public: + EnhancedQsymSolver() + : qsym::Solver("/dev/null", g_config.outputDir, g_config.aflCoverageMap) { + } + + void pushInputByte(size_t offset, uint8_t value) { + if (inputs_.size() <= offset) + inputs_.resize(offset + 1); + + inputs_[offset] = value; + } +}; + +EnhancedQsymSolver *g_enhanced_solver; + } // namespace using namespace qsym; @@ -122,7 +147,7 @@ void _sym_initialize(void) { loadConfig(); initLibcWrappers(); std::cerr << "This is SymCC running with the QSYM backend" << std::endl; - if (g_config.fullyConcrete) { + if (std::holds_alternative(g_config.input)) { std::cerr << "Performing fully concrete execution (i.e., without symbolic input)" << std::endl; @@ -138,42 +163,9 @@ void _sym_initialize(void) { exit(-1); } - // Qsym requires the full input in a file - if (g_config.inputFile.empty()) { - std::cerr << "Reading program input until EOF (use Ctrl+D in a terminal)..." - << std::endl; - std::istreambuf_iterator in_begin(std::cin), in_end; - std::vector inputData(in_begin, in_end); - inputFileName = std::tmpnam(nullptr); - std::ofstream inputFile(inputFileName, std::ios::trunc); - std::copy(inputData.begin(), inputData.end(), - std::ostreambuf_iterator(inputFile)); - inputFile.close(); - -#ifdef DEBUG_RUNTIME - std::cerr << "Loaded input:" << std::endl; - std::copy(inputData.begin(), inputData.end(), - std::ostreambuf_iterator(std::cerr)); - std::cerr << std::endl; -#endif - - atexit(deleteInputFile); - - // Restore some semblance of standard input - auto *newStdin = freopen(inputFileName.c_str(), "r", stdin); - if (newStdin == nullptr) { - perror("Failed to reopen stdin"); - exit(-1); - } - } else { - inputFileName = g_config.inputFile; - std::cerr << "Making data read from " << inputFileName << " as symbolic" - << std::endl; - } - g_z3_context = new z3::context{}; - g_solver = - new Solver(inputFileName, g_config.outputDir, g_config.aflCoverageMap); + g_enhanced_solver = new EnhancedQsymSolver{}; + g_solver = g_enhanced_solver; // for Qsym-internal use g_expr_builder = g_config.pruning ? PruneExprBuilder::create() : SymbolicExprBuilder::create(); } @@ -289,7 +281,8 @@ void _sym_push_path_constraint(SymExpr constraint, int taken, g_solver->addJcc(allocatedExpressions.at(constraint), taken != 0, site_id); } -SymExpr _sym_get_input_byte(size_t offset) { +SymExpr _sym_get_input_byte(size_t offset, uint8_t value) { + g_enhanced_solver->pushInputByte(offset, value); return registerExpression(g_expr_builder->createRead(offset)); } diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index c6542d74..50dbc8df 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -178,7 +178,7 @@ Z3_ast _sym_build_float(double value, int is_double) { return result; } -Z3_ast _sym_get_input_byte(size_t offset) { +Z3_ast _sym_get_input_byte(size_t offset, uint8_t) { static std::vector stdinBytes; if (offset < stdinBytes.size()) diff --git a/test/memory_input.c b/test/memory_input.c new file mode 100644 index 00000000..58f55702 --- /dev/null +++ b/test/memory_input.c @@ -0,0 +1,51 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O2 %s -o %t +// RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s +#include +#include +#include + +void symcc_make_symbolic(void *start, size_t byte_length); + +uint64_t g_value = 0xaaaabbbbccccdddd; + +int main(int argc, char *argv[]) { + uint64_t x = 10; + uint8_t y = 0; + + symcc_make_symbolic(&x, sizeof(x)); + symcc_make_symbolic(&y, sizeof(y)); + + fprintf(stderr, "%s\n", (x == g_value) ? "yes" : "no"); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE-DAG: #xaa + // SIMPLE-DAG: #xbb + // SIMPLE-DAG: #xcc + // SIMPLE-DAG: #xdd + // QSYM-COUNT-2: SMT + // ANY: no + + fprintf(stderr, "%s\n", (y == 10) ? "yes" : "no"); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // y should be part of the input, just after x + // SIMPLE: stdin8 -> #x0a + // QSYM-COUNT-2: SMT + // ANY: no + + return 0; +} From 7a5ef5885d92b034376f99e7f0f4d9f8a2ef6cfb Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 18 Nov 2022 14:51:20 +0100 Subject: [PATCH 18/64] Run GitHub actions for pull requests only No need for "push": the "pull_request" event already triggers when new commits are pushed to the PR branch, and we expect all changes to go through a PR. --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 9d00bbd0..eb5ae3ca 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -1,5 +1,5 @@ name: Compile and test SymCC -on: [push, pull_request] +on: [pull_request] jobs: build_and_test_symcc: runs-on: ubuntu-20.04 From 505ce4d814574b8b040d0b58a9f8c0865947c52f Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Mon, 21 Nov 2022 15:40:48 +0100 Subject: [PATCH 19/64] Rebase QSYM on the upstream master branch --- runtime/qsym_backend/qsym | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/qsym_backend/qsym b/runtime/qsym_backend/qsym index 6cba7f99..bbc87e42 160000 --- a/runtime/qsym_backend/qsym +++ b/runtime/qsym_backend/qsym @@ -1 +1 @@ -Subproject commit 6cba7f996fa2568dcc02e632a72d9931fdf30f70 +Subproject commit bbc87e423d10af86b4028589a1be70dd962c6f49 From 54ebd5c5af88e9db3f174286122e02ff105950e7 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Mon, 21 Nov 2022 15:43:48 +0100 Subject: [PATCH 20/64] Make it possible to trigger the CI pipeline manually --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index eb5ae3ca..e3bcb936 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -1,5 +1,5 @@ name: Compile and test SymCC -on: [pull_request] +on: [pull_request, workflow_dispatch] jobs: build_and_test_symcc: runs-on: ubuntu-20.04 From fbbca653f618e8a6074675f6e6a95b846a87dc08 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 22 Nov 2022 09:46:59 +0100 Subject: [PATCH 21/64] Add support for custom test-case handlers The program under test (or a wrapper around it) can now call symcc_set_test_case_handler to define a function that will be invoked for each newly generated test case. --- docs/Configuration.txt | 5 ++- runtime/RuntimeCommon.h | 6 ++- runtime/qsym_backend/Runtime.cpp | 51 +++++++++++++++++++------- runtime/qsym_backend/qsym | 2 +- runtime/simple_backend/Runtime.cpp | 14 ++++++- test/test_case_handler.c | 59 ++++++++++++++++++++++++++++++ 6 files changed, 117 insertions(+), 20 deletions(-) create mode 100644 test/test_case_handler.c diff --git a/docs/Configuration.txt b/docs/Configuration.txt index 4e743aa8..f36d7605 100644 --- a/docs/Configuration.txt +++ b/docs/Configuration.txt @@ -56,7 +56,10 @@ environment variables. uninstrumented counterparts. - SYMCC_OUTPUT_DIR (default "/tmp/output"): This is the directory where SymCC - will store new inputs (QSYM backend only). + will store new inputs (QSYM backend only). If you prefer to handle them + programatically, make your program call symcc_set_test_case_handler; the + handler will be called instead of the default handler each time the backend + generates a new input. - SYMCC_INPUT_FILE (default empty): When empty, SymCC treats data read from standard input as symbolic; when set to a file name, any data read from that diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 8677513f..6a971e4f 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -180,12 +180,14 @@ void _sym_register_expression_region(SymExpr *start, size_t length); void _sym_collect_garbage(void); /* - * Symbolic input from memory + * User-facing functionality * - * This is the only function in the interface that we expect to be called by + * These are the only functions in the interface that we expect to be called by * users (i.e., calls to it aren't auto-generated by our compiler pass). */ void symcc_make_symbolic(void *start, size_t byte_length); +typedef void (*TestCaseHandler)(const uint8_t *, size_t); +void symcc_set_test_case_handler(TestCaseHandler handler); #ifdef __cplusplus } diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index b4558a43..f4df9906 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -13,7 +13,7 @@ // SymCC. If not, see . // -// Definitions that we need for the Qsym backend +// Definitions that we need for the QSYM backend // #include "Runtime.h" @@ -50,7 +50,7 @@ #include #include -// Qsym +// QSYM #include #include #include @@ -79,7 +79,7 @@ namespace { /// Indicate whether the runtime has been initialized. std::atomic_flag g_initialized = ATOMIC_FLAG_INIT; -/// A mapping of all expressions that we have ever received from Qsym to the +/// A mapping of all expressions that we have ever received from QSYM to the /// corresponding shared pointers on the heap. /// /// We can't expect C clients to handle std::shared_ptr, so we maintain a single @@ -102,18 +102,24 @@ SymExpr registerExpression(const qsym::ExprRef &expr) { return rawExpr; } -/// A Qsym solver that doesn't require the entire input on initialization. +/// The user-provided test case handler, if any. +/// +/// If the user doesn't register a handler, we use QSYM's default behavior of +/// writing the test case to a file in the output directory. +TestCaseHandler g_test_case_handler = nullptr; + +/// A QSYM solver that doesn't require the entire input on initialization. class EnhancedQsymSolver : public qsym::Solver { // Warning! // - // We can't override methods of qsym::Solver. None of them are declared - // virtual, and the Qsym code refers to the solver with a pointer of type - // qsym::Solver*, so it will always choose the implementation of the base - // class. What we can do, though, is add new functions that access the data - // members of the base class. + // Before we can override methods of qsym::Solver, we need to declare them + // virtual because the QSYM code refers to the solver with a pointer of type + // qsym::Solver*; for non-virtual methods, it will always choose the + // implementation of the base class. Beware that making a method virtual adds + // a small performance overhead and requires us to change QSYM code. // - // Subclassing the Qsym solver is ugly but helps us to avoid making too many - // changes in the Qsym codebase. + // Subclassing the QSYM solver is ugly but helps us to avoid making too many + // changes in the QSYM codebase. public: EnhancedQsymSolver() @@ -126,6 +132,15 @@ class EnhancedQsymSolver : public qsym::Solver { inputs_[offset] = value; } + + void saveValues(const std::string &suffix) override { + if (auto handler = g_test_case_handler) { + auto values = getConcreteValues(); + handler(values.data(), values.size()); + } else { + Solver::saveValues(suffix); + } + } }; EnhancedQsymSolver *g_enhanced_solver; @@ -165,13 +180,13 @@ void _sym_initialize(void) { g_z3_context = new z3::context{}; g_enhanced_solver = new EnhancedQsymSolver{}; - g_solver = g_enhanced_solver; // for Qsym-internal use + g_solver = g_enhanced_solver; // for QSYM-internal use g_expr_builder = g_config.pruning ? PruneExprBuilder::create() : SymbolicExprBuilder::create(); } SymExpr _sym_build_integer(uint64_t value, uint8_t bits) { - // Qsym's API takes uintptr_t, so we need to be careful when compiling for + // QSYM's API takes uintptr_t, so we need to be careful when compiling for // 32-bit systems: the compiler would helpfully truncate our uint64_t to fit // into 32 bits. if constexpr (sizeof(uint64_t) == sizeof(uintptr_t)) { @@ -304,7 +319,7 @@ SymExpr _sym_build_bool_to_bit(SymExpr expr) { } // -// Floating-point operations (unsupported in Qsym) +// Floating-point operations (unsupported in QSYM) // #define UNSUPPORTED(prototype) \ @@ -417,3 +432,11 @@ void _sym_collect_garbage() { << " milliseconds)" << std::endl; #endif } + +// +// Test-case handling +// + +void symcc_set_test_case_handler(TestCaseHandler handler) { + g_test_case_handler = handler; +} diff --git a/runtime/qsym_backend/qsym b/runtime/qsym_backend/qsym index bbc87e42..300300e8 160000 --- a/runtime/qsym_backend/qsym +++ b/runtime/qsym_backend/qsym @@ -1 +1 @@ -Subproject commit bbc87e423d10af86b4028589a1be70dd962c6f49 +Subproject commit 300300e8f417e057f918eb8fd696290aec3e1a69 diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index 50dbc8df..16d308fb 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -408,8 +409,7 @@ Z3_ast _sym_build_float_to_unsigned_integer(Z3_ast expr, uint8_t bits) { } Z3_ast _sym_build_bool_to_bit(Z3_ast expr) { - return registerExpression(Z3_mk_ite(g_context, expr, - _sym_build_integer(1, 1), + return registerExpression(Z3_mk_ite(g_context, expr, _sym_build_integer(1, 1), _sym_build_integer(0, 1))); } @@ -544,3 +544,13 @@ void _sym_collect_garbage() { << " milliseconds)" << std::endl; #endif } + +/* Test-case handling */ +void symcc_set_test_case_handler(TestCaseHandler) { + // The simple backend doesn't support test-case handlers. However, let's not + // make this a fatal error; otherwise, users would have to change their + // programs to make them work with the simple backend. + fprintf( + g_log, + "Warning: test-case handlers aren't supported in the simple backend\n"); +} diff --git a/test/test_case_handler.c b/test/test_case_handler.c new file mode 100644 index 00000000..e1dc4a23 --- /dev/null +++ b/test/test_case_handler.c @@ -0,0 +1,59 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O2 %s -o %t +// RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s +#include +#include +#include + +#define MAGIC 0xab + +void symcc_make_symbolic(void *start, size_t byte_length); +typedef void (*TestCaseHandler)(const uint8_t *, size_t); +void symcc_set_test_case_handler(TestCaseHandler handler); + +int solved = 0; +int num_test_cases = 0; + +void handle_test_case(const uint8_t *data, size_t data_length) { + num_test_cases++; + if (data_length == 1 && data[0] == MAGIC) + solved = 1; +} + +int main(int argc, char *argv[]) { + symcc_set_test_case_handler(handle_test_case); + // SIMPLE: Warning: test-case handlers + + uint8_t input = 0; + symcc_make_symbolic(&input, sizeof(input)); + + fprintf(stderr, "%s\n", (input == MAGIC) ? "yes" : "no"); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE: stdin0 -> #xab + // QSYM: SMT + // ANY: no + + fprintf(stderr, "%d\n", solved); + // QSYM: 1 + // SIMPLE: 0 + + fprintf(stderr, "%d\n", num_test_cases); + // QSYM: 1 + // SIMPLE: 0 + + return 0; +} From 21930cb805284254cf3a9837ca510129230d95d5 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 22 Nov 2022 11:13:06 +0100 Subject: [PATCH 22/64] Make the test-case handler type consistent with symcc_make_symbolic Memory comes in as void*, so we should also return it with that type; users can cast the pointer to anything they want. --- runtime/RuntimeCommon.h | 2 +- test/test_case_handler.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 6a971e4f..cbdcf49d 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -186,7 +186,7 @@ void _sym_collect_garbage(void); * users (i.e., calls to it aren't auto-generated by our compiler pass). */ void symcc_make_symbolic(void *start, size_t byte_length); -typedef void (*TestCaseHandler)(const uint8_t *, size_t); +typedef void (*TestCaseHandler)(const void *, size_t); void symcc_set_test_case_handler(TestCaseHandler handler); #ifdef __cplusplus diff --git a/test/test_case_handler.c b/test/test_case_handler.c index e1dc4a23..11819754 100644 --- a/test/test_case_handler.c +++ b/test/test_case_handler.c @@ -21,15 +21,15 @@ #define MAGIC 0xab void symcc_make_symbolic(void *start, size_t byte_length); -typedef void (*TestCaseHandler)(const uint8_t *, size_t); +typedef void (*TestCaseHandler)(const void *, size_t); void symcc_set_test_case_handler(TestCaseHandler handler); int solved = 0; int num_test_cases = 0; -void handle_test_case(const uint8_t *data, size_t data_length) { +void handle_test_case(const void *data, size_t data_length) { num_test_cases++; - if (data_length == 1 && data[0] == MAGIC) + if (data_length == 1 && ((const uint8_t *)data)[0] == MAGIC) solved = 1; } From e4c4866f49e6b4edad423fb03ae57ec4413460c7 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Mon, 21 Nov 2022 11:52:46 +0100 Subject: [PATCH 23/64] Change the license of the SymCC runtime to LGPL This commit closes #114. --- LICENSE.lgpl | 165 ++++++++++++++++++++++++++ README.md | 10 +- runtime/CMakeLists.txt | 21 ++-- runtime/Config.cpp | 21 ++-- runtime/Config.h | 21 ++-- runtime/GarbageCollection.cpp | 21 ++-- runtime/GarbageCollection.h | 21 ++-- runtime/LibcWrappers.cpp | 21 ++-- runtime/LibcWrappers.h | 21 ++-- runtime/RuntimeCommon.cpp | 21 ++-- runtime/RuntimeCommon.h | 21 ++-- runtime/Shadow.cpp | 21 ++-- runtime/Shadow.h | 21 ++-- runtime/qsym_backend/CMakeLists.txt | 21 ++-- runtime/qsym_backend/Runtime.cpp | 21 ++-- runtime/qsym_backend/Runtime.h | 21 ++-- runtime/qsym_backend/pin.H | 21 ++-- runtime/simple_backend/CMakeLists.txt | 21 ++-- runtime/simple_backend/Runtime.cpp | 21 ++-- runtime/simple_backend/Runtime.h | 21 ++-- 20 files changed, 371 insertions(+), 182 deletions(-) create mode 100644 LICENSE.lgpl diff --git a/LICENSE.lgpl b/LICENSE.lgpl new file mode 100644 index 00000000..0a041280 --- /dev/null +++ b/LICENSE.lgpl @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md index 0bb20644..2d639036 100644 --- a/README.md +++ b/README.md @@ -270,12 +270,18 @@ SymCC is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. +As an exception from the above, you can redistribute and/or modify the SymCC +runtime under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at your +option) any later version. See #114 for the rationale. + SymCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along with -SymCC. If not, see . +You should have received a copy of the GNU General Public License and the GNU +Lesser General Public License along with SymCC. If not, see +. The following pieces of software have additional or alternate copyrights, licenses, and/or restrictions: diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 637fc1ce..07277b8f 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -1,16 +1,17 @@ -# This file is part of SymCC. +# This file is part of the SymCC runtime. # -# SymCC is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. +# The SymCC runtime is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# The SymCC runtime is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +# for more details. # -# You should have received a copy of the GNU General Public License along with -# SymCC. If not, see . +# You should have received a copy of the GNU Lesser General Public License along +# with the SymCC runtime. If not, see . cmake_minimum_required(VERSION 3.5) project(SymRuntime) diff --git a/runtime/Config.cpp b/runtime/Config.cpp index 0088a4d1..23a28df6 100644 --- a/runtime/Config.cpp +++ b/runtime/Config.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #include "Config.h" diff --git a/runtime/Config.h b/runtime/Config.h index 4ed8f806..a866821c 100644 --- a/runtime/Config.h +++ b/runtime/Config.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef CONFIG_H #define CONFIG_H diff --git a/runtime/GarbageCollection.cpp b/runtime/GarbageCollection.cpp index 8c1edc37..8afdd327 100644 --- a/runtime/GarbageCollection.cpp +++ b/runtime/GarbageCollection.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include "GarbageCollection.h" diff --git a/runtime/GarbageCollection.h b/runtime/GarbageCollection.h index da77ff83..81b0b8c2 100644 --- a/runtime/GarbageCollection.h +++ b/runtime/GarbageCollection.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef GARBAGECOLLECTION_H #define GARBAGECOLLECTION_H diff --git a/runtime/LibcWrappers.cpp b/runtime/LibcWrappers.cpp index 6e9c6c90..41ce2a34 100644 --- a/runtime/LibcWrappers.cpp +++ b/runtime/LibcWrappers.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include #include diff --git a/runtime/LibcWrappers.h b/runtime/LibcWrappers.h index d84c1f47..2304a3a0 100644 --- a/runtime/LibcWrappers.h +++ b/runtime/LibcWrappers.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef LIBCWRAPPERS_H #define LIBCWRAPPERS_H diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index eddd93d0..9e0bb141 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index cbdcf49d..893853cb 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -8,19 +8,20 @@ // Whoever uses this file has to define the type "SymExpr" first; we use it to // keep this header independent of the back-end implementation. -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef RUNTIMECOMMON_H #define RUNTIMECOMMON_H diff --git a/runtime/Shadow.cpp b/runtime/Shadow.cpp index 2b69a08a..2852fad1 100644 --- a/runtime/Shadow.cpp +++ b/runtime/Shadow.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include "Shadow.h" diff --git a/runtime/Shadow.h b/runtime/Shadow.h index 967e11ca..36979bdd 100644 --- a/runtime/Shadow.h +++ b/runtime/Shadow.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef SHADOW_H #define SHADOW_H diff --git a/runtime/qsym_backend/CMakeLists.txt b/runtime/qsym_backend/CMakeLists.txt index feaea43c..5977451f 100644 --- a/runtime/qsym_backend/CMakeLists.txt +++ b/runtime/qsym_backend/CMakeLists.txt @@ -1,16 +1,17 @@ -# This file is part of SymCC. +# This file is part of the SymCC runtime. # -# SymCC is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. +# The SymCC runtime is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# The SymCC runtime is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +# for more details. # -# You should have received a copy of the GNU General Public License along with -# SymCC. If not, see . +# You should have received a copy of the GNU Lesser General Public License along +# with SymCC. If not, see . # Build the parts of the Qsym backend that are relevant for us diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index f4df9906..7a0c388b 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . // // Definitions that we need for the QSYM backend diff --git a/runtime/qsym_backend/Runtime.h b/runtime/qsym_backend/Runtime.h index e0a23526..8f19d2af 100644 --- a/runtime/qsym_backend/Runtime.h +++ b/runtime/qsym_backend/Runtime.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef RUNTIME_H #define RUNTIME_H diff --git a/runtime/qsym_backend/pin.H b/runtime/qsym_backend/pin.H index 84e8f7c6..083d79a8 100644 --- a/runtime/qsym_backend/pin.H +++ b/runtime/qsym_backend/pin.H @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef PIN_H #define PIN_H diff --git a/runtime/simple_backend/CMakeLists.txt b/runtime/simple_backend/CMakeLists.txt index baaff20b..64822050 100644 --- a/runtime/simple_backend/CMakeLists.txt +++ b/runtime/simple_backend/CMakeLists.txt @@ -1,16 +1,17 @@ -# This file is part of SymCC. +# This file is part of the SymCC runtime. # -# SymCC is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. +# The SymCC runtime is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# The SymCC runtime is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +# for more details. # -# You should have received a copy of the GNU General Public License along with -# SymCC. If not, see . +# You should have received a copy of the GNU Lesser General Public License along +# with SymCC. If not, see . find_package(Z3 4 CONFIG) if (NOT Z3_FOUND) diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index 16d308fb..bb74abd4 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #include diff --git a/runtime/simple_backend/Runtime.h b/runtime/simple_backend/Runtime.h index 953d7db0..66fe8b91 100644 --- a/runtime/simple_backend/Runtime.h +++ b/runtime/simple_backend/Runtime.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #ifndef RUNTIME_H #define RUNTIME_H From 08192b8dc48aedc3a870832a95ab9160d7e6f8a1 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Wed, 23 Nov 2022 10:52:11 +0100 Subject: [PATCH 24/64] Add a guideline for contributors --- CONTRIBUTING.md | 13 +++++++++++++ README.md | 7 ++++--- test/README | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 CONTRIBUTING.md create mode 120000 test/README diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..15f6c8da --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,13 @@ +# Contributing to SymCC + +We encourage everyone to contribute improvements and bug fixes to SymCC. Our +preferred way of accepting contributions is via GitHub pull requests. Please be +sure to run clang-format on any C/C++ code you change, and ideally add a test to +your patch. Unfortunately, since the project is a bit short on developers at the +moment, we have to ask for your patience while we review your PR. + +Please note that any contributions you make are licensed under the same terms as +the code you're contributing to, as per the GitHub Terms of Service, [section +D.6](https://docs.github.com/en/site-policy/github-terms/github-terms-of-service#6-contributions-under-repository-license). +At the time of writing, this means LGPL (version 3 or later) for the SymCC +runtime, and GPL (version 3 or later) for the rest of SymCC. diff --git a/README.md b/README.md index 2d639036..d6f81afb 100644 --- a/README.md +++ b/README.md @@ -286,7 +286,8 @@ Lesser General Public License along with SymCC. If not, see The following pieces of software have additional or alternate copyrights, licenses, and/or restrictions: -| Program | Directory | -| --- | --- | -| QSYM | `runtime/qsym_backend/qsym` | +| Program | Directory | +|---------------|-----------------------------| +| QSYM | `runtime/qsym_backend/qsym` | +| SymCC runtime | `runtime` | diff --git a/test/README b/test/README new file mode 120000 index 00000000..410eae0b --- /dev/null +++ b/test/README @@ -0,0 +1 @@ +../docs/Testing.txt \ No newline at end of file From 131b4b79ab08c1764545ac0b670f08e6b03cf19e Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 29 Nov 2022 11:45:46 +0100 Subject: [PATCH 25/64] Handle insertvalue instructions with undef target We weren't able to generate symbolic expressions for insertvalue instructions whose target operand was "undef". --- compiler/Runtime.cpp | 3 ++- compiler/Runtime.h | 1 + compiler/Symbolizer.cpp | 27 +++++++++++++++++++-------- runtime/RuntimeCommon.cpp | 11 +++++++++++ runtime/RuntimeCommon.h | 1 + 5 files changed, 34 insertions(+), 9 deletions(-) diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index ba0f3d83..43a089bc 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -147,6 +147,7 @@ Runtime::Runtime(Module &M) { import(M, "_sym_read_memory", ptrT, intPtrType, intPtrType, int8T); writeMemory = import(M, "_sym_write_memory", voidT, intPtrType, intPtrType, ptrT, int8T); + buildZeroBytes = import(M, "_sym_build_zero_bytes", ptrT, intPtrType); buildInsert = import(M, "_sym_build_insert", ptrT, ptrT, ptrT, IRB.getInt64Ty(), int8T); buildExtract = import(M, "_sym_build_extract", ptrT, ptrT, IRB.getInt64Ty(), @@ -163,7 +164,7 @@ bool isInterceptedFunction(const Function &f) { "malloc", "calloc", "mmap", "mmap64", "open", "read", "lseek", "lseek64", "fopen", "fopen64", "fread", "fseek", "fseeko", "rewind", "fseeko64", "getc", "ungetc", "memcpy", "memset", "strncpy", "strchr", - "memcmp", "memmove", "ntohl", "fgets", "fgetc", "getchar"}; + "memcmp", "memmove", "ntohl", "fgets", "fgetc", "getchar"}; return (kInterceptedFunctions.count(f.getName()) > 0); } diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 519f9f00..5f72573c 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -60,6 +60,7 @@ struct Runtime { SymFnT memmove{}; SymFnT readMemory{}; SymFnT writeMemory{}; + SymFnT buildZeroBytes{}; SymFnT buildInsert{}; SymFnT buildExtract{}; SymFnT notifyCall{}; diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 05f5a6b5..b3eb7b9b 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -887,15 +887,26 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { // member. However, this would put an additional burden on the handling of // cast instructions, because expressions would have to be converted // between different representations according to the type. + // + // Unfortunately, the hack doesn't work when the entire structure is + // "undef"; writing it to memory is a well-defined bitcode operation, but + // the symbolic expression for the memory region will just be null because + // it's entirely concrete. We create an all-zeros expression for it instead. - auto *memory = IRB.CreateAlloca(V->getType()); - IRB.CreateStore(V, memory); - return IRB.CreateCall( - runtime.readMemory, - {IRB.CreatePtrToInt(memory, intPtrType), - ConstantInt::get(intPtrType, - dataLayout.getTypeStoreSize(V->getType())), - IRB.getInt8(0)}); + if (isa(V)) { + return IRB.CreateCall( + runtime.buildZeroBytes, + {ConstantInt::get(intPtrType, + dataLayout.getTypeStoreSize(valueType))}); + } else { + auto *memory = IRB.CreateAlloca(valueType); + IRB.CreateStore(V, memory); + return IRB.CreateCall( + runtime.readMemory, + {IRB.CreatePtrToInt(memory, intPtrType), + ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(valueType)), + IRB.getInt8(0)}); + } } llvm_unreachable("Unhandled type for constant expression"); diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index 9e0bb141..ff508291 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -199,6 +199,17 @@ SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, return result; } +SymExpr _sym_build_zero_bytes(size_t length) { + auto zero_byte = _sym_build_integer(0, 8); + + auto result = zero_byte; + for (size_t i = 1; i < length; i++) { + result = _sym_concat_helper(result, zero_byte); + } + + return result; +} + void _sym_register_expression_region(SymExpr *start, size_t length) { registerExpressionRegion({start, length}); } diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 893853cb..01047676 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -156,6 +156,7 @@ void _sym_write_memory(uint8_t *addr, size_t length, SymExpr expr, void _sym_memcpy(uint8_t *dest, const uint8_t *src, size_t length); void _sym_memset(uint8_t *memory, SymExpr value, size_t length); void _sym_memmove(uint8_t *dest, const uint8_t *src, size_t length); +SymExpr _sym_build_zero_bytes(size_t length); SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, bool little_endian); SymExpr _sym_build_extract(SymExpr expr, uint64_t offset, uint64_t length, From 339d3cb0fa5ea2b966d5d0b7ae2b9f7d3eb51697 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 29 Nov 2022 14:12:33 +0100 Subject: [PATCH 26/64] Handle floating-point inserts and extracts We need to convert the corresponding expressions to/from bit-vectors before/after the operation. --- compiler/Symbolizer.cpp | 78 ++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index b3eb7b9b..9e8cb608 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -764,30 +764,68 @@ void Symbolizer::visitPHINode(PHINode &I) { void Symbolizer::visitInsertValueInst(InsertValueInst &I) { IRBuilder<> IRB(&I); - auto insert = buildRuntimeCall( - IRB, runtime.buildInsert, - {{I.getAggregateOperand(), true}, - {I.getInsertedValueOperand(), true}, - {IRB.getInt64(aggregateMemberOffset(I.getAggregateOperand()->getType(), - I.getIndices())), - false}, - {IRB.getInt8(isLittleEndian(I.getInsertedValueOperand()->getType()) ? 1 - : 0), - false}}); - registerSymbolicComputation(insert, &I); + auto target = I.getAggregateOperand(); + auto insertedValue = I.getInsertedValueOperand(); + auto insertedValueType = insertedValue->getType(); + + if (getSymbolicExpression(target) == nullptr && + getSymbolicExpression(insertedValue) == nullptr) + return; + + auto insertedValueExpr = getSymbolicExpressionOrNull(insertedValue); + + // Floating-point values are a distinct kind in the solver, so we need to + // convert them to bit vectors before we can insert them into the expression + // for the aggregate. + Input symbolicInput; + if (insertedValueType->isFloatingPointTy()) { + auto floatConversion = IRB.CreateCall( + runtime.buildFloatToBits, + {insertedValueExpr, IRB.getInt1(insertedValueType->isDoubleTy())}); + symbolicInput = {insertedValue, 0, floatConversion}; + insertedValueExpr = floatConversion; + } + + auto result = IRB.CreateCall( + runtime.buildInsert, + {getSymbolicExpressionOrNull(target), insertedValueExpr, + IRB.getInt64(aggregateMemberOffset(target->getType(), I.getIndices())), + IRB.getInt8(isLittleEndian(insertedValueType) ? 1 : 0)}); + + if (!insertedValueType->isFloatingPointTy()) + symbolicInput = {insertedValue, 1, result}; + + registerSymbolicComputation( + {symbolicInput.user, result, {{target, 0, result}, symbolicInput}}, &I); } void Symbolizer::visitExtractValueInst(ExtractValueInst &I) { IRBuilder<> IRB(&I); - auto extract = buildRuntimeCall( - IRB, runtime.buildExtract, - {{I.getAggregateOperand(), true}, - {IRB.getInt64(aggregateMemberOffset(I.getAggregateOperand()->getType(), - I.getIndices())), - false}, - {IRB.getInt64(dataLayout.getTypeStoreSize(I.getType())), false}, - {IRB.getInt8(isLittleEndian(I.getType()) ? 1 : 0), false}}); - registerSymbolicComputation(extract, &I); + auto target = I.getAggregateOperand(); + auto targetExpr = getSymbolicExpression(target); + auto resultType = I.getType(); + + if (targetExpr == nullptr) + return; + + auto extractedBits = IRB.CreateCall( + runtime.buildExtract, + {targetExpr, + IRB.getInt64(aggregateMemberOffset(target->getType(), I.getIndices())), + IRB.getInt64(dataLayout.getTypeStoreSize(resultType)), + IRB.getInt8(isLittleEndian(resultType) ? 1 : 0)}); + + // Floating-point values are a distinct kind in the solver. Extracting from an + // aggregate gives us a bit vector, so we need to convert the expression to a + // float if it represents one. + auto result = resultType->isFloatingPointTy() + ? IRB.CreateCall(runtime.buildBitsToFloat, + {extractedBits, + IRB.getInt1(resultType->isDoubleTy())}) + : extractedBits; + + registerSymbolicComputation( + {extractedBits, result, {{target, 0, extractedBits}}}, &I); } void Symbolizer::visitSwitchInst(SwitchInst &I) { From 940335550da08ea87cce070fd97f8135b98ea54d Mon Sep 17 00:00:00 2001 From: Emilio Coppa Date: Tue, 13 Dec 2022 16:31:57 +0100 Subject: [PATCH 27/64] Fix issue #112 --- compiler/Runtime.cpp | 1 + compiler/Runtime.h | 11 +++++------ compiler/Symbolizer.cpp | 31 ++++++++++++++++++++++++++++--- runtime/RuntimeCommon.cpp | 4 ++++ runtime/RuntimeCommon.h | 1 + 5 files changed, 39 insertions(+), 9 deletions(-) diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index 43a089bc..67a4aa10 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -70,6 +70,7 @@ Runtime::Runtime(Module &M) { buildBoolOr = import(M, "_sym_build_bool_or", ptrT, ptrT, ptrT); buildBoolXor = import(M, "_sym_build_bool_xor", ptrT, ptrT, ptrT); buildBoolToBit = import(M, "_sym_build_bool_to_bit", ptrT, ptrT); + buildBitToBool = import(M, "_sym_build_bit_to_bool", ptrT, ptrT); pushPathConstraint = import(M, "_sym_push_path_constraint", voidT, ptrT, IRB.getInt1Ty(), intPtrType); diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 5f72573c..5dd72d3b 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -19,9 +19,9 @@ #include #if LLVM_VERSION_MAJOR >= 9 && LLVM_VERSION_MAJOR < 11 - using SymFnT = llvm::Value *; +using SymFnT = llvm::Value *; #else - using SymFnT = llvm::FunctionCallee; +using SymFnT = llvm::FunctionCallee; #endif /// Runtime functions @@ -50,6 +50,7 @@ struct Runtime { SymFnT buildBoolOr{}; SymFnT buildBoolXor{}; SymFnT buildBoolToBit{}; + SymFnT buildBitToBool{}; SymFnT pushPathConstraint{}; SymFnT getParameterExpression{}; SymFnT setParameterExpression{}; @@ -69,13 +70,11 @@ struct Runtime { /// Mapping from icmp predicates to the functions that build the corresponding /// symbolic expressions. - std::array - comparisonHandlers{}; + std::array comparisonHandlers{}; /// Mapping from binary operators to the functions that build the /// corresponding symbolic expressions. - std::array - binaryOperatorHandlers{}; + std::array binaryOperatorHandlers{}; }; bool isInterceptedFunction(const llvm::Function &f); diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 9e8cb608..54287e60 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -477,6 +477,11 @@ void Symbolizer::visitLoadInst(LoadInst &I) { if (dataType->isFloatingPointTy()) { data = IRB.CreateCall(runtime.buildBitsToFloat, {data, IRB.getInt1(dataType->isDoubleTy())}); + } else if (dataType->isIntegerTy() && dataType->getIntegerBitWidth() == 1) { + /* convert from byte back to a bool (i1) */ + data = IRB.CreateCall(runtime.buildTrunc, + {data, ConstantInt::get(IRB.getInt8Ty(), 1)}); + data = IRB.CreateCall(runtime.buildBitToBool, {data}); } symbolicExpressions[&I] = data; @@ -491,6 +496,12 @@ void Symbolizer::visitStoreInst(StoreInst &I) { auto *dataType = I.getValueOperand()->getType(); if (dataType->isFloatingPointTy()) { data = IRB.CreateCall(runtime.buildFloatToBits, data); + } else if (dataType->isIntegerTy() && dataType->getIntegerBitWidth() == 1) { + /* convert from i1 (bool) to a byte */ + data = IRB.CreateCall(runtime.buildBoolToBit, {data}); + data = IRB.CreateCall( + runtime.buildZExt, + {data, ConstantInt::get(IRB.getInt8Ty(), 7 /* 1 byte */)}); } IRB.CreateCall( @@ -619,11 +630,25 @@ void Symbolizer::visitBitCastInst(BitCastInst &I) { void Symbolizer::visitTruncInst(TruncInst &I) { IRBuilder<> IRB(&I); - auto trunc = buildRuntimeCall( + + if (getSymbolicExpression(I.getOperand(0)) == nullptr) + return; + + SymbolicComputation symbolicComputation; + symbolicComputation.merge(forceBuildRuntimeCall( IRB, runtime.buildTrunc, {{I.getOperand(0), true}, - {IRB.getInt8(I.getDestTy()->getIntegerBitWidth()), false}}); - registerSymbolicComputation(trunc, &I); + {IRB.getInt8(I.getDestTy()->getIntegerBitWidth()), false}})); + + if (I.getDestTy()->isIntegerTy() && + I.getDestTy()->getIntegerBitWidth() == 1) { + // convert from byte back to a bool (i1) + symbolicComputation.merge( + forceBuildRuntimeCall(IRB, runtime.buildBitToBool, + {{symbolicComputation.lastInstruction, false}})); + } + + registerSymbolicComputation(symbolicComputation, &I); } void Symbolizer::visitIntToPtrInst(IntToPtrInst &I) { diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index ff508291..2cbebae4 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -231,3 +231,7 @@ void symcc_make_symbolic(void *start, size_t byte_length) { _sym_make_symbolic(start, byte_length, inputOffset); inputOffset += byte_length; } + +SymExpr _sym_build_bit_to_bool(SymExpr expr) { + return _sym_build_equal(expr, _sym_build_integer(0, _sym_bits_helper(expr))); +} \ No newline at end of file diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 01047676..9078792d 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -123,6 +123,7 @@ SymExpr _sym_build_float_to_bits(SymExpr expr); SymExpr _sym_build_float_to_signed_integer(SymExpr expr, uint8_t bits); SymExpr _sym_build_float_to_unsigned_integer(SymExpr expr, uint8_t bits); SymExpr _sym_build_bool_to_bit(SymExpr expr); +SymExpr _sym_build_bit_to_bool(SymExpr expr); /* * Bit-array helpers From 4d97b13f04097fe0070d4b1fd9dcc1eb305c9e52 Mon Sep 17 00:00:00 2001 From: Emilio Coppa Date: Wed, 14 Dec 2022 12:33:20 +0100 Subject: [PATCH 28/64] fix mistake --- runtime/RuntimeCommon.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index 2cbebae4..baa86107 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -233,5 +233,6 @@ void symcc_make_symbolic(void *start, size_t byte_length) { } SymExpr _sym_build_bit_to_bool(SymExpr expr) { - return _sym_build_equal(expr, _sym_build_integer(0, _sym_bits_helper(expr))); + return _sym_build_not_equal(expr, + _sym_build_integer(0, _sym_bits_helper(expr))); } \ No newline at end of file From 97de5f1ed9da8404d85be715e834ca45175abe76 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 20 Dec 2022 12:52:46 +0100 Subject: [PATCH 29/64] Add a GitLab CI configuration for AdaCore eng/fuzz/symcc#1 --- .adacore-gitlab-ci.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .adacore-gitlab-ci.yml diff --git a/.adacore-gitlab-ci.yml b/.adacore-gitlab-ci.yml new file mode 100644 index 00000000..a11b0459 --- /dev/null +++ b/.adacore-gitlab-ci.yml @@ -0,0 +1,22 @@ +anod_build: + services: + - image:sandbox + - cpu:8 + - mem:16 + stage: build + script: + - . ~/.aws_container_credentials + - export PATH=/it/e3/bin:$PATH + - export ANOD_DEFAULT_SANDBOX_DIR=/it/wave + + # Check out QSYM + - cd runtime/qsym_backend + - git clone -b symcc https://gitlab-ci-token:${CI_JOB_TOKEN}@${CI_SERVER_HOST}:${CI_SERVER_PORT}/eng/fuzz/qsym + + # Use our repositories + - anod vcs --add-repo symcc $CI_PROJECT_DIR + - anod vcs --add-repo qsym $CI_PROJECT_DIR/runtime/qsym_backend/qsym + + # Build SymCC + - anod source symcc + - anod build symcc From f31bf75a9d12769f3e16f1ad35bf081b22a484a8 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 10 Jan 2023 17:40:00 +0100 Subject: [PATCH 30/64] Add Ada bindings for the runtime Closes eng/fuzz/symcc#4. --- runtime/bindings/README | 9 +++++++ runtime/bindings/ada/README | 36 ++++++++++++++++++++++++++ runtime/bindings/ada/symcc.ads | 46 ++++++++++++++++++++++++++++++++++ runtime/bindings/ada/symcc.gpr | 21 ++++++++++++++++ 4 files changed, 112 insertions(+) create mode 100644 runtime/bindings/README create mode 100644 runtime/bindings/ada/README create mode 100644 runtime/bindings/ada/symcc.ads create mode 100644 runtime/bindings/ada/symcc.gpr diff --git a/runtime/bindings/README b/runtime/bindings/README new file mode 100644 index 00000000..d36686f4 --- /dev/null +++ b/runtime/bindings/README @@ -0,0 +1,9 @@ + + + Runtime bindings + + +This directory contains bindings to the user-facing functionality of the runtime +(see runtime/RuntimeCommon.h). The bindings give target programs written in +different languages access to runtime features like in-memory input or custom +test-case handlers. diff --git a/runtime/bindings/ada/README b/runtime/bindings/ada/README new file mode 100644 index 00000000..312576ac --- /dev/null +++ b/runtime/bindings/ada/README @@ -0,0 +1,36 @@ + + + Ada bindings + + +This directory contains Ada bindings for the SymCC runtime. To use them in your +Ada code, you can either point gprbuild here directly (e.g., by setting +GPR_PROJECT_PATH appropriately), or you can install them in the system: + +$ gprbuild -Psymcc +$ gprinstall -Psymcc + +Either way, you'll then be able to include SymCC in your project definition +(i.e., the .gpr file for your project): + + with "symcc"; + +This will let you use the bindings in your Ada code, for example: + + with SymCC; use SymCC; + + -- ... + + -- Register a procedure that receives new program inputs. + SymCC_Set_Test_Case_Handler (My_Handler); + + -- Tell SymCC where to find the input in memory. Note that the variable needs + -- to be declared with the "aliased" keyword. + SymCC_Make_Symbolic (Input'Address, Input'Size / System.Storage_Unit); + + -- Run your code on the input; SymCC will follow the computations + -- symbolically and call My_Handler whenever it produces a new test input. + My_Code_Under_Test (Input); + +See the doc comments in symcc.ads for details, or generate HTML documentation +with "gnatdoc -Psymcc". diff --git a/runtime/bindings/ada/symcc.ads b/runtime/bindings/ada/symcc.ads new file mode 100644 index 00000000..7d7587d6 --- /dev/null +++ b/runtime/bindings/ada/symcc.ads @@ -0,0 +1,46 @@ +-- This file is part of the SymCC runtime. + +-- The SymCC runtime is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Lesser General Public License as published by +-- the Free Software Foundation, either version 3 of the License, or (at your +-- option) any later version. + +-- The SymCC runtime is distributed in the hope that it will be useful, but +-- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +-- License for more details. + +with Interfaces.C; +with System; + +-- @summary +-- Ada bindings to the SymCC runtime API. +-- +-- @description +-- This package provides thin bindings to the user-facing functionality of the +-- SymCC runtime (see RuntimeCommon.h). +package SymCC is + + procedure SymCC_Make_Symbolic + (Address : System.Address; Size : Interfaces.C.size_t) with + Import => True, Convention => C, External_Name => "symcc_make_symbolic"; + -- Mark a memory region as symbolic program input. + -- @param Address The start of the region containing the input data. + -- @param Size The length in bytes of the region. + + type Test_Case_Handler_Callback_Type is + access procedure + (Data_Block : System.Address; Size : Interfaces.C.size_t) with + Convention => C; + -- Type of functions that the runtime can call when it generates new + -- program inputs (see SymCC_Set_Test_Case_Handler). + + procedure SymCC_Set_Test_Case_Handler + (Callback : Test_Case_Handler_Callback_Type) with + Import => True, + Convention => C, + External_Name => "symcc_set_test_case_handler"; + -- Define a custom handler for new program inputs. + -- @param Callback The procedure to be called for each new input. + +end SymCC; diff --git a/runtime/bindings/ada/symcc.gpr b/runtime/bindings/ada/symcc.gpr new file mode 100644 index 00000000..31305fc9 --- /dev/null +++ b/runtime/bindings/ada/symcc.gpr @@ -0,0 +1,21 @@ +-- This file is part of the SymCC runtime. + +-- The SymCC runtime is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Lesser General Public License as published by +-- the Free Software Foundation, either version 3 of the License, or (at your +-- option) any later version. + +-- The SymCC runtime is distributed in the hope that it will be useful, but +-- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +-- License for more details. + +library project SymCC is + + for Library_Name use "symcc"; + for Library_Dir use "lib"; + + for Languages use ("Ada"); + for Object_Dir use "obj"; + +end SymCC; From da9dbde9ac522a0f6fb3e515d00144df1ce5d10b Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Wed, 18 Jan 2023 12:48:44 +0100 Subject: [PATCH 31/64] Document how to use clang-format and add a test --- CONTRIBUTING.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 15f6c8da..be09db8d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,8 +2,12 @@ We encourage everyone to contribute improvements and bug fixes to SymCC. Our preferred way of accepting contributions is via GitHub pull requests. Please be -sure to run clang-format on any C/C++ code you change, and ideally add a test to -your patch. Unfortunately, since the project is a bit short on developers at the +sure to run clang-format on any C/C++ code you change; an easy way to do so is +with `git clang-format --style LLVM` just before committing. (On Ubuntu, you can +get `git-clang-format` via `apt install clang-format`.) Ideally, also add a test +to your patch (see the +[docs](https://github.com/eurecom-s3/symcc/blob/master/docs/Testing.txt) for +details). Unfortunately, since the project is a bit short on developers at the moment, we have to ask for your patience while we review your PR. Please note that any contributions you make are licensed under the same terms as From ee37b4ef5870987ff73fde137f34242b51a51907 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Wed, 18 Jan 2023 12:54:04 +0100 Subject: [PATCH 32/64] Run clang-format in the CI pipeline --- .github/workflows/check_style.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/check_style.yml diff --git a/.github/workflows/check_style.yml b/.github/workflows/check_style.yml new file mode 100644 index 00000000..f3580388 --- /dev/null +++ b/.github/workflows/check_style.yml @@ -0,0 +1,19 @@ +name: Check coding style +on: [pull_request] +jobs: + coding_style: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Run clang-format + shell: bash + run: | + format_changes=$(git clang-format-14 --quiet --diff \ + ${{ github.event.pull_request.base.sha }} \ + ${{ github.event.pull_request.head.sha }} | wc -c) + if [[ $format_changes -ne 0 ]]; then + echo "Please format your changes with clang-format using the LLVM style, e.g., git clang-format --style LLVM before committing" + exit 1 + fi From 6676d6486bcbf17ba6e2a5f67cc03059d0738c83 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 26 Jan 2023 13:22:40 +0100 Subject: [PATCH 33/64] Ignore the clangd cache and the user's build directories --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 9aba1266..a5522f87 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,7 @@ TAGS # Clang tooling compile_commands.json .clangd +.cache + +# Build directories +build* From 324cd074bd318626e04fb0c00e37fc4c043d81d4 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 17 Jan 2023 14:07:07 +0100 Subject: [PATCH 34/64] Handle concrete Booleans in load/store The code introduced to fix eurecom-s3/symcc#112 can't handle concrete values (i.e., nullptr expressions). Detecting nullptr and skipping the symbolic computations in bitcode would have complicated the code of the pass and the generated IR a lot for an unclear benefit (basically just preventing the call to and immediate return from the runtime functions). Fixes eng/fuzz/symcc#6. --- compiler/Symbolizer.cpp | 12 ++++++-- runtime/RuntimeCommon.cpp | 5 +++- runtime/RuntimeCommon.h | 28 ++++++++++++------- runtime/qsym_backend/Runtime.cpp | 12 ++++++++ runtime/simple_backend/Runtime.cpp | 9 ++++++ test/load_store.ll | 45 ++++++++++++++++++++++++++++++ 6 files changed, 98 insertions(+), 13 deletions(-) create mode 100644 test/load_store.ll diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 54287e60..5cd475f4 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -474,11 +474,15 @@ void Symbolizer::visitLoadInst(LoadInst &I) { ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(dataType)), ConstantInt::get(IRB.getInt8Ty(), isLittleEndian(dataType) ? 1 : 0)}); + // Make sure that the expression corresponding to the loaded value is of + // bit-vector kind. Shortcutting the runtime calls that we emit here (e.g., + // for floating-point values) is tricky, so instead we make sure that any + // runtime function we call can handle null expressions. + if (dataType->isFloatingPointTy()) { data = IRB.CreateCall(runtime.buildBitsToFloat, {data, IRB.getInt1(dataType->isDoubleTy())}); } else if (dataType->isIntegerTy() && dataType->getIntegerBitWidth() == 1) { - /* convert from byte back to a bool (i1) */ data = IRB.CreateCall(runtime.buildTrunc, {data, ConstantInt::get(IRB.getInt8Ty(), 1)}); data = IRB.CreateCall(runtime.buildBitToBool, {data}); @@ -492,12 +496,16 @@ void Symbolizer::visitStoreInst(StoreInst &I) { tryAlternative(IRB, I.getPointerOperand()); + // Make sure that the expression corresponding to the stored value is of + // bit-vector kind. Shortcutting the runtime calls that we emit here (e.g., + // for floating-point values) is tricky, so instead we make sure that any + // runtime function we call can handle null expressions. + auto *data = getSymbolicExpressionOrNull(I.getValueOperand()); auto *dataType = I.getValueOperand()->getType(); if (dataType->isFloatingPointTy()) { data = IRB.CreateCall(runtime.buildFloatToBits, data); } else if (dataType->isIntegerTy() && dataType->getIntegerBitWidth() == 1) { - /* convert from i1 (bool) to a byte */ data = IRB.CreateCall(runtime.buildBoolToBit, {data}); data = IRB.CreateCall( runtime.buildZExt, diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index baa86107..1b43b411 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -233,6 +233,9 @@ void symcc_make_symbolic(void *start, size_t byte_length) { } SymExpr _sym_build_bit_to_bool(SymExpr expr) { + if (expr == nullptr) + return nullptr; + return _sym_build_not_equal(expr, _sym_build_integer(0, _sym_bits_helper(expr))); -} \ No newline at end of file +} diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 9078792d..0f89eda7 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -3,7 +3,10 @@ // This header defines the interface of the run-time library. It is not actually // used anywhere because the compiler pass inserts calls to the library // functions at the level of LLVM bitcode, but it serves as documentation of the -// intended interface. +// intended interface. Unless documented otherwise, functions taking symbolic +// expressions can't handle null values (i.e., they shouldn't be called for +// concrete values); exceptions are made if it's too difficult to check for +// concreteness in bitcode. // // Whoever uses this file has to define the type "SymExpr" first; we use it to // keep this header independent of the back-end implementation. @@ -26,6 +29,9 @@ #ifndef RUNTIMECOMMON_H #define RUNTIMECOMMON_H +/* Marker for expression parameters which may be null. */ +#define nullable + #ifdef __cplusplus #include #include @@ -112,9 +118,9 @@ SymExpr _sym_build_float_unordered_not_equal(SymExpr a, SymExpr b); /* * Casts */ -SymExpr _sym_build_sext(SymExpr expr, uint8_t bits); -SymExpr _sym_build_zext(SymExpr expr, uint8_t bits); -SymExpr _sym_build_trunc(SymExpr expr, uint8_t bits); +SymExpr _sym_build_sext(nullable SymExpr expr, uint8_t bits); +SymExpr _sym_build_zext(nullable SymExpr expr, uint8_t bits); +SymExpr _sym_build_trunc(nullable SymExpr expr, uint8_t bits); SymExpr _sym_build_bswap(SymExpr expr); SymExpr _sym_build_int_to_float(SymExpr value, int is_double, int is_signed); SymExpr _sym_build_float_to_float(SymExpr expr, int to_double); @@ -122,8 +128,8 @@ SymExpr _sym_build_bits_to_float(SymExpr expr, int to_double); SymExpr _sym_build_float_to_bits(SymExpr expr); SymExpr _sym_build_float_to_signed_integer(SymExpr expr, uint8_t bits); SymExpr _sym_build_float_to_unsigned_integer(SymExpr expr, uint8_t bits); -SymExpr _sym_build_bool_to_bit(SymExpr expr); -SymExpr _sym_build_bit_to_bool(SymExpr expr); +SymExpr _sym_build_bool_to_bit(nullable SymExpr expr); +SymExpr _sym_build_bit_to_bool(nullable SymExpr expr); /* * Bit-array helpers @@ -135,15 +141,15 @@ size_t _sym_bits_helper(SymExpr expr); /* * Function-call helpers */ -void _sym_set_parameter_expression(uint8_t index, SymExpr expr); +void _sym_set_parameter_expression(uint8_t index, nullable SymExpr expr); SymExpr _sym_get_parameter_expression(uint8_t index); -void _sym_set_return_expression(SymExpr expr); +void _sym_set_return_expression(nullable SymExpr expr); SymExpr _sym_get_return_expression(void); /* * Constraint handling */ -void _sym_push_path_constraint(SymExpr constraint, int taken, +void _sym_push_path_constraint(nullable SymExpr constraint, int taken, uintptr_t site_id); SymExpr _sym_get_input_byte(size_t offset, uint8_t concrete_value); void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset); @@ -152,7 +158,7 @@ void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset); * Memory management */ SymExpr _sym_read_memory(uint8_t *addr, size_t length, bool little_endian); -void _sym_write_memory(uint8_t *addr, size_t length, SymExpr expr, +void _sym_write_memory(uint8_t *addr, size_t length, nullable SymExpr expr, bool little_endian); void _sym_memcpy(uint8_t *dest, const uint8_t *src, size_t length); void _sym_memset(uint8_t *memory, SymExpr value, size_t length); @@ -196,4 +202,6 @@ void symcc_set_test_case_handler(TestCaseHandler handler); } #endif +#undef nullable + #endif diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 7a0c388b..a7c4ef52 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -275,16 +275,25 @@ SymExpr _sym_build_not(SymExpr expr) { } SymExpr _sym_build_sext(SymExpr expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression(g_expr_builder->createSExt( allocatedExpressions.at(expr), bits + expr->bits())); } SymExpr _sym_build_zext(SymExpr expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression(g_expr_builder->createZExt( allocatedExpressions.at(expr), bits + expr->bits())); } SymExpr _sym_build_trunc(SymExpr expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression( g_expr_builder->createTrunc(allocatedExpressions.at(expr), bits)); } @@ -315,6 +324,9 @@ SymExpr _sym_extract_helper(SymExpr expr, size_t first_bit, size_t last_bit) { size_t _sym_bits_helper(SymExpr expr) { return expr->bits(); } SymExpr _sym_build_bool_to_bit(SymExpr expr) { + if (expr == nullptr) + return nullptr; + return registerExpression( g_expr_builder->boolToBit(allocatedExpressions.at(expr), 1)); } diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index bb74abd4..98ed31b2 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -351,14 +351,21 @@ Z3_ast _sym_build_float_unordered_not_equal(Z3_ast a, Z3_ast b) { } Z3_ast _sym_build_sext(Z3_ast expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; return registerExpression(Z3_mk_sign_ext(g_context, bits, expr)); } Z3_ast _sym_build_zext(Z3_ast expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; return registerExpression(Z3_mk_zero_ext(g_context, bits, expr)); } Z3_ast _sym_build_trunc(Z3_ast expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression(Z3_mk_extract(g_context, bits - 1, 0, expr)); } @@ -410,6 +417,8 @@ Z3_ast _sym_build_float_to_unsigned_integer(Z3_ast expr, uint8_t bits) { } Z3_ast _sym_build_bool_to_bit(Z3_ast expr) { + if (expr == nullptr) + return nullptr; return registerExpression(Z3_mk_ite(g_context, expr, _sym_build_integer(1, 1), _sym_build_integer(0, 1))); } diff --git a/test/load_store.ll b/test/load_store.ll new file mode 100644 index 00000000..6dc25689 --- /dev/null +++ b/test/load_store.ll @@ -0,0 +1,45 @@ +; This file is part of SymCC. +; +; SymCC is free software: you can redistribute it and/or modify it under the +; terms of the GNU General Public License as published by the Free Software +; Foundation, either version 3 of the License, or (at your option) any later +; version. +; +; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +; A PARTICULAR PURPOSE. See the GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License along with +; SymCC. If not, see . + +; Verify that loading and storing concrete values of various types works. For +; each type, we allocate space on the stack, then store a constant value into +; it, and finally load it back. Compiling this code with SymCC and verifying +; that the resulting binary exits cleanly shows that SymCC's instrumentation +; doesn't break the load/store operations. +; +; This test reproduces a bug where loading a concrete Boolean would lead to a +; program crash. +; +; Since the bitcode is written by hand, we first run llc on it because it +; performs a validity check, whereas Clang doesn't. +; +; RUN: llc %s -o /dev/null +; RUN: %symcc %s -o %t +; RUN: %t 2>&1 + +target triple = "x86_64-pc-linux-gnu" + +define i32 @main(i32 %argc, i8** %argv) { + ; Load and store a Boolean. + %stack_bool = alloca i1 + store i1 0, i1* %stack_bool + %copy_of_stack_bool = load i1, i1* %stack_bool + + ; Load and store a float. + %stack_float = alloca float + store float 0.0, float* %stack_float + %copy_of_stack_float = load float, float* %stack_float + + ret i32 0 +} From d20d3e7bae2a66eb78a4ce0827fd0546b7fdd6fa Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 26 Jan 2023 13:04:28 +0100 Subject: [PATCH 35/64] Make the data parameter for symcc_make_symbolic const It never modifies its input, and const void* is easier to use with strings (in addition to being more explicit). --- runtime/RuntimeCommon.cpp | 7 ++++--- runtime/RuntimeCommon.h | 5 +++-- test/memory_input.c | 2 +- test/test_case_handler.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index 1b43b411..2d314b06 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -214,15 +214,16 @@ void _sym_register_expression_region(SymExpr *start, size_t length) { registerExpressionRegion({start, length}); } -void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset) { +void _sym_make_symbolic(const void *data, size_t byte_length, + size_t input_offset) { ReadWriteShadow shadow(data, byte_length); - uint8_t *data_bytes = reinterpret_cast(data); + const uint8_t *data_bytes = reinterpret_cast(data); std::generate(shadow.begin(), shadow.end(), [&, i = 0]() mutable { return _sym_get_input_byte(input_offset++, data_bytes[i++]); }); } -void symcc_make_symbolic(void *start, size_t byte_length) { +void symcc_make_symbolic(const void *start, size_t byte_length) { if (!std::holds_alternative(g_config.input)) throw std::runtime_error{"Calls to symcc_make_symbolic aren't allowed when " "SYMCC_MEMORY_INPUT isn't set"}; diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 0f89eda7..19b4860f 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -152,7 +152,8 @@ SymExpr _sym_get_return_expression(void); void _sym_push_path_constraint(nullable SymExpr constraint, int taken, uintptr_t site_id); SymExpr _sym_get_input_byte(size_t offset, uint8_t concrete_value); -void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset); +void _sym_make_symbolic(const void *data, size_t byte_length, + size_t input_offset); /* * Memory management @@ -194,7 +195,7 @@ void _sym_collect_garbage(void); * These are the only functions in the interface that we expect to be called by * users (i.e., calls to it aren't auto-generated by our compiler pass). */ -void symcc_make_symbolic(void *start, size_t byte_length); +void symcc_make_symbolic(const void *start, size_t byte_length); typedef void (*TestCaseHandler)(const void *, size_t); void symcc_set_test_case_handler(TestCaseHandler handler); diff --git a/test/memory_input.c b/test/memory_input.c index 58f55702..9e2282ab 100644 --- a/test/memory_input.c +++ b/test/memory_input.c @@ -18,7 +18,7 @@ #include #include -void symcc_make_symbolic(void *start, size_t byte_length); +void symcc_make_symbolic(const void *start, size_t byte_length); uint64_t g_value = 0xaaaabbbbccccdddd; diff --git a/test/test_case_handler.c b/test/test_case_handler.c index 11819754..414c8843 100644 --- a/test/test_case_handler.c +++ b/test/test_case_handler.c @@ -20,7 +20,7 @@ #define MAGIC 0xab -void symcc_make_symbolic(void *start, size_t byte_length); +void symcc_make_symbolic(const void *start, size_t byte_length); typedef void (*TestCaseHandler)(const void *, size_t); void symcc_set_test_case_handler(TestCaseHandler handler); From 031a2c3caa679558cb927ea75fe402f18a2df46a Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 26 Jan 2023 13:07:37 +0100 Subject: [PATCH 36/64] Intercept libc functions bcmp, bcopy, and bzero Fixes eng/fuzz/symcc#7. --- compiler/Runtime.cpp | 9 +++-- runtime/LibcWrappers.cpp | 83 +++++++++++++++++++++++++++++++++++++++ runtime/RuntimeCommon.cpp | 4 ++ test/CMakeLists.txt | 7 ++++ test/bcopy_bcmp_bzero.c | 65 ++++++++++++++++++++++++++++++ 5 files changed, 164 insertions(+), 4 deletions(-) create mode 100644 test/bcopy_bcmp_bzero.c diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index 67a4aa10..330dcc7a 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -162,10 +162,11 @@ Runtime::Runtime(Module &M) { /// Decide whether a function is called symbolically. bool isInterceptedFunction(const Function &f) { static const StringSet<> kInterceptedFunctions = { - "malloc", "calloc", "mmap", "mmap64", "open", "read", "lseek", - "lseek64", "fopen", "fopen64", "fread", "fseek", "fseeko", "rewind", - "fseeko64", "getc", "ungetc", "memcpy", "memset", "strncpy", "strchr", - "memcmp", "memmove", "ntohl", "fgets", "fgetc", "getchar"}; + "malloc", "calloc", "mmap", "mmap64", "open", "read", + "lseek", "lseek64", "fopen", "fopen64", "fread", "fseek", + "fseeko", "rewind", "fseeko64", "getc", "ungetc", "memcpy", + "memset", "strncpy", "strchr", "memcmp", "memmove", "ntohl", + "fgets", "fgetc", "getchar", "bcopy", "bcmp", "bzero"}; return (kInterceptedFunctions.count(f.getName()) > 0); } diff --git a/runtime/LibcWrappers.cpp b/runtime/LibcWrappers.cpp index 41ce2a34..aafc1b08 100644 --- a/runtime/LibcWrappers.cpp +++ b/runtime/LibcWrappers.cpp @@ -13,6 +13,22 @@ // You should have received a copy of the GNU Lesser General Public License // along with SymCC. If not, see . +// +// Libc wrappers +// +// This file contains the wrappers around libc functions which add symbolic +// computations; using the wrappers frees instrumented code from having to link +// against an instrumented libc. +// +// We define a wrapper for function X with SYM(X), which just changes the name +// "X" to something predictable and hopefully unique. It is then up to the +// compiler pass to replace calls of X with calls of SYM(X). +// +// In general, the wrappers ask the solver to generate alternative parameter +// values, then call the wrapped function, create and store symbolic expressions +// matching the libc function's semantics, and finally return the wrapped +// function's result. + #include #include #include @@ -389,6 +405,20 @@ void *SYM(memset)(void *s, int c, size_t n) { return result; } +void SYM(bzero)(void *s, size_t n) { + bzero(s, n); + + // No return value, hence no corresponding expression. + _sym_set_return_expression(nullptr); + + tryAlternative(s, _sym_get_parameter_expression(0), SYM(bzero)); + tryAlternative(n, _sym_get_parameter_expression(1), SYM(bzero)); + + // Concretize the memory region, which now is all zeros. + ReadWriteShadow shadow(s, n); + std::fill(shadow.begin(), shadow.end(), nullptr); +} + void *SYM(memmove)(void *dest, const void *src, size_t n) { tryAlternative(dest, _sym_get_parameter_expression(0), SYM(memmove)); tryAlternative(src, _sym_get_parameter_expression(1), SYM(memmove)); @@ -402,6 +432,22 @@ void *SYM(memmove)(void *dest, const void *src, size_t n) { return result; } +void SYM(bcopy)(const void *src, void *dest, size_t n) { + tryAlternative(src, _sym_get_parameter_expression(0), SYM(bcopy)); + tryAlternative(dest, _sym_get_parameter_expression(1), SYM(bcopy)); + tryAlternative(n, _sym_get_parameter_expression(2), SYM(bcopy)); + + bcopy(src, dest, n); + + // bcopy is mostly equivalent to memmove, so we can use our symbolic version + // of memmove to copy any symbolic expressions over to the destination. + _sym_memmove(static_cast(dest), static_cast(src), + n); + + // void function, so there is no return value and hence no expression for it. + _sym_set_return_expression(nullptr); +} + char *SYM(strncpy)(char *dest, const char *src, size_t n) { tryAlternative(dest, _sym_get_parameter_expression(0), SYM(strncpy)); tryAlternative(src, _sym_get_parameter_expression(1), SYM(strncpy)); @@ -485,6 +531,43 @@ int SYM(memcmp)(const void *a, const void *b, size_t n) { return result; } +int SYM(bcmp)(const void *a, const void *b, size_t n) { + tryAlternative(a, _sym_get_parameter_expression(0), SYM(bcmp)); + tryAlternative(b, _sym_get_parameter_expression(1), SYM(bcmp)); + tryAlternative(n, _sym_get_parameter_expression(2), SYM(bcmp)); + + auto result = bcmp(a, b, n); + + // bcmp returns zero if the input regions are equal and an unspecified + // non-zero value otherwise. Instead of expressing this symbolically, we + // directly ask the solver for an alternative solution (assuming that the + // result is used for a conditional branch later), and return a concrete + // value. + _sym_set_return_expression(nullptr); + + // The result of the comparison depends on whether the input regions are equal + // byte by byte. Construct the corresponding expression, but only if there is + // at least one symbolic byte in either of the regions; otherwise, the result + // is concrete. + + if (isConcrete(a, n) && isConcrete(b, n)) + return result; + + auto aShadowIt = ReadOnlyShadow(a, n).begin_non_null(); + auto bShadowIt = ReadOnlyShadow(b, n).begin_non_null(); + auto *allEqual = _sym_build_equal(*aShadowIt, *bShadowIt); + for (size_t i = 1; i < n; i++) { + ++aShadowIt; + ++bShadowIt; + allEqual = + _sym_build_bool_and(allEqual, _sym_build_equal(*aShadowIt, *bShadowIt)); + } + + _sym_push_path_constraint(allEqual, result == 0, + reinterpret_cast(SYM(bcmp))); + return result; +} + uint32_t SYM(ntohl)(uint32_t netlong) { auto netlongExpr = _sym_get_parameter_expression(0); auto result = ntohl(netlong); diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index 2d314b06..b8c90dfe 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -74,6 +74,10 @@ void _sym_memset(uint8_t *memory, SymExpr value, size_t length) { } void _sym_memmove(uint8_t *dest, const uint8_t *src, size_t length) { + // Unless both the source and the destination are fully concrete memory + // regions, we need to copy the symbolic expressions over. (In the case where + // only the destination is symbolic, this means making it concrete.) + if (isConcrete(src, length) && isConcrete(dest, length)) return; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c9613e57..c6b0d1b5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,6 +18,13 @@ else() set(SYM_TEST_FILECHECK_ARGS "--check-prefix=SIMPLE --check-prefix=ANY") endif() +if (${LLVM_VERSION_MAJOR} VERSION_GREATER_EQUAL 14) + # FileCheck used to be fine with unused prefixes when more than one prefix was + # defined. This changed in LLVM version 14, requiring the new option + # "--allow-unused-prefixes" (added in LLVM 13) to restore the old behavior. + set(SYM_TEST_FILECHECK_ARGS "${SYM_TEST_FILECHECK_ARGS} --allow-unused-prefixes") +endif() + configure_file("lit.site.cfg.in" "lit.site.cfg") add_custom_target(check diff --git a/test/bcopy_bcmp_bzero.c b/test/bcopy_bcmp_bzero.c new file mode 100644 index 00000000..fff833f2 --- /dev/null +++ b/test/bcopy_bcmp_bzero.c @@ -0,0 +1,65 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O2 %s -o %t +// RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s +// +// Test symbolic handling of bcopy, bcmp, and bzero. We copy symbolic data with +// bcmp, then compare it with bcmp, expecting the solver to be triggered +// (indicating that the two functions are represented correctly); then we bzero +// the region and perform another comparison, which should not result in a +// solver query (indicating that bzero concretized as expected). + +#include +#include +#include +#include + +void symcc_make_symbolic(const void *start, size_t byte_length); +typedef void (*TestCaseHandler)(const void *, size_t); +void symcc_set_test_case_handler(TestCaseHandler handler); + +int solved = 0; + +void handle_test_case(const void *data, size_t data_length) { + assert(data_length == 4); + assert(bcmp(data, "bar", 4) == 0); + solved = 1; +} + +int main(int argc, char *argv[]) { + symcc_set_test_case_handler(handle_test_case); + + const char input[] = "foo"; + symcc_make_symbolic(input, 4); + + // Make a copy and compare it in order to trigger the solver. + char copy[4]; + bcopy(input, copy, 4); + int bcmp_result = bcmp(copy, "bar", 4); + assert(bcmp_result != 0); + + // Zero out the symbolic data and compare again (which should not trigger the + // solver this time). + bzero(copy, 4); + bcmp_result = bcmp(copy, "abc", 4); + assert(bcmp_result != 0); + + // The simple backend doesn't support test-case handlers, so we only expect a + // solution with the QSYM backend. + printf("Solved: %d\n", solved); + // SIMPLE: Solved: 0 + // QSYM: Solved: 1 + return 0; +} From 37677d5999485e311d729c9b5380ff68157d58be Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Sun, 12 Sep 2021 07:14:41 +0000 Subject: [PATCH 37/64] compiler: run scalarizer and lower atomics passes The scalarizer pass removes most (currently unsupported) vector instructions. The loweratomic pass replaces atomic instructions with non-atomic operations. --- compiler/Main.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compiler/Main.cpp b/compiler/Main.cpp index f915d5d4..c91af67a 100644 --- a/compiler/Main.cpp +++ b/compiler/Main.cpp @@ -14,6 +14,9 @@ #include #include +#include +#include +#include #if LLVM_VERSION_MAJOR >= 13 #include @@ -36,6 +39,8 @@ using namespace llvm; void addSymbolizeLegacyPass(const PassManagerBuilder & /* unused */, legacy::PassManagerBase &PM) { + PM.add(createScalarizerPass()); + PM.add(createLowerAtomicPass()); PM.add(new SymbolizeLegacyPass()); } @@ -67,6 +72,8 @@ PassPluginLibraryInfo getSymbolizePluginInfo() { }); PB.registerVectorizerStartEPCallback( [](FunctionPassManager &PM, OptimizationLevel) { + PM.addPass(ScalarizerPass()); + PM.addPass(LowerAtomicPass()); PM.addPass(SymbolizePass()); }); }}; From 58177ba842f4819c5249e9e237a48d9f37a23fd5 Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Fri, 27 Aug 2021 05:17:05 +0000 Subject: [PATCH 38/64] runtime: add ITE expression builder --- runtime/RuntimeCommon.h | 1 + runtime/qsym_backend/Runtime.cpp | 7 +++++++ runtime/simple_backend/Runtime.cpp | 8 ++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 19b4860f..d7a9f88d 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -99,6 +99,7 @@ SymExpr _sym_build_bool_or(SymExpr a, SymExpr b); SymExpr _sym_build_or(SymExpr a, SymExpr b); SymExpr _sym_build_bool_xor(SymExpr a, SymExpr b); SymExpr _sym_build_xor(SymExpr a, SymExpr b); +SymExpr _sym_build_ite(SymExpr cond, SymExpr a, SymExpr b); SymExpr _sym_build_float_ordered_greater_than(SymExpr a, SymExpr b); SymExpr _sym_build_float_ordered_greater_equal(SymExpr a, SymExpr b); diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index a7c4ef52..fb3cc878 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -274,6 +274,13 @@ SymExpr _sym_build_not(SymExpr expr) { g_expr_builder->createNot(allocatedExpressions.at(expr))); } +SymExpr _sym_build_ite(SymExpr cond, SymExpr a, SymExpr b) { + return registerExpression( + g_expr_builder->createIte(allocatedExpressions.at(cond), + allocatedExpressions.at(a), + allocatedExpressions.at(b))); +} + SymExpr _sym_build_sext(SymExpr expr, uint8_t bits) { if (expr == nullptr) return nullptr; diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index 98ed31b2..7db8d160 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -243,6 +243,10 @@ DEF_BINARY_EXPR_BUILDER(float_ordered_equal, fpa_eq) #undef DEF_BINARY_EXPR_BUILDER +Z3_ast _sym_build_ite(Z3_ast cond, Z3_ast a, Z3_ast b) { + return registerExpression(Z3_mk_ite(g_context, cond, a, b)); +} + Z3_ast _sym_build_fp_add(Z3_ast a, Z3_ast b) { return registerExpression(Z3_mk_fpa_add(g_context, g_rounding_mode, a, b)); } @@ -419,8 +423,8 @@ Z3_ast _sym_build_float_to_unsigned_integer(Z3_ast expr, uint8_t bits) { Z3_ast _sym_build_bool_to_bit(Z3_ast expr) { if (expr == nullptr) return nullptr; - return registerExpression(Z3_mk_ite(g_context, expr, _sym_build_integer(1, 1), - _sym_build_integer(0, 1))); + return _sym_build_ite(expr, _sym_build_integer(1, 1), + _sym_build_integer(0, 1)); } void _sym_push_path_constraint(Z3_ast constraint, int taken, From 0e362c73a6d3953f113e90cfc521a20af5a88bce Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Sun, 29 Aug 2021 06:14:54 +0000 Subject: [PATCH 39/64] intrinsics: add support for saturating arithmetic --- compiler/Runtime.cpp | 7 ++ compiler/Runtime.h | 6 ++ compiler/Symbolizer.cpp | 21 ++++++ runtime/RuntimeCommon.cpp | 149 ++++++++++++++++++++++++++++++++++++++ runtime/RuntimeCommon.h | 15 +++- 5 files changed, 197 insertions(+), 1 deletion(-) diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index 330dcc7a..6a8420e7 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -74,6 +74,13 @@ Runtime::Runtime(Module &M) { pushPathConstraint = import(M, "_sym_push_path_constraint", voidT, ptrT, IRB.getInt1Ty(), intPtrType); + buildSAddSat = import(M, "_sym_build_sadd_sat", ptrT, ptrT, ptrT); + buildUAddSat = import(M, "_sym_build_uadd_sat", ptrT, ptrT, ptrT); + buildSSubSat = import(M, "_sym_build_ssub_sat", ptrT, ptrT, ptrT); + buildUSubSat = import(M, "_sym_build_usub_sat", ptrT, ptrT, ptrT); + buildSShlSat = import(M, "_sym_build_sshl_sat", ptrT, ptrT, ptrT); + buildUShlSat = import(M, "_sym_build_ushl_sat", ptrT, ptrT, ptrT); + setParameterExpression = import(M, "_sym_set_parameter_expression", voidT, int8T, ptrT); getParameterExpression = diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 5dd72d3b..7324423f 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -51,6 +51,12 @@ struct Runtime { SymFnT buildBoolXor{}; SymFnT buildBoolToBit{}; SymFnT buildBitToBool{}; + SymFnT buildSAddSat{}; + SymFnT buildUAddSat{}; + SymFnT buildSSubSat{}; + SymFnT buildUSubSat{}; + SymFnT buildSShlSat{}; + SymFnT buildUShlSat{}; SymFnT pushPathConstraint{}; SymFnT getParameterExpression{}; SymFnT setParameterExpression{}; diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 5cd475f4..f0167e22 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -284,6 +284,27 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { registerSymbolicComputation(swapped, &I); break; } + +#define DEF_SAT_ARITH_BUILDER(intrinsic_op, runtime_name) \ + case Intrinsic::intrinsic_op##_sat: { \ + IRBuilder<> IRB(&I); \ + auto result = buildRuntimeCall(IRB, runtime.build##runtime_name, \ + {I.getOperand(0), I.getOperand(1)}); \ + registerSymbolicComputation(result, &I); \ + break; \ + } + + DEF_SAT_ARITH_BUILDER(sadd, SAddSat) + DEF_SAT_ARITH_BUILDER(uadd, UAddSat) + DEF_SAT_ARITH_BUILDER(ssub, SSubSat) + DEF_SAT_ARITH_BUILDER(usub, USubSat) +#if LLVM_VERSION_MAJOR > 11 + DEF_SAT_ARITH_BUILDER(sshl, SShlSat) + DEF_SAT_ARITH_BUILDER(ushl, UShlSat) +#endif + +#undef DEF_SAT_ARITH_BUILDER + default: errs() << "Warning: unhandled LLVM intrinsic " << callee->getName() << "; the result will be concretized\n"; diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index b8c90dfe..cef8cf7a 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -37,6 +37,51 @@ SymExpr g_return_value; std::array g_function_arguments; // TODO make thread-local +SymExpr buildMinSignedInt(uint8_t bits) { + return _sym_build_integer((uint64_t)(1) << (bits - 1), bits); +} + +SymExpr buildMaxSignedInt(uint8_t bits) { + uint64_t mask = ((uint64_t)(1) << bits) - 1; + return _sym_build_integer(((uint64_t)(~0) & mask) >> 1, bits); +} + +SymExpr buildMaxUnsignedInt(uint8_t bits) { + uint64_t mask = ((uint64_t)(1) << bits) - 1; + return _sym_build_integer((uint64_t)(~0) & mask, bits); +} + +/// Construct an expression describing the in-memory representation of the +/// bitcode structure {iN, i1} returned by the intrinsics for arithmetic with +/// overflow (see +/// https://llvm.org/docs/LangRef.html#arithmetic-with-overflow-intrinsics). The +/// overflow parameter is expected to be a symbolic Boolean. +SymExpr buildOverflowResult(SymExpr result_expr, SymExpr overflow, + bool little_endian) { + auto result_bits = _sym_bits_helper(result_expr); + assert(result_bits % 8 == 0 && + "Arithmetic with overflow on integers of invalid length"); + + // When storing {iN, i1} in memory, the compiler would insert padding between + // the two elements, extending the Boolean to the same size as the integer. We + // simulate the same here, taking endianness into account. + + auto result_expr_mem = + little_endian ? _sym_build_bswap(result_expr) : result_expr; + auto overflow_byte = _sym_build_zext(_sym_build_bool_to_bit(overflow), 7); + + // There's no padding if the result is a single byte. + if (result_bits == 8) { + return _sym_concat_helper(result_expr_mem, overflow_byte); + } + + auto padding = _sym_build_zero_bytes(result_bits / 8 - 1); + return _sym_concat_helper(result_expr_mem, + little_endian + ? _sym_concat_helper(overflow_byte, padding) + : _sym_concat_helper(padding, overflow_byte)); +} + } // namespace void _sym_set_return_expression(SymExpr expr) { g_return_value = expr; } @@ -214,6 +259,110 @@ SymExpr _sym_build_zero_bytes(size_t length) { return result; } +SymExpr _sym_build_sadd_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + SymExpr min = buildMinSignedInt(bits); + SymExpr max = buildMaxSignedInt(bits); + SymExpr add_sext = + _sym_build_add(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + + return _sym_build_ite( + // If the result is less than the min signed integer... + _sym_build_signed_less_equal(add_sext, _sym_build_sext(min, 1)), + // ... Return the min signed integer + min, + _sym_build_ite( + // Otherwise, if the result is greater than the max signed integer... + _sym_build_signed_greater_equal(add_sext, _sym_build_sext(max, 1)), + // ... Return the max signed integer + max, + // Otherwise, return the addition + _sym_build_add(a, b))); +} + +SymExpr _sym_build_uadd_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + SymExpr max = buildMaxUnsignedInt(bits); + SymExpr add_zext = + _sym_build_add(_sym_build_zext(a, 1), _sym_build_zext(b, 1)); + + return _sym_build_ite( + // If the top bit is set, an overflow has occurred and... + _sym_build_bit_to_bool(_sym_extract_helper(add_zext, bits, bits)), + // ... Return the max unsigned integer + max, + // Otherwise, return the addition + _sym_build_add(a, b)); +} + +SymExpr _sym_build_ssub_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + SymExpr min = buildMinSignedInt(bits); + SymExpr max = buildMaxSignedInt(bits); + + SymExpr sub_sext = + _sym_build_sub(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + + return _sym_build_ite( + // If the result is less than the min signed integer... + _sym_build_signed_less_equal(sub_sext, _sym_build_sext(min, 1)), + // ... Return the min signed integer + min, + _sym_build_ite( + // Otherwise, if the result is greater than the max signed integer... + _sym_build_signed_greater_equal(sub_sext, _sym_build_sext(max, 1)), + // ... Return the max signed integer + max, + // Otherwise, return the subtraction + _sym_build_sub(a, b))); +} + +SymExpr _sym_build_usub_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + + return _sym_build_ite( + // If `a >= b`, then no overflow occurs and... + _sym_build_unsigned_greater_equal(a, b), + // ... Return the subtraction + _sym_build_sub(a, b), + // Otherwise, saturate at zero + _sym_build_integer(0, bits)); +} + +static SymExpr _sym_build_shift_left_overflow(SymExpr a, SymExpr b) { + return _sym_build_not_equal( + _sym_build_arithmetic_shift_right(_sym_build_shift_left(a, b), b), a); +} + +SymExpr _sym_build_sshl_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + + return _sym_build_ite( + // If an overflow occurred... + _sym_build_shift_left_overflow(a, b), + _sym_build_ite( + // ... And the LHS is negative... + _sym_build_bit_to_bool(_sym_extract_helper(a, bits - 1, bits - 1)), + // ... Return the min signed integer... + buildMinSignedInt(bits), + // ... Otherwise, return the max signed integer + buildMaxSignedInt(bits)), + // Otherwise, return the left shift + _sym_build_shift_left(a, b)); +} + +SymExpr _sym_build_ushl_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + + return _sym_build_ite( + // If an overflow occurred... + _sym_build_shift_left_overflow(a, b), + // ... Return the max unsigned integer + buildMaxUnsignedInt(bits), + // Otherwise, return the left shift + _sym_build_shift_left(a, b)); +} + void _sym_register_expression_region(SymExpr *start, size_t length) { registerExpressionRegion({start, length}); } diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index d7a9f88d..81f7ef13 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -58,7 +58,7 @@ SymExpr _sym_build_false(void); SymExpr _sym_build_bool(bool value); /* - * Arithmetic and shifts + * Integer arithmetic and shifts */ SymExpr _sym_build_neg(SymExpr expr); SymExpr _sym_build_add(SymExpr a, SymExpr b); @@ -72,6 +72,19 @@ SymExpr _sym_build_shift_left(SymExpr a, SymExpr b); SymExpr _sym_build_logical_shift_right(SymExpr a, SymExpr b); SymExpr _sym_build_arithmetic_shift_right(SymExpr a, SymExpr b); +/* + * Saturating integer arithmetic and shifts + */ +SymExpr _sym_build_sadd_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_uadd_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_ssub_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_usub_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_sshl_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_ushl_sat(SymExpr a, SymExpr b); + +/* + * Floating-point arithmetic and shifts + */ SymExpr _sym_build_fp_add(SymExpr a, SymExpr b); SymExpr _sym_build_fp_sub(SymExpr a, SymExpr b); SymExpr _sym_build_fp_mul(SymExpr a, SymExpr b); From cdf7ad0c5c2b7b63718a01a6733210260c20e331 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 16 Feb 2023 10:55:59 +0100 Subject: [PATCH 40/64] intrinsics: add support for overflow arithmetic --- compiler/Runtime.cpp | 9 ++++ compiler/Runtime.h | 3 ++ compiler/Symbolizer.cpp | 30 ++++++++++++- compiler/Symbolizer.h | 10 ++++- runtime/RuntimeCommon.cpp | 67 ++++++++++++++++++++++++++++++ runtime/RuntimeCommon.h | 10 +++++ runtime/qsym_backend/Runtime.cpp | 7 ++-- runtime/simple_backend/Runtime.cpp | 8 ++-- 8 files changed, 133 insertions(+), 11 deletions(-) diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index 6a8420e7..3e4d1ef9 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -74,6 +74,15 @@ Runtime::Runtime(Module &M) { pushPathConstraint = import(M, "_sym_push_path_constraint", voidT, ptrT, IRB.getInt1Ty(), intPtrType); + // Overflow arithmetic + buildAddOverflow = import(M, "_sym_build_add_overflow", ptrT, ptrT, ptrT, + IRB.getInt1Ty(), IRB.getInt1Ty()); + buildSubOverflow = import(M, "_sym_build_sub_overflow", ptrT, ptrT, ptrT, + IRB.getInt1Ty(), IRB.getInt1Ty()); + buildMulOverflow = import(M, "_sym_build_mul_overflow", ptrT, ptrT, ptrT, + IRB.getInt1Ty(), IRB.getInt1Ty()); + + // Saturating arithmetic buildSAddSat = import(M, "_sym_build_sadd_sat", ptrT, ptrT, ptrT); buildUAddSat = import(M, "_sym_build_uadd_sat", ptrT, ptrT, ptrT); buildSSubSat = import(M, "_sym_build_ssub_sat", ptrT, ptrT, ptrT); diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 7324423f..1987fdd0 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -51,6 +51,9 @@ struct Runtime { SymFnT buildBoolXor{}; SymFnT buildBoolToBit{}; SymFnT buildBitToBool{}; + SymFnT buildAddOverflow{}; + SymFnT buildSubOverflow{}; + SymFnT buildMulOverflow{}; SymFnT buildSAddSat{}; SymFnT buildUAddSat{}; SymFnT buildSSubSat{}; diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index f0167e22..a486128c 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -285,6 +285,32 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { break; } +// Overflow arithmetic +#define DEF_OVF_ARITH_BUILDER(intrinsic_op, runtime_name) \ + case Intrinsic::s##intrinsic_op##_with_overflow: \ + case Intrinsic::u##intrinsic_op##_with_overflow: { \ + IRBuilder<> IRB(&I); \ + \ + bool isSigned = \ + I.getIntrinsicID() == Intrinsic::s##intrinsic_op##_with_overflow; \ + auto overflow = buildRuntimeCall( \ + IRB, runtime.build##runtime_name, \ + {{I.getOperand(0), true}, \ + {I.getOperand(1), true}, \ + {IRB.getInt1(isSigned), false}, \ + {IRB.getInt1(dataLayout.isLittleEndian() ? 1 : 0), false}}); \ + registerSymbolicComputation(overflow, &I); \ + \ + break; \ + } + + DEF_OVF_ARITH_BUILDER(add, AddOverflow) + DEF_OVF_ARITH_BUILDER(sub, SubOverflow) + DEF_OVF_ARITH_BUILDER(mul, MulOverflow) + +#undef DEF_OVF_ARITH_BUILDER + +// Saturating arithmetic #define DEF_SAT_ARITH_BUILDER(intrinsic_op, runtime_name) \ case Intrinsic::intrinsic_op##_sat: { \ IRBuilder<> IRB(&I); \ @@ -1017,7 +1043,7 @@ Symbolizer::forceBuildRuntimeCall(IRBuilder<> &IRB, SymFnT function, for (unsigned i = 0; i < args.size(); i++) { const auto &[arg, symbolic] = args[i]; if (symbolic) - inputs.push_back({arg, i, call}); + inputs.push_back(Input(arg, i, call)); } return SymbolicComputation(call, call, inputs); @@ -1034,7 +1060,7 @@ void Symbolizer::tryAlternative(IRBuilder<> &IRB, Value *V) { runtime.pushPathConstraint, {destAssertion, IRB.getInt1(true), getTargetPreferredInt(V)}); registerSymbolicComputation(SymbolicComputation( - concreteDestExpr, pushAssertion, {{V, 0, destAssertion}})); + concreteDestExpr, pushAssertion, {Input(V, 0, destAssertion)})); } } diff --git a/compiler/Symbolizer.h b/compiler/Symbolizer.h index 8ab440af..cf0cfcf6 100644 --- a/compiler/Symbolizer.h +++ b/compiler/Symbolizer.h @@ -142,6 +142,14 @@ class Symbolizer : public llvm::InstVisitor { unsigned operandIndex; llvm::Instruction *user; + Input() = default; + + Input(llvm::Value *concrete, unsigned idx, llvm::Instruction *user) + : concreteValue(concrete), operandIndex(idx), user(user) { + assert(getSymbolicOperand()->getType() == + llvm::Type::getInt8PtrTy(user->getContext())); + } + llvm::Value *getSymbolicOperand() const { return user->getOperand(operandIndex); } @@ -186,7 +194,7 @@ class Symbolizer : public llvm::InstVisitor { << "\n...ending at " << *computation.lastInstruction << "\n...with inputs:\n"; for (const auto &input : computation.inputs) { - out << '\t' << *input.concreteValue << '\n'; + out << '\t' << *input.concreteValue << " => " << *input.user << '\n'; } return out; } diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index cef8cf7a..8d0b1d8f 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -363,6 +363,73 @@ SymExpr _sym_build_ushl_sat(SymExpr a, SymExpr b) { _sym_build_shift_left(a, b)); } +SymExpr _sym_build_add_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian) { + size_t bits = _sym_bits_helper(a); + SymExpr overflow = [&]() { + if (is_signed) { + // Check if the additions are different + SymExpr add_sext = + _sym_build_add(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + return _sym_build_not_equal(add_sext, + _sym_build_sext(_sym_build_add(a, b), 1)); + } else { + // Check if the addition overflowed into the extra bit + SymExpr add_zext = + _sym_build_add(_sym_build_zext(a, 1), _sym_build_zext(b, 1)); + return _sym_build_equal(_sym_extract_helper(add_zext, bits, bits), + _sym_build_true()); + } + }(); + + return buildOverflowResult(_sym_build_add(a, b), overflow, little_endian); +} + +SymExpr _sym_build_sub_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian) { + size_t bits = _sym_bits_helper(a); + SymExpr overflow = [&]() { + if (is_signed) { + // Check if the subtractions are different + SymExpr sub_sext = + _sym_build_sub(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + return _sym_build_not_equal(sub_sext, + _sym_build_sext(_sym_build_sub(a, b), 1)); + } else { + // Check if the subtraction overflowed into the extra bit + SymExpr sub_zext = + _sym_build_sub(_sym_build_zext(a, 1), _sym_build_zext(b, 1)); + return _sym_build_equal(_sym_extract_helper(sub_zext, bits, bits), + _sym_build_true()); + } + }(); + + return buildOverflowResult(_sym_build_sub(a, b), overflow, little_endian); +} + +SymExpr _sym_build_mul_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian) { + size_t bits = _sym_bits_helper(a); + SymExpr overflow = [&]() { + if (is_signed) { + // Check if the multiplications are different + SymExpr mul_sext = + _sym_build_mul(_sym_build_sext(a, bits), _sym_build_sext(b, bits)); + return _sym_build_not_equal(mul_sext, + _sym_build_sext(_sym_build_mul(a, b), bits)); + } else { + // Check if the multiplication overflowed into the extra bit + SymExpr mul_zext = + _sym_build_mul(_sym_build_zext(a, bits), _sym_build_zext(b, bits)); + return _sym_build_equal( + _sym_extract_helper(mul_zext, 2 * bits - 1, 2 * bits - 1), + _sym_build_true()); + } + }(); + + return buildOverflowResult(_sym_build_mul(a, b), overflow, little_endian); +} + void _sym_register_expression_region(SymExpr *start, size_t length) { registerExpressionRegion({start, length}); } diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 81f7ef13..756c6a2c 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -72,6 +72,16 @@ SymExpr _sym_build_shift_left(SymExpr a, SymExpr b); SymExpr _sym_build_logical_shift_right(SymExpr a, SymExpr b); SymExpr _sym_build_arithmetic_shift_right(SymExpr a, SymExpr b); +/* + * Arithmetic with overflow + */ +SymExpr _sym_build_add_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian); +SymExpr _sym_build_sub_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian); +SymExpr _sym_build_mul_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian); + /* * Saturating integer arithmetic and shifts */ diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index fb3cc878..6f0b82a9 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -275,10 +275,9 @@ SymExpr _sym_build_not(SymExpr expr) { } SymExpr _sym_build_ite(SymExpr cond, SymExpr a, SymExpr b) { - return registerExpression( - g_expr_builder->createIte(allocatedExpressions.at(cond), - allocatedExpressions.at(a), - allocatedExpressions.at(b))); + return registerExpression(g_expr_builder->createIte( + allocatedExpressions.at(cond), allocatedExpressions.at(a), + allocatedExpressions.at(b))); } SymExpr _sym_build_sext(SymExpr expr, uint8_t bits) { diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index 7db8d160..43372883 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -81,7 +81,7 @@ void handle_z3_error(Z3_context c [[maybe_unused]], Z3_error_code e) { } #endif -Z3_ast build_variable(const char *name, uint8_t bits) { +SymExpr build_variable(const char *name, uint8_t bits) { Z3_symbol sym = Z3_mk_string_symbol(g_context, name); auto *sort = Z3_mk_bv_sort(g_context, bits); Z3_inc_ref(g_context, (Z3_ast)sort); @@ -94,7 +94,7 @@ Z3_ast build_variable(const char *name, uint8_t bits) { /// The set of all expressions we have ever passed to client code. std::set allocatedExpressions; -SymExpr registerExpression(Z3_ast expr) { +SymExpr registerExpression(SymExpr expr) { if (allocatedExpressions.count(expr) == 0) { // We don't know this expression yet. Record it and increase the reference // counter. @@ -439,13 +439,13 @@ void _sym_push_path_constraint(Z3_ast constraint, int taken, "true" or "false", there is no point in trying to solve the negation or * pushing the constraint to the solver... */ - if (Z3_is_eq_ast(g_context, constraint, Z3_mk_true(g_context))) { + if (Z3_is_eq_ast(g_context, constraint, g_true)) { assert(taken && "We have taken an impossible branch"); Z3_dec_ref(g_context, constraint); return; } - if (Z3_is_eq_ast(g_context, constraint, Z3_mk_false(g_context))) { + if (Z3_is_eq_ast(g_context, constraint, g_false)) { assert(!taken && "We have taken an impossible branch"); Z3_dec_ref(g_context, constraint); return; From 0586a5dfd00ed8bf57cd6dfdc018403b79cd91ce Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Sun, 29 Aug 2021 06:26:55 +0000 Subject: [PATCH 41/64] intrinsics: add support for funnel shift and integer abs --- compiler/Runtime.cpp | 4 ++++ compiler/Runtime.h | 3 +++ compiler/Symbolizer.cpp | 21 +++++++++++++++++++++ runtime/RuntimeCommon.cpp | 24 ++++++++++++++++++++++++ runtime/RuntimeCommon.h | 3 +++ 5 files changed, 55 insertions(+) diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index 3e4d1ef9..6c99c077 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -90,6 +90,10 @@ Runtime::Runtime(Module &M) { buildSShlSat = import(M, "_sym_build_sshl_sat", ptrT, ptrT, ptrT); buildUShlSat = import(M, "_sym_build_ushl_sat", ptrT, ptrT, ptrT); + buildFshl = import(M, "_sym_build_funnel_shift_left", ptrT, ptrT, ptrT, ptrT); + buildFshr = import(M, "_sym_build_funnel_shift_right", ptrT, ptrT, ptrT, ptrT); + buildAbs = import(M, "_sym_build_abs", ptrT, ptrT); + setParameterExpression = import(M, "_sym_set_parameter_expression", voidT, int8T, ptrT); getParameterExpression = diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 1987fdd0..3ee50000 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -60,6 +60,9 @@ struct Runtime { SymFnT buildUSubSat{}; SymFnT buildSShlSat{}; SymFnT buildUShlSat{}; + SymFnT buildFshl{}; + SymFnT buildFshr{}; + SymFnT buildAbs{}; SymFnT pushPathConstraint{}; SymFnT getParameterExpression{}; SymFnT setParameterExpression{}; diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index a486128c..4061d45a 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -331,6 +331,27 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { #undef DEF_SAT_ARITH_BUILDER + case Intrinsic::fshl: + case Intrinsic::fshr: { + IRBuilder<> IRB(&I); + auto funnelShift = buildRuntimeCall( + IRB, + I.getIntrinsicID() == Intrinsic::fshl ? runtime.buildFshl + : runtime.buildFshr, + {I.getOperand(0), I.getOperand(1), I.getOperand(2)}); + registerSymbolicComputation(funnelShift, &I); + break; + } +#if LLVM_VERSION_MAJOR > 11 + case Intrinsic::abs: { + // Integer absolute value + + IRBuilder<> IRB(&I); + auto abs = buildRuntimeCall(IRB, runtime.buildAbs, I.getOperand(0)); + registerSymbolicComputation(abs, &I); + break; + } +#endif default: errs() << "Warning: unhandled LLVM intrinsic " << callee->getName() << "; the result will be concretized\n"; diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index 8d0b1d8f..3e1b8ac6 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -430,6 +430,30 @@ SymExpr _sym_build_mul_overflow(SymExpr a, SymExpr b, bool is_signed, return buildOverflowResult(_sym_build_mul(a, b), overflow, little_endian); } +SymExpr _sym_build_funnel_shift_left(SymExpr a, SymExpr b, SymExpr c) { + size_t bits = _sym_bits_helper(c); + SymExpr concat = _sym_concat_helper(a, b); + SymExpr shift = _sym_build_unsigned_rem(c, _sym_build_integer(bits, bits)); + + return _sym_extract_helper(_sym_build_shift_left(concat, shift), 0, bits); +} + +SymExpr _sym_build_funnel_shift_right(SymExpr a, SymExpr b, SymExpr c) { + size_t bits = _sym_bits_helper(c); + SymExpr concat = _sym_concat_helper(a, b); + SymExpr shift = _sym_build_unsigned_rem(c, _sym_build_integer(bits, bits)); + + return _sym_extract_helper(_sym_build_logical_shift_right(concat, shift), 0, + bits); +} + +SymExpr _sym_build_abs(SymExpr expr) { + size_t bits = _sym_bits_helper(expr); + return _sym_build_ite( + _sym_build_signed_greater_equal(expr, _sym_build_integer(0, bits)), expr, + _sym_build_sub(_sym_build_integer(0, bits), expr)); +} + void _sym_register_expression_region(SymExpr *start, size_t length) { registerExpressionRegion({start, length}); } diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 756c6a2c..63e4f2c7 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -71,6 +71,9 @@ SymExpr _sym_build_signed_rem(SymExpr a, SymExpr b); SymExpr _sym_build_shift_left(SymExpr a, SymExpr b); SymExpr _sym_build_logical_shift_right(SymExpr a, SymExpr b); SymExpr _sym_build_arithmetic_shift_right(SymExpr a, SymExpr b); +SymExpr _sym_build_funnel_shift_left(SymExpr a, SymExpr b, SymExpr c); +SymExpr _sym_build_funnel_shift_right(SymExpr a, SymExpr b, SymExpr c); +SymExpr _sym_build_abs(SymExpr expr); /* * Arithmetic with overflow From b78b6e4bd8593337441aa409d257e556dc3c4a11 Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Thu, 2 Sep 2021 23:27:35 +0000 Subject: [PATCH 42/64] compiler: refactor passing bool to runtime --- compiler/Runtime.cpp | 43 ++++++++++++++++++++--------------------- compiler/Symbolizer.cpp | 11 +++++------ 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index 6c99c077..d77e2890 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -39,27 +39,25 @@ Runtime::Runtime(Module &M) { auto *intPtrType = M.getDataLayout().getIntPtrType(M.getContext()); auto *ptrT = IRB.getInt8PtrTy(); auto *int8T = IRB.getInt8Ty(); + auto *int1T = IRB.getInt1Ty(); auto *voidT = IRB.getVoidTy(); buildInteger = import(M, "_sym_build_integer", ptrT, IRB.getInt64Ty(), int8T); buildInteger128 = import(M, "_sym_build_integer128", ptrT, IRB.getInt64Ty(), IRB.getInt64Ty()); - buildFloat = - import(M, "_sym_build_float", ptrT, IRB.getDoubleTy(), IRB.getInt1Ty()); + buildFloat = import(M, "_sym_build_float", ptrT, IRB.getDoubleTy(), int1T); buildNullPointer = import(M, "_sym_build_null_pointer", ptrT); buildTrue = import(M, "_sym_build_true", ptrT); buildFalse = import(M, "_sym_build_false", ptrT); - buildBool = import(M, "_sym_build_bool", ptrT, IRB.getInt1Ty()); + buildBool = import(M, "_sym_build_bool", ptrT, int1T); buildSExt = import(M, "_sym_build_sext", ptrT, ptrT, int8T); buildZExt = import(M, "_sym_build_zext", ptrT, ptrT, int8T); buildTrunc = import(M, "_sym_build_trunc", ptrT, ptrT, int8T); buildBswap = import(M, "_sym_build_bswap", ptrT, ptrT); - buildIntToFloat = import(M, "_sym_build_int_to_float", ptrT, ptrT, - IRB.getInt1Ty(), IRB.getInt1Ty()); - buildFloatToFloat = - import(M, "_sym_build_float_to_float", ptrT, ptrT, IRB.getInt1Ty()); - buildBitsToFloat = - import(M, "_sym_build_bits_to_float", ptrT, ptrT, IRB.getInt1Ty()); + buildIntToFloat = + import(M, "_sym_build_int_to_float", ptrT, ptrT, int1T, int1T); + buildFloatToFloat = import(M, "_sym_build_float_to_float", ptrT, ptrT, int1T); + buildBitsToFloat = import(M, "_sym_build_bits_to_float", ptrT, ptrT, int1T); buildFloatToBits = import(M, "_sym_build_float_to_bits", ptrT, ptrT); buildFloatToSignedInt = import(M, "_sym_build_float_to_signed_integer", ptrT, ptrT, int8T); @@ -71,16 +69,16 @@ Runtime::Runtime(Module &M) { buildBoolXor = import(M, "_sym_build_bool_xor", ptrT, ptrT, ptrT); buildBoolToBit = import(M, "_sym_build_bool_to_bit", ptrT, ptrT); buildBitToBool = import(M, "_sym_build_bit_to_bool", ptrT, ptrT); - pushPathConstraint = import(M, "_sym_push_path_constraint", voidT, ptrT, - IRB.getInt1Ty(), intPtrType); + pushPathConstraint = + import(M, "_sym_push_path_constraint", voidT, ptrT, int1T, intPtrType); // Overflow arithmetic - buildAddOverflow = import(M, "_sym_build_add_overflow", ptrT, ptrT, ptrT, - IRB.getInt1Ty(), IRB.getInt1Ty()); - buildSubOverflow = import(M, "_sym_build_sub_overflow", ptrT, ptrT, ptrT, - IRB.getInt1Ty(), IRB.getInt1Ty()); - buildMulOverflow = import(M, "_sym_build_mul_overflow", ptrT, ptrT, ptrT, - IRB.getInt1Ty(), IRB.getInt1Ty()); + buildAddOverflow = + import(M, "_sym_build_add_overflow", ptrT, ptrT, ptrT, int1T, int1T); + buildSubOverflow = + import(M, "_sym_build_sub_overflow", ptrT, ptrT, ptrT, int1T, int1T); + buildMulOverflow = + import(M, "_sym_build_mul_overflow", ptrT, ptrT, ptrT, int1T, int1T); // Saturating arithmetic buildSAddSat = import(M, "_sym_build_sadd_sat", ptrT, ptrT, ptrT); @@ -91,7 +89,8 @@ Runtime::Runtime(Module &M) { buildUShlSat = import(M, "_sym_build_ushl_sat", ptrT, ptrT, ptrT); buildFshl = import(M, "_sym_build_funnel_shift_left", ptrT, ptrT, ptrT, ptrT); - buildFshr = import(M, "_sym_build_funnel_shift_right", ptrT, ptrT, ptrT, ptrT); + buildFshr = + import(M, "_sym_build_funnel_shift_right", ptrT, ptrT, ptrT, ptrT); buildAbs = import(M, "_sym_build_abs", ptrT, ptrT); setParameterExpression = @@ -165,14 +164,14 @@ Runtime::Runtime(Module &M) { memset = import(M, "_sym_memset", voidT, ptrT, ptrT, intPtrType); memmove = import(M, "_sym_memmove", voidT, ptrT, ptrT, intPtrType); readMemory = - import(M, "_sym_read_memory", ptrT, intPtrType, intPtrType, int8T); + import(M, "_sym_read_memory", ptrT, intPtrType, intPtrType, int1T); writeMemory = import(M, "_sym_write_memory", voidT, intPtrType, intPtrType, - ptrT, int8T); + ptrT, int1T); buildZeroBytes = import(M, "_sym_build_zero_bytes", ptrT, intPtrType); buildInsert = - import(M, "_sym_build_insert", ptrT, ptrT, ptrT, IRB.getInt64Ty(), int8T); + import(M, "_sym_build_insert", ptrT, ptrT, ptrT, IRB.getInt64Ty(), int1T); buildExtract = import(M, "_sym_build_extract", ptrT, ptrT, IRB.getInt64Ty(), - IRB.getInt64Ty(), int8T); + IRB.getInt64Ty(), int1T); notifyCall = import(M, "_sym_notify_call", voidT, intPtrType); notifyRet = import(M, "_sym_notify_ret", voidT, intPtrType); diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 4061d45a..098492ad 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -540,7 +540,7 @@ void Symbolizer::visitLoadInst(LoadInst &I) { runtime.readMemory, {IRB.CreatePtrToInt(addr, intPtrType), ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(dataType)), - ConstantInt::get(IRB.getInt8Ty(), isLittleEndian(dataType) ? 1 : 0)}); + IRB.getInt1(isLittleEndian(dataType) ? 1 : 0)}); // Make sure that the expression corresponding to the loaded value is of // bit-vector kind. Shortcutting the runtime calls that we emit here (e.g., @@ -584,8 +584,7 @@ void Symbolizer::visitStoreInst(StoreInst &I) { runtime.writeMemory, {IRB.CreatePtrToInt(I.getPointerOperand(), intPtrType), ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(dataType)), - data, - ConstantInt::get(IRB.getInt8Ty(), dataLayout.isLittleEndian() ? 1 : 0)}); + data, IRB.getInt1(dataLayout.isLittleEndian() ? 1 : 0)}); } void Symbolizer::visitGetElementPtrInst(GetElementPtrInst &I) { @@ -891,7 +890,7 @@ void Symbolizer::visitInsertValueInst(InsertValueInst &I) { runtime.buildInsert, {getSymbolicExpressionOrNull(target), insertedValueExpr, IRB.getInt64(aggregateMemberOffset(target->getType(), I.getIndices())), - IRB.getInt8(isLittleEndian(insertedValueType) ? 1 : 0)}); + IRB.getInt1(isLittleEndian(insertedValueType) ? 1 : 0)}); if (!insertedValueType->isFloatingPointTy()) symbolicInput = {insertedValue, 1, result}; @@ -914,7 +913,7 @@ void Symbolizer::visitExtractValueInst(ExtractValueInst &I) { {targetExpr, IRB.getInt64(aggregateMemberOffset(target->getType(), I.getIndices())), IRB.getInt64(dataLayout.getTypeStoreSize(resultType)), - IRB.getInt8(isLittleEndian(resultType) ? 1 : 0)}); + IRB.getInt1(isLittleEndian(resultType) ? 1 : 0)}); // Floating-point values are a distinct kind in the solver. Extracting from an // aggregate gives us a bit vector, so we need to convert the expression to a @@ -1044,7 +1043,7 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { runtime.readMemory, {IRB.CreatePtrToInt(memory, intPtrType), ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(valueType)), - IRB.getInt8(0)}); + IRB.getInt1(0)}); } } From e7e09264b9ffc936c6e5afb3d4cccac7f20b5556 Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Sun, 29 Aug 2021 06:19:07 +0000 Subject: [PATCH 43/64] intrinsics: add intrinsics lowering pass This pass uses LLVM's `IntrinsicLowering` interface to lower supported intrinsics so that they can be symbolized. --- compiler/Pass.cpp | 64 +++++++++++++++++++++++++++++++++++++++++ compiler/Pass.h | 4 +-- compiler/Symbolizer.cpp | 23 +++------------ 3 files changed, 70 insertions(+), 21 deletions(-) diff --git a/compiler/Pass.cpp b/compiler/Pass.cpp index f17fd7e5..b4672794 100644 --- a/compiler/Pass.cpp +++ b/compiler/Pass.cpp @@ -15,6 +15,7 @@ #include "Pass.h" #include +#include #include #include #include @@ -60,6 +61,58 @@ bool instrumentModule(Module &M) { return true; } +bool canLower(const CallInst *CI) { + const Function *Callee = CI->getCalledFunction(); + if (!Callee) + return false; + + switch (Callee->getIntrinsicID()) { + case Intrinsic::expect: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::prefetch: + case Intrinsic::pcmarker: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::eh_typeid_for: + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + case Intrinsic::assume: +#if LLVM_VERSION_MAJOR > 11 + case Intrinsic::experimental_noalias_scope_decl: +#endif + case Intrinsic::var_annotation: + case Intrinsic::sqrt: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::pow: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::round: +#if LLVM_VERSION_MAJOR > 10 + case Intrinsic::roundeven: +#endif + case Intrinsic::copysign: + case Intrinsic::flt_rounds: + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + return true; + default: + return false; + } + + llvm_unreachable("Control cannot reach here"); +} + bool instrumentFunction(Function &F) { auto functionName = F.getName(); if (functionName == kSymCtorName) @@ -73,6 +126,17 @@ bool instrumentFunction(Function &F) { for (auto &I : instructions(F)) allInstructions.push_back(&I); + IntrinsicLowering IL(F.getParent()->getDataLayout()); + for (auto *I : allInstructions) { + if (auto *CI = dyn_cast(I); CI && canLower(CI)) { + IL.LowerIntrinsicCall(CI); + } + } + + allInstructions.clear(); + for (auto &I : instructions(F)) + allInstructions.push_back(&I); + Symbolizer symbolizer(*F.getParent()); symbolizer.symbolizeFunctionArguments(F); diff --git a/compiler/Pass.h b/compiler/Pass.h index cf0676aa..b06377dc 100644 --- a/compiler/Pass.h +++ b/compiler/Pass.h @@ -29,8 +29,8 @@ class SymbolizeLegacyPass : public llvm::FunctionPass { SymbolizeLegacyPass() : FunctionPass(ID) {} - bool doInitialization(llvm::Module &M) override; - bool runOnFunction(llvm::Function &F) override; + virtual bool doInitialization(llvm::Module &M) override; + virtual bool runOnFunction(llvm::Function &F) override; }; #if LLVM_VERSION_MAJOR >= 13 diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 098492ad..185af825 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -180,15 +180,9 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { auto *callee = I.getCalledFunction(); switch (callee->getIntrinsicID()) { - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::is_constant: case Intrinsic::trap: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::assume: // These are safe to ignore. break; case Intrinsic::memcpy: { @@ -258,22 +252,13 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { registerSymbolicComputation(abs, &I); break; } - case Intrinsic::cttz: - case Intrinsic::ctpop: - case Intrinsic::ctlz: { - // Various bit-count operations. Expressing these symbolically is - // difficult, so for now we just concretize. - - errs() << "Warning: losing track of symbolic expressions at bit-count " - "operation " - << I << "\n"; - break; - } - case Intrinsic::returnaddress: { + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + case Intrinsic::addressofreturnaddress: { // Obtain the return address of the current function or one of its parents // on the stack. We just concretize. - errs() << "Warning: using concrete value for return address\n"; + errs() << "Warning: using concrete value for return/frame address\n"; break; } case Intrinsic::bswap: { From a337a0c68f8c7699b59b24c0a5f9aacb24744693 Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Mon, 30 Aug 2021 09:36:41 +0000 Subject: [PATCH 44/64] instructions: add support for fneg --- compiler/Runtime.cpp | 8 ++++++++ compiler/Runtime.h | 4 ++++ compiler/Symbolizer.cpp | 9 +++++++++ compiler/Symbolizer.h | 1 + runtime/RuntimeCommon.h | 1 + runtime/qsym_backend/Runtime.cpp | 1 + runtime/simple_backend/Runtime.cpp | 4 ++++ 7 files changed, 28 insertions(+) diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index d77e2890..34c22824 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -127,6 +127,14 @@ Runtime::Runtime(Module &M) { #undef LOAD_BINARY_OPERATOR_HANDLER +#define LOAD_UNARY_OPERATOR_HANDLER(constant, name) \ + unaryOperatorHandlers[Instruction::constant] = \ + import(M, "_sym_build_" #name, ptrT, ptrT); + + LOAD_UNARY_OPERATOR_HANDLER(FNeg, fp_neg) + +#undef LOAD_UNARY_OPERATOR_HANDLER + #define LOAD_COMPARISON_HANDLER(constant, name) \ comparisonHandlers[CmpInst::constant] = \ import(M, "_sym_build_" #name, ptrT, ptrT, ptrT); diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 3ee50000..61966555 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -87,6 +87,10 @@ struct Runtime { /// Mapping from binary operators to the functions that build the /// corresponding symbolic expressions. std::array binaryOperatorHandlers{}; + + /// Mapping from unary operators to the functions that build the + /// corresponding symbolic expressions. + std::array unaryOperatorHandlers{}; }; bool isInterceptedFunction(const llvm::Function &f); diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 185af825..a973bb40 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -422,6 +422,15 @@ void Symbolizer::visitBinaryOperator(BinaryOperator &I) { registerSymbolicComputation(runtimeCall, &I); } +void Symbolizer::visitUnaryOperator(UnaryOperator &I) { + IRBuilder<> IRB(&I); + SymFnT handler = runtime.unaryOperatorHandlers.at(I.getOpcode()); + + assert(handler && "Unable to handle unary operator"); + auto runtimeCall = buildRuntimeCall(IRB, handler, I.getOperand(0)); + registerSymbolicComputation(runtimeCall, &I); +} + void Symbolizer::visitSelectInst(SelectInst &I) { // Select is like the ternary operator ("?:") in C. We push the (potentially // negated) condition to the path constraints and copy the symbolic diff --git a/compiler/Symbolizer.h b/compiler/Symbolizer.h index cf0cfcf6..9b42c943 100644 --- a/compiler/Symbolizer.h +++ b/compiler/Symbolizer.h @@ -103,6 +103,7 @@ class Symbolizer : public llvm::InstVisitor { // Implementation of InstVisitor // void visitBinaryOperator(llvm::BinaryOperator &I); + void visitUnaryOperator(llvm::UnaryOperator &I); void visitSelectInst(llvm::SelectInst &I); void visitCmpInst(llvm::CmpInst &I); void visitReturnInst(llvm::ReturnInst &I); diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index 63e4f2c7..cae492fc 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -104,6 +104,7 @@ SymExpr _sym_build_fp_mul(SymExpr a, SymExpr b); SymExpr _sym_build_fp_div(SymExpr a, SymExpr b); SymExpr _sym_build_fp_rem(SymExpr a, SymExpr b); SymExpr _sym_build_fp_abs(SymExpr a); +SymExpr _sym_build_fp_neg(SymExpr a); /* * Boolean operations diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 6f0b82a9..e7047076 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -351,6 +351,7 @@ UNSUPPORTED(SymExpr _sym_build_fp_mul(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_div(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_rem(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_abs(SymExpr)) +UNSUPPORTED(SymExpr _sym_build_fp_neg(SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_ordered_greater_than(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_ordered_greater_equal(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_ordered_less_than(SymExpr, SymExpr)) diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index 43372883..53ca28cd 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -271,6 +271,10 @@ Z3_ast _sym_build_fp_abs(Z3_ast a) { return registerExpression(Z3_mk_fpa_abs(g_context, a)); } +Z3_ast _sym_build_fp_neg(Z3_ast a) { + return registerExpression(Z3_mk_fpa_neg(g_context, a)); +} + Z3_ast _sym_build_not(Z3_ast expr) { return registerExpression(Z3_mk_bvnot(g_context, expr)); } From ee44ecaf755701a18fd8dfdd6468dc0385725175 Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Wed, 8 Sep 2021 02:15:29 +0000 Subject: [PATCH 45/64] test: add tests for new features Intrinsics, etc. --- test/regression/cxa_vector.ll | 5 ---- test/uadd_sat.ll | 50 +++++++++++++++++++++++++++++++++++ test/uadd_sat.test32 | 2 ++ test/usub_sat.ll | 50 +++++++++++++++++++++++++++++++++++ test/usub_sat.test32 | 2 ++ 5 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 test/uadd_sat.ll create mode 100644 test/uadd_sat.test32 create mode 100644 test/usub_sat.ll create mode 100644 test/usub_sat.test32 diff --git a/test/regression/cxa_vector.ll b/test/regression/cxa_vector.ll index 65a0c3be..40e7a546 100644 --- a/test/regression/cxa_vector.ll +++ b/test/regression/cxa_vector.ll @@ -3,11 +3,6 @@ ; This file exposed a bug in our handling of "invoke" instructions that would ; lead to invalid byte code. -; ModuleID = '/home/seba/work/compiler/llvm-project/libcxxabi/src/cxa_vector.cpp' -source_filename = "/home/seba/work/compiler/llvm-project/libcxxabi/src/cxa_vector.cpp" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-linux-gnu" - $__clang_call_terminate = comdat any ; Function Attrs: sspstrong uwtable diff --git a/test/uadd_sat.ll b/test/uadd_sat.ll new file mode 100644 index 00000000..4248fa33 --- /dev/null +++ b/test/uadd_sat.ll @@ -0,0 +1,50 @@ +; RUN: %symcc -O2 %s -o %t +; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@g_value = dso_local local_unnamed_addr global i16 40, align 2 +@stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8 +@.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1 +@.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1 + +; Function Attrs: nofree nounwind uwtable +define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +entry: + %x = alloca i16, align 2 + %0 = bitcast i16* %x to i8* + %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5 + %cmp.not = icmp eq i64 %call, 2 + %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6 + br label %cleanup + +if.end: ; preds = %entry + %3 = load i16, i16* %x, align 2 + %4 = load i16, i16* @g_value, align 2 + %add = call i16 @llvm.uadd.sat.i16(i16 %3, i16 %4) + %cmp = icmp eq i16 %add, 43981 + %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0) + ; SIMPLE: Trying to solve + ; SIMPLE: Found diverging input + ; SIMPLE-DAG: stdin0 -> #xa5 + ; SIMPLE-DAG: stdin1 -> #xab + ; ANY: no + %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 +} + +declare i64 @read(i32, i8* nocapture, i64) +declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...) +declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) +declare i16 @llvm.uadd.sat.i16(i16, i16) diff --git a/test/uadd_sat.test32 b/test/uadd_sat.test32 new file mode 100644 index 00000000..647cc3c7 --- /dev/null +++ b/test/uadd_sat.test32 @@ -0,0 +1,2 @@ +RUN: %symcc -m32 -O2 %S/uadd_sat.ll -o %t_32 +RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s diff --git a/test/usub_sat.ll b/test/usub_sat.ll new file mode 100644 index 00000000..62fe200a --- /dev/null +++ b/test/usub_sat.ll @@ -0,0 +1,50 @@ +; RUN: %symcc -O2 %s -o %t +; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@g_value = dso_local local_unnamed_addr global i16 40, align 2 +@stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8 +@.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1 +@.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1 + +; Function Attrs: nofree nounwind uwtable +define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +entry: + %x = alloca i16, align 2 + %0 = bitcast i16* %x to i8* + %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5 + %cmp.not = icmp eq i64 %call, 2 + %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6 + br label %cleanup + +if.end: ; preds = %entry + %3 = load i16, i16* %x, align 2 + %4 = load i16, i16* @g_value, align 2 + %add = call i16 @llvm.usub.sat.i16(i16 %3, i16 %4) + %cmp = icmp eq i16 %add, 43981 + %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0) + ; SIMPLE: Trying to solve + ; SIMPLE: Found diverging input + ; SIMPLE-DAG: stdin0 -> #xf5 + ; SIMPLE-DAG: stdin1 -> #xab + ; ANY: no + %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 +} + +declare i64 @read(i32, i8* nocapture, i64) +declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...) +declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) +declare i16 @llvm.usub.sat.i16(i16, i16) diff --git a/test/usub_sat.test32 b/test/usub_sat.test32 new file mode 100644 index 00000000..16de7355 --- /dev/null +++ b/test/usub_sat.test32 @@ -0,0 +1,2 @@ +RUN: %symcc -m32 -O2 %S/usub_sat.ll -o %t_32 +RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s From 5404d2155b19e72f457b16a8a220f77acbd17d7d Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 16 Feb 2023 11:23:03 +0100 Subject: [PATCH 46/64] Handle Booleans in structs Perform conversion to/from the Boolean solver kind for insertvalue/extractvalue instructions the same way as for load/store, and extract the corresponding functionality into shared helpers. --- compiler/Symbolizer.cpp | 131 +++++++++++++++++++++------------------- compiler/Symbolizer.h | 35 ++++++++--- 2 files changed, 98 insertions(+), 68 deletions(-) diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index a973bb40..1422a06a 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -536,21 +536,7 @@ void Symbolizer::visitLoadInst(LoadInst &I) { ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(dataType)), IRB.getInt1(isLittleEndian(dataType) ? 1 : 0)}); - // Make sure that the expression corresponding to the loaded value is of - // bit-vector kind. Shortcutting the runtime calls that we emit here (e.g., - // for floating-point values) is tricky, so instead we make sure that any - // runtime function we call can handle null expressions. - - if (dataType->isFloatingPointTy()) { - data = IRB.CreateCall(runtime.buildBitsToFloat, - {data, IRB.getInt1(dataType->isDoubleTy())}); - } else if (dataType->isIntegerTy() && dataType->getIntegerBitWidth() == 1) { - data = IRB.CreateCall(runtime.buildTrunc, - {data, ConstantInt::get(IRB.getInt8Ty(), 1)}); - data = IRB.CreateCall(runtime.buildBitToBool, {data}); - } - - symbolicExpressions[&I] = data; + symbolicExpressions[&I] = convertBitVectorExprForType(IRB, data, dataType); } void Symbolizer::visitStoreInst(StoreInst &I) { @@ -563,22 +549,16 @@ void Symbolizer::visitStoreInst(StoreInst &I) { // for floating-point values) is tricky, so instead we make sure that any // runtime function we call can handle null expressions. - auto *data = getSymbolicExpressionOrNull(I.getValueOperand()); - auto *dataType = I.getValueOperand()->getType(); - if (dataType->isFloatingPointTy()) { - data = IRB.CreateCall(runtime.buildFloatToBits, data); - } else if (dataType->isIntegerTy() && dataType->getIntegerBitWidth() == 1) { - data = IRB.CreateCall(runtime.buildBoolToBit, {data}); - data = IRB.CreateCall( - runtime.buildZExt, - {data, ConstantInt::get(IRB.getInt8Ty(), 7 /* 1 byte */)}); - } + auto V = I.getValueOperand(); + auto maybeConversion = convertExprForTypeToBitVectorExpr(IRB, V); IRB.CreateCall( runtime.writeMemory, {IRB.CreatePtrToInt(I.getPointerOperand(), intPtrType), - ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(dataType)), - data, IRB.getInt1(dataLayout.isLittleEndian() ? 1 : 0)}); + ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(V->getType())), + maybeConversion ? maybeConversion->lastInstruction + : getSymbolicExpressionOrNull(V), + IRB.getInt1(dataLayout.isLittleEndian() ? 1 : 0)}); } void Symbolizer::visitGetElementPtrInst(GetElementPtrInst &I) { @@ -860,37 +840,36 @@ void Symbolizer::visitInsertValueInst(InsertValueInst &I) { IRBuilder<> IRB(&I); auto target = I.getAggregateOperand(); auto insertedValue = I.getInsertedValueOperand(); - auto insertedValueType = insertedValue->getType(); if (getSymbolicExpression(target) == nullptr && getSymbolicExpression(insertedValue) == nullptr) return; - auto insertedValueExpr = getSymbolicExpressionOrNull(insertedValue); - - // Floating-point values are a distinct kind in the solver, so we need to - // convert them to bit vectors before we can insert them into the expression - // for the aggregate. - Input symbolicInput; - if (insertedValueType->isFloatingPointTy()) { - auto floatConversion = IRB.CreateCall( - runtime.buildFloatToBits, - {insertedValueExpr, IRB.getInt1(insertedValueType->isDoubleTy())}); - symbolicInput = {insertedValue, 0, floatConversion}; - insertedValueExpr = floatConversion; - } + // We may have to convert the expression to bit-vector kind... + auto maybeConversion = convertExprForTypeToBitVectorExpr(IRB, insertedValue); - auto result = IRB.CreateCall( + auto insert = IRB.CreateCall( runtime.buildInsert, - {getSymbolicExpressionOrNull(target), insertedValueExpr, + {getSymbolicExpressionOrNull(target), + // If we had to convert the expression, use the result of the conversion. + maybeConversion ? maybeConversion->lastInstruction + : getSymbolicExpressionOrNull(insertedValue), IRB.getInt64(aggregateMemberOffset(target->getType(), I.getIndices())), - IRB.getInt1(isLittleEndian(insertedValueType) ? 1 : 0)}); - - if (!insertedValueType->isFloatingPointTy()) - symbolicInput = {insertedValue, 1, result}; + IRB.getInt1(isLittleEndian(insertedValue->getType()) ? 1 : 0)}); + auto insertComputation = + SymbolicComputation(insert, insert, {Input(target, 0, insert)}); + + if (!maybeConversion) { + // If we didn't have to convert, then the inserted value is first used in + // the insertion. + insertComputation.inputs.push_back(Input(insertedValue, 1, insert)); + } else { + // Otherwise, the full computation consists of the conversion followed by + // the insertion. + maybeConversion->merge(insertComputation); + } - registerSymbolicComputation( - {symbolicInput.user, result, {{target, 0, result}, symbolicInput}}, &I); + registerSymbolicComputation(maybeConversion.value_or(insertComputation)); } void Symbolizer::visitExtractValueInst(ExtractValueInst &I) { @@ -909,15 +888,8 @@ void Symbolizer::visitExtractValueInst(ExtractValueInst &I) { IRB.getInt64(dataLayout.getTypeStoreSize(resultType)), IRB.getInt1(isLittleEndian(resultType) ? 1 : 0)}); - // Floating-point values are a distinct kind in the solver. Extracting from an - // aggregate gives us a bit vector, so we need to convert the expression to a - // float if it represents one. - auto result = resultType->isFloatingPointTy() - ? IRB.CreateCall(runtime.buildBitsToFloat, - {extractedBits, - IRB.getInt1(resultType->isDoubleTy())}) - : extractedBits; - + Instruction *result = + convertBitVectorExprForType(IRB, extractedBits, resultType); registerSymbolicComputation( {extractedBits, result, {{target, 0, extractedBits}}}, &I); } @@ -1044,9 +1016,9 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { llvm_unreachable("Unhandled type for constant expression"); } -Symbolizer::SymbolicComputation -Symbolizer::forceBuildRuntimeCall(IRBuilder<> &IRB, SymFnT function, - ArrayRef> args) { +Symbolizer::SymbolicComputation Symbolizer::forceBuildRuntimeCall( + IRBuilder<> &IRB, SymFnT function, + ArrayRef> args) const { std::vector functionArgs; for (const auto &[arg, symbolic] : args) { functionArgs.push_back(symbolic ? getSymbolicExpressionOrNull(arg) : arg); @@ -1100,3 +1072,40 @@ uint64_t Symbolizer::aggregateMemberOffset(Type *aggregateType, return offset; } + +Instruction *Symbolizer::convertBitVectorExprForType(llvm::IRBuilder<> &IRB, + Instruction *I, + Type *T) const { + Instruction *result = I; + + if (T->isFloatingPointTy()) { + result = IRB.CreateCall(runtime.buildBitsToFloat, + {I, IRB.getInt1(T->isDoubleTy())}); + } else if (T->isIntegerTy() && T->getIntegerBitWidth() == 1) { + result = IRB.CreateCall(runtime.buildTrunc, + {I, ConstantInt::get(IRB.getInt8Ty(), 1)}); + result = IRB.CreateCall(runtime.buildBitToBool, {result}); + } + + return result; +} + +std::optional +Symbolizer::convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, + llvm::Value *V) const { + auto T = V->getType(); + + if (T->isFloatingPointTy()) { + return buildRuntimeCall(IRB, runtime.buildFloatToBits, {V}); + } else if (T->isIntegerTy() && T->getIntegerBitWidth() == 1) { + if (auto computation = buildRuntimeCall(IRB, runtime.buildBoolToBit, {V})) { + computation->merge( + forceBuildRuntimeCall(IRB, runtime.buildZExt, + {{computation->lastInstruction, false}, + {IRB.getInt8(7 /* 1 byte */), false}})); + return computation; + } + } + + return {}; +} diff --git a/compiler/Symbolizer.h b/compiler/Symbolizer.h index 9b42c943..e2c67827 100644 --- a/compiler/Symbolizer.h +++ b/compiler/Symbolizer.h @@ -205,12 +205,12 @@ class Symbolizer : public llvm::InstVisitor { llvm::CallInst *createValueExpression(llvm::Value *V, llvm::IRBuilder<> &IRB); /// Get the (already created) symbolic expression for a value. - llvm::Value *getSymbolicExpression(llvm::Value *V) { + llvm::Value *getSymbolicExpression(llvm::Value *V) const { auto exprIt = symbolicExpressions.find(V); return (exprIt != symbolicExpressions.end()) ? exprIt->second : nullptr; } - llvm::Value *getSymbolicExpressionOrNull(llvm::Value *V) { + llvm::Value *getSymbolicExpressionOrNull(llvm::Value *V) const { auto *expr = getSymbolicExpression(V); if (expr == nullptr) return llvm::ConstantPointerNull::get( @@ -223,9 +223,9 @@ class Symbolizer : public llvm::InstVisitor { } /// Like buildRuntimeCall, but the call is always generated. - SymbolicComputation - forceBuildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, - llvm::ArrayRef> args); + SymbolicComputation forceBuildRuntimeCall( + llvm::IRBuilder<> &IRB, SymFnT function, + llvm::ArrayRef> args) const; /// Create a call to the specified function in the run-time library. /// @@ -238,7 +238,7 @@ class Symbolizer : public llvm::InstVisitor { /// instruction is emitted and the function returns null. std::optional buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, - llvm::ArrayRef> args) { + llvm::ArrayRef> args) const { if (std::all_of(args.begin(), args.end(), [this](std::pair arg) { return (getSymbolicExpression(arg.first) == nullptr); @@ -252,7 +252,7 @@ class Symbolizer : public llvm::InstVisitor { /// Convenience overload that treats all arguments as symbolic. std::optional buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, - llvm::ArrayRef symbolicArgs) { + llvm::ArrayRef symbolicArgs) const { std::vector> args; for (const auto &arg : symbolicArgs) { args.emplace_back(arg, true); @@ -307,6 +307,27 @@ class Symbolizer : public llvm::InstVisitor { uint64_t aggregateMemberOffset(llvm::Type *aggregateType, llvm::ArrayRef indices) const; + /// Emit code that converts the bit-vector expression represented by I to an + /// expression that is appropriate for T; return the instruction that computes + /// the result (which may be I if no conversion is needed). + /// + /// The solver doesn't represent all values as bit vectors. For example, + /// floating-point values and Booleans are of separate kinds, so we emit code + /// that changes the solver kind of the expression to whatever is needed. + llvm::Instruction *convertBitVectorExprForType(llvm::IRBuilder<> &IRB, + llvm::Instruction *I, + llvm::Type *T) const; + + /// Emit code that converts the expression in I to a bit-vector expression. + /// Return the SymbolicComputation representing the conversion (if a + /// conversion is necessary); the last instruction computes the result. + /// + /// This is the inverse operation of convertBitVectorExprForType (see details + /// there). + std::optional + convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, + llvm::Value *V) const; + const Runtime runtime; /// The data layout of the currently processed module. From d1d62fc3b885d854e6d0e2d2ebf8cbda733c475b Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Fri, 10 Sep 2021 23:20:22 +0000 Subject: [PATCH 47/64] compiler: use TargetLowering::ExpandInlineAsm This change exploits LLVM's target lowering and its ability to expand inline assembly into explicit LLVM code. Importantly, this expansion includes lifting `bswap` instructions to the `bswap` intrinsic, which can be symbolized with symcc. This fixes issue #29 and does away with the hacks made in PR #75 --- compiler/Pass.cpp | 50 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/compiler/Pass.cpp b/compiler/Pass.cpp index b4672794..8e2379fb 100644 --- a/compiler/Pass.cpp +++ b/compiler/Pass.cpp @@ -16,11 +16,21 @@ #include #include +#include +#include #include #include #include +#include +#include #include +#if LLVM_VERSION_MAJOR < 14 +#include +#else +#include +#endif + #include "Runtime.h" #include "Symbolizer.h" @@ -113,6 +123,38 @@ bool canLower(const CallInst *CI) { llvm_unreachable("Control cannot reach here"); } +void liftInlineAssembly(CallInst *CI) { + // TODO When we don't have to worry about the old pass manager anymore, move + // the initialization to the pass constructor. (Currently there are two + // passes, but only if we're on a recent enough LLVM...) + + Function *F = CI->getFunction(); + Module *M = F->getParent(); + auto triple = M->getTargetTriple(); + + std::string error; + auto target = TargetRegistry::lookupTarget(triple, error); + if (!target) { + errs() << "Warning: can't get target info to lift inline assembly\n"; + return; + } + + auto cpu = F->getFnAttribute("target-cpu").getValueAsString(); + auto features = F->getFnAttribute("target-features").getValueAsString(); + + std::unique_ptr TM( + target->createTargetMachine(triple, cpu, features, TargetOptions(), {})); + auto subTarget = TM->getSubtargetImpl(*F); + if (subTarget == nullptr) + return; + + auto targetLowering = subTarget->getTargetLowering(); + if (targetLowering == nullptr) + return; + + targetLowering->ExpandInlineAsm(CI); +} + bool instrumentFunction(Function &F) { auto functionName = F.getName(); if (functionName == kSymCtorName) @@ -128,8 +170,12 @@ bool instrumentFunction(Function &F) { IntrinsicLowering IL(F.getParent()->getDataLayout()); for (auto *I : allInstructions) { - if (auto *CI = dyn_cast(I); CI && canLower(CI)) { - IL.LowerIntrinsicCall(CI); + if (auto *CI = dyn_cast(I)) { + if (canLower(CI)) { + IL.LowerIntrinsicCall(CI); + } else if (isa(CI->getCalledOperand())) { + liftInlineAssembly(CI); + } } } From 4f76930e3881a36a50f0514719227db65d8b7161 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 17 Feb 2023 14:32:54 +0100 Subject: [PATCH 48/64] Stop using std::iterator Fixes eng/fuzz/symcc#10. --- runtime/Shadow.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/runtime/Shadow.h b/runtime/Shadow.h index 36979bdd..fe630bb4 100644 --- a/runtime/Shadow.h +++ b/runtime/Shadow.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -59,12 +58,21 @@ extern std::map g_shadow_pages; /// An iterator that walks over the shadow bytes corresponding to a memory /// region. If there is no shadow for any given memory address, it just returns /// null. -class ReadShadowIterator - : public std::iterator { +class ReadShadowIterator { public: explicit ReadShadowIterator(uintptr_t address) - : std::iterator(), - address_(address), shadow_(getShadow(address)) {} + : address_(address), shadow_(getShadow(address)) {} + + // The STL requires iterator types to expose the following type definitions + // (see std::iterator_traits). Before C++17, it was possible to get them by + // deriving from std::iterator, which is just an empty template struct with + // five typedefs. However, std::iterator was deprecated in C++17 and hence its + // use causes a warning in recent compilers. + using iterator_category = std::bidirectional_iterator_tag; + using value_type = SymExpr; + using difference_type = ptrdiff_t; + using pointer = SymExpr *; + using reference = SymExpr &; ReadShadowIterator &operator++() { auto previousAddress = address_++; From 98ba4484e094aeac9f3e1c5306fddd6f73a420a0 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 21 Feb 2023 09:47:16 +0000 Subject: [PATCH 49/64] CI: compile with LLVM 15 It's now available in Ubuntu 22.04. --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index e3bcb936..a2a5fac6 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - llvm_version: [11, 12, 13, 14] + llvm_version: [11, 12, 13, 14, 15] steps: - uses: actions/checkout@v3 with: From ef610536fbff2331b835e718311fdbe9fe09dab8 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 21 Feb 2023 09:54:09 +0000 Subject: [PATCH 50/64] Fix includes for LLVM 15 --- compiler/Main.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/compiler/Main.cpp b/compiler/Main.cpp index c91af67a..a5997fcc 100644 --- a/compiler/Main.cpp +++ b/compiler/Main.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #if LLVM_VERSION_MAJOR >= 13 @@ -29,6 +28,12 @@ using OptimizationLevel = llvm::PassBuilder::OptimizationLevel; #endif #endif +#if LLVM_VERSION_MAJOR >= 15 +#include +#else +#include +#endif + #include "Pass.h" using namespace llvm; From de3e888097a039789ea553340efb2b15d1f28526 Mon Sep 17 00:00:00 2001 From: tiedaoxiaotubie <617021914@qq.com> Date: Mon, 13 Sep 2021 15:44:45 +0800 Subject: [PATCH 51/64] Add input symbolization in mmap() The original wrapper didn't do symbolization work, which will lose constraints. --- runtime/LibcWrappers.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/runtime/LibcWrappers.cpp b/runtime/LibcWrappers.cpp index aafc1b08..cfe55525 100644 --- a/runtime/LibcWrappers.cpp +++ b/runtime/LibcWrappers.cpp @@ -128,10 +128,31 @@ void *SYM(calloc)(size_t nmemb, size_t size) { void *SYM(mmap64)(void *addr, size_t len, int prot, int flags, int fildes, uint64_t off) { auto *result = mmap64(addr, len, prot, flags, fildes, off); + _sym_set_return_expression(nullptr); + + if (result == MAP_FAILED) // mmap failed + return result; + + if (fildes == inputFileDescriptor) { + /* we update the inputOffset only when mmap() is reading from input file + * HACK! update inputOffset with off parameter sometimes will be dangerous + * We don't know whether there is read() before/after mmap, + * if there is, we have to fix this tricky method :P + */ + inputOffset = off + len; + // Reading symbolic input. + ReadWriteShadow shadow(result, len); + uint8_t *resultBytes = (uint8_t *)result; + std::generate(shadow.begin(), shadow.end(), [resultBytes, i = 0]() mutable { + return _sym_get_input_byte(inputOffset, resultBytes[i++]); + }); + } else if (!isConcrete(result, len)) { + ReadWriteShadow shadow(result, len); + std::fill(shadow.begin(), shadow.end(), nullptr); + } tryAlternative(len, _sym_get_parameter_expression(1), SYM(mmap64)); - _sym_set_return_expression(nullptr); return result; } From 1d4759a25c407f8743deeb18accc2e0c71ed6e18 Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Fri, 24 Feb 2023 15:36:25 +1100 Subject: [PATCH 52/64] runtime: more permissive std::filesystem check --- CMakeLists.txt | 3 + cmake/FindFilesystem.cmake | 247 ++++++++++++++++++++++++++++ runtime/qsym_backend/CMakeLists.txt | 5 +- 3 files changed, 252 insertions(+), 3 deletions(-) create mode 100644 cmake/FindFilesystem.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cd05dfe..9a582db9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,8 @@ cmake_minimum_required(VERSION 3.5) project(SymbolicCompiler) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") + option(QSYM_BACKEND "Use the Qsym backend instead of our own" OFF) option(TARGET_32BIT "Make the compiler work correctly with -m32" OFF) @@ -38,6 +40,7 @@ set(SYM_RUNTIME_BUILD_ARGS -DCMAKE_MODULE_LINKER_FLAGS_INIT=${CMAKE_MODULE_LINKER_FLAGS_INIT} -DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -DCMAKE_SHARED_LINKER_FLAGS_INIT=${CMAKE_SHARED_LINKER_FLAGS_INIT} + -DCMAKE_MODULE_PATH=${CMAKE_MODULE_PATH} -DCMAKE_SYSROOT=${CMAKE_SYSROOT} -DQSYM_BACKEND=${QSYM_BACKEND} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} diff --git a/cmake/FindFilesystem.cmake b/cmake/FindFilesystem.cmake new file mode 100644 index 00000000..a152e522 --- /dev/null +++ b/cmake/FindFilesystem.cmake @@ -0,0 +1,247 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: + +FindFilesystem +############## + +This module supports the C++17 standard library's filesystem utilities. Use the +:imp-target:`std::filesystem` imported target to + +Options +******* + +The ``COMPONENTS`` argument to this module supports the following values: + +.. find-component:: Experimental + :name: fs.Experimental + + Allows the module to find the "experimental" Filesystem TS version of the + Filesystem library. This is the library that should be used with the + ``std::experimental::filesystem`` namespace. + +.. find-component:: Final + :name: fs.Final + + Finds the final C++17 standard version of the filesystem library. + +If no components are provided, behaves as if the +:find-component:`fs.Final` component was specified. + +If both :find-component:`fs.Experimental` and :find-component:`fs.Final` are +provided, first looks for ``Final``, and falls back to ``Experimental`` in case +of failure. If ``Final`` is found, :imp-target:`std::filesystem` and all +:ref:`variables ` will refer to the ``Final`` version. + + +Imported Targets +**************** + +.. imp-target:: std::filesystem + + The ``std::filesystem`` imported target is defined when any requested + version of the C++ filesystem library has been found, whether it is + *Experimental* or *Final*. + + If no version of the filesystem library is available, this target will not + be defined. + + .. note:: + This target has ``cxx_std_17`` as an ``INTERFACE`` + :ref:`compile language standard feature `. Linking + to this target will automatically enable C++17 if no later standard + version is already required on the linking target. + + +.. _fs.variables: + +Variables +********* + +.. variable:: CXX_FILESYSTEM_IS_EXPERIMENTAL + + Set to ``TRUE`` when the :find-component:`fs.Experimental` version of C++ + filesystem library was found, otherwise ``FALSE``. + +.. variable:: CXX_FILESYSTEM_HAVE_FS + + Set to ``TRUE`` when a filesystem header was found. + +.. variable:: CXX_FILESYSTEM_HEADER + + Set to either ``filesystem`` or ``experimental/filesystem`` depending on + whether :find-component:`fs.Final` or :find-component:`fs.Experimental` was + found. + +.. variable:: CXX_FILESYSTEM_NAMESPACE + + Set to either ``std::filesystem`` or ``std::experimental::filesystem`` + depending on whether :find-component:`fs.Final` or + :find-component:`fs.Experimental` was found. + + +Examples +******** + +Using `find_package(Filesystem)` with no component arguments: + +.. code-block:: cmake + + find_package(Filesystem REQUIRED) + + add_executable(my-program main.cpp) + target_link_libraries(my-program PRIVATE std::filesystem) + + +#]=======================================================================] + + +if(TARGET std::filesystem) + # This module has already been processed. Don't do it again. + return() +endif() + +cmake_minimum_required(VERSION 3.10) + +include(CMakePushCheckState) +include(CheckIncludeFileCXX) + +# If we're not cross-compiling, try to run test executables. +# Otherwise, assume that compile + link is a sufficient check. +if(CMAKE_CROSSCOMPILING) + include(CheckCXXSourceCompiles) + macro(_cmcm_check_cxx_source code var) + check_cxx_source_compiles("${code}" ${var}) + endmacro() +else() + include(CheckCXXSourceRuns) + macro(_cmcm_check_cxx_source code var) + check_cxx_source_runs("${code}" ${var}) + endmacro() +endif() + +cmake_push_check_state() + +set(CMAKE_REQUIRED_QUIET ${Filesystem_FIND_QUIETLY}) + +# All of our tests required C++17 or later +set(CMAKE_CXX_STANDARD 17) + +# Normalize and check the component list we were given +set(want_components ${Filesystem_FIND_COMPONENTS}) +if(Filesystem_FIND_COMPONENTS STREQUAL "") + set(want_components Final) +endif() + +# Warn on any unrecognized components +set(extra_components ${want_components}) +list(REMOVE_ITEM extra_components Final Experimental) +foreach(component IN LISTS extra_components) + message(WARNING "Extraneous find_package component for Filesystem: ${component}") +endforeach() + +# Detect which of Experimental and Final we should look for +set(find_experimental TRUE) +set(find_final TRUE) +if(NOT "Final" IN_LIST want_components) + set(find_final FALSE) +endif() +if(NOT "Experimental" IN_LIST want_components) + set(find_experimental FALSE) +endif() + +if(find_final) + check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER) + mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER) + if(_CXX_FILESYSTEM_HAVE_HEADER) + # We found the non-experimental header. Don't bother looking for the + # experimental one. + set(find_experimental FALSE) + endif() +else() + set(_CXX_FILESYSTEM_HAVE_HEADER FALSE) +endif() + +if(find_experimental) + check_include_file_cxx("experimental/filesystem" _CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) + mark_as_advanced(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) +else() + set(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER FALSE) +endif() + +if(_CXX_FILESYSTEM_HAVE_HEADER) + set(_have_fs TRUE) + set(_fs_header filesystem) + set(_fs_namespace std::filesystem) + set(_is_experimental FALSE) +elseif(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) + set(_have_fs TRUE) + set(_fs_header experimental/filesystem) + set(_fs_namespace std::experimental::filesystem) + set(_is_experimental TRUE) +else() + set(_have_fs FALSE) +endif() + +set(CXX_FILESYSTEM_HAVE_FS ${_have_fs} CACHE BOOL "TRUE if we have the C++ filesystem headers") +set(CXX_FILESYSTEM_HEADER ${_fs_header} CACHE STRING "The header that should be included to obtain the filesystem APIs") +set(CXX_FILESYSTEM_NAMESPACE ${_fs_namespace} CACHE STRING "The C++ namespace that contains the filesystem APIs") +set(CXX_FILESYSTEM_IS_EXPERIMENTAL ${_is_experimental} CACHE BOOL "TRUE if the C++ filesystem library is the experimental version") + +set(_found FALSE) + +if(CXX_FILESYSTEM_HAVE_FS) + # We have some filesystem library available. Do link checks + string(CONFIGURE [[ + #include + #include <@CXX_FILESYSTEM_HEADER@> + + int main() { + auto cwd = @CXX_FILESYSTEM_NAMESPACE@::current_path(); + printf("%s", cwd.c_str()); + return EXIT_SUCCESS; + } + ]] code @ONLY) + + # Check a simple filesystem program without any linker flags + _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_NO_LINK_NEEDED) + + set(can_link ${CXX_FILESYSTEM_NO_LINK_NEEDED}) + + if(NOT CXX_FILESYSTEM_NO_LINK_NEEDED) + set(prev_libraries ${CMAKE_REQUIRED_LIBRARIES}) + # Add the libstdc++ flag + set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lstdc++fs) + _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_STDCPPFS_NEEDED) + set(can_link ${CXX_FILESYSTEM_STDCPPFS_NEEDED}) + if(NOT CXX_FILESYSTEM_STDCPPFS_NEEDED) + # Try the libc++ flag + set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lc++fs) + _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_CPPFS_NEEDED) + set(can_link ${CXX_FILESYSTEM_CPPFS_NEEDED}) + endif() + endif() + + if(can_link) + add_library(std::filesystem INTERFACE IMPORTED) + set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_17) + set(_found TRUE) + + if(CXX_FILESYSTEM_NO_LINK_NEEDED) + # Nothing to add... + elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED) + set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_LINK_LIBRARIES -lstdc++fs) + elseif(CXX_FILESYSTEM_CPPFS_NEEDED) + set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_LINK_LIBRARIES -lc++fs) + endif() + endif() +endif() + +cmake_pop_check_state() + +set(Filesystem_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::filesystem" FORCE) + +if(Filesystem_FIND_REQUIRED AND NOT Filesystem_FOUND) + message(FATAL_ERROR "Cannot run simple program using std::filesystem") +endif() diff --git a/runtime/qsym_backend/CMakeLists.txt b/runtime/qsym_backend/CMakeLists.txt index 5977451f..34c01320 100644 --- a/runtime/qsym_backend/CMakeLists.txt +++ b/runtime/qsym_backend/CMakeLists.txt @@ -92,6 +92,5 @@ target_link_libraries(SymRuntime ${Z3_LIBRARIES} ${QSYM_LLVM_DEPS}) # some current LTS distributions ship a GCC that requires libstdc++fs for # std::filesystem - we catch this case in order to enable users of such systems # to build with the default compiler. -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - target_link_libraries(SymRuntime stdc++fs) -endif() +find_package(Filesystem COMPONENTS Final Experimental) +target_link_libraries(SymRuntime std::filesystem) From f2d5060846dc19620260a24bb3c9447aed4592a2 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Sat, 11 Mar 2023 12:30:18 +0100 Subject: [PATCH 53/64] Improve procedure names in the Ada bindings Fixes eng/fuzz/symcc#12. --- runtime/bindings/ada/symcc.ads | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/runtime/bindings/ada/symcc.ads b/runtime/bindings/ada/symcc.ads index 7d7587d6..bdda6c41 100644 --- a/runtime/bindings/ada/symcc.ads +++ b/runtime/bindings/ada/symcc.ads @@ -21,7 +21,7 @@ with System; -- SymCC runtime (see RuntimeCommon.h). package SymCC is - procedure SymCC_Make_Symbolic + procedure Make_Symbolic (Address : System.Address; Size : Interfaces.C.size_t) with Import => True, Convention => C, External_Name => "symcc_make_symbolic"; -- Mark a memory region as symbolic program input. @@ -33,12 +33,11 @@ package SymCC is (Data_Block : System.Address; Size : Interfaces.C.size_t) with Convention => C; -- Type of functions that the runtime can call when it generates new - -- program inputs (see SymCC_Set_Test_Case_Handler). + -- program inputs (see Set_Test_Case_Handler). - procedure SymCC_Set_Test_Case_Handler + procedure Set_Test_Case_Handler (Callback : Test_Case_Handler_Callback_Type) with - Import => True, - Convention => C, + Import => True, Convention => C, External_Name => "symcc_set_test_case_handler"; -- Define a custom handler for new program inputs. -- @param Callback The procedure to be called for each new input. From eccfbda9c3f4bb4dea3754329951de033c04289a Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 17 Mar 2023 11:07:13 +0100 Subject: [PATCH 54/64] Simplify the AdaCore CI script The removed commands are now executed automatically. --- .adacore-gitlab-ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.adacore-gitlab-ci.yml b/.adacore-gitlab-ci.yml index a11b0459..c0ff5f75 100644 --- a/.adacore-gitlab-ci.yml +++ b/.adacore-gitlab-ci.yml @@ -5,8 +5,6 @@ anod_build: - mem:16 stage: build script: - - . ~/.aws_container_credentials - - export PATH=/it/e3/bin:$PATH - export ANOD_DEFAULT_SANDBOX_DIR=/it/wave # Check out QSYM From 9ff0194c12185b04387809d1e635023239c96b17 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 17 Mar 2023 17:14:51 +0100 Subject: [PATCH 55/64] Add a failing test to reproduce struct expression creation failure This is a reproducer for eurecom-s3/symcc#134. --- test/concrete_structs.ll | 106 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 test/concrete_structs.ll diff --git a/test/concrete_structs.ll b/test/concrete_structs.ll new file mode 100644 index 00000000..5eb6ecba --- /dev/null +++ b/test/concrete_structs.ll @@ -0,0 +1,106 @@ +; This file is part of SymCC. +; +; SymCC is free software: you can redistribute it and/or modify it under the +; terms of the GNU General Public License as published by the Free Software +; Foundation, either version 3 of the License, or (at your option) any later +; version. +; +; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +; A PARTICULAR PURPOSE. See the GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License along with +; SymCC. If not, see . + +; Verify that we create correct expressions from struct values. For each kind of +; value, we trigger expression creation by inserting a symbolic value into the +; struct. Compiling this code with SymCC and verifying that the resulting binary +; exits cleanly shows that SymCC's instrumentation doesn't break the execution +; of the program. Moreover, we store a struct value to memory, load one of its +; elements back into a register, and branch based on it in order to trigger the +; solver; by checking the generated test case we can verify that the expression +; was correct. +; +; This test reproduces a bug where creating expressions for some structs would +; lead to a program crash. +; +; Since the bitcode is written by hand, we first run llc on it because it +; performs a validity check, whereas Clang doesn't. + +; RUN: llc %s -o /dev/null +; RUN: %symcc %s -o %t +; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 + +target triple = "x86_64-pc-linux-gnu" + +%struct_type = type { i8, i32, i8 } + +; Global variable to record whether we've found a solution. Since the simple +; backend doesn't support test-case handlers, we start with "true". +@solved = global i1 1 + +; Our test-case handler verifies that the new test case is a 32-bit integer +; with the value 42. +define void @test_case_handler(i8* %data, i64 %data_length) { + %correct_length = icmp eq i64 %data_length, 4 + br i1 %correct_length, label %check_data, label %failed + +check_data: + %value_pointer = bitcast i8* %data to i32* + %value = load i32, i32* %value_pointer + %correct_value = icmp eq i32 %value, 42 + br i1 %correct_value, label %all_good, label %failed + +all_good: + store i1 1, i1* @solved + ret void + +failed: + store i1 0, i1* @solved + ret void +} + +define i32 @main(i32 %argc, i8** %argv) { + ; Register our test-case handler. + call void @symcc_set_test_case_handler(void (i8*, i64)* @test_case_handler) + + ; Create a symbolic value that we can use to trigger the creation of struct + ; expressions. + %symbolic_value_mem = alloca i32 + store i32 1, i32* %symbolic_value_mem + call void @symcc_make_symbolic(i32* %symbolic_value_mem, i64 4) + %symbolic_value = load i32, i32* %symbolic_value_mem + %symbolic_byte = trunc i32 %symbolic_value to i8 + + ; Undef struct + insertvalue %struct_type undef, i32 %symbolic_value, 1 + + ; Struct with concrete value + insertvalue %struct_type { i8 1, i32 undef, i8 2 }, i32 %symbolic_value, 1 + + ; Struct with symbolic value + %symbolic_struct = insertvalue %struct_type undef, i8 %symbolic_byte, 0 + insertvalue %struct_type %symbolic_struct, i32 %symbolic_value, 1 + + ; Write a struct to memory and load one of its elements back into a register. + ; It's important to also insert a symbolic value into the struct, so that we + ; generate an expression in the first place. + %struct_mem = alloca %struct_type + %struct_value = insertvalue %struct_type { i8 0, i32 42, i8 undef }, i8 %symbolic_byte, 2 + store %struct_type %struct_value, %struct_type* %struct_mem + %value_address = getelementptr %struct_type, %struct_type* %struct_mem, i32 0, i32 1 + %value_loaded = load i32, i32* %value_address + %is_forty_two = icmp eq i32 %value_loaded, %symbolic_value + br i1 %is_forty_two, label %never_executed, label %done + +never_executed: + br label %done + +done: + %solved = load i1, i1* @solved + %result = select i1 %solved, i32 0, i32 1 + ret i32 %result +} + +declare void @symcc_make_symbolic(i32*, i64) +declare void @symcc_set_test_case_handler(void (i8*, i64)*) From e72245aad038c4ae6508381ad084d4bd9b3f9dd5 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Mon, 20 Mar 2023 13:30:53 +0100 Subject: [PATCH 56/64] Fix expression creation for concrete structs Fixes eurecom-s3/symcc#134. --- compiler/Runtime.cpp | 3 ++ compiler/Runtime.h | 1 + compiler/Symbolizer.cpp | 76 +++++++++++++++++++++++++++++---------- runtime/RuntimeCommon.cpp | 4 ++- test/concrete_structs.ll | 4 ++- 5 files changed, 68 insertions(+), 20 deletions(-) diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index 34c22824..a97b30f6 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -69,6 +69,9 @@ Runtime::Runtime(Module &M) { buildBoolXor = import(M, "_sym_build_bool_xor", ptrT, ptrT, ptrT); buildBoolToBit = import(M, "_sym_build_bool_to_bit", ptrT, ptrT); buildBitToBool = import(M, "_sym_build_bit_to_bool", ptrT, ptrT); + buildConcat = + import(M, "_sym_concat_helper", ptrT, ptrT, + ptrT); // doesn't follow naming convention for historic reasons pushPathConstraint = import(M, "_sym_push_path_constraint", voidT, ptrT, int1T, intPtrType); diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 61966555..3f26c76d 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -63,6 +63,7 @@ struct Runtime { SymFnT buildFshl{}; SymFnT buildFshr{}; SymFnT buildAbs{}; + SymFnT buildConcat{}; SymFnT pushPathConstraint{}; SymFnT getParameterExpression{}; SymFnT setParameterExpression{}; diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 1422a06a..ff78b47c 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -558,7 +558,7 @@ void Symbolizer::visitStoreInst(StoreInst &I) { ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(V->getType())), maybeConversion ? maybeConversion->lastInstruction : getSymbolicExpressionOrNull(V), - IRB.getInt1(dataLayout.isLittleEndian() ? 1 : 0)}); + IRB.getInt1(isLittleEndian(V->getType()) ? 1 : 0)}); } void Symbolizer::visitGetElementPtrInst(GetElementPtrInst &I) { @@ -869,7 +869,7 @@ void Symbolizer::visitInsertValueInst(InsertValueInst &I) { maybeConversion->merge(insertComputation); } - registerSymbolicComputation(maybeConversion.value_or(insertComputation)); + registerSymbolicComputation(maybeConversion.value_or(insertComputation), &I); } void Symbolizer::visitExtractValueInst(ExtractValueInst &I) { @@ -979,37 +979,77 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { {IRB.CreatePtrToInt(V, IRB.getInt64Ty()), IRB.getInt8(ptrBits)}); } - if (valueType->isStructTy()) { + if (auto structType = dyn_cast(valueType)) { // In unoptimized code we may see structures in SSA registers. What we // want is a single bit-vector expression describing their contents, but - // unfortunately we can't take the address of a register. We fix the - // problem with a hack: we write the register to memory and initialize the - // expression from there. + // unfortunately we can't take the address of a register. What we do instead + // is to build the expression recursively by iterating over the elements of + // the structure. // // An alternative would be to change the representation of structures in // SSA registers to "shadow structures" that contain one expression per // member. However, this would put an additional burden on the handling of // cast instructions, because expressions would have to be converted // between different representations according to the type. - // - // Unfortunately, the hack doesn't work when the entire structure is - // "undef"; writing it to memory is a well-defined bitcode operation, but - // the symbolic expression for the memory region will just be null because - // it's entirely concrete. We create an all-zeros expression for it instead. if (isa(V)) { + // This is just an optimization for completely undefined structs; we + // create an all-zeros expression without iterating over the elements. return IRB.CreateCall( runtime.buildZeroBytes, {ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(valueType))}); } else { - auto *memory = IRB.CreateAlloca(valueType); - IRB.CreateStore(V, memory); - return IRB.CreateCall( - runtime.readMemory, - {IRB.CreatePtrToInt(memory, intPtrType), - ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(valueType)), - IRB.getInt1(0)}); + // Iterate over the elements of the struct and concatenate the + // corresponding expressions (along with any padding that might be + // needed). + + auto structLayout = dataLayout.getStructLayout(structType); + auto constantStructValue = dyn_cast(V); + size_t offset = 0; // The end of the expressed portion in bytes. + CallInst *expr = nullptr; + auto append = [&](CallInst *newExpr) { + expr = expr ? IRB.CreateCall(runtime.buildConcat, {expr, newExpr}) + : newExpr; + }; + + for (size_t i = 0; i < structType->getNumElements(); i++) { + // Build an expression for any padding preceding the current element. + if (auto padding = structLayout->getElementOffset(i) - offset; + padding > 0) { + append(IRB.CreateCall(runtime.buildZeroBytes, + {ConstantInt::get(intPtrType, padding)})); + } + + // Build the expression for the current element. If the struct is not a + // constant, we need to read the element with extractvalue. + auto elementExpr = createValueExpression( + constantStructValue ? constantStructValue->getAggregateElement(i) + : IRB.CreateExtractValue(V, i), + IRB); + + // If the element is represented in little-endian byte order in memory, + // swap the bytes. + auto elementType = structType->getElementType(i); + if (isLittleEndian(elementType) && + dataLayout.getTypeStoreSize(elementType) > 1) { + elementExpr = IRB.CreateCall(runtime.buildBswap, {elementExpr}); + } + + append(elementExpr); + + offset = structLayout->getElementOffset(i) + + dataLayout.getTypeStoreSize(structType->getElementType(i)); + } + + // Insert padding at the end, if any. + if (auto finalPadding = dataLayout.getTypeStoreSize(structType) - offset; + finalPadding > 0) { + append(IRB.CreateCall(runtime.buildZeroBytes, + {ConstantInt::get(intPtrType, finalPadding)})); + } + + return expr; } } diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index 3e1b8ac6..127c81de 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -226,7 +226,9 @@ SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, SymExpr beforeInsert = (offset == 0) ? nullptr : _sym_build_extract(target, 0, offset, false); - SymExpr newPiece = little_endian ? _sym_build_bswap(to_insert) : to_insert; + SymExpr newPiece = (little_endian && bitsToInsert > 8) + ? _sym_build_bswap(to_insert) + : to_insert; uint64_t afterLen = (_sym_bits_helper(target) / 8) - offset - (bitsToInsert / 8); SymExpr afterInsert = diff --git a/test/concrete_structs.ll b/test/concrete_structs.ll index 5eb6ecba..6cde05da 100644 --- a/test/concrete_structs.ll +++ b/test/concrete_structs.ll @@ -29,7 +29,7 @@ ; RUN: llc %s -o /dev/null ; RUN: %symcc %s -o %t -; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 +; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s target triple = "x86_64-pc-linux-gnu" @@ -63,6 +63,7 @@ failed: define i32 @main(i32 %argc, i8** %argv) { ; Register our test-case handler. call void @symcc_set_test_case_handler(void (i8*, i64)* @test_case_handler) + ; SIMPLE: Warning: test-case handlers ; Create a symbolic value that we can use to trigger the creation of struct ; expressions. @@ -92,6 +93,7 @@ define i32 @main(i32 %argc, i8** %argv) { %value_loaded = load i32, i32* %value_address %is_forty_two = icmp eq i32 %value_loaded, %symbolic_value br i1 %is_forty_two, label %never_executed, label %done + ; QSYM: SMT never_executed: br label %done From 0f1159a11cc17b60034085fc81df756f9b5e672c Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Mon, 17 Apr 2023 16:16:02 +0200 Subject: [PATCH 57/64] Support test-case handlers in instrumented code This commit fixes eurecom-s3/symcc#140 by calling the test-case handler with the proper calling convention for instrumented code, i.e., setting parameter expressions before the call. --- runtime/qsym_backend/Runtime.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index e7047076..8cc31db4 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -137,6 +137,12 @@ class EnhancedQsymSolver : public qsym::Solver { void saveValues(const std::string &suffix) override { if (auto handler = g_test_case_handler) { auto values = getConcreteValues(); + // The test-case handler may be instrumented, so let's call it with + // argument expressions to meet instrumented code's expectations. + // Otherwise, we might end up erroneously using whatever expression was + // last registered for a function parameter. + _sym_set_parameter_expression(0, nullptr); + _sym_set_parameter_expression(1, nullptr); handler(values.data(), values.size()); } else { Solver::saveValues(suffix); From 09b583ed0bb8ffe7a72ee507a2189eaf38fa9127 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 18 Apr 2023 10:03:28 +0200 Subject: [PATCH 58/64] GitHub CI: update apt sources before installing dependencies CI is failing because packages aren't available in the repositories. --- .github/workflows/run_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index a2a5fac6..f4003558 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -26,6 +26,7 @@ jobs: submodules: true - name: Install dependencies run: | + sudo apt-get update sudo apt-get install -y \ llvm-${{ matrix.llvm_version }}-dev \ libz3-dev \ From 26277eadb03715d2a37302807dbbf0cbea3e296a Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 6 Apr 2023 09:51:22 +0200 Subject: [PATCH 59/64] Add a failing test for floats and Booleans in structs These tests reproduce eurecom-s3/symcc#138. They show that we fail to generate expressions for struct literals containing floats/Booleans, as well as for the result of inserting symbolic floats/Booleans into structs. --- test/concrete_structs.ll | 13 +++++----- test/symbolic_structs.ll | 52 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 test/symbolic_structs.ll diff --git a/test/concrete_structs.ll b/test/concrete_structs.ll index 6cde05da..e3d8748c 100644 --- a/test/concrete_structs.ll +++ b/test/concrete_structs.ll @@ -33,7 +33,10 @@ target triple = "x86_64-pc-linux-gnu" -%struct_type = type { i8, i32, i8 } +; The struct type which we'll create expressions for. Include a floating-point +; value and a Boolean because they're represented with non-bitvector solver +; variables (reproducing eurecom-s3/symcc#138). +%struct_type = type { i8, i32, i8, float, i1 } ; Global variable to record whether we've found a solution. Since the simple ; backend doesn't support test-case handlers, we start with "true". @@ -77,17 +80,13 @@ define i32 @main(i32 %argc, i8** %argv) { insertvalue %struct_type undef, i32 %symbolic_value, 1 ; Struct with concrete value - insertvalue %struct_type { i8 1, i32 undef, i8 2 }, i32 %symbolic_value, 1 - - ; Struct with symbolic value - %symbolic_struct = insertvalue %struct_type undef, i8 %symbolic_byte, 0 - insertvalue %struct_type %symbolic_struct, i32 %symbolic_value, 1 + insertvalue %struct_type { i8 1, i32 undef, i8 2, float undef, i1 undef }, i32 %symbolic_value, 1 ; Write a struct to memory and load one of its elements back into a register. ; It's important to also insert a symbolic value into the struct, so that we ; generate an expression in the first place. %struct_mem = alloca %struct_type - %struct_value = insertvalue %struct_type { i8 0, i32 42, i8 undef }, i8 %symbolic_byte, 2 + %struct_value = insertvalue %struct_type { i8 0, i32 42, i8 undef, float undef, i1 undef }, i8 %symbolic_byte, 2 store %struct_type %struct_value, %struct_type* %struct_mem %value_address = getelementptr %struct_type, %struct_type* %struct_mem, i32 0, i32 1 %value_loaded = load i32, i32* %value_address diff --git a/test/symbolic_structs.ll b/test/symbolic_structs.ll new file mode 100644 index 00000000..93a112b4 --- /dev/null +++ b/test/symbolic_structs.ll @@ -0,0 +1,52 @@ +; This file is part of SymCC. +; +; SymCC is free software: you can redistribute it and/or modify it under the +; terms of the GNU General Public License as published by the Free Software +; Foundation, either version 3 of the License, or (at your option) any later +; version. +; +; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +; A PARTICULAR PURPOSE. See the GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License along with +; SymCC. If not, see . + +; Verify that we correctly insert into symbolic struct values. We insert values +; of various types into a symbolic struct, thus triggering expression updates. +; Compiling this code with SymCC and verifying that the resulting binary exits +; cleanly shows that SymCC's instrumentation doesn't break the execution of the +; program. +; +; This test reproduces a bug where inserting a concrete floating-point value +; into a symbolic struct would lead to a program crash (eurecom-s3/symcc#138). +; +; Since the bitcode is written by hand, we first run llc on it because it +; performs a validity check, whereas Clang doesn't. + +; RUN: llc %s -o /dev/null +; RUN: %symcc %s -o %t +; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 + +target triple = "x86_64-pc-linux-gnu" + +; The struct type of our symbolic value. Include a floating-point value and a +; Boolean because they're represented with non-bitvector solver variables +; (reproducing eurecom-s3/symcc#138). +%struct_type = type { i8, i32, i8, float, i1 } + +define i32 @main(i32 %argc, i8** %argv) { + ; Create a symbolic struct value that we can subsequently insert values into. + %struct_value_mem = alloca %struct_type + call void @symcc_make_symbolic(%struct_type* %struct_value_mem, i64 20) + %symbolic_struct = load %struct_type, %struct_type* %struct_value_mem + + ; Insert values of various types, triggering the creation of new expressions. + insertvalue %struct_type %symbolic_struct, i32 5, 1 + insertvalue %struct_type %symbolic_struct, float 42.0, 3 + insertvalue %struct_type %symbolic_struct, i1 1, 4 + + ret i32 0 +} + +declare void @symcc_make_symbolic(%struct_type*, i64) From acff1d584dadaea0a82c6be9b40633154d40d6ae Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 6 Apr 2023 13:30:14 +0200 Subject: [PATCH 60/64] Fix handling of expressions for floats and Booleans in structs This commit fixes two problems, both part of eurecom-s3/symcc#138: 1. When creating expressions for struct literals, we now convert expressions for floats and Booleans to bit-vector kind before attempting to concatenate them with the rest of the struct expression. 2. In "insertvalue" instructions with a symbolic target and a concrete value to insert we now make sure that the runtime call to create the expression for the inserted value isn't optimized out. --- compiler/Symbolizer.cpp | 52 ++++++++++++++++++++++++----------------- compiler/Symbolizer.h | 10 ++++---- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index ff78b47c..d111c52d 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -550,7 +550,8 @@ void Symbolizer::visitStoreInst(StoreInst &I) { // runtime function we call can handle null expressions. auto V = I.getValueOperand(); - auto maybeConversion = convertExprForTypeToBitVectorExpr(IRB, V); + auto maybeConversion = + convertExprForTypeToBitVectorExpr(IRB, V, getSymbolicExpression(V)); IRB.CreateCall( runtime.writeMemory, @@ -846,7 +847,8 @@ void Symbolizer::visitInsertValueInst(InsertValueInst &I) { return; // We may have to convert the expression to bit-vector kind... - auto maybeConversion = convertExprForTypeToBitVectorExpr(IRB, insertedValue); + auto maybeConversion = convertExprForTypeToBitVectorExpr( + IRB, insertedValue, getSymbolicExpressionOrNull(insertedValue)); auto insert = IRB.CreateCall( runtime.buildInsert, @@ -937,7 +939,7 @@ void Symbolizer::visitInstruction(Instruction &I) { << "; the result will be concretized\n"; } -CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { +Instruction *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { auto *valueType = V->getType(); if (isa(V)) { @@ -1007,8 +1009,8 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { auto structLayout = dataLayout.getStructLayout(structType); auto constantStructValue = dyn_cast(V); size_t offset = 0; // The end of the expressed portion in bytes. - CallInst *expr = nullptr; - auto append = [&](CallInst *newExpr) { + Instruction *expr = nullptr; + auto append = [&](Instruction *newExpr) { expr = expr ? IRB.CreateCall(runtime.buildConcat, {expr, newExpr}) : newExpr; }; @@ -1023,10 +1025,17 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { // Build the expression for the current element. If the struct is not a // constant, we need to read the element with extractvalue. - auto elementExpr = createValueExpression( - constantStructValue ? constantStructValue->getAggregateElement(i) - : IRB.CreateExtractValue(V, i), - IRB); + auto element = constantStructValue + ? constantStructValue->getAggregateElement(i) + : IRB.CreateExtractValue(V, i); + auto elementExpr = createValueExpression(element, IRB); + + // The expression may be of a different kind than bit vector; in this + // case, we need to convert it. + if (auto conversion = + convertExprForTypeToBitVectorExpr(IRB, element, elementExpr)) { + elementExpr = conversion->lastInstruction; + } // If the element is represented in little-endian byte order in memory, // swap the bytes. @@ -1131,21 +1140,22 @@ Instruction *Symbolizer::convertBitVectorExprForType(llvm::IRBuilder<> &IRB, } std::optional -Symbolizer::convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, - llvm::Value *V) const { +Symbolizer::convertExprForTypeToBitVectorExpr(IRBuilder<> &IRB, Value *V, + Value *Expr) const { + if (Expr == nullptr) + return {}; + auto T = V->getType(); if (T->isFloatingPointTy()) { - return buildRuntimeCall(IRB, runtime.buildFloatToBits, {V}); + auto floatBits = IRB.CreateCall(runtime.buildFloatToBits, {Expr}); + return SymbolicComputation(floatBits, floatBits, {Input(V, 0, floatBits)}); } else if (T->isIntegerTy() && T->getIntegerBitWidth() == 1) { - if (auto computation = buildRuntimeCall(IRB, runtime.buildBoolToBit, {V})) { - computation->merge( - forceBuildRuntimeCall(IRB, runtime.buildZExt, - {{computation->lastInstruction, false}, - {IRB.getInt8(7 /* 1 byte */), false}})); - return computation; - } + auto bitExpr = IRB.CreateCall(runtime.buildBoolToBit, {Expr}); + auto bitVectorExpr = IRB.CreateCall(runtime.buildZExt, + {bitExpr, IRB.getInt8(7 /* 1 byte */)}); + return SymbolicComputation(bitExpr, bitVectorExpr, {Input(V, 0, bitExpr)}); + } else { + return {}; } - - return {}; } diff --git a/compiler/Symbolizer.h b/compiler/Symbolizer.h index e2c67827..1b6ec3e3 100644 --- a/compiler/Symbolizer.h +++ b/compiler/Symbolizer.h @@ -202,7 +202,7 @@ class Symbolizer : public llvm::InstVisitor { }; /// Create an expression that represents the concrete value. - llvm::CallInst *createValueExpression(llvm::Value *V, llvm::IRBuilder<> &IRB); + llvm::Instruction *createValueExpression(llvm::Value *V, llvm::IRBuilder<> &IRB); /// Get the (already created) symbolic expression for a value. llvm::Value *getSymbolicExpression(llvm::Value *V) const { @@ -318,15 +318,15 @@ class Symbolizer : public llvm::InstVisitor { llvm::Instruction *I, llvm::Type *T) const; - /// Emit code that converts the expression in I to a bit-vector expression. - /// Return the SymbolicComputation representing the conversion (if a - /// conversion is necessary); the last instruction computes the result. + /// Emit code that converts the expression Expr for V to a bit-vector + /// expression. Return the SymbolicComputation representing the conversion + /// (if a conversion is necessary); the last instruction computes the result. /// /// This is the inverse operation of convertBitVectorExprForType (see details /// there). std::optional convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, - llvm::Value *V) const; + llvm::Value *V, llvm::Value *Expr) const; const Runtime runtime; From 630a39a0fbd96ed702b4ed28fdf50c777e266174 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Thu, 6 Apr 2023 13:40:39 +0200 Subject: [PATCH 61/64] Fix handling of floats in structs with the QSYM backend Since the QSYM backend doesn't support floating-point arithmetic, we don't ordinarily generate expressions for floats. This is a problem when we need to create expressions for structs containing floats. Add dummy runtime functions to prevent crashes in this case. Closes eurecom-s3/symcc#138. --- runtime/qsym_backend/Runtime.cpp | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index 8cc31db4..ea1cd848 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -347,10 +347,27 @@ SymExpr _sym_build_bool_to_bit(SymExpr expr) { // Floating-point operations (unsupported in QSYM) // +// Even if we don't generally support operations on floats in this backend, we +// need dummy implementations of a few functions to help the parts of the +// instrumentation that deal with structures; if structs contain floats, the +// instrumentation expects to be able to create bit-vector expressions for +// them. + +SymExpr _sym_build_float(double, int is_double) { + // We create an all-zeros bit vector, mainly to capture the length of the + // value. This is compatible with our dummy implementation of + // _sym_build_float_to_bits. + return registerExpression( + g_expr_builder->createConstant(0, is_double ? 64 : 32)); +} + +SymExpr _sym_build_float_to_bits(SymExpr expr) { + return expr; +} + #define UNSUPPORTED(prototype) \ prototype { return nullptr; } -UNSUPPORTED(SymExpr _sym_build_float(double, int)) UNSUPPORTED(SymExpr _sym_build_fp_add(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_sub(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_mul(SymExpr, SymExpr)) @@ -375,7 +392,6 @@ UNSUPPORTED(SymExpr _sym_build_float_unordered_not_equal(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_int_to_float(SymExpr, int, int)) UNSUPPORTED(SymExpr _sym_build_float_to_float(SymExpr, int)) UNSUPPORTED(SymExpr _sym_build_bits_to_float(SymExpr, int)) -UNSUPPORTED(SymExpr _sym_build_float_to_bits(SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_to_signed_integer(SymExpr, uint8_t)) UNSUPPORTED(SymExpr _sym_build_float_to_unsigned_integer(SymExpr, uint8_t)) From bb0fd1581bf962acdd04d021f9b10bb143e31813 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Tue, 18 Apr 2023 15:27:06 +0200 Subject: [PATCH 62/64] Formatting fixes --- compiler/Symbolizer.h | 7 ++++--- runtime/qsym_backend/Runtime.cpp | 4 +--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/compiler/Symbolizer.h b/compiler/Symbolizer.h index 1b6ec3e3..808712ee 100644 --- a/compiler/Symbolizer.h +++ b/compiler/Symbolizer.h @@ -202,7 +202,8 @@ class Symbolizer : public llvm::InstVisitor { }; /// Create an expression that represents the concrete value. - llvm::Instruction *createValueExpression(llvm::Value *V, llvm::IRBuilder<> &IRB); + llvm::Instruction *createValueExpression(llvm::Value *V, + llvm::IRBuilder<> &IRB); /// Get the (already created) symbolic expression for a value. llvm::Value *getSymbolicExpression(llvm::Value *V) const { @@ -325,8 +326,8 @@ class Symbolizer : public llvm::InstVisitor { /// This is the inverse operation of convertBitVectorExprForType (see details /// there). std::optional - convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, - llvm::Value *V, llvm::Value *Expr) const; + convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, llvm::Value *V, + llvm::Value *Expr) const; const Runtime runtime; diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index ea1cd848..21b022b9 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -361,9 +361,7 @@ SymExpr _sym_build_float(double, int is_double) { g_expr_builder->createConstant(0, is_double ? 64 : 32)); } -SymExpr _sym_build_float_to_bits(SymExpr expr) { - return expr; -} +SymExpr _sym_build_float_to_bits(SymExpr expr) { return expr; } #define UNSUPPORTED(prototype) \ prototype { return nullptr; } From 1e67ab633eed9305deb4af698dae128a97ada4e7 Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Wed, 26 Apr 2023 11:45:16 +0200 Subject: [PATCH 63/64] Run the CI pipeline for merge requests only This commit implements eng/fuzz/symcc#13. --- .adacore-gitlab-ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.adacore-gitlab-ci.yml b/.adacore-gitlab-ci.yml index c0ff5f75..9f70bcaf 100644 --- a/.adacore-gitlab-ci.yml +++ b/.adacore-gitlab-ci.yml @@ -1,3 +1,7 @@ +workflow: + rules: + - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' + anod_build: services: - image:sandbox From 77bb9716098bb3f2bf587856d37563269e3dc26e Mon Sep 17 00:00:00 2001 From: Sebastian Poeplau Date: Fri, 5 May 2023 17:23:03 +0200 Subject: [PATCH 64/64] Make SymCC compatible with LLVM 16 Part of eng/toolchain/llvm-project#1. --- CMakeLists.txt | 4 ++-- README.md | 2 +- compiler/Main.cpp | 4 ++++ compiler/Pass.cpp | 4 ++++ 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a582db9..61df6e0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,8 +80,8 @@ find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}") -if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 15) - message(WARNING "The software has been developed for LLVM 8 through 15; \ +if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 16) + message(WARNING "The software has been developed for LLVM 8 through 16; \ it is unlikely to work with other versions!") endif() diff --git a/README.md b/README.md index d6f81afb..a15eea18 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ program. The actual computation happens through calls to the support library at run time. To build the pass and the support library, install LLVM (any version between 8 -and 15) and Z3 (version 4.5 or later), as well as a C++ compiler with support +and 16) and Z3 (version 4.5 or later), as well as a C++ compiler with support for C++17. LLVM lit is only needed to run the tests; if it's not packaged with your LLVM, you can get it with `pip install lit`. diff --git a/compiler/Main.cpp b/compiler/Main.cpp index a5997fcc..6e0f8512 100644 --- a/compiler/Main.cpp +++ b/compiler/Main.cpp @@ -42,6 +42,8 @@ using namespace llvm; // Legacy pass registration (up to LLVM 13) // +#if LLVM_VERSION_MAJOR <= 15 + void addSymbolizeLegacyPass(const PassManagerBuilder & /* unused */, legacy::PassManagerBase &PM) { PM.add(createScalarizerPass()); @@ -57,6 +59,8 @@ static struct RegisterStandardPasses Y(PassManagerBuilder::EP_VectorizerStart, static struct RegisterStandardPasses Z(PassManagerBuilder::EP_EnabledOnOptLevel0, addSymbolizeLegacyPass); +#endif + // // New pass registration (LLVM 13 and above) // diff --git a/compiler/Pass.cpp b/compiler/Pass.cpp index 8e2379fb..af0d88a8 100644 --- a/compiler/Pass.cpp +++ b/compiler/Pass.cpp @@ -110,7 +110,11 @@ bool canLower(const CallInst *CI) { case Intrinsic::roundeven: #endif case Intrinsic::copysign: +#if LLVM_VERSION_MAJOR < 16 case Intrinsic::flt_rounds: +#else + case Intrinsic::get_rounding: +#endif case Intrinsic::invariant_start: case Intrinsic::lifetime_start: case Intrinsic::invariant_end: