diff --git a/.adacore-gitlab-ci.yml b/.adacore-gitlab-ci.yml new file mode 100644 index 00000000..9f70bcaf --- /dev/null +++ b/.adacore-gitlab-ci.yml @@ -0,0 +1,24 @@ +workflow: + rules: + - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' + +anod_build: + services: + - image:sandbox + - cpu:8 + - mem:16 + stage: build + script: + - export ANOD_DEFAULT_SANDBOX_DIR=/it/wave + + # Check out QSYM + - cd runtime/qsym_backend + - git clone -b symcc https://gitlab-ci-token:${CI_JOB_TOKEN}@${CI_SERVER_HOST}:${CI_SERVER_PORT}/eng/fuzz/qsym + + # Use our repositories + - anod vcs --add-repo symcc $CI_PROJECT_DIR + - anod vcs --add-repo qsym $CI_PROJECT_DIR/runtime/qsym_backend/qsym + + # Build SymCC + - anod source symcc + - anod build symcc diff --git a/.github/workflows/check_style.yml b/.github/workflows/check_style.yml new file mode 100644 index 00000000..f3580388 --- /dev/null +++ b/.github/workflows/check_style.yml @@ -0,0 +1,19 @@ +name: Check coding style +on: [pull_request] +jobs: + coding_style: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Run clang-format + shell: bash + run: | + format_changes=$(git clang-format-14 --quiet --diff \ + ${{ github.event.pull_request.base.sha }} \ + ${{ github.event.pull_request.head.sha }} | wc -c) + if [[ $format_changes -ne 0 ]]; then + echo "Please format your changes with clang-format using the LLVM style, e.g., git clang-format --style LLVM before committing" + exit 1 + fi diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index eb5ae3ca..f4003558 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -1,5 +1,5 @@ name: Compile and test SymCC -on: [pull_request] +on: [pull_request, workflow_dispatch] jobs: build_and_test_symcc: runs-on: ubuntu-20.04 @@ -19,13 +19,14 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - llvm_version: [11, 12, 13, 14] + llvm_version: [11, 12, 13, 14, 15] steps: - uses: actions/checkout@v3 with: submodules: true - name: Install dependencies run: | + sudo apt-get update sudo apt-get install -y \ llvm-${{ matrix.llvm_version }}-dev \ libz3-dev \ diff --git a/.gitignore b/.gitignore index 9aba1266..a5522f87 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,7 @@ TAGS # Clang tooling compile_commands.json .clangd +.cache + +# Build directories +build* diff --git a/CMakeLists.txt b/CMakeLists.txt index d0601bf1..7e5f9309 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,8 @@ cmake_minimum_required(VERSION 3.5) project(SymbolicCompiler) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") + option(QSYM_BACKEND "Use the Qsym backend instead of our own" OFF) option(RUST_BACKEND "Build the support code required for a Rust backend as a static archive." OFF) option(TARGET_32BIT "Make the compiler work correctly with -m32" OFF) @@ -40,6 +42,7 @@ set(SYM_RUNTIME_BUILD_ARGS -DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -DRUST_BACKEND=${RUST_BACKEND} -DCMAKE_SHARED_LINKER_FLAGS_INIT=${CMAKE_SHARED_LINKER_FLAGS_INIT} + -DCMAKE_MODULE_PATH=${CMAKE_MODULE_PATH} -DCMAKE_SYSROOT=${CMAKE_SYSROOT} -DQSYM_BACKEND=${QSYM_BACKEND} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} @@ -79,8 +82,8 @@ find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}") -if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 15) - message(WARNING "The software has been developed for LLVM 8 through 15; \ +if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 16) + message(WARNING "The software has been developed for LLVM 8 through 16; \ it is unlikely to work with other versions!") endif() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..be09db8d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,17 @@ +# Contributing to SymCC + +We encourage everyone to contribute improvements and bug fixes to SymCC. Our +preferred way of accepting contributions is via GitHub pull requests. Please be +sure to run clang-format on any C/C++ code you change; an easy way to do so is +with `git clang-format --style LLVM` just before committing. (On Ubuntu, you can +get `git-clang-format` via `apt install clang-format`.) Ideally, also add a test +to your patch (see the +[docs](https://github.com/eurecom-s3/symcc/blob/master/docs/Testing.txt) for +details). Unfortunately, since the project is a bit short on developers at the +moment, we have to ask for your patience while we review your PR. + +Please note that any contributions you make are licensed under the same terms as +the code you're contributing to, as per the GitHub Terms of Service, [section +D.6](https://docs.github.com/en/site-policy/github-terms/github-terms-of-service#6-contributions-under-repository-license). +At the time of writing, this means LGPL (version 3 or later) for the SymCC +runtime, and GPL (version 3 or later) for the rest of SymCC. diff --git a/LICENSE.lgpl b/LICENSE.lgpl new file mode 100644 index 00000000..0a041280 --- /dev/null +++ b/LICENSE.lgpl @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md index 0bb20644..a15eea18 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ program. The actual computation happens through calls to the support library at run time. To build the pass and the support library, install LLVM (any version between 8 -and 15) and Z3 (version 4.5 or later), as well as a C++ compiler with support +and 16) and Z3 (version 4.5 or later), as well as a C++ compiler with support for C++17. LLVM lit is only needed to run the tests; if it's not packaged with your LLVM, you can get it with `pip install lit`. @@ -270,17 +270,24 @@ SymCC is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. +As an exception from the above, you can redistribute and/or modify the SymCC +runtime under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at your +option) any later version. See #114 for the rationale. + SymCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along with -SymCC. If not, see . +You should have received a copy of the GNU General Public License and the GNU +Lesser General Public License along with SymCC. If not, see +. The following pieces of software have additional or alternate copyrights, licenses, and/or restrictions: -| Program | Directory | -| --- | --- | -| QSYM | `runtime/qsym_backend/qsym` | +| Program | Directory | +|---------------|-----------------------------| +| QSYM | `runtime/qsym_backend/qsym` | +| SymCC runtime | `runtime` | diff --git a/cmake/FindFilesystem.cmake b/cmake/FindFilesystem.cmake new file mode 100644 index 00000000..a152e522 --- /dev/null +++ b/cmake/FindFilesystem.cmake @@ -0,0 +1,247 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: + +FindFilesystem +############## + +This module supports the C++17 standard library's filesystem utilities. Use the +:imp-target:`std::filesystem` imported target to + +Options +******* + +The ``COMPONENTS`` argument to this module supports the following values: + +.. find-component:: Experimental + :name: fs.Experimental + + Allows the module to find the "experimental" Filesystem TS version of the + Filesystem library. This is the library that should be used with the + ``std::experimental::filesystem`` namespace. + +.. find-component:: Final + :name: fs.Final + + Finds the final C++17 standard version of the filesystem library. + +If no components are provided, behaves as if the +:find-component:`fs.Final` component was specified. + +If both :find-component:`fs.Experimental` and :find-component:`fs.Final` are +provided, first looks for ``Final``, and falls back to ``Experimental`` in case +of failure. If ``Final`` is found, :imp-target:`std::filesystem` and all +:ref:`variables ` will refer to the ``Final`` version. + + +Imported Targets +**************** + +.. imp-target:: std::filesystem + + The ``std::filesystem`` imported target is defined when any requested + version of the C++ filesystem library has been found, whether it is + *Experimental* or *Final*. + + If no version of the filesystem library is available, this target will not + be defined. + + .. note:: + This target has ``cxx_std_17`` as an ``INTERFACE`` + :ref:`compile language standard feature `. Linking + to this target will automatically enable C++17 if no later standard + version is already required on the linking target. + + +.. _fs.variables: + +Variables +********* + +.. variable:: CXX_FILESYSTEM_IS_EXPERIMENTAL + + Set to ``TRUE`` when the :find-component:`fs.Experimental` version of C++ + filesystem library was found, otherwise ``FALSE``. + +.. variable:: CXX_FILESYSTEM_HAVE_FS + + Set to ``TRUE`` when a filesystem header was found. + +.. variable:: CXX_FILESYSTEM_HEADER + + Set to either ``filesystem`` or ``experimental/filesystem`` depending on + whether :find-component:`fs.Final` or :find-component:`fs.Experimental` was + found. + +.. variable:: CXX_FILESYSTEM_NAMESPACE + + Set to either ``std::filesystem`` or ``std::experimental::filesystem`` + depending on whether :find-component:`fs.Final` or + :find-component:`fs.Experimental` was found. + + +Examples +******** + +Using `find_package(Filesystem)` with no component arguments: + +.. code-block:: cmake + + find_package(Filesystem REQUIRED) + + add_executable(my-program main.cpp) + target_link_libraries(my-program PRIVATE std::filesystem) + + +#]=======================================================================] + + +if(TARGET std::filesystem) + # This module has already been processed. Don't do it again. + return() +endif() + +cmake_minimum_required(VERSION 3.10) + +include(CMakePushCheckState) +include(CheckIncludeFileCXX) + +# If we're not cross-compiling, try to run test executables. +# Otherwise, assume that compile + link is a sufficient check. +if(CMAKE_CROSSCOMPILING) + include(CheckCXXSourceCompiles) + macro(_cmcm_check_cxx_source code var) + check_cxx_source_compiles("${code}" ${var}) + endmacro() +else() + include(CheckCXXSourceRuns) + macro(_cmcm_check_cxx_source code var) + check_cxx_source_runs("${code}" ${var}) + endmacro() +endif() + +cmake_push_check_state() + +set(CMAKE_REQUIRED_QUIET ${Filesystem_FIND_QUIETLY}) + +# All of our tests required C++17 or later +set(CMAKE_CXX_STANDARD 17) + +# Normalize and check the component list we were given +set(want_components ${Filesystem_FIND_COMPONENTS}) +if(Filesystem_FIND_COMPONENTS STREQUAL "") + set(want_components Final) +endif() + +# Warn on any unrecognized components +set(extra_components ${want_components}) +list(REMOVE_ITEM extra_components Final Experimental) +foreach(component IN LISTS extra_components) + message(WARNING "Extraneous find_package component for Filesystem: ${component}") +endforeach() + +# Detect which of Experimental and Final we should look for +set(find_experimental TRUE) +set(find_final TRUE) +if(NOT "Final" IN_LIST want_components) + set(find_final FALSE) +endif() +if(NOT "Experimental" IN_LIST want_components) + set(find_experimental FALSE) +endif() + +if(find_final) + check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER) + mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER) + if(_CXX_FILESYSTEM_HAVE_HEADER) + # We found the non-experimental header. Don't bother looking for the + # experimental one. + set(find_experimental FALSE) + endif() +else() + set(_CXX_FILESYSTEM_HAVE_HEADER FALSE) +endif() + +if(find_experimental) + check_include_file_cxx("experimental/filesystem" _CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) + mark_as_advanced(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) +else() + set(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER FALSE) +endif() + +if(_CXX_FILESYSTEM_HAVE_HEADER) + set(_have_fs TRUE) + set(_fs_header filesystem) + set(_fs_namespace std::filesystem) + set(_is_experimental FALSE) +elseif(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER) + set(_have_fs TRUE) + set(_fs_header experimental/filesystem) + set(_fs_namespace std::experimental::filesystem) + set(_is_experimental TRUE) +else() + set(_have_fs FALSE) +endif() + +set(CXX_FILESYSTEM_HAVE_FS ${_have_fs} CACHE BOOL "TRUE if we have the C++ filesystem headers") +set(CXX_FILESYSTEM_HEADER ${_fs_header} CACHE STRING "The header that should be included to obtain the filesystem APIs") +set(CXX_FILESYSTEM_NAMESPACE ${_fs_namespace} CACHE STRING "The C++ namespace that contains the filesystem APIs") +set(CXX_FILESYSTEM_IS_EXPERIMENTAL ${_is_experimental} CACHE BOOL "TRUE if the C++ filesystem library is the experimental version") + +set(_found FALSE) + +if(CXX_FILESYSTEM_HAVE_FS) + # We have some filesystem library available. Do link checks + string(CONFIGURE [[ + #include + #include <@CXX_FILESYSTEM_HEADER@> + + int main() { + auto cwd = @CXX_FILESYSTEM_NAMESPACE@::current_path(); + printf("%s", cwd.c_str()); + return EXIT_SUCCESS; + } + ]] code @ONLY) + + # Check a simple filesystem program without any linker flags + _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_NO_LINK_NEEDED) + + set(can_link ${CXX_FILESYSTEM_NO_LINK_NEEDED}) + + if(NOT CXX_FILESYSTEM_NO_LINK_NEEDED) + set(prev_libraries ${CMAKE_REQUIRED_LIBRARIES}) + # Add the libstdc++ flag + set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lstdc++fs) + _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_STDCPPFS_NEEDED) + set(can_link ${CXX_FILESYSTEM_STDCPPFS_NEEDED}) + if(NOT CXX_FILESYSTEM_STDCPPFS_NEEDED) + # Try the libc++ flag + set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lc++fs) + _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_CPPFS_NEEDED) + set(can_link ${CXX_FILESYSTEM_CPPFS_NEEDED}) + endif() + endif() + + if(can_link) + add_library(std::filesystem INTERFACE IMPORTED) + set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_17) + set(_found TRUE) + + if(CXX_FILESYSTEM_NO_LINK_NEEDED) + # Nothing to add... + elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED) + set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_LINK_LIBRARIES -lstdc++fs) + elseif(CXX_FILESYSTEM_CPPFS_NEEDED) + set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_LINK_LIBRARIES -lc++fs) + endif() + endif() +endif() + +cmake_pop_check_state() + +set(Filesystem_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::filesystem" FORCE) + +if(Filesystem_FIND_REQUIRED AND NOT Filesystem_FOUND) + message(FATAL_ERROR "Cannot run simple program using std::filesystem") +endif() diff --git a/compiler/Main.cpp b/compiler/Main.cpp index f915d5d4..0cf97501 100644 --- a/compiler/Main.cpp +++ b/compiler/Main.cpp @@ -14,6 +14,25 @@ #include #include +#include +#include + +#if LLVM_VERSION_MAJOR >= 13 +#include +#include + +#if LLVM_VERSION_MAJOR >= 14 +#include +#else +using OptimizationLevel = llvm::PassBuilder::OptimizationLevel; +#endif +#endif + +#if LLVM_VERSION_MAJOR >= 15 +#include +#else +#include +#endif #if LLVM_VERSION_MAJOR >= 13 #include @@ -34,8 +53,12 @@ using namespace llvm; // Legacy pass registration (up to LLVM 13) // +#if LLVM_VERSION_MAJOR <= 15 + void addSymbolizeLegacyPass(const PassManagerBuilder & /* unused */, legacy::PassManagerBase &PM) { + PM.add(createScalarizerPass()); + PM.add(createLowerAtomicPass()); PM.add(new SymbolizeLegacyPass()); } @@ -47,6 +70,8 @@ static struct RegisterStandardPasses Y(PassManagerBuilder::EP_VectorizerStart, static struct RegisterStandardPasses Z(PassManagerBuilder::EP_EnabledOnOptLevel0, addSymbolizeLegacyPass); +#endif + // // New pass registration (LLVM 13 and above) // @@ -67,6 +92,8 @@ PassPluginLibraryInfo getSymbolizePluginInfo() { }); PB.registerVectorizerStartEPCallback( [](FunctionPassManager &PM, OptimizationLevel) { + PM.addPass(ScalarizerPass()); + PM.addPass(LowerAtomicPass()); PM.addPass(SymbolizePass()); }); }}; diff --git a/compiler/Pass.cpp b/compiler/Pass.cpp index f17fd7e5..af0d88a8 100644 --- a/compiler/Pass.cpp +++ b/compiler/Pass.cpp @@ -15,11 +15,22 @@ #include "Pass.h" #include +#include +#include +#include #include #include #include +#include +#include #include +#if LLVM_VERSION_MAJOR < 14 +#include +#else +#include +#endif + #include "Runtime.h" #include "Symbolizer.h" @@ -60,6 +71,94 @@ bool instrumentModule(Module &M) { return true; } +bool canLower(const CallInst *CI) { + const Function *Callee = CI->getCalledFunction(); + if (!Callee) + return false; + + switch (Callee->getIntrinsicID()) { + case Intrinsic::expect: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::prefetch: + case Intrinsic::pcmarker: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::eh_typeid_for: + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + case Intrinsic::assume: +#if LLVM_VERSION_MAJOR > 11 + case Intrinsic::experimental_noalias_scope_decl: +#endif + case Intrinsic::var_annotation: + case Intrinsic::sqrt: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::pow: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::round: +#if LLVM_VERSION_MAJOR > 10 + case Intrinsic::roundeven: +#endif + case Intrinsic::copysign: +#if LLVM_VERSION_MAJOR < 16 + case Intrinsic::flt_rounds: +#else + case Intrinsic::get_rounding: +#endif + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + return true; + default: + return false; + } + + llvm_unreachable("Control cannot reach here"); +} + +void liftInlineAssembly(CallInst *CI) { + // TODO When we don't have to worry about the old pass manager anymore, move + // the initialization to the pass constructor. (Currently there are two + // passes, but only if we're on a recent enough LLVM...) + + Function *F = CI->getFunction(); + Module *M = F->getParent(); + auto triple = M->getTargetTriple(); + + std::string error; + auto target = TargetRegistry::lookupTarget(triple, error); + if (!target) { + errs() << "Warning: can't get target info to lift inline assembly\n"; + return; + } + + auto cpu = F->getFnAttribute("target-cpu").getValueAsString(); + auto features = F->getFnAttribute("target-features").getValueAsString(); + + std::unique_ptr TM( + target->createTargetMachine(triple, cpu, features, TargetOptions(), {})); + auto subTarget = TM->getSubtargetImpl(*F); + if (subTarget == nullptr) + return; + + auto targetLowering = subTarget->getTargetLowering(); + if (targetLowering == nullptr) + return; + + targetLowering->ExpandInlineAsm(CI); +} + bool instrumentFunction(Function &F) { auto functionName = F.getName(); if (functionName == kSymCtorName) @@ -73,6 +172,21 @@ bool instrumentFunction(Function &F) { for (auto &I : instructions(F)) allInstructions.push_back(&I); + IntrinsicLowering IL(F.getParent()->getDataLayout()); + for (auto *I : allInstructions) { + if (auto *CI = dyn_cast(I)) { + if (canLower(CI)) { + IL.LowerIntrinsicCall(CI); + } else if (isa(CI->getCalledOperand())) { + liftInlineAssembly(CI); + } + } + } + + allInstructions.clear(); + for (auto &I : instructions(F)) + allInstructions.push_back(&I); + Symbolizer symbolizer(*F.getParent()); symbolizer.symbolizeFunctionArguments(F); diff --git a/compiler/Pass.h b/compiler/Pass.h index cf0676aa..b06377dc 100644 --- a/compiler/Pass.h +++ b/compiler/Pass.h @@ -29,8 +29,8 @@ class SymbolizeLegacyPass : public llvm::FunctionPass { SymbolizeLegacyPass() : FunctionPass(ID) {} - bool doInitialization(llvm::Module &M) override; - bool runOnFunction(llvm::Function &F) override; + virtual bool doInitialization(llvm::Module &M) override; + virtual bool runOnFunction(llvm::Function &F) override; }; #if LLVM_VERSION_MAJOR >= 13 diff --git a/compiler/Runtime.cpp b/compiler/Runtime.cpp index ba0f3d83..a97b30f6 100644 --- a/compiler/Runtime.cpp +++ b/compiler/Runtime.cpp @@ -39,27 +39,25 @@ Runtime::Runtime(Module &M) { auto *intPtrType = M.getDataLayout().getIntPtrType(M.getContext()); auto *ptrT = IRB.getInt8PtrTy(); auto *int8T = IRB.getInt8Ty(); + auto *int1T = IRB.getInt1Ty(); auto *voidT = IRB.getVoidTy(); buildInteger = import(M, "_sym_build_integer", ptrT, IRB.getInt64Ty(), int8T); buildInteger128 = import(M, "_sym_build_integer128", ptrT, IRB.getInt64Ty(), IRB.getInt64Ty()); - buildFloat = - import(M, "_sym_build_float", ptrT, IRB.getDoubleTy(), IRB.getInt1Ty()); + buildFloat = import(M, "_sym_build_float", ptrT, IRB.getDoubleTy(), int1T); buildNullPointer = import(M, "_sym_build_null_pointer", ptrT); buildTrue = import(M, "_sym_build_true", ptrT); buildFalse = import(M, "_sym_build_false", ptrT); - buildBool = import(M, "_sym_build_bool", ptrT, IRB.getInt1Ty()); + buildBool = import(M, "_sym_build_bool", ptrT, int1T); buildSExt = import(M, "_sym_build_sext", ptrT, ptrT, int8T); buildZExt = import(M, "_sym_build_zext", ptrT, ptrT, int8T); buildTrunc = import(M, "_sym_build_trunc", ptrT, ptrT, int8T); buildBswap = import(M, "_sym_build_bswap", ptrT, ptrT); - buildIntToFloat = import(M, "_sym_build_int_to_float", ptrT, ptrT, - IRB.getInt1Ty(), IRB.getInt1Ty()); - buildFloatToFloat = - import(M, "_sym_build_float_to_float", ptrT, ptrT, IRB.getInt1Ty()); - buildBitsToFloat = - import(M, "_sym_build_bits_to_float", ptrT, ptrT, IRB.getInt1Ty()); + buildIntToFloat = + import(M, "_sym_build_int_to_float", ptrT, ptrT, int1T, int1T); + buildFloatToFloat = import(M, "_sym_build_float_to_float", ptrT, ptrT, int1T); + buildBitsToFloat = import(M, "_sym_build_bits_to_float", ptrT, ptrT, int1T); buildFloatToBits = import(M, "_sym_build_float_to_bits", ptrT, ptrT); buildFloatToSignedInt = import(M, "_sym_build_float_to_signed_integer", ptrT, ptrT, int8T); @@ -70,8 +68,33 @@ Runtime::Runtime(Module &M) { buildBoolOr = import(M, "_sym_build_bool_or", ptrT, ptrT, ptrT); buildBoolXor = import(M, "_sym_build_bool_xor", ptrT, ptrT, ptrT); buildBoolToBit = import(M, "_sym_build_bool_to_bit", ptrT, ptrT); - pushPathConstraint = import(M, "_sym_push_path_constraint", voidT, ptrT, - IRB.getInt1Ty(), intPtrType); + buildBitToBool = import(M, "_sym_build_bit_to_bool", ptrT, ptrT); + buildConcat = + import(M, "_sym_concat_helper", ptrT, ptrT, + ptrT); // doesn't follow naming convention for historic reasons + pushPathConstraint = + import(M, "_sym_push_path_constraint", voidT, ptrT, int1T, intPtrType); + + // Overflow arithmetic + buildAddOverflow = + import(M, "_sym_build_add_overflow", ptrT, ptrT, ptrT, int1T, int1T); + buildSubOverflow = + import(M, "_sym_build_sub_overflow", ptrT, ptrT, ptrT, int1T, int1T); + buildMulOverflow = + import(M, "_sym_build_mul_overflow", ptrT, ptrT, ptrT, int1T, int1T); + + // Saturating arithmetic + buildSAddSat = import(M, "_sym_build_sadd_sat", ptrT, ptrT, ptrT); + buildUAddSat = import(M, "_sym_build_uadd_sat", ptrT, ptrT, ptrT); + buildSSubSat = import(M, "_sym_build_ssub_sat", ptrT, ptrT, ptrT); + buildUSubSat = import(M, "_sym_build_usub_sat", ptrT, ptrT, ptrT); + buildSShlSat = import(M, "_sym_build_sshl_sat", ptrT, ptrT, ptrT); + buildUShlSat = import(M, "_sym_build_ushl_sat", ptrT, ptrT, ptrT); + + buildFshl = import(M, "_sym_build_funnel_shift_left", ptrT, ptrT, ptrT, ptrT); + buildFshr = + import(M, "_sym_build_funnel_shift_right", ptrT, ptrT, ptrT, ptrT); + buildAbs = import(M, "_sym_build_abs", ptrT, ptrT); setParameterExpression = import(M, "_sym_set_parameter_expression", voidT, int8T, ptrT); @@ -107,6 +130,14 @@ Runtime::Runtime(Module &M) { #undef LOAD_BINARY_OPERATOR_HANDLER +#define LOAD_UNARY_OPERATOR_HANDLER(constant, name) \ + unaryOperatorHandlers[Instruction::constant] = \ + import(M, "_sym_build_" #name, ptrT, ptrT); + + LOAD_UNARY_OPERATOR_HANDLER(FNeg, fp_neg) + +#undef LOAD_UNARY_OPERATOR_HANDLER + #define LOAD_COMPARISON_HANDLER(constant, name) \ comparisonHandlers[CmpInst::constant] = \ import(M, "_sym_build_" #name, ptrT, ptrT, ptrT); @@ -144,13 +175,14 @@ Runtime::Runtime(Module &M) { memset = import(M, "_sym_memset", voidT, ptrT, ptrT, intPtrType); memmove = import(M, "_sym_memmove", voidT, ptrT, ptrT, intPtrType); readMemory = - import(M, "_sym_read_memory", ptrT, intPtrType, intPtrType, int8T); + import(M, "_sym_read_memory", ptrT, intPtrType, intPtrType, int1T); writeMemory = import(M, "_sym_write_memory", voidT, intPtrType, intPtrType, - ptrT, int8T); + ptrT, int1T); + buildZeroBytes = import(M, "_sym_build_zero_bytes", ptrT, intPtrType); buildInsert = - import(M, "_sym_build_insert", ptrT, ptrT, ptrT, IRB.getInt64Ty(), int8T); + import(M, "_sym_build_insert", ptrT, ptrT, ptrT, IRB.getInt64Ty(), int1T); buildExtract = import(M, "_sym_build_extract", ptrT, ptrT, IRB.getInt64Ty(), - IRB.getInt64Ty(), int8T); + IRB.getInt64Ty(), int1T); notifyCall = import(M, "_sym_notify_call", voidT, intPtrType); notifyRet = import(M, "_sym_notify_ret", voidT, intPtrType); @@ -160,10 +192,11 @@ Runtime::Runtime(Module &M) { /// Decide whether a function is called symbolically. bool isInterceptedFunction(const Function &f) { static const StringSet<> kInterceptedFunctions = { - "malloc", "calloc", "mmap", "mmap64", "open", "read", "lseek", - "lseek64", "fopen", "fopen64", "fread", "fseek", "fseeko", "rewind", - "fseeko64", "getc", "ungetc", "memcpy", "memset", "strncpy", "strchr", - "memcmp", "memmove", "ntohl", "fgets", "fgetc", "getchar"}; + "malloc", "calloc", "mmap", "mmap64", "open", "read", + "lseek", "lseek64", "fopen", "fopen64", "fread", "fseek", + "fseeko", "rewind", "fseeko64", "getc", "ungetc", "memcpy", + "memset", "strncpy", "strchr", "memcmp", "memmove", "ntohl", + "fgets", "fgetc", "getchar", "bcopy", "bcmp", "bzero"}; return (kInterceptedFunctions.count(f.getName()) > 0); } diff --git a/compiler/Runtime.h b/compiler/Runtime.h index 519f9f00..3f26c76d 100644 --- a/compiler/Runtime.h +++ b/compiler/Runtime.h @@ -19,9 +19,9 @@ #include #if LLVM_VERSION_MAJOR >= 9 && LLVM_VERSION_MAJOR < 11 - using SymFnT = llvm::Value *; +using SymFnT = llvm::Value *; #else - using SymFnT = llvm::FunctionCallee; +using SymFnT = llvm::FunctionCallee; #endif /// Runtime functions @@ -50,6 +50,20 @@ struct Runtime { SymFnT buildBoolOr{}; SymFnT buildBoolXor{}; SymFnT buildBoolToBit{}; + SymFnT buildBitToBool{}; + SymFnT buildAddOverflow{}; + SymFnT buildSubOverflow{}; + SymFnT buildMulOverflow{}; + SymFnT buildSAddSat{}; + SymFnT buildUAddSat{}; + SymFnT buildSSubSat{}; + SymFnT buildUSubSat{}; + SymFnT buildSShlSat{}; + SymFnT buildUShlSat{}; + SymFnT buildFshl{}; + SymFnT buildFshr{}; + SymFnT buildAbs{}; + SymFnT buildConcat{}; SymFnT pushPathConstraint{}; SymFnT getParameterExpression{}; SymFnT setParameterExpression{}; @@ -60,6 +74,7 @@ struct Runtime { SymFnT memmove{}; SymFnT readMemory{}; SymFnT writeMemory{}; + SymFnT buildZeroBytes{}; SymFnT buildInsert{}; SymFnT buildExtract{}; SymFnT notifyCall{}; @@ -68,13 +83,15 @@ struct Runtime { /// Mapping from icmp predicates to the functions that build the corresponding /// symbolic expressions. - std::array - comparisonHandlers{}; + std::array comparisonHandlers{}; /// Mapping from binary operators to the functions that build the /// corresponding symbolic expressions. - std::array - binaryOperatorHandlers{}; + std::array binaryOperatorHandlers{}; + + /// Mapping from unary operators to the functions that build the + /// corresponding symbolic expressions. + std::array unaryOperatorHandlers{}; }; bool isInterceptedFunction(const llvm::Function &f); diff --git a/compiler/Symbolizer.cpp b/compiler/Symbolizer.cpp index 05f5a6b5..d111c52d 100644 --- a/compiler/Symbolizer.cpp +++ b/compiler/Symbolizer.cpp @@ -180,15 +180,9 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { auto *callee = I.getCalledFunction(); switch (callee->getIntrinsicID()) { - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::is_constant: case Intrinsic::trap: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::assume: // These are safe to ignore. break; case Intrinsic::memcpy: { @@ -258,22 +252,13 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { registerSymbolicComputation(abs, &I); break; } - case Intrinsic::cttz: - case Intrinsic::ctpop: - case Intrinsic::ctlz: { - // Various bit-count operations. Expressing these symbolically is - // difficult, so for now we just concretize. - - errs() << "Warning: losing track of symbolic expressions at bit-count " - "operation " - << I << "\n"; - break; - } - case Intrinsic::returnaddress: { + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + case Intrinsic::addressofreturnaddress: { // Obtain the return address of the current function or one of its parents // on the stack. We just concretize. - errs() << "Warning: using concrete value for return address\n"; + errs() << "Warning: using concrete value for return/frame address\n"; break; } case Intrinsic::bswap: { @@ -284,6 +269,74 @@ void Symbolizer::handleIntrinsicCall(CallBase &I) { registerSymbolicComputation(swapped, &I); break; } + +// Overflow arithmetic +#define DEF_OVF_ARITH_BUILDER(intrinsic_op, runtime_name) \ + case Intrinsic::s##intrinsic_op##_with_overflow: \ + case Intrinsic::u##intrinsic_op##_with_overflow: { \ + IRBuilder<> IRB(&I); \ + \ + bool isSigned = \ + I.getIntrinsicID() == Intrinsic::s##intrinsic_op##_with_overflow; \ + auto overflow = buildRuntimeCall( \ + IRB, runtime.build##runtime_name, \ + {{I.getOperand(0), true}, \ + {I.getOperand(1), true}, \ + {IRB.getInt1(isSigned), false}, \ + {IRB.getInt1(dataLayout.isLittleEndian() ? 1 : 0), false}}); \ + registerSymbolicComputation(overflow, &I); \ + \ + break; \ + } + + DEF_OVF_ARITH_BUILDER(add, AddOverflow) + DEF_OVF_ARITH_BUILDER(sub, SubOverflow) + DEF_OVF_ARITH_BUILDER(mul, MulOverflow) + +#undef DEF_OVF_ARITH_BUILDER + +// Saturating arithmetic +#define DEF_SAT_ARITH_BUILDER(intrinsic_op, runtime_name) \ + case Intrinsic::intrinsic_op##_sat: { \ + IRBuilder<> IRB(&I); \ + auto result = buildRuntimeCall(IRB, runtime.build##runtime_name, \ + {I.getOperand(0), I.getOperand(1)}); \ + registerSymbolicComputation(result, &I); \ + break; \ + } + + DEF_SAT_ARITH_BUILDER(sadd, SAddSat) + DEF_SAT_ARITH_BUILDER(uadd, UAddSat) + DEF_SAT_ARITH_BUILDER(ssub, SSubSat) + DEF_SAT_ARITH_BUILDER(usub, USubSat) +#if LLVM_VERSION_MAJOR > 11 + DEF_SAT_ARITH_BUILDER(sshl, SShlSat) + DEF_SAT_ARITH_BUILDER(ushl, UShlSat) +#endif + +#undef DEF_SAT_ARITH_BUILDER + + case Intrinsic::fshl: + case Intrinsic::fshr: { + IRBuilder<> IRB(&I); + auto funnelShift = buildRuntimeCall( + IRB, + I.getIntrinsicID() == Intrinsic::fshl ? runtime.buildFshl + : runtime.buildFshr, + {I.getOperand(0), I.getOperand(1), I.getOperand(2)}); + registerSymbolicComputation(funnelShift, &I); + break; + } +#if LLVM_VERSION_MAJOR > 11 + case Intrinsic::abs: { + // Integer absolute value + + IRBuilder<> IRB(&I); + auto abs = buildRuntimeCall(IRB, runtime.buildAbs, I.getOperand(0)); + registerSymbolicComputation(abs, &I); + break; + } +#endif default: errs() << "Warning: unhandled LLVM intrinsic " << callee->getName() << "; the result will be concretized\n"; @@ -369,6 +422,15 @@ void Symbolizer::visitBinaryOperator(BinaryOperator &I) { registerSymbolicComputation(runtimeCall, &I); } +void Symbolizer::visitUnaryOperator(UnaryOperator &I) { + IRBuilder<> IRB(&I); + SymFnT handler = runtime.unaryOperatorHandlers.at(I.getOpcode()); + + assert(handler && "Unable to handle unary operator"); + auto runtimeCall = buildRuntimeCall(IRB, handler, I.getOperand(0)); + registerSymbolicComputation(runtimeCall, &I); +} + void Symbolizer::visitSelectInst(SelectInst &I) { // Select is like the ternary operator ("?:") in C. We push the (potentially // negated) condition to the path constraints and copy the symbolic @@ -472,14 +534,9 @@ void Symbolizer::visitLoadInst(LoadInst &I) { runtime.readMemory, {IRB.CreatePtrToInt(addr, intPtrType), ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(dataType)), - ConstantInt::get(IRB.getInt8Ty(), isLittleEndian(dataType) ? 1 : 0)}); - - if (dataType->isFloatingPointTy()) { - data = IRB.CreateCall(runtime.buildBitsToFloat, - {data, IRB.getInt1(dataType->isDoubleTy())}); - } + IRB.getInt1(isLittleEndian(dataType) ? 1 : 0)}); - symbolicExpressions[&I] = data; + symbolicExpressions[&I] = convertBitVectorExprForType(IRB, data, dataType); } void Symbolizer::visitStoreInst(StoreInst &I) { @@ -487,18 +544,22 @@ void Symbolizer::visitStoreInst(StoreInst &I) { tryAlternative(IRB, I.getPointerOperand()); - auto *data = getSymbolicExpressionOrNull(I.getValueOperand()); - auto *dataType = I.getValueOperand()->getType(); - if (dataType->isFloatingPointTy()) { - data = IRB.CreateCall(runtime.buildFloatToBits, data); - } + // Make sure that the expression corresponding to the stored value is of + // bit-vector kind. Shortcutting the runtime calls that we emit here (e.g., + // for floating-point values) is tricky, so instead we make sure that any + // runtime function we call can handle null expressions. + + auto V = I.getValueOperand(); + auto maybeConversion = + convertExprForTypeToBitVectorExpr(IRB, V, getSymbolicExpression(V)); IRB.CreateCall( runtime.writeMemory, {IRB.CreatePtrToInt(I.getPointerOperand(), intPtrType), - ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(dataType)), - data, - ConstantInt::get(IRB.getInt8Ty(), dataLayout.isLittleEndian() ? 1 : 0)}); + ConstantInt::get(intPtrType, dataLayout.getTypeStoreSize(V->getType())), + maybeConversion ? maybeConversion->lastInstruction + : getSymbolicExpressionOrNull(V), + IRB.getInt1(isLittleEndian(V->getType()) ? 1 : 0)}); } void Symbolizer::visitGetElementPtrInst(GetElementPtrInst &I) { @@ -619,11 +680,25 @@ void Symbolizer::visitBitCastInst(BitCastInst &I) { void Symbolizer::visitTruncInst(TruncInst &I) { IRBuilder<> IRB(&I); - auto trunc = buildRuntimeCall( + + if (getSymbolicExpression(I.getOperand(0)) == nullptr) + return; + + SymbolicComputation symbolicComputation; + symbolicComputation.merge(forceBuildRuntimeCall( IRB, runtime.buildTrunc, {{I.getOperand(0), true}, - {IRB.getInt8(I.getDestTy()->getIntegerBitWidth()), false}}); - registerSymbolicComputation(trunc, &I); + {IRB.getInt8(I.getDestTy()->getIntegerBitWidth()), false}})); + + if (I.getDestTy()->isIntegerTy() && + I.getDestTy()->getIntegerBitWidth() == 1) { + // convert from byte back to a bool (i1) + symbolicComputation.merge( + forceBuildRuntimeCall(IRB, runtime.buildBitToBool, + {{symbolicComputation.lastInstruction, false}})); + } + + registerSymbolicComputation(symbolicComputation, &I); } void Symbolizer::visitIntToPtrInst(IntToPtrInst &I) { @@ -764,30 +839,61 @@ void Symbolizer::visitPHINode(PHINode &I) { void Symbolizer::visitInsertValueInst(InsertValueInst &I) { IRBuilder<> IRB(&I); - auto insert = buildRuntimeCall( - IRB, runtime.buildInsert, - {{I.getAggregateOperand(), true}, - {I.getInsertedValueOperand(), true}, - {IRB.getInt64(aggregateMemberOffset(I.getAggregateOperand()->getType(), - I.getIndices())), - false}, - {IRB.getInt8(isLittleEndian(I.getInsertedValueOperand()->getType()) ? 1 - : 0), - false}}); - registerSymbolicComputation(insert, &I); + auto target = I.getAggregateOperand(); + auto insertedValue = I.getInsertedValueOperand(); + + if (getSymbolicExpression(target) == nullptr && + getSymbolicExpression(insertedValue) == nullptr) + return; + + // We may have to convert the expression to bit-vector kind... + auto maybeConversion = convertExprForTypeToBitVectorExpr( + IRB, insertedValue, getSymbolicExpressionOrNull(insertedValue)); + + auto insert = IRB.CreateCall( + runtime.buildInsert, + {getSymbolicExpressionOrNull(target), + // If we had to convert the expression, use the result of the conversion. + maybeConversion ? maybeConversion->lastInstruction + : getSymbolicExpressionOrNull(insertedValue), + IRB.getInt64(aggregateMemberOffset(target->getType(), I.getIndices())), + IRB.getInt1(isLittleEndian(insertedValue->getType()) ? 1 : 0)}); + auto insertComputation = + SymbolicComputation(insert, insert, {Input(target, 0, insert)}); + + if (!maybeConversion) { + // If we didn't have to convert, then the inserted value is first used in + // the insertion. + insertComputation.inputs.push_back(Input(insertedValue, 1, insert)); + } else { + // Otherwise, the full computation consists of the conversion followed by + // the insertion. + maybeConversion->merge(insertComputation); + } + + registerSymbolicComputation(maybeConversion.value_or(insertComputation), &I); } void Symbolizer::visitExtractValueInst(ExtractValueInst &I) { IRBuilder<> IRB(&I); - auto extract = buildRuntimeCall( - IRB, runtime.buildExtract, - {{I.getAggregateOperand(), true}, - {IRB.getInt64(aggregateMemberOffset(I.getAggregateOperand()->getType(), - I.getIndices())), - false}, - {IRB.getInt64(dataLayout.getTypeStoreSize(I.getType())), false}, - {IRB.getInt8(isLittleEndian(I.getType()) ? 1 : 0), false}}); - registerSymbolicComputation(extract, &I); + auto target = I.getAggregateOperand(); + auto targetExpr = getSymbolicExpression(target); + auto resultType = I.getType(); + + if (targetExpr == nullptr) + return; + + auto extractedBits = IRB.CreateCall( + runtime.buildExtract, + {targetExpr, + IRB.getInt64(aggregateMemberOffset(target->getType(), I.getIndices())), + IRB.getInt64(dataLayout.getTypeStoreSize(resultType)), + IRB.getInt1(isLittleEndian(resultType) ? 1 : 0)}); + + Instruction *result = + convertBitVectorExprForType(IRB, extractedBits, resultType); + registerSymbolicComputation( + {extractedBits, result, {{target, 0, extractedBits}}}, &I); } void Symbolizer::visitSwitchInst(SwitchInst &I) { @@ -833,7 +939,7 @@ void Symbolizer::visitInstruction(Instruction &I) { << "; the result will be concretized\n"; } -CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { +Instruction *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { auto *valueType = V->getType(); if (isa(V)) { @@ -875,12 +981,12 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { {IRB.CreatePtrToInt(V, IRB.getInt64Ty()), IRB.getInt8(ptrBits)}); } - if (valueType->isStructTy()) { + if (auto structType = dyn_cast(valueType)) { // In unoptimized code we may see structures in SSA registers. What we // want is a single bit-vector expression describing their contents, but - // unfortunately we can't take the address of a register. We fix the - // problem with a hack: we write the register to memory and initialize the - // expression from there. + // unfortunately we can't take the address of a register. What we do instead + // is to build the expression recursively by iterating over the elements of + // the structure. // // An alternative would be to change the representation of structures in // SSA registers to "shadow structures" that contain one expression per @@ -888,22 +994,80 @@ CallInst *Symbolizer::createValueExpression(Value *V, IRBuilder<> &IRB) { // cast instructions, because expressions would have to be converted // between different representations according to the type. - auto *memory = IRB.CreateAlloca(V->getType()); - IRB.CreateStore(V, memory); - return IRB.CreateCall( - runtime.readMemory, - {IRB.CreatePtrToInt(memory, intPtrType), - ConstantInt::get(intPtrType, - dataLayout.getTypeStoreSize(V->getType())), - IRB.getInt8(0)}); + if (isa(V)) { + // This is just an optimization for completely undefined structs; we + // create an all-zeros expression without iterating over the elements. + return IRB.CreateCall( + runtime.buildZeroBytes, + {ConstantInt::get(intPtrType, + dataLayout.getTypeStoreSize(valueType))}); + } else { + // Iterate over the elements of the struct and concatenate the + // corresponding expressions (along with any padding that might be + // needed). + + auto structLayout = dataLayout.getStructLayout(structType); + auto constantStructValue = dyn_cast(V); + size_t offset = 0; // The end of the expressed portion in bytes. + Instruction *expr = nullptr; + auto append = [&](Instruction *newExpr) { + expr = expr ? IRB.CreateCall(runtime.buildConcat, {expr, newExpr}) + : newExpr; + }; + + for (size_t i = 0; i < structType->getNumElements(); i++) { + // Build an expression for any padding preceding the current element. + if (auto padding = structLayout->getElementOffset(i) - offset; + padding > 0) { + append(IRB.CreateCall(runtime.buildZeroBytes, + {ConstantInt::get(intPtrType, padding)})); + } + + // Build the expression for the current element. If the struct is not a + // constant, we need to read the element with extractvalue. + auto element = constantStructValue + ? constantStructValue->getAggregateElement(i) + : IRB.CreateExtractValue(V, i); + auto elementExpr = createValueExpression(element, IRB); + + // The expression may be of a different kind than bit vector; in this + // case, we need to convert it. + if (auto conversion = + convertExprForTypeToBitVectorExpr(IRB, element, elementExpr)) { + elementExpr = conversion->lastInstruction; + } + + // If the element is represented in little-endian byte order in memory, + // swap the bytes. + auto elementType = structType->getElementType(i); + if (isLittleEndian(elementType) && + dataLayout.getTypeStoreSize(elementType) > 1) { + elementExpr = IRB.CreateCall(runtime.buildBswap, {elementExpr}); + } + + append(elementExpr); + + offset = structLayout->getElementOffset(i) + + dataLayout.getTypeStoreSize(structType->getElementType(i)); + } + + // Insert padding at the end, if any. + if (auto finalPadding = dataLayout.getTypeStoreSize(structType) - offset; + finalPadding > 0) { + append(IRB.CreateCall(runtime.buildZeroBytes, + {ConstantInt::get(intPtrType, finalPadding)})); + } + + return expr; + } } llvm_unreachable("Unhandled type for constant expression"); } -Symbolizer::SymbolicComputation -Symbolizer::forceBuildRuntimeCall(IRBuilder<> &IRB, SymFnT function, - ArrayRef> args) { +Symbolizer::SymbolicComputation Symbolizer::forceBuildRuntimeCall( + IRBuilder<> &IRB, SymFnT function, + ArrayRef> args) const { std::vector functionArgs; for (const auto &[arg, symbolic] : args) { functionArgs.push_back(symbolic ? getSymbolicExpressionOrNull(arg) : arg); @@ -914,7 +1078,7 @@ Symbolizer::forceBuildRuntimeCall(IRBuilder<> &IRB, SymFnT function, for (unsigned i = 0; i < args.size(); i++) { const auto &[arg, symbolic] = args[i]; if (symbolic) - inputs.push_back({arg, i, call}); + inputs.push_back(Input(arg, i, call)); } return SymbolicComputation(call, call, inputs); @@ -931,7 +1095,7 @@ void Symbolizer::tryAlternative(IRBuilder<> &IRB, Value *V) { runtime.pushPathConstraint, {destAssertion, IRB.getInt1(true), getTargetPreferredInt(V)}); registerSymbolicComputation(SymbolicComputation( - concreteDestExpr, pushAssertion, {{V, 0, destAssertion}})); + concreteDestExpr, pushAssertion, {Input(V, 0, destAssertion)})); } } @@ -957,3 +1121,41 @@ uint64_t Symbolizer::aggregateMemberOffset(Type *aggregateType, return offset; } + +Instruction *Symbolizer::convertBitVectorExprForType(llvm::IRBuilder<> &IRB, + Instruction *I, + Type *T) const { + Instruction *result = I; + + if (T->isFloatingPointTy()) { + result = IRB.CreateCall(runtime.buildBitsToFloat, + {I, IRB.getInt1(T->isDoubleTy())}); + } else if (T->isIntegerTy() && T->getIntegerBitWidth() == 1) { + result = IRB.CreateCall(runtime.buildTrunc, + {I, ConstantInt::get(IRB.getInt8Ty(), 1)}); + result = IRB.CreateCall(runtime.buildBitToBool, {result}); + } + + return result; +} + +std::optional +Symbolizer::convertExprForTypeToBitVectorExpr(IRBuilder<> &IRB, Value *V, + Value *Expr) const { + if (Expr == nullptr) + return {}; + + auto T = V->getType(); + + if (T->isFloatingPointTy()) { + auto floatBits = IRB.CreateCall(runtime.buildFloatToBits, {Expr}); + return SymbolicComputation(floatBits, floatBits, {Input(V, 0, floatBits)}); + } else if (T->isIntegerTy() && T->getIntegerBitWidth() == 1) { + auto bitExpr = IRB.CreateCall(runtime.buildBoolToBit, {Expr}); + auto bitVectorExpr = IRB.CreateCall(runtime.buildZExt, + {bitExpr, IRB.getInt8(7 /* 1 byte */)}); + return SymbolicComputation(bitExpr, bitVectorExpr, {Input(V, 0, bitExpr)}); + } else { + return {}; + } +} diff --git a/compiler/Symbolizer.h b/compiler/Symbolizer.h index 8ab440af..808712ee 100644 --- a/compiler/Symbolizer.h +++ b/compiler/Symbolizer.h @@ -103,6 +103,7 @@ class Symbolizer : public llvm::InstVisitor { // Implementation of InstVisitor // void visitBinaryOperator(llvm::BinaryOperator &I); + void visitUnaryOperator(llvm::UnaryOperator &I); void visitSelectInst(llvm::SelectInst &I); void visitCmpInst(llvm::CmpInst &I); void visitReturnInst(llvm::ReturnInst &I); @@ -142,6 +143,14 @@ class Symbolizer : public llvm::InstVisitor { unsigned operandIndex; llvm::Instruction *user; + Input() = default; + + Input(llvm::Value *concrete, unsigned idx, llvm::Instruction *user) + : concreteValue(concrete), operandIndex(idx), user(user) { + assert(getSymbolicOperand()->getType() == + llvm::Type::getInt8PtrTy(user->getContext())); + } + llvm::Value *getSymbolicOperand() const { return user->getOperand(operandIndex); } @@ -186,22 +195,23 @@ class Symbolizer : public llvm::InstVisitor { << "\n...ending at " << *computation.lastInstruction << "\n...with inputs:\n"; for (const auto &input : computation.inputs) { - out << '\t' << *input.concreteValue << '\n'; + out << '\t' << *input.concreteValue << " => " << *input.user << '\n'; } return out; } }; /// Create an expression that represents the concrete value. - llvm::CallInst *createValueExpression(llvm::Value *V, llvm::IRBuilder<> &IRB); + llvm::Instruction *createValueExpression(llvm::Value *V, + llvm::IRBuilder<> &IRB); /// Get the (already created) symbolic expression for a value. - llvm::Value *getSymbolicExpression(llvm::Value *V) { + llvm::Value *getSymbolicExpression(llvm::Value *V) const { auto exprIt = symbolicExpressions.find(V); return (exprIt != symbolicExpressions.end()) ? exprIt->second : nullptr; } - llvm::Value *getSymbolicExpressionOrNull(llvm::Value *V) { + llvm::Value *getSymbolicExpressionOrNull(llvm::Value *V) const { auto *expr = getSymbolicExpression(V); if (expr == nullptr) return llvm::ConstantPointerNull::get( @@ -214,9 +224,9 @@ class Symbolizer : public llvm::InstVisitor { } /// Like buildRuntimeCall, but the call is always generated. - SymbolicComputation - forceBuildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, - llvm::ArrayRef> args); + SymbolicComputation forceBuildRuntimeCall( + llvm::IRBuilder<> &IRB, SymFnT function, + llvm::ArrayRef> args) const; /// Create a call to the specified function in the run-time library. /// @@ -229,7 +239,7 @@ class Symbolizer : public llvm::InstVisitor { /// instruction is emitted and the function returns null. std::optional buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, - llvm::ArrayRef> args) { + llvm::ArrayRef> args) const { if (std::all_of(args.begin(), args.end(), [this](std::pair arg) { return (getSymbolicExpression(arg.first) == nullptr); @@ -243,7 +253,7 @@ class Symbolizer : public llvm::InstVisitor { /// Convenience overload that treats all arguments as symbolic. std::optional buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, - llvm::ArrayRef symbolicArgs) { + llvm::ArrayRef symbolicArgs) const { std::vector> args; for (const auto &arg : symbolicArgs) { args.emplace_back(arg, true); @@ -298,6 +308,27 @@ class Symbolizer : public llvm::InstVisitor { uint64_t aggregateMemberOffset(llvm::Type *aggregateType, llvm::ArrayRef indices) const; + /// Emit code that converts the bit-vector expression represented by I to an + /// expression that is appropriate for T; return the instruction that computes + /// the result (which may be I if no conversion is needed). + /// + /// The solver doesn't represent all values as bit vectors. For example, + /// floating-point values and Booleans are of separate kinds, so we emit code + /// that changes the solver kind of the expression to whatever is needed. + llvm::Instruction *convertBitVectorExprForType(llvm::IRBuilder<> &IRB, + llvm::Instruction *I, + llvm::Type *T) const; + + /// Emit code that converts the expression Expr for V to a bit-vector + /// expression. Return the SymbolicComputation representing the conversion + /// (if a conversion is necessary); the last instruction computes the result. + /// + /// This is the inverse operation of convertBitVectorExprForType (see details + /// there). + std::optional + convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, llvm::Value *V, + llvm::Value *Expr) const; + const Runtime runtime; /// The data layout of the currently processed module. diff --git a/docs/Configuration.txt b/docs/Configuration.txt index 4e743aa8..f36d7605 100644 --- a/docs/Configuration.txt +++ b/docs/Configuration.txt @@ -56,7 +56,10 @@ environment variables. uninstrumented counterparts. - SYMCC_OUTPUT_DIR (default "/tmp/output"): This is the directory where SymCC - will store new inputs (QSYM backend only). + will store new inputs (QSYM backend only). If you prefer to handle them + programatically, make your program call symcc_set_test_case_handler; the + handler will be called instead of the default handler each time the backend + generates a new input. - SYMCC_INPUT_FILE (default empty): When empty, SymCC treats data read from standard input as symbolic; when set to a file name, any data read from that diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 28ddb303..af28bb1d 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -1,16 +1,17 @@ -# This file is part of SymCC. +# This file is part of the SymCC runtime. # -# SymCC is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. +# The SymCC runtime is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# The SymCC runtime is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +# for more details. # -# You should have received a copy of the GNU General Public License along with -# SymCC. If not, see . +# You should have received a copy of the GNU Lesser General Public License along +# with the SymCC runtime. If not, see . cmake_minimum_required(VERSION 3.5) project(SymRuntime) diff --git a/runtime/Config.cpp b/runtime/Config.cpp index 0088a4d1..23a28df6 100644 --- a/runtime/Config.cpp +++ b/runtime/Config.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #include "Config.h" diff --git a/runtime/Config.h b/runtime/Config.h index 4ed8f806..a866821c 100644 --- a/runtime/Config.h +++ b/runtime/Config.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef CONFIG_H #define CONFIG_H diff --git a/runtime/GarbageCollection.cpp b/runtime/GarbageCollection.cpp index 8c1edc37..8afdd327 100644 --- a/runtime/GarbageCollection.cpp +++ b/runtime/GarbageCollection.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include "GarbageCollection.h" diff --git a/runtime/GarbageCollection.h b/runtime/GarbageCollection.h index da77ff83..81b0b8c2 100644 --- a/runtime/GarbageCollection.h +++ b/runtime/GarbageCollection.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef GARBAGECOLLECTION_H #define GARBAGECOLLECTION_H diff --git a/runtime/LibcWrappers.cpp b/runtime/LibcWrappers.cpp index 6e9c6c90..cfe55525 100644 --- a/runtime/LibcWrappers.cpp +++ b/runtime/LibcWrappers.cpp @@ -1,16 +1,33 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . + +// +// Libc wrappers +// +// This file contains the wrappers around libc functions which add symbolic +// computations; using the wrappers frees instrumented code from having to link +// against an instrumented libc. +// +// We define a wrapper for function X with SYM(X), which just changes the name +// "X" to something predictable and hopefully unique. It is then up to the +// compiler pass to replace calls of X with calls of SYM(X). +// +// In general, the wrappers ask the solver to generate alternative parameter +// values, then call the wrapped function, create and store symbolic expressions +// matching the libc function's semantics, and finally return the wrapped +// function's result. #include #include @@ -111,10 +128,31 @@ void *SYM(calloc)(size_t nmemb, size_t size) { void *SYM(mmap64)(void *addr, size_t len, int prot, int flags, int fildes, uint64_t off) { auto *result = mmap64(addr, len, prot, flags, fildes, off); + _sym_set_return_expression(nullptr); + + if (result == MAP_FAILED) // mmap failed + return result; + + if (fildes == inputFileDescriptor) { + /* we update the inputOffset only when mmap() is reading from input file + * HACK! update inputOffset with off parameter sometimes will be dangerous + * We don't know whether there is read() before/after mmap, + * if there is, we have to fix this tricky method :P + */ + inputOffset = off + len; + // Reading symbolic input. + ReadWriteShadow shadow(result, len); + uint8_t *resultBytes = (uint8_t *)result; + std::generate(shadow.begin(), shadow.end(), [resultBytes, i = 0]() mutable { + return _sym_get_input_byte(inputOffset, resultBytes[i++]); + }); + } else if (!isConcrete(result, len)) { + ReadWriteShadow shadow(result, len); + std::fill(shadow.begin(), shadow.end(), nullptr); + } tryAlternative(len, _sym_get_parameter_expression(1), SYM(mmap64)); - _sym_set_return_expression(nullptr); return result; } @@ -388,6 +426,20 @@ void *SYM(memset)(void *s, int c, size_t n) { return result; } +void SYM(bzero)(void *s, size_t n) { + bzero(s, n); + + // No return value, hence no corresponding expression. + _sym_set_return_expression(nullptr); + + tryAlternative(s, _sym_get_parameter_expression(0), SYM(bzero)); + tryAlternative(n, _sym_get_parameter_expression(1), SYM(bzero)); + + // Concretize the memory region, which now is all zeros. + ReadWriteShadow shadow(s, n); + std::fill(shadow.begin(), shadow.end(), nullptr); +} + void *SYM(memmove)(void *dest, const void *src, size_t n) { tryAlternative(dest, _sym_get_parameter_expression(0), SYM(memmove)); tryAlternative(src, _sym_get_parameter_expression(1), SYM(memmove)); @@ -401,6 +453,22 @@ void *SYM(memmove)(void *dest, const void *src, size_t n) { return result; } +void SYM(bcopy)(const void *src, void *dest, size_t n) { + tryAlternative(src, _sym_get_parameter_expression(0), SYM(bcopy)); + tryAlternative(dest, _sym_get_parameter_expression(1), SYM(bcopy)); + tryAlternative(n, _sym_get_parameter_expression(2), SYM(bcopy)); + + bcopy(src, dest, n); + + // bcopy is mostly equivalent to memmove, so we can use our symbolic version + // of memmove to copy any symbolic expressions over to the destination. + _sym_memmove(static_cast(dest), static_cast(src), + n); + + // void function, so there is no return value and hence no expression for it. + _sym_set_return_expression(nullptr); +} + char *SYM(strncpy)(char *dest, const char *src, size_t n) { tryAlternative(dest, _sym_get_parameter_expression(0), SYM(strncpy)); tryAlternative(src, _sym_get_parameter_expression(1), SYM(strncpy)); @@ -484,6 +552,43 @@ int SYM(memcmp)(const void *a, const void *b, size_t n) { return result; } +int SYM(bcmp)(const void *a, const void *b, size_t n) { + tryAlternative(a, _sym_get_parameter_expression(0), SYM(bcmp)); + tryAlternative(b, _sym_get_parameter_expression(1), SYM(bcmp)); + tryAlternative(n, _sym_get_parameter_expression(2), SYM(bcmp)); + + auto result = bcmp(a, b, n); + + // bcmp returns zero if the input regions are equal and an unspecified + // non-zero value otherwise. Instead of expressing this symbolically, we + // directly ask the solver for an alternative solution (assuming that the + // result is used for a conditional branch later), and return a concrete + // value. + _sym_set_return_expression(nullptr); + + // The result of the comparison depends on whether the input regions are equal + // byte by byte. Construct the corresponding expression, but only if there is + // at least one symbolic byte in either of the regions; otherwise, the result + // is concrete. + + if (isConcrete(a, n) && isConcrete(b, n)) + return result; + + auto aShadowIt = ReadOnlyShadow(a, n).begin_non_null(); + auto bShadowIt = ReadOnlyShadow(b, n).begin_non_null(); + auto *allEqual = _sym_build_equal(*aShadowIt, *bShadowIt); + for (size_t i = 1; i < n; i++) { + ++aShadowIt; + ++bShadowIt; + allEqual = + _sym_build_bool_and(allEqual, _sym_build_equal(*aShadowIt, *bShadowIt)); + } + + _sym_push_path_constraint(allEqual, result == 0, + reinterpret_cast(SYM(bcmp))); + return result; +} + uint32_t SYM(ntohl)(uint32_t netlong) { auto netlongExpr = _sym_get_parameter_expression(0); auto result = ntohl(netlong); diff --git a/runtime/LibcWrappers.h b/runtime/LibcWrappers.h index d84c1f47..2304a3a0 100644 --- a/runtime/LibcWrappers.h +++ b/runtime/LibcWrappers.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef LIBCWRAPPERS_H #define LIBCWRAPPERS_H diff --git a/runtime/RuntimeCommon.cpp b/runtime/RuntimeCommon.cpp index eddd93d0..127c81de 100644 --- a/runtime/RuntimeCommon.cpp +++ b/runtime/RuntimeCommon.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include @@ -36,6 +37,51 @@ SymExpr g_return_value; std::array g_function_arguments; // TODO make thread-local +SymExpr buildMinSignedInt(uint8_t bits) { + return _sym_build_integer((uint64_t)(1) << (bits - 1), bits); +} + +SymExpr buildMaxSignedInt(uint8_t bits) { + uint64_t mask = ((uint64_t)(1) << bits) - 1; + return _sym_build_integer(((uint64_t)(~0) & mask) >> 1, bits); +} + +SymExpr buildMaxUnsignedInt(uint8_t bits) { + uint64_t mask = ((uint64_t)(1) << bits) - 1; + return _sym_build_integer((uint64_t)(~0) & mask, bits); +} + +/// Construct an expression describing the in-memory representation of the +/// bitcode structure {iN, i1} returned by the intrinsics for arithmetic with +/// overflow (see +/// https://llvm.org/docs/LangRef.html#arithmetic-with-overflow-intrinsics). The +/// overflow parameter is expected to be a symbolic Boolean. +SymExpr buildOverflowResult(SymExpr result_expr, SymExpr overflow, + bool little_endian) { + auto result_bits = _sym_bits_helper(result_expr); + assert(result_bits % 8 == 0 && + "Arithmetic with overflow on integers of invalid length"); + + // When storing {iN, i1} in memory, the compiler would insert padding between + // the two elements, extending the Boolean to the same size as the integer. We + // simulate the same here, taking endianness into account. + + auto result_expr_mem = + little_endian ? _sym_build_bswap(result_expr) : result_expr; + auto overflow_byte = _sym_build_zext(_sym_build_bool_to_bit(overflow), 7); + + // There's no padding if the result is a single byte. + if (result_bits == 8) { + return _sym_concat_helper(result_expr_mem, overflow_byte); + } + + auto padding = _sym_build_zero_bytes(result_bits / 8 - 1); + return _sym_concat_helper(result_expr_mem, + little_endian + ? _sym_concat_helper(overflow_byte, padding) + : _sym_concat_helper(padding, overflow_byte)); +} + } // namespace void _sym_set_return_expression(SymExpr expr) { g_return_value = expr; } @@ -73,6 +119,10 @@ void _sym_memset(uint8_t *memory, SymExpr value, size_t length) { } void _sym_memmove(uint8_t *dest, const uint8_t *src, size_t length) { + // Unless both the source and the destination are fully concrete memory + // regions, we need to copy the symbolic expressions over. (In the case where + // only the destination is symbolic, this means making it concrete.) + if (isConcrete(src, length) && isConcrete(dest, length)) return; @@ -176,7 +226,9 @@ SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, SymExpr beforeInsert = (offset == 0) ? nullptr : _sym_build_extract(target, 0, offset, false); - SymExpr newPiece = little_endian ? _sym_build_bswap(to_insert) : to_insert; + SymExpr newPiece = (little_endian && bitsToInsert > 8) + ? _sym_build_bswap(to_insert) + : to_insert; uint64_t afterLen = (_sym_bits_helper(target) / 8) - offset - (bitsToInsert / 8); SymExpr afterInsert = @@ -198,19 +250,226 @@ SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, return result; } +SymExpr _sym_build_zero_bytes(size_t length) { + auto zero_byte = _sym_build_integer(0, 8); + + auto result = zero_byte; + for (size_t i = 1; i < length; i++) { + result = _sym_concat_helper(result, zero_byte); + } + + return result; +} + +SymExpr _sym_build_sadd_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + SymExpr min = buildMinSignedInt(bits); + SymExpr max = buildMaxSignedInt(bits); + SymExpr add_sext = + _sym_build_add(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + + return _sym_build_ite( + // If the result is less than the min signed integer... + _sym_build_signed_less_equal(add_sext, _sym_build_sext(min, 1)), + // ... Return the min signed integer + min, + _sym_build_ite( + // Otherwise, if the result is greater than the max signed integer... + _sym_build_signed_greater_equal(add_sext, _sym_build_sext(max, 1)), + // ... Return the max signed integer + max, + // Otherwise, return the addition + _sym_build_add(a, b))); +} + +SymExpr _sym_build_uadd_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + SymExpr max = buildMaxUnsignedInt(bits); + SymExpr add_zext = + _sym_build_add(_sym_build_zext(a, 1), _sym_build_zext(b, 1)); + + return _sym_build_ite( + // If the top bit is set, an overflow has occurred and... + _sym_build_bit_to_bool(_sym_extract_helper(add_zext, bits, bits)), + // ... Return the max unsigned integer + max, + // Otherwise, return the addition + _sym_build_add(a, b)); +} + +SymExpr _sym_build_ssub_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + SymExpr min = buildMinSignedInt(bits); + SymExpr max = buildMaxSignedInt(bits); + + SymExpr sub_sext = + _sym_build_sub(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + + return _sym_build_ite( + // If the result is less than the min signed integer... + _sym_build_signed_less_equal(sub_sext, _sym_build_sext(min, 1)), + // ... Return the min signed integer + min, + _sym_build_ite( + // Otherwise, if the result is greater than the max signed integer... + _sym_build_signed_greater_equal(sub_sext, _sym_build_sext(max, 1)), + // ... Return the max signed integer + max, + // Otherwise, return the subtraction + _sym_build_sub(a, b))); +} + +SymExpr _sym_build_usub_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + + return _sym_build_ite( + // If `a >= b`, then no overflow occurs and... + _sym_build_unsigned_greater_equal(a, b), + // ... Return the subtraction + _sym_build_sub(a, b), + // Otherwise, saturate at zero + _sym_build_integer(0, bits)); +} + +static SymExpr _sym_build_shift_left_overflow(SymExpr a, SymExpr b) { + return _sym_build_not_equal( + _sym_build_arithmetic_shift_right(_sym_build_shift_left(a, b), b), a); +} + +SymExpr _sym_build_sshl_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + + return _sym_build_ite( + // If an overflow occurred... + _sym_build_shift_left_overflow(a, b), + _sym_build_ite( + // ... And the LHS is negative... + _sym_build_bit_to_bool(_sym_extract_helper(a, bits - 1, bits - 1)), + // ... Return the min signed integer... + buildMinSignedInt(bits), + // ... Otherwise, return the max signed integer + buildMaxSignedInt(bits)), + // Otherwise, return the left shift + _sym_build_shift_left(a, b)); +} + +SymExpr _sym_build_ushl_sat(SymExpr a, SymExpr b) { + size_t bits = _sym_bits_helper(a); + + return _sym_build_ite( + // If an overflow occurred... + _sym_build_shift_left_overflow(a, b), + // ... Return the max unsigned integer + buildMaxUnsignedInt(bits), + // Otherwise, return the left shift + _sym_build_shift_left(a, b)); +} + +SymExpr _sym_build_add_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian) { + size_t bits = _sym_bits_helper(a); + SymExpr overflow = [&]() { + if (is_signed) { + // Check if the additions are different + SymExpr add_sext = + _sym_build_add(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + return _sym_build_not_equal(add_sext, + _sym_build_sext(_sym_build_add(a, b), 1)); + } else { + // Check if the addition overflowed into the extra bit + SymExpr add_zext = + _sym_build_add(_sym_build_zext(a, 1), _sym_build_zext(b, 1)); + return _sym_build_equal(_sym_extract_helper(add_zext, bits, bits), + _sym_build_true()); + } + }(); + + return buildOverflowResult(_sym_build_add(a, b), overflow, little_endian); +} + +SymExpr _sym_build_sub_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian) { + size_t bits = _sym_bits_helper(a); + SymExpr overflow = [&]() { + if (is_signed) { + // Check if the subtractions are different + SymExpr sub_sext = + _sym_build_sub(_sym_build_sext(a, 1), _sym_build_sext(b, 1)); + return _sym_build_not_equal(sub_sext, + _sym_build_sext(_sym_build_sub(a, b), 1)); + } else { + // Check if the subtraction overflowed into the extra bit + SymExpr sub_zext = + _sym_build_sub(_sym_build_zext(a, 1), _sym_build_zext(b, 1)); + return _sym_build_equal(_sym_extract_helper(sub_zext, bits, bits), + _sym_build_true()); + } + }(); + + return buildOverflowResult(_sym_build_sub(a, b), overflow, little_endian); +} + +SymExpr _sym_build_mul_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian) { + size_t bits = _sym_bits_helper(a); + SymExpr overflow = [&]() { + if (is_signed) { + // Check if the multiplications are different + SymExpr mul_sext = + _sym_build_mul(_sym_build_sext(a, bits), _sym_build_sext(b, bits)); + return _sym_build_not_equal(mul_sext, + _sym_build_sext(_sym_build_mul(a, b), bits)); + } else { + // Check if the multiplication overflowed into the extra bit + SymExpr mul_zext = + _sym_build_mul(_sym_build_zext(a, bits), _sym_build_zext(b, bits)); + return _sym_build_equal( + _sym_extract_helper(mul_zext, 2 * bits - 1, 2 * bits - 1), + _sym_build_true()); + } + }(); + + return buildOverflowResult(_sym_build_mul(a, b), overflow, little_endian); +} + +SymExpr _sym_build_funnel_shift_left(SymExpr a, SymExpr b, SymExpr c) { + size_t bits = _sym_bits_helper(c); + SymExpr concat = _sym_concat_helper(a, b); + SymExpr shift = _sym_build_unsigned_rem(c, _sym_build_integer(bits, bits)); + + return _sym_extract_helper(_sym_build_shift_left(concat, shift), 0, bits); +} + +SymExpr _sym_build_funnel_shift_right(SymExpr a, SymExpr b, SymExpr c) { + size_t bits = _sym_bits_helper(c); + SymExpr concat = _sym_concat_helper(a, b); + SymExpr shift = _sym_build_unsigned_rem(c, _sym_build_integer(bits, bits)); + + return _sym_extract_helper(_sym_build_logical_shift_right(concat, shift), 0, + bits); +} + +SymExpr _sym_build_abs(SymExpr expr) { + size_t bits = _sym_bits_helper(expr); + return _sym_build_ite( + _sym_build_signed_greater_equal(expr, _sym_build_integer(0, bits)), expr, + _sym_build_sub(_sym_build_integer(0, bits), expr)); +} + void _sym_register_expression_region(SymExpr *start, size_t length) { registerExpressionRegion({start, length}); } -void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset) { +void _sym_make_symbolic(const void *data, size_t byte_length, + size_t input_offset) { ReadWriteShadow shadow(data, byte_length); - uint8_t *data_bytes = reinterpret_cast(data); + const uint8_t *data_bytes = reinterpret_cast(data); std::generate(shadow.begin(), shadow.end(), [&, i = 0]() mutable { return _sym_get_input_byte(input_offset++, data_bytes[i++]); }); } -void symcc_make_symbolic(void *start, size_t byte_length) { +void symcc_make_symbolic(const void *start, size_t byte_length) { if (!std::holds_alternative(g_config.input)) throw std::runtime_error{"Calls to symcc_make_symbolic aren't allowed when " "SYMCC_MEMORY_INPUT isn't set"}; @@ -219,3 +478,11 @@ void symcc_make_symbolic(void *start, size_t byte_length) { _sym_make_symbolic(start, byte_length, inputOffset); inputOffset += byte_length; } + +SymExpr _sym_build_bit_to_bool(SymExpr expr) { + if (expr == nullptr) + return nullptr; + + return _sym_build_not_equal(expr, + _sym_build_integer(0, _sym_bits_helper(expr))); +} diff --git a/runtime/RuntimeCommon.h b/runtime/RuntimeCommon.h index bd86a6ed..cd016c4f 100644 --- a/runtime/RuntimeCommon.h +++ b/runtime/RuntimeCommon.h @@ -3,28 +3,35 @@ // This header defines the interface of the run-time library. It is not actually // used anywhere because the compiler pass inserts calls to the library // functions at the level of LLVM bitcode, but it serves as documentation of the -// intended interface. +// intended interface. Unless documented otherwise, functions taking symbolic +// expressions can't handle null values (i.e., they shouldn't be called for +// concrete values); exceptions are made if it's too difficult to check for +// concreteness in bitcode. // // Whoever uses this file has to define the type "SymExpr" first; we use it to // keep this header independent of the back-end implementation. -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef RUNTIMECOMMON_H #define RUNTIMECOMMON_H +/* Marker for expression parameters which may be null. */ +#define nullable + #ifdef __cplusplus #include // for size_t #include @@ -53,7 +60,7 @@ SymExpr _sym_build_false(void); SymExpr _sym_build_bool(bool value); /* - * Arithmetic and shifts + * Integer arithmetic and shifts */ SymExpr _sym_build_neg(SymExpr expr); SymExpr _sym_build_add(SymExpr a, SymExpr b); @@ -66,13 +73,40 @@ SymExpr _sym_build_signed_rem(SymExpr a, SymExpr b); SymExpr _sym_build_shift_left(SymExpr a, SymExpr b); SymExpr _sym_build_logical_shift_right(SymExpr a, SymExpr b); SymExpr _sym_build_arithmetic_shift_right(SymExpr a, SymExpr b); +SymExpr _sym_build_funnel_shift_left(SymExpr a, SymExpr b, SymExpr c); +SymExpr _sym_build_funnel_shift_right(SymExpr a, SymExpr b, SymExpr c); +SymExpr _sym_build_abs(SymExpr expr); + +/* + * Arithmetic with overflow + */ +SymExpr _sym_build_add_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian); +SymExpr _sym_build_sub_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian); +SymExpr _sym_build_mul_overflow(SymExpr a, SymExpr b, bool is_signed, + bool little_endian); + +/* + * Saturating integer arithmetic and shifts + */ +SymExpr _sym_build_sadd_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_uadd_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_ssub_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_usub_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_sshl_sat(SymExpr a, SymExpr b); +SymExpr _sym_build_ushl_sat(SymExpr a, SymExpr b); +/* + * Floating-point arithmetic and shifts + */ SymExpr _sym_build_fp_add(SymExpr a, SymExpr b); SymExpr _sym_build_fp_sub(SymExpr a, SymExpr b); SymExpr _sym_build_fp_mul(SymExpr a, SymExpr b); SymExpr _sym_build_fp_div(SymExpr a, SymExpr b); SymExpr _sym_build_fp_rem(SymExpr a, SymExpr b); SymExpr _sym_build_fp_abs(SymExpr a); +SymExpr _sym_build_fp_neg(SymExpr a); /* * Boolean operations @@ -94,6 +128,7 @@ SymExpr _sym_build_bool_or(SymExpr a, SymExpr b); SymExpr _sym_build_or(SymExpr a, SymExpr b); SymExpr _sym_build_bool_xor(SymExpr a, SymExpr b); SymExpr _sym_build_xor(SymExpr a, SymExpr b); +SymExpr _sym_build_ite(SymExpr cond, SymExpr a, SymExpr b); SymExpr _sym_build_float_ordered_greater_than(SymExpr a, SymExpr b); SymExpr _sym_build_float_ordered_greater_equal(SymExpr a, SymExpr b); @@ -113,9 +148,9 @@ SymExpr _sym_build_float_unordered_not_equal(SymExpr a, SymExpr b); /* * Casts */ -SymExpr _sym_build_sext(SymExpr expr, uint8_t bits); -SymExpr _sym_build_zext(SymExpr expr, uint8_t bits); -SymExpr _sym_build_trunc(SymExpr expr, uint8_t bits); +SymExpr _sym_build_sext(nullable SymExpr expr, uint8_t bits); +SymExpr _sym_build_zext(nullable SymExpr expr, uint8_t bits); +SymExpr _sym_build_trunc(nullable SymExpr expr, uint8_t bits); SymExpr _sym_build_bswap(SymExpr expr); SymExpr _sym_build_int_to_float(SymExpr value, int is_double, int is_signed); SymExpr _sym_build_float_to_float(SymExpr expr, int to_double); @@ -123,7 +158,8 @@ SymExpr _sym_build_bits_to_float(SymExpr expr, int to_double); SymExpr _sym_build_float_to_bits(SymExpr expr); SymExpr _sym_build_float_to_signed_integer(SymExpr expr, uint8_t bits); SymExpr _sym_build_float_to_unsigned_integer(SymExpr expr, uint8_t bits); -SymExpr _sym_build_bool_to_bit(SymExpr expr); +SymExpr _sym_build_bool_to_bit(nullable SymExpr expr); +SymExpr _sym_build_bit_to_bool(nullable SymExpr expr); /* * Bit-array helpers @@ -135,28 +171,30 @@ size_t _sym_bits_helper(SymExpr expr); /* * Function-call helpers */ -void _sym_set_parameter_expression(uint8_t index, SymExpr expr); +void _sym_set_parameter_expression(uint8_t index, nullable SymExpr expr); SymExpr _sym_get_parameter_expression(uint8_t index); -void _sym_set_return_expression(SymExpr expr); +void _sym_set_return_expression(nullable SymExpr expr); SymExpr _sym_get_return_expression(void); /* * Constraint handling */ -void _sym_push_path_constraint(SymExpr constraint, int taken, +void _sym_push_path_constraint(nullable SymExpr constraint, int taken, uintptr_t site_id); SymExpr _sym_get_input_byte(size_t offset, uint8_t concrete_value); -void _sym_make_symbolic(void *data, size_t byte_length, size_t input_offset); +void _sym_make_symbolic(const void *data, size_t byte_length, + size_t input_offset); /* * Memory management */ SymExpr _sym_read_memory(uint8_t *addr, size_t length, bool little_endian); -void _sym_write_memory(uint8_t *addr, size_t length, SymExpr expr, +void _sym_write_memory(uint8_t *addr, size_t length, nullable SymExpr expr, bool little_endian); void _sym_memcpy(uint8_t *dest, const uint8_t *src, size_t length); void _sym_memset(uint8_t *memory, SymExpr value, size_t length); void _sym_memmove(uint8_t *dest, const uint8_t *src, size_t length); +SymExpr _sym_build_zero_bytes(size_t length); SymExpr _sym_build_insert(SymExpr target, SymExpr to_insert, uint64_t offset, bool little_endian); SymExpr _sym_build_extract(SymExpr expr, uint64_t offset, uint64_t length, @@ -182,15 +220,19 @@ void _sym_register_expression_region(SymExpr *start, size_t length); void _sym_collect_garbage(void); /* - * Symbolic input from memory + * User-facing functionality * - * This is the only function in the interface that we expect to be called by + * These are the only functions in the interface that we expect to be called by * users (i.e., calls to it aren't auto-generated by our compiler pass). */ -void symcc_make_symbolic(void *start, size_t byte_length); +void symcc_make_symbolic(const void *start, size_t byte_length); +typedef void (*TestCaseHandler)(const void *, size_t); +void symcc_set_test_case_handler(TestCaseHandler handler); #ifdef __cplusplus } #endif +#undef nullable + #endif diff --git a/runtime/Shadow.cpp b/runtime/Shadow.cpp index 2b69a08a..2852fad1 100644 --- a/runtime/Shadow.cpp +++ b/runtime/Shadow.cpp @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #include "Shadow.h" diff --git a/runtime/Shadow.h b/runtime/Shadow.h index ae920600..d32444c6 100644 --- a/runtime/Shadow.h +++ b/runtime/Shadow.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef SHADOW_H #define SHADOW_H @@ -18,7 +19,6 @@ #include #include #include -#include #include #include @@ -56,12 +56,21 @@ extern std::map g_shadow_pages; /// An iterator that walks over the shadow bytes corresponding to a memory /// region. If there is no shadow for any given memory address, it just returns /// null. -class ReadShadowIterator - : public std::iterator { +class ReadShadowIterator { public: explicit ReadShadowIterator(uintptr_t address) - : std::iterator(), - address_(address), shadow_(getShadow(address)) {} + : address_(address), shadow_(getShadow(address)) {} + + // The STL requires iterator types to expose the following type definitions + // (see std::iterator_traits). Before C++17, it was possible to get them by + // deriving from std::iterator, which is just an empty template struct with + // five typedefs. However, std::iterator was deprecated in C++17 and hence its + // use causes a warning in recent compilers. + using iterator_category = std::bidirectional_iterator_tag; + using value_type = SymExpr; + using difference_type = ptrdiff_t; + using pointer = SymExpr *; + using reference = SymExpr &; ReadShadowIterator &operator++() { auto previousAddress = address_++; diff --git a/runtime/bindings/README b/runtime/bindings/README new file mode 100644 index 00000000..d36686f4 --- /dev/null +++ b/runtime/bindings/README @@ -0,0 +1,9 @@ + + + Runtime bindings + + +This directory contains bindings to the user-facing functionality of the runtime +(see runtime/RuntimeCommon.h). The bindings give target programs written in +different languages access to runtime features like in-memory input or custom +test-case handlers. diff --git a/runtime/bindings/ada/README b/runtime/bindings/ada/README new file mode 100644 index 00000000..312576ac --- /dev/null +++ b/runtime/bindings/ada/README @@ -0,0 +1,36 @@ + + + Ada bindings + + +This directory contains Ada bindings for the SymCC runtime. To use them in your +Ada code, you can either point gprbuild here directly (e.g., by setting +GPR_PROJECT_PATH appropriately), or you can install them in the system: + +$ gprbuild -Psymcc +$ gprinstall -Psymcc + +Either way, you'll then be able to include SymCC in your project definition +(i.e., the .gpr file for your project): + + with "symcc"; + +This will let you use the bindings in your Ada code, for example: + + with SymCC; use SymCC; + + -- ... + + -- Register a procedure that receives new program inputs. + SymCC_Set_Test_Case_Handler (My_Handler); + + -- Tell SymCC where to find the input in memory. Note that the variable needs + -- to be declared with the "aliased" keyword. + SymCC_Make_Symbolic (Input'Address, Input'Size / System.Storage_Unit); + + -- Run your code on the input; SymCC will follow the computations + -- symbolically and call My_Handler whenever it produces a new test input. + My_Code_Under_Test (Input); + +See the doc comments in symcc.ads for details, or generate HTML documentation +with "gnatdoc -Psymcc". diff --git a/runtime/bindings/ada/symcc.ads b/runtime/bindings/ada/symcc.ads new file mode 100644 index 00000000..bdda6c41 --- /dev/null +++ b/runtime/bindings/ada/symcc.ads @@ -0,0 +1,45 @@ +-- This file is part of the SymCC runtime. + +-- The SymCC runtime is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Lesser General Public License as published by +-- the Free Software Foundation, either version 3 of the License, or (at your +-- option) any later version. + +-- The SymCC runtime is distributed in the hope that it will be useful, but +-- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +-- License for more details. + +with Interfaces.C; +with System; + +-- @summary +-- Ada bindings to the SymCC runtime API. +-- +-- @description +-- This package provides thin bindings to the user-facing functionality of the +-- SymCC runtime (see RuntimeCommon.h). +package SymCC is + + procedure Make_Symbolic + (Address : System.Address; Size : Interfaces.C.size_t) with + Import => True, Convention => C, External_Name => "symcc_make_symbolic"; + -- Mark a memory region as symbolic program input. + -- @param Address The start of the region containing the input data. + -- @param Size The length in bytes of the region. + + type Test_Case_Handler_Callback_Type is + access procedure + (Data_Block : System.Address; Size : Interfaces.C.size_t) with + Convention => C; + -- Type of functions that the runtime can call when it generates new + -- program inputs (see Set_Test_Case_Handler). + + procedure Set_Test_Case_Handler + (Callback : Test_Case_Handler_Callback_Type) with + Import => True, Convention => C, + External_Name => "symcc_set_test_case_handler"; + -- Define a custom handler for new program inputs. + -- @param Callback The procedure to be called for each new input. + +end SymCC; diff --git a/runtime/bindings/ada/symcc.gpr b/runtime/bindings/ada/symcc.gpr new file mode 100644 index 00000000..31305fc9 --- /dev/null +++ b/runtime/bindings/ada/symcc.gpr @@ -0,0 +1,21 @@ +-- This file is part of the SymCC runtime. + +-- The SymCC runtime is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Lesser General Public License as published by +-- the Free Software Foundation, either version 3 of the License, or (at your +-- option) any later version. + +-- The SymCC runtime is distributed in the hope that it will be useful, but +-- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +-- License for more details. + +library project SymCC is + + for Library_Name use "symcc"; + for Library_Dir use "lib"; + + for Languages use ("Ada"); + for Object_Dir use "obj"; + +end SymCC; diff --git a/runtime/qsym_backend/CMakeLists.txt b/runtime/qsym_backend/CMakeLists.txt index feaea43c..34c01320 100644 --- a/runtime/qsym_backend/CMakeLists.txt +++ b/runtime/qsym_backend/CMakeLists.txt @@ -1,16 +1,17 @@ -# This file is part of SymCC. +# This file is part of the SymCC runtime. # -# SymCC is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. +# The SymCC runtime is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# The SymCC runtime is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +# for more details. # -# You should have received a copy of the GNU General Public License along with -# SymCC. If not, see . +# You should have received a copy of the GNU Lesser General Public License along +# with SymCC. If not, see . # Build the parts of the Qsym backend that are relevant for us @@ -91,6 +92,5 @@ target_link_libraries(SymRuntime ${Z3_LIBRARIES} ${QSYM_LLVM_DEPS}) # some current LTS distributions ship a GCC that requires libstdc++fs for # std::filesystem - we catch this case in order to enable users of such systems # to build with the default compiler. -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - target_link_libraries(SymRuntime stdc++fs) -endif() +find_package(Filesystem COMPONENTS Final Experimental) +target_link_libraries(SymRuntime std::filesystem) diff --git a/runtime/qsym_backend/Runtime.cpp b/runtime/qsym_backend/Runtime.cpp index b4558a43..21b022b9 100644 --- a/runtime/qsym_backend/Runtime.cpp +++ b/runtime/qsym_backend/Runtime.cpp @@ -1,19 +1,20 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . // -// Definitions that we need for the Qsym backend +// Definitions that we need for the QSYM backend // #include "Runtime.h" @@ -50,7 +51,7 @@ #include #include -// Qsym +// QSYM #include #include #include @@ -79,7 +80,7 @@ namespace { /// Indicate whether the runtime has been initialized. std::atomic_flag g_initialized = ATOMIC_FLAG_INIT; -/// A mapping of all expressions that we have ever received from Qsym to the +/// A mapping of all expressions that we have ever received from QSYM to the /// corresponding shared pointers on the heap. /// /// We can't expect C clients to handle std::shared_ptr, so we maintain a single @@ -102,18 +103,24 @@ SymExpr registerExpression(const qsym::ExprRef &expr) { return rawExpr; } -/// A Qsym solver that doesn't require the entire input on initialization. +/// The user-provided test case handler, if any. +/// +/// If the user doesn't register a handler, we use QSYM's default behavior of +/// writing the test case to a file in the output directory. +TestCaseHandler g_test_case_handler = nullptr; + +/// A QSYM solver that doesn't require the entire input on initialization. class EnhancedQsymSolver : public qsym::Solver { // Warning! // - // We can't override methods of qsym::Solver. None of them are declared - // virtual, and the Qsym code refers to the solver with a pointer of type - // qsym::Solver*, so it will always choose the implementation of the base - // class. What we can do, though, is add new functions that access the data - // members of the base class. + // Before we can override methods of qsym::Solver, we need to declare them + // virtual because the QSYM code refers to the solver with a pointer of type + // qsym::Solver*; for non-virtual methods, it will always choose the + // implementation of the base class. Beware that making a method virtual adds + // a small performance overhead and requires us to change QSYM code. // - // Subclassing the Qsym solver is ugly but helps us to avoid making too many - // changes in the Qsym codebase. + // Subclassing the QSYM solver is ugly but helps us to avoid making too many + // changes in the QSYM codebase. public: EnhancedQsymSolver() @@ -126,6 +133,21 @@ class EnhancedQsymSolver : public qsym::Solver { inputs_[offset] = value; } + + void saveValues(const std::string &suffix) override { + if (auto handler = g_test_case_handler) { + auto values = getConcreteValues(); + // The test-case handler may be instrumented, so let's call it with + // argument expressions to meet instrumented code's expectations. + // Otherwise, we might end up erroneously using whatever expression was + // last registered for a function parameter. + _sym_set_parameter_expression(0, nullptr); + _sym_set_parameter_expression(1, nullptr); + handler(values.data(), values.size()); + } else { + Solver::saveValues(suffix); + } + } }; EnhancedQsymSolver *g_enhanced_solver; @@ -165,13 +187,13 @@ void _sym_initialize(void) { g_z3_context = new z3::context{}; g_enhanced_solver = new EnhancedQsymSolver{}; - g_solver = g_enhanced_solver; // for Qsym-internal use + g_solver = g_enhanced_solver; // for QSYM-internal use g_expr_builder = g_config.pruning ? PruneExprBuilder::create() : SymbolicExprBuilder::create(); } SymExpr _sym_build_integer(uint64_t value, uint8_t bits) { - // Qsym's API takes uintptr_t, so we need to be careful when compiling for + // QSYM's API takes uintptr_t, so we need to be careful when compiling for // 32-bit systems: the compiler would helpfully truncate our uint64_t to fit // into 32 bits. if constexpr (sizeof(uint64_t) == sizeof(uintptr_t)) { @@ -258,17 +280,32 @@ SymExpr _sym_build_not(SymExpr expr) { g_expr_builder->createNot(allocatedExpressions.at(expr))); } +SymExpr _sym_build_ite(SymExpr cond, SymExpr a, SymExpr b) { + return registerExpression(g_expr_builder->createIte( + allocatedExpressions.at(cond), allocatedExpressions.at(a), + allocatedExpressions.at(b))); +} + SymExpr _sym_build_sext(SymExpr expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression(g_expr_builder->createSExt( allocatedExpressions.at(expr), bits + expr->bits())); } SymExpr _sym_build_zext(SymExpr expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression(g_expr_builder->createZExt( allocatedExpressions.at(expr), bits + expr->bits())); } SymExpr _sym_build_trunc(SymExpr expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression( g_expr_builder->createTrunc(allocatedExpressions.at(expr), bits)); } @@ -299,24 +336,43 @@ SymExpr _sym_extract_helper(SymExpr expr, size_t first_bit, size_t last_bit) { size_t _sym_bits_helper(SymExpr expr) { return expr->bits(); } SymExpr _sym_build_bool_to_bit(SymExpr expr) { + if (expr == nullptr) + return nullptr; + return registerExpression( g_expr_builder->boolToBit(allocatedExpressions.at(expr), 1)); } // -// Floating-point operations (unsupported in Qsym) +// Floating-point operations (unsupported in QSYM) // +// Even if we don't generally support operations on floats in this backend, we +// need dummy implementations of a few functions to help the parts of the +// instrumentation that deal with structures; if structs contain floats, the +// instrumentation expects to be able to create bit-vector expressions for +// them. + +SymExpr _sym_build_float(double, int is_double) { + // We create an all-zeros bit vector, mainly to capture the length of the + // value. This is compatible with our dummy implementation of + // _sym_build_float_to_bits. + return registerExpression( + g_expr_builder->createConstant(0, is_double ? 64 : 32)); +} + +SymExpr _sym_build_float_to_bits(SymExpr expr) { return expr; } + #define UNSUPPORTED(prototype) \ prototype { return nullptr; } -UNSUPPORTED(SymExpr _sym_build_float(double, int)) UNSUPPORTED(SymExpr _sym_build_fp_add(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_sub(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_mul(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_div(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_rem(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_fp_abs(SymExpr)) +UNSUPPORTED(SymExpr _sym_build_fp_neg(SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_ordered_greater_than(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_ordered_greater_equal(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_ordered_less_than(SymExpr, SymExpr)) @@ -334,7 +390,6 @@ UNSUPPORTED(SymExpr _sym_build_float_unordered_not_equal(SymExpr, SymExpr)) UNSUPPORTED(SymExpr _sym_build_int_to_float(SymExpr, int, int)) UNSUPPORTED(SymExpr _sym_build_float_to_float(SymExpr, int)) UNSUPPORTED(SymExpr _sym_build_bits_to_float(SymExpr, int)) -UNSUPPORTED(SymExpr _sym_build_float_to_bits(SymExpr)) UNSUPPORTED(SymExpr _sym_build_float_to_signed_integer(SymExpr, uint8_t)) UNSUPPORTED(SymExpr _sym_build_float_to_unsigned_integer(SymExpr, uint8_t)) @@ -417,3 +472,11 @@ void _sym_collect_garbage() { << " milliseconds)" << std::endl; #endif } + +// +// Test-case handling +// + +void symcc_set_test_case_handler(TestCaseHandler handler) { + g_test_case_handler = handler; +} diff --git a/runtime/qsym_backend/Runtime.h b/runtime/qsym_backend/Runtime.h index e0a23526..8f19d2af 100644 --- a/runtime/qsym_backend/Runtime.h +++ b/runtime/qsym_backend/Runtime.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef RUNTIME_H #define RUNTIME_H diff --git a/runtime/qsym_backend/pin.H b/runtime/qsym_backend/pin.H index 84e8f7c6..083d79a8 100644 --- a/runtime/qsym_backend/pin.H +++ b/runtime/qsym_backend/pin.H @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #ifndef PIN_H #define PIN_H diff --git a/runtime/simple_backend/CMakeLists.txt b/runtime/simple_backend/CMakeLists.txt index baaff20b..64822050 100644 --- a/runtime/simple_backend/CMakeLists.txt +++ b/runtime/simple_backend/CMakeLists.txt @@ -1,16 +1,17 @@ -# This file is part of SymCC. +# This file is part of the SymCC runtime. # -# SymCC is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. +# The SymCC runtime is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your option) +# any later version. # -# SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# The SymCC runtime is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +# for more details. # -# You should have received a copy of the GNU General Public License along with -# SymCC. If not, see . +# You should have received a copy of the GNU Lesser General Public License along +# with SymCC. If not, see . find_package(Z3 4 CONFIG) if (NOT Z3_FOUND) diff --git a/runtime/simple_backend/Runtime.cpp b/runtime/simple_backend/Runtime.cpp index 50dbc8df..53ca28cd 100644 --- a/runtime/simple_backend/Runtime.cpp +++ b/runtime/simple_backend/Runtime.cpp @@ -1,22 +1,24 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with the SymCC runtime. If not, see . #include #include #include #include +#include #include #include #include @@ -79,7 +81,7 @@ void handle_z3_error(Z3_context c [[maybe_unused]], Z3_error_code e) { } #endif -Z3_ast build_variable(const char *name, uint8_t bits) { +SymExpr build_variable(const char *name, uint8_t bits) { Z3_symbol sym = Z3_mk_string_symbol(g_context, name); auto *sort = Z3_mk_bv_sort(g_context, bits); Z3_inc_ref(g_context, (Z3_ast)sort); @@ -92,7 +94,7 @@ Z3_ast build_variable(const char *name, uint8_t bits) { /// The set of all expressions we have ever passed to client code. std::set allocatedExpressions; -SymExpr registerExpression(Z3_ast expr) { +SymExpr registerExpression(SymExpr expr) { if (allocatedExpressions.count(expr) == 0) { // We don't know this expression yet. Record it and increase the reference // counter. @@ -241,6 +243,10 @@ DEF_BINARY_EXPR_BUILDER(float_ordered_equal, fpa_eq) #undef DEF_BINARY_EXPR_BUILDER +Z3_ast _sym_build_ite(Z3_ast cond, Z3_ast a, Z3_ast b) { + return registerExpression(Z3_mk_ite(g_context, cond, a, b)); +} + Z3_ast _sym_build_fp_add(Z3_ast a, Z3_ast b) { return registerExpression(Z3_mk_fpa_add(g_context, g_rounding_mode, a, b)); } @@ -265,6 +271,10 @@ Z3_ast _sym_build_fp_abs(Z3_ast a) { return registerExpression(Z3_mk_fpa_abs(g_context, a)); } +Z3_ast _sym_build_fp_neg(Z3_ast a) { + return registerExpression(Z3_mk_fpa_neg(g_context, a)); +} + Z3_ast _sym_build_not(Z3_ast expr) { return registerExpression(Z3_mk_bvnot(g_context, expr)); } @@ -349,14 +359,21 @@ Z3_ast _sym_build_float_unordered_not_equal(Z3_ast a, Z3_ast b) { } Z3_ast _sym_build_sext(Z3_ast expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; return registerExpression(Z3_mk_sign_ext(g_context, bits, expr)); } Z3_ast _sym_build_zext(Z3_ast expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; return registerExpression(Z3_mk_zero_ext(g_context, bits, expr)); } Z3_ast _sym_build_trunc(Z3_ast expr, uint8_t bits) { + if (expr == nullptr) + return nullptr; + return registerExpression(Z3_mk_extract(g_context, bits - 1, 0, expr)); } @@ -408,9 +425,10 @@ Z3_ast _sym_build_float_to_unsigned_integer(Z3_ast expr, uint8_t bits) { } Z3_ast _sym_build_bool_to_bit(Z3_ast expr) { - return registerExpression(Z3_mk_ite(g_context, expr, - _sym_build_integer(1, 1), - _sym_build_integer(0, 1))); + if (expr == nullptr) + return nullptr; + return _sym_build_ite(expr, _sym_build_integer(1, 1), + _sym_build_integer(0, 1)); } void _sym_push_path_constraint(Z3_ast constraint, int taken, @@ -425,13 +443,13 @@ void _sym_push_path_constraint(Z3_ast constraint, int taken, "true" or "false", there is no point in trying to solve the negation or * pushing the constraint to the solver... */ - if (Z3_is_eq_ast(g_context, constraint, Z3_mk_true(g_context))) { + if (Z3_is_eq_ast(g_context, constraint, g_true)) { assert(taken && "We have taken an impossible branch"); Z3_dec_ref(g_context, constraint); return; } - if (Z3_is_eq_ast(g_context, constraint, Z3_mk_false(g_context))) { + if (Z3_is_eq_ast(g_context, constraint, g_false)) { assert(!taken && "We have taken an impossible branch"); Z3_dec_ref(g_context, constraint); return; @@ -544,3 +562,13 @@ void _sym_collect_garbage() { << " milliseconds)" << std::endl; #endif } + +/* Test-case handling */ +void symcc_set_test_case_handler(TestCaseHandler) { + // The simple backend doesn't support test-case handlers. However, let's not + // make this a fatal error; otherwise, users would have to change their + // programs to make them work with the simple backend. + fprintf( + g_log, + "Warning: test-case handlers aren't supported in the simple backend\n"); +} diff --git a/runtime/simple_backend/Runtime.h b/runtime/simple_backend/Runtime.h index 953d7db0..66fe8b91 100644 --- a/runtime/simple_backend/Runtime.h +++ b/runtime/simple_backend/Runtime.h @@ -1,16 +1,17 @@ -// This file is part of SymCC. +// This file is part of the SymCC runtime. // -// SymCC is free software: you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the Free Software -// Foundation, either version 3 of the License, or (at your option) any later -// version. +// The SymCC runtime is free software: you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License as published by the +// Free Software Foundation, either version 3 of the License, or (at your +// option) any later version. // -// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// The SymCC runtime is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. // -// You should have received a copy of the GNU General Public License along with -// SymCC. If not, see . +// You should have received a copy of the GNU Lesser General Public License +// along with SymCC. If not, see . #ifndef RUNTIME_H #define RUNTIME_H diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c9613e57..c6b0d1b5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,6 +18,13 @@ else() set(SYM_TEST_FILECHECK_ARGS "--check-prefix=SIMPLE --check-prefix=ANY") endif() +if (${LLVM_VERSION_MAJOR} VERSION_GREATER_EQUAL 14) + # FileCheck used to be fine with unused prefixes when more than one prefix was + # defined. This changed in LLVM version 14, requiring the new option + # "--allow-unused-prefixes" (added in LLVM 13) to restore the old behavior. + set(SYM_TEST_FILECHECK_ARGS "${SYM_TEST_FILECHECK_ARGS} --allow-unused-prefixes") +endif() + configure_file("lit.site.cfg.in" "lit.site.cfg") add_custom_target(check diff --git a/test/README b/test/README new file mode 120000 index 00000000..410eae0b --- /dev/null +++ b/test/README @@ -0,0 +1 @@ +../docs/Testing.txt \ No newline at end of file diff --git a/test/bcopy_bcmp_bzero.c b/test/bcopy_bcmp_bzero.c new file mode 100644 index 00000000..fff833f2 --- /dev/null +++ b/test/bcopy_bcmp_bzero.c @@ -0,0 +1,65 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O2 %s -o %t +// RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s +// +// Test symbolic handling of bcopy, bcmp, and bzero. We copy symbolic data with +// bcmp, then compare it with bcmp, expecting the solver to be triggered +// (indicating that the two functions are represented correctly); then we bzero +// the region and perform another comparison, which should not result in a +// solver query (indicating that bzero concretized as expected). + +#include +#include +#include +#include + +void symcc_make_symbolic(const void *start, size_t byte_length); +typedef void (*TestCaseHandler)(const void *, size_t); +void symcc_set_test_case_handler(TestCaseHandler handler); + +int solved = 0; + +void handle_test_case(const void *data, size_t data_length) { + assert(data_length == 4); + assert(bcmp(data, "bar", 4) == 0); + solved = 1; +} + +int main(int argc, char *argv[]) { + symcc_set_test_case_handler(handle_test_case); + + const char input[] = "foo"; + symcc_make_symbolic(input, 4); + + // Make a copy and compare it in order to trigger the solver. + char copy[4]; + bcopy(input, copy, 4); + int bcmp_result = bcmp(copy, "bar", 4); + assert(bcmp_result != 0); + + // Zero out the symbolic data and compare again (which should not trigger the + // solver this time). + bzero(copy, 4); + bcmp_result = bcmp(copy, "abc", 4); + assert(bcmp_result != 0); + + // The simple backend doesn't support test-case handlers, so we only expect a + // solution with the QSYM backend. + printf("Solved: %d\n", solved); + // SIMPLE: Solved: 0 + // QSYM: Solved: 1 + return 0; +} diff --git a/test/concrete_structs.ll b/test/concrete_structs.ll new file mode 100644 index 00000000..e3d8748c --- /dev/null +++ b/test/concrete_structs.ll @@ -0,0 +1,107 @@ +; This file is part of SymCC. +; +; SymCC is free software: you can redistribute it and/or modify it under the +; terms of the GNU General Public License as published by the Free Software +; Foundation, either version 3 of the License, or (at your option) any later +; version. +; +; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +; A PARTICULAR PURPOSE. See the GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License along with +; SymCC. If not, see . + +; Verify that we create correct expressions from struct values. For each kind of +; value, we trigger expression creation by inserting a symbolic value into the +; struct. Compiling this code with SymCC and verifying that the resulting binary +; exits cleanly shows that SymCC's instrumentation doesn't break the execution +; of the program. Moreover, we store a struct value to memory, load one of its +; elements back into a register, and branch based on it in order to trigger the +; solver; by checking the generated test case we can verify that the expression +; was correct. +; +; This test reproduces a bug where creating expressions for some structs would +; lead to a program crash. +; +; Since the bitcode is written by hand, we first run llc on it because it +; performs a validity check, whereas Clang doesn't. + +; RUN: llc %s -o /dev/null +; RUN: %symcc %s -o %t +; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s + +target triple = "x86_64-pc-linux-gnu" + +; The struct type which we'll create expressions for. Include a floating-point +; value and a Boolean because they're represented with non-bitvector solver +; variables (reproducing eurecom-s3/symcc#138). +%struct_type = type { i8, i32, i8, float, i1 } + +; Global variable to record whether we've found a solution. Since the simple +; backend doesn't support test-case handlers, we start with "true". +@solved = global i1 1 + +; Our test-case handler verifies that the new test case is a 32-bit integer +; with the value 42. +define void @test_case_handler(i8* %data, i64 %data_length) { + %correct_length = icmp eq i64 %data_length, 4 + br i1 %correct_length, label %check_data, label %failed + +check_data: + %value_pointer = bitcast i8* %data to i32* + %value = load i32, i32* %value_pointer + %correct_value = icmp eq i32 %value, 42 + br i1 %correct_value, label %all_good, label %failed + +all_good: + store i1 1, i1* @solved + ret void + +failed: + store i1 0, i1* @solved + ret void +} + +define i32 @main(i32 %argc, i8** %argv) { + ; Register our test-case handler. + call void @symcc_set_test_case_handler(void (i8*, i64)* @test_case_handler) + ; SIMPLE: Warning: test-case handlers + + ; Create a symbolic value that we can use to trigger the creation of struct + ; expressions. + %symbolic_value_mem = alloca i32 + store i32 1, i32* %symbolic_value_mem + call void @symcc_make_symbolic(i32* %symbolic_value_mem, i64 4) + %symbolic_value = load i32, i32* %symbolic_value_mem + %symbolic_byte = trunc i32 %symbolic_value to i8 + + ; Undef struct + insertvalue %struct_type undef, i32 %symbolic_value, 1 + + ; Struct with concrete value + insertvalue %struct_type { i8 1, i32 undef, i8 2, float undef, i1 undef }, i32 %symbolic_value, 1 + + ; Write a struct to memory and load one of its elements back into a register. + ; It's important to also insert a symbolic value into the struct, so that we + ; generate an expression in the first place. + %struct_mem = alloca %struct_type + %struct_value = insertvalue %struct_type { i8 0, i32 42, i8 undef, float undef, i1 undef }, i8 %symbolic_byte, 2 + store %struct_type %struct_value, %struct_type* %struct_mem + %value_address = getelementptr %struct_type, %struct_type* %struct_mem, i32 0, i32 1 + %value_loaded = load i32, i32* %value_address + %is_forty_two = icmp eq i32 %value_loaded, %symbolic_value + br i1 %is_forty_two, label %never_executed, label %done + ; QSYM: SMT + +never_executed: + br label %done + +done: + %solved = load i1, i1* @solved + %result = select i1 %solved, i32 0, i32 1 + ret i32 %result +} + +declare void @symcc_make_symbolic(i32*, i64) +declare void @symcc_set_test_case_handler(void (i8*, i64)*) diff --git a/test/load_store.ll b/test/load_store.ll new file mode 100644 index 00000000..6dc25689 --- /dev/null +++ b/test/load_store.ll @@ -0,0 +1,45 @@ +; This file is part of SymCC. +; +; SymCC is free software: you can redistribute it and/or modify it under the +; terms of the GNU General Public License as published by the Free Software +; Foundation, either version 3 of the License, or (at your option) any later +; version. +; +; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +; A PARTICULAR PURPOSE. See the GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License along with +; SymCC. If not, see . + +; Verify that loading and storing concrete values of various types works. For +; each type, we allocate space on the stack, then store a constant value into +; it, and finally load it back. Compiling this code with SymCC and verifying +; that the resulting binary exits cleanly shows that SymCC's instrumentation +; doesn't break the load/store operations. +; +; This test reproduces a bug where loading a concrete Boolean would lead to a +; program crash. +; +; Since the bitcode is written by hand, we first run llc on it because it +; performs a validity check, whereas Clang doesn't. +; +; RUN: llc %s -o /dev/null +; RUN: %symcc %s -o %t +; RUN: %t 2>&1 + +target triple = "x86_64-pc-linux-gnu" + +define i32 @main(i32 %argc, i8** %argv) { + ; Load and store a Boolean. + %stack_bool = alloca i1 + store i1 0, i1* %stack_bool + %copy_of_stack_bool = load i1, i1* %stack_bool + + ; Load and store a float. + %stack_float = alloca float + store float 0.0, float* %stack_float + %copy_of_stack_float = load float, float* %stack_float + + ret i32 0 +} diff --git a/test/memory_input.c b/test/memory_input.c index 58f55702..9e2282ab 100644 --- a/test/memory_input.c +++ b/test/memory_input.c @@ -18,7 +18,7 @@ #include #include -void symcc_make_symbolic(void *start, size_t byte_length); +void symcc_make_symbolic(const void *start, size_t byte_length); uint64_t g_value = 0xaaaabbbbccccdddd; diff --git a/test/regression/cxa_vector.ll b/test/regression/cxa_vector.ll index 65a0c3be..40e7a546 100644 --- a/test/regression/cxa_vector.ll +++ b/test/regression/cxa_vector.ll @@ -3,11 +3,6 @@ ; This file exposed a bug in our handling of "invoke" instructions that would ; lead to invalid byte code. -; ModuleID = '/home/seba/work/compiler/llvm-project/libcxxabi/src/cxa_vector.cpp' -source_filename = "/home/seba/work/compiler/llvm-project/libcxxabi/src/cxa_vector.cpp" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-linux-gnu" - $__clang_call_terminate = comdat any ; Function Attrs: sspstrong uwtable diff --git a/test/symbolic_structs.ll b/test/symbolic_structs.ll new file mode 100644 index 00000000..93a112b4 --- /dev/null +++ b/test/symbolic_structs.ll @@ -0,0 +1,52 @@ +; This file is part of SymCC. +; +; SymCC is free software: you can redistribute it and/or modify it under the +; terms of the GNU General Public License as published by the Free Software +; Foundation, either version 3 of the License, or (at your option) any later +; version. +; +; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +; A PARTICULAR PURPOSE. See the GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License along with +; SymCC. If not, see . + +; Verify that we correctly insert into symbolic struct values. We insert values +; of various types into a symbolic struct, thus triggering expression updates. +; Compiling this code with SymCC and verifying that the resulting binary exits +; cleanly shows that SymCC's instrumentation doesn't break the execution of the +; program. +; +; This test reproduces a bug where inserting a concrete floating-point value +; into a symbolic struct would lead to a program crash (eurecom-s3/symcc#138). +; +; Since the bitcode is written by hand, we first run llc on it because it +; performs a validity check, whereas Clang doesn't. + +; RUN: llc %s -o /dev/null +; RUN: %symcc %s -o %t +; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 + +target triple = "x86_64-pc-linux-gnu" + +; The struct type of our symbolic value. Include a floating-point value and a +; Boolean because they're represented with non-bitvector solver variables +; (reproducing eurecom-s3/symcc#138). +%struct_type = type { i8, i32, i8, float, i1 } + +define i32 @main(i32 %argc, i8** %argv) { + ; Create a symbolic struct value that we can subsequently insert values into. + %struct_value_mem = alloca %struct_type + call void @symcc_make_symbolic(%struct_type* %struct_value_mem, i64 20) + %symbolic_struct = load %struct_type, %struct_type* %struct_value_mem + + ; Insert values of various types, triggering the creation of new expressions. + insertvalue %struct_type %symbolic_struct, i32 5, 1 + insertvalue %struct_type %symbolic_struct, float 42.0, 3 + insertvalue %struct_type %symbolic_struct, i1 1, 4 + + ret i32 0 +} + +declare void @symcc_make_symbolic(%struct_type*, i64) diff --git a/test/test_case_handler.c b/test/test_case_handler.c new file mode 100644 index 00000000..414c8843 --- /dev/null +++ b/test/test_case_handler.c @@ -0,0 +1,59 @@ +// This file is part of SymCC. +// +// SymCC is free software: you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the Free Software +// Foundation, either version 3 of the License, or (at your option) any later +// version. +// +// SymCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with +// SymCC. If not, see . + +// RUN: %symcc -O2 %s -o %t +// RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s +#include +#include +#include + +#define MAGIC 0xab + +void symcc_make_symbolic(const void *start, size_t byte_length); +typedef void (*TestCaseHandler)(const void *, size_t); +void symcc_set_test_case_handler(TestCaseHandler handler); + +int solved = 0; +int num_test_cases = 0; + +void handle_test_case(const void *data, size_t data_length) { + num_test_cases++; + if (data_length == 1 && ((const uint8_t *)data)[0] == MAGIC) + solved = 1; +} + +int main(int argc, char *argv[]) { + symcc_set_test_case_handler(handle_test_case); + // SIMPLE: Warning: test-case handlers + + uint8_t input = 0; + symcc_make_symbolic(&input, sizeof(input)); + + fprintf(stderr, "%s\n", (input == MAGIC) ? "yes" : "no"); + // SIMPLE: Trying to solve + // SIMPLE: Found diverging input + // SIMPLE: stdin0 -> #xab + // QSYM: SMT + // ANY: no + + fprintf(stderr, "%d\n", solved); + // QSYM: 1 + // SIMPLE: 0 + + fprintf(stderr, "%d\n", num_test_cases); + // QSYM: 1 + // SIMPLE: 0 + + return 0; +} diff --git a/test/uadd_sat.ll b/test/uadd_sat.ll new file mode 100644 index 00000000..4248fa33 --- /dev/null +++ b/test/uadd_sat.ll @@ -0,0 +1,50 @@ +; RUN: %symcc -O2 %s -o %t +; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@g_value = dso_local local_unnamed_addr global i16 40, align 2 +@stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8 +@.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1 +@.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1 + +; Function Attrs: nofree nounwind uwtable +define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +entry: + %x = alloca i16, align 2 + %0 = bitcast i16* %x to i8* + %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5 + %cmp.not = icmp eq i64 %call, 2 + %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6 + br label %cleanup + +if.end: ; preds = %entry + %3 = load i16, i16* %x, align 2 + %4 = load i16, i16* @g_value, align 2 + %add = call i16 @llvm.uadd.sat.i16(i16 %3, i16 %4) + %cmp = icmp eq i16 %add, 43981 + %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0) + ; SIMPLE: Trying to solve + ; SIMPLE: Found diverging input + ; SIMPLE-DAG: stdin0 -> #xa5 + ; SIMPLE-DAG: stdin1 -> #xab + ; ANY: no + %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 +} + +declare i64 @read(i32, i8* nocapture, i64) +declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...) +declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) +declare i16 @llvm.uadd.sat.i16(i16, i16) diff --git a/test/uadd_sat.test32 b/test/uadd_sat.test32 new file mode 100644 index 00000000..647cc3c7 --- /dev/null +++ b/test/uadd_sat.test32 @@ -0,0 +1,2 @@ +RUN: %symcc -m32 -O2 %S/uadd_sat.ll -o %t_32 +RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s diff --git a/test/usub_sat.ll b/test/usub_sat.ll new file mode 100644 index 00000000..62fe200a --- /dev/null +++ b/test/usub_sat.ll @@ -0,0 +1,50 @@ +; RUN: %symcc -O2 %s -o %t +; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@g_value = dso_local local_unnamed_addr global i16 40, align 2 +@stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8 +@.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1 +@.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1 + +; Function Attrs: nofree nounwind uwtable +define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +entry: + %x = alloca i16, align 2 + %0 = bitcast i16* %x to i8* + %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5 + %cmp.not = icmp eq i64 %call, 2 + %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6 + br label %cleanup + +if.end: ; preds = %entry + %3 = load i16, i16* %x, align 2 + %4 = load i16, i16* @g_value, align 2 + %add = call i16 @llvm.usub.sat.i16(i16 %3, i16 %4) + %cmp = icmp eq i16 %add, 43981 + %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0) + ; SIMPLE: Trying to solve + ; SIMPLE: Found diverging input + ; SIMPLE-DAG: stdin0 -> #xf5 + ; SIMPLE-DAG: stdin1 -> #xab + ; ANY: no + %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6 + br label %cleanup + +cleanup: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 +} + +declare i64 @read(i32, i8* nocapture, i64) +declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...) +declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) +declare i16 @llvm.usub.sat.i16(i16, i16) diff --git a/test/usub_sat.test32 b/test/usub_sat.test32 new file mode 100644 index 00000000..16de7355 --- /dev/null +++ b/test/usub_sat.test32 @@ -0,0 +1,2 @@ +RUN: %symcc -m32 -O2 %S/usub_sat.ll -o %t_32 +RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s