diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f474606 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +main.o +syscall_nr.h +ministrace diff --git a/Makefile b/Makefile index 979ab86..d87bcc0 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,12 @@ all: ministrace -ministrace: syscall_nr.h main.o +.PHONY: run + +ministrace: syscall_nr.h main.c $(CC) -o $@ $^ %.o: %.c $(CC) -c $^ syscall_nr.h: - python get_syscalls.py /usr/include/asm/unistd_64.h > syscall_nr.h + ./get_syscalls.sh diff --git a/README.md b/README.md index 59ef6cd..dcdee63 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ This repository contains a basic reimplementation of the `strace` tool. ## Building +Before building, ensure that you have the Linux headers installed on your system. +On Debian-based systems, try installing them with apt: `sudo apt install linux-headers-xxxx`. + Run `make all` to build `ministrace`. Run `./ministrace ` to trace `program`. @@ -15,3 +18,5 @@ macros in `unistd_64.h` to generate a mapping. Note that we have no information on the number and types of arguments to syscalls. The two possible approaches here seem to be parsing syscall implementations or manually mapping syscall numbers to argument count and types. + +NOTE: The syscall extraction was tested on both Debian, Fedora and NixOS. diff --git a/get_syscalls.py b/get_syscalls.py index 236f320..91f3904 100644 --- a/get_syscalls.py +++ b/get_syscalls.py @@ -1,5 +1,6 @@ import sys + def main(): syscalls = dict() with open(sys.argv[1], "r") as fp: @@ -9,14 +10,14 @@ def main(): definition = line[13:] parts = definition.split() syscalls[int(parts[1])] = parts[0] - print("void populate_syscalls() {") + print("#define SYSCALL_NAME(nr) SYSCALLS[nr]") print(f"char *SYSCALLS[{max(syscalls.keys())}];") - print("}") print("void populate_syscalls() {") for (k, v) in syscalls.items(): print(f" SYSCALLS[{k}] = \"{v}\";") print("}") + if __name__ == "__main__": main() diff --git a/get_syscalls.sh b/get_syscalls.sh new file mode 100755 index 0000000..17ec90e --- /dev/null +++ b/get_syscalls.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash + +GCC="gcc" +SED="sed" + +HEADER_FILE="unistd_64.h" + +ensure_avail() { + command=$1 + if ! which "${command}" > /dev/null; then + echo "ERROR: the ${command} is not available on your system." + exit 1 + fi + + echo "[OK] ${command} available." +} + +get_header_locations() { + RAW=$(LC_ALL=C "${GCC}" -v -E -xc - &1 | LC_ALL=C "${SED}" -ne '/starts here/,/End of/p') + + for word in $RAW; do + if [[ $word == /* ]]; then + echo "$word" + fi + done +} + +ensure_avail gcc +ensure_avail sed + +headers=$(get_header_locations) + +CANDIDATES="" + +for header in $headers; do + echo "CHECKING include dir ${header}..." + matches=$(find "${header}" -name "${HEADER_FILE}") + + if [ "${#matches}" != "0" ]; then + echo "[OK] FOUND ${matches}" + CANDIDATES+=" ${matches}" + fi + +done + + +if [ "${#CANDIDATES}" = "0" ]; then + echo "[ERR] none of the search directories contains ${HEADER_FILE}" + exit 1 +fi + +success=0 + +echo "CAN: $CANDIDATES" + +for candidate in $CANDIDATES; do + OUTPUT="" + + if ! OUTPUT=$(python3 ./get_syscalls.py "${candidate}" 2>&1 ); then + echo "[ERR] File ${candidate} is invalid." + else + echo "[OK] File ${candidate} is valid." + success=1 + break + fi +done + +if [ $success -eq 0 ]; then + echo "${OUTPUT}" + echo "[ERR] could not extract syscalls: all sources failed" + exit 1 +else + echo "$OUTPUT" > ./syscall_nr.h +fi