From 92e66b6c64fb18f2f17c7e6d582e2bedcf27b7e8 Mon Sep 17 00:00:00 2001 From: Nick Zavaritsky Date: Fri, 3 May 2019 19:14:22 +0000 Subject: [PATCH] Compressed syscall database with O(1) lookup libkafel.so 5x smaller (x86_64, stripped): down to 88KiB from 440KiB. Closes #20 --- .gitignore | 3 + src/Makefile | 31 +- src/context.c | 1 - src/context.h | 2 +- src/kafel.c | 4 +- src/parser.y | 2 +- src/syscall.c | 78 ++--- src/syscall.h | 10 +- src/syscalldb.h | 115 ++++++++ src/syscalldb.inl | 84 ++++++ src/syscalls/Makefile | 43 +++ src/syscalls/syscalldb_generator.c | 441 +++++++++++++++++++++++++++++ 12 files changed, 735 insertions(+), 79 deletions(-) create mode 100644 src/syscalldb.h create mode 100644 src/syscalldb.inl create mode 100644 src/syscalls/Makefile create mode 100644 src/syscalls/syscalldb_generator.c diff --git a/.gitignore b/.gitignore index 3f5a6ee..2756d50 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,9 @@ /src/lexer.c /src/parser.h /src/parser.c +/src/syscalldb.gperf +/src/syscalldb.c +/src/syscalls/syscalldb_generator # Backup files *.bak diff --git a/src/Makefile b/src/Makefile index 86f2809..5c7e769 100644 --- a/src/Makefile +++ b/src/Makefile @@ -17,18 +17,14 @@ # limitations under the License. # +SUBDIRS:=syscalls + OBJCOPY?=objcopy CFLAGS+=-fPIC -fvisibility=hidden -GENERATED_SRCS:=lexer.c parser.c +GENERATED_SRCS:=lexer.c parser.c syscalldb.c GENERATED:=lexer.h parser.h ${GENERATED_SRCS} -TEMPORARY:=libkafel_r.o libkafel.o -SYSCALL_LISTS:=amd64_syscalls.c \ - i386_syscalls.c \ - aarch64_syscalls.c \ - mipso32_syscalls.c \ - mips64_syscalls.c \ - arm_syscalls.c +TEMPORARY:=libkafel_r.o libkafel.o syscalldb.gperf SRCS:=kafel.c \ context.c \ codegen.c \ @@ -37,8 +33,7 @@ SRCS:=kafel.c \ policy.c \ range_rules.c \ syscall.c \ - ${GENERATED_SRCS} \ - $(SYSCALL_LISTS:%.c=syscalls/%.c) + ${GENERATED_SRCS} DYNAMIC_TARGET:=${PROJECT_ROOT}libkafel.so STATIC_TARGET:=${PROJECT_ROOT}libkafel.a TARGET=${DYNAMIC_TARGET} ${STATIC_TARGET} @@ -65,6 +60,13 @@ lexer.h lexer.c: lexer.l parser.h parser.c: parser.y bison $< +syscalldb.c: syscalls/syscalldb_generator + ./syscalls/syscalldb_generator > ./syscalldb.gperf + gperf -m10 --output-file=./syscalldb.c ./syscalldb.gperf + +syscalls/syscalldb_generator: syscalls + true + # DO NOT DELETE THIS LINE -- make depend depends on it. kafel.o: codegen.h context.h includes.h policy.h expression.h syscall.h @@ -76,14 +78,9 @@ expression.o: expression.h common.h includes.o: includes.h common.h policy.o: policy.h expression.h common.h range_rules.o: range_rules.h policy.h expression.h common.h syscall.h -syscall.o: syscall.h common.h +syscall.o: syscall.h syscalldb.h common.h +syscalldb.o: syscall.h syscalldb.h syscalldb.inl lexer.o: parser.h context.h includes.h policy.h expression.h syscall.h lexer.o: common.h parser.o: parser.h context.h includes.h policy.h expression.h syscall.h parser.o: lexer.h -syscalls/amd64_syscalls.o: syscall.h -syscalls/i386_syscalls.o: syscall.h -syscalls/aarch64_syscalls.o: syscall.h -syscalls/mipso32_syscalls.o: syscall.h -syscalls/mips64_syscalls.o: syscall.h -syscalls/arm_syscalls.o: syscall.h diff --git a/src/context.c b/src/context.c index ec4acf3..5c4caf5 100644 --- a/src/context.c +++ b/src/context.c @@ -70,7 +70,6 @@ void kafel_ctxt_reset(kafel_ctxt_t ctxt) { } ctxt->default_action = 0; ctxt->lexical_error = false; - ctxt->syscalls = NULL; } void kafel_ctxt_clean(kafel_ctxt_t ctxt) { diff --git a/src/context.h b/src/context.h index f93d4b3..407aece 100644 --- a/src/context.h +++ b/src/context.h @@ -46,7 +46,7 @@ struct kafel_ctxt { struct policy* main_policy; int default_action; uint32_t target_arch; - const struct syscall_list* syscalls; + uint32_t target_arch_mask; struct { enum { INPUT_NONE, diff --git a/src/kafel.c b/src/kafel.c index 34c2a34..36660f2 100644 --- a/src/kafel.c +++ b/src/kafel.c @@ -57,8 +57,8 @@ static int parse(struct kafel_ctxt* ctxt) { kafel_yyset_column(1, scanner); kafel_yyset_lineno(1, scanner); - ctxt->syscalls = syscalls_lookup(ctxt->target_arch); - if (ctxt->syscalls == NULL) { + ctxt->target_arch_mask = syscall_get_arch_mask(ctxt->target_arch); + if (!ctxt->target_arch_mask) { append_error(ctxt, "Cannot resolve syscall list for architecture %#x\n", ctxt->target_arch); kafel_yylex_destroy(scanner); diff --git a/src/parser.y b/src/parser.y index 82e78ac..9123fab 100644 --- a/src/parser.y +++ b/src/parser.y @@ -298,7 +298,7 @@ syscall_id $$ = syscall_custom(value); } else { $$ = (struct syscall_descriptor*) - syscall_lookup(ctxt->syscalls, $1); + syscall_lookup(ctxt->target_arch_mask, $1); if ($$ == NULL) { emit_error(@1, "Undefined syscall `%s'", $1); free($1); $1 = NULL; diff --git a/src/syscall.c b/src/syscall.c index 6163643..71bf394 100644 --- a/src/syscall.c +++ b/src/syscall.c @@ -25,75 +25,57 @@ #include #include "common.h" +#include "syscalldb.h" // Fix for Linux <3.12 #ifndef EM_ARM #define EM_ARM 40 #endif -#define SYSCALL_LIST_DECL(arch) \ - extern const struct syscall_descriptor arch##_syscall_list[]; \ - extern const size_t arch##_syscall_list_size; - -#define SYSCALL_LIST(audit_arch, arch) \ - { audit_arch, arch##_syscall_list, &arch##_syscall_list_size } - -SYSCALL_LIST_DECL(arm) -SYSCALL_LIST_DECL(aarch64) -SYSCALL_LIST_DECL(amd64) -SYSCALL_LIST_DECL(mipso32) -SYSCALL_LIST_DECL(mips64) -SYSCALL_LIST_DECL(i386) +struct syscall_descriptor* syscall_custom(uint32_t nr) { + struct syscall_descriptor* rv = calloc(1, sizeof(*rv)); + rv->nr = nr; + return rv; +} -const struct syscall_list syscall_lists[] = { +uint32_t syscall_get_arch_mask(uint32_t arch) { + switch (arch) { + default: + return 0; #ifdef AUDIT_ARCH_ARM - SYSCALL_LIST(AUDIT_ARCH_ARM, arm), + case AUDIT_ARCH_ARM: + return SYSCALLDB_ARCH_ARM_FLAG; #endif #ifdef AUDIT_ARCH_AARCH64 - SYSCALL_LIST(AUDIT_ARCH_AARCH64, aarch64), + case AUDIT_ARCH_AARCH64: + return SYSCALLDB_ARCH_AARCH64_FLAG; #endif #ifdef AUDIT_ARCH_X86_64 - SYSCALL_LIST(AUDIT_ARCH_X86_64, amd64), + case AUDIT_ARCH_X86_64: + return SYSCALLDB_ARCH_X86_64_FLAG; #endif #ifdef AUDIT_ARCH_MIPS - SYSCALL_LIST(AUDIT_ARCH_MIPS, mipso32), + case AUDIT_ARCH_MIPS: + return SYSCALLDB_ARCH_MIPS_FLAG; #endif #ifdef AUDIT_ARCH_MIPS64 - SYSCALL_LIST(AUDIT_ARCH_MIPS64, mips64), + case AUDIT_ARCH_MIPS64: + return SYSCALLDB_ARCH_MIPS64_FLAG; #endif #ifdef AUDIT_ARCH_I386 - SYSCALL_LIST(AUDIT_ARCH_I386, i386), + case AUDIT_ARCH_I386: + return SYSCALLDB_ARCH_I386_FLAG; #endif -}; - -struct syscall_descriptor* syscall_custom(uint32_t nr) { - struct syscall_descriptor* rv = calloc(1, sizeof(*rv)); - rv->nr = nr; - rv->is_custom = true; - return rv; -} - -const struct syscall_list* syscalls_lookup(uint32_t arch) { - for (size_t i = 0; i < sizeof(syscall_lists) / sizeof(syscall_lists[0]); - ++i) { - if (syscall_lists[i].arch == arch) { - return &syscall_lists[i]; - } } - return NULL; } -const struct syscall_descriptor* syscall_lookup(const struct syscall_list* list, +const struct syscall_descriptor* syscall_lookup(uint32_t mask, const char* name) { - ASSERT(list != NULL); - ASSERT(name != NULL); - /* TODO use binary search if syscalls can be guaranteed to be - * sorted alphabetically - */ - for (size_t i = 0; i < *list->size; ++i) { - if (strcmp(name, list->syscalls[i].name) == 0) { - return &list->syscalls[i]; - } + const struct syscalldb_definition* def = syscalldb_lookup(name); + if (def && mask & def->arch_mask) { + struct syscall_descriptor* rv = calloc(1, sizeof(*rv)); + syscalldb_unpack(def, mask, rv); + return rv; } return NULL; } @@ -102,8 +84,6 @@ void syscall_descriptor_destroy(struct syscall_descriptor** desc) { ASSERT(desc != NULL); ASSERT((*desc) != NULL); - if ((*desc)->is_custom) { - free(*desc); - } + free(*desc); (*desc) = NULL; } diff --git a/src/syscall.h b/src/syscall.h index c826474..90b81cf 100644 --- a/src/syscall.h +++ b/src/syscall.h @@ -40,15 +40,9 @@ struct syscall_descriptor { struct syscall_arg args[SYSCALL_MAX_ARGS]; }; -struct syscall_list { - uint32_t arch; - const struct syscall_descriptor* const syscalls; - const size_t* const size; -}; - struct syscall_descriptor* syscall_custom(uint32_t nr); -const struct syscall_list* syscalls_lookup(uint32_t arch); -const struct syscall_descriptor* syscall_lookup(const struct syscall_list* list, +uint32_t syscall_get_arch_mask(uint32_t arch); +const struct syscall_descriptor* syscall_lookup(uint32_t arch_mask, const char* name); void syscall_descriptor_destroy(struct syscall_descriptor** desc); diff --git a/src/syscalldb.h b/src/syscalldb.h new file mode 100644 index 0000000..b35613c --- /dev/null +++ b/src/syscalldb.h @@ -0,0 +1,115 @@ +/* + Kafel - syscall database + ----------------------------------------- + + Copyright 2019 Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +*/ + +#ifndef KAFEL_SYSCALLDB_H +#define KAFEL_SYSCALLDB_H + +#include +#include +#include + +struct syscalldb_definition; +struct syscall_descriptor; + +enum { + SYSCALLDB_ARCH_ARM_FLAG = 0x01, + SYSCALLDB_ARCH_AARCH64_FLAG = 0x02, + SYSCALLDB_ARCH_X86_64_FLAG = 0x04, + SYSCALLDB_ARCH_MIPS_FLAG = 0x08, + SYSCALLDB_ARCH_MIPS64_FLAG = 0x10, + SYSCALLDB_ARCH_I386_FLAG = 0x20, +}; + +const struct syscalldb_definition* syscalldb_lookup(const char* name); +const char* syscalldb_reverse_lookup(uint32_t arch_mask, uint32_t nr); + +void syscalldb_unpack(const struct syscalldb_definition* definition, + uint32_t arch_mask, struct syscall_descriptor* dest); + +/* + internals + + Generated from individual syscall lists, has O(1) lookups and takes + advantage of the redundancy in the data set to reduce footprint + dramatically. + + O(1) lookups are courtesy of the perfect hash function generated with + GNU gperf. PHF maps a name to an index in the table of + tuples. If names match, syscall definition is found at the given + offset. + + Syscall definitions are of the variable length and stored back to + back. For details, consult syscalldb_definition struct. + +*/ + +#define SYSCALLDB_MAX_ARGTYPE 8 +#define SYSCALLDB_MAX_ARGNAME 0xffff + +#define SYSCALLDB_ARGNO(no) (((uint32_t)(no)) << 24) +#define SYSCALLDB_ARGTYPE(type) (((uint32_t)(type)) << 16) +#define SYSCALLDB_ARGNAME(name) ((uint32_t)(name)) + +#define SYSCALLDB_GET_ARGNO(x) (((x)&UINT32_C(0xff000000)) >> 24) +#define SYSCALLDB_GET_ARGTYPE(x) (((x)&UINT32_C(0x00ff0000)) >> 16) +#define SYSCALLDB_GET_ARGNAME(x) (((x)&UINT32_C(0x0000ffff))) + +struct syscalldb_entry { + uint16_t name; + uint16_t definition_offset; +}; + +/* + Observations: + + (1) very few syscalls are arch-specific; + + (2) syscall numbers varies wildly across archs; + + (3) argument names and sizes (modulo pointer size differences) are the same + across archs with a few notable exceptions (ex: clone). + + Last but not least, avoid pointers in static data structures with + initializers! Due to PIC requirements every single one of theese + require relocation. Increases the footprint and has runtime overhead. + +*/ +struct syscalldb_definition { + uint32_t arch_mask; /* archs providing this syscall */ + uint32_t n_arg_info; /* if >INT32_MAX), consult ext_arg_info; + it has -n_arg_info entries */ + union { + uint32_t arg_info[1]; /* argno, argtype, argname */ + struct { + uint32_t arch_mask; /* archs this entry applies to */ + uint32_t arg_info; /* argno, argtype, argname */ + } ext_arg_info[1]; + }; + /* uint32_t nr[]; syscall numbers, one value per a bit set in arch_mask */ +}; + +#define SYSCALLDB_DEFINITION_NR(d) \ + (&(d)->arch_mask + 2 + \ + ((d)->n_arg_info > INT32_MAX ? 2 * -(d)->n_arg_info : (d)->n_arg_info)) + +#define SYSCALLDB_DEFINITION_NEXT(d) \ + (typeof(d))(SYSCALLDB_DEFINITION_NR(d) + __builtin_popcount((d)->arch_mask)) + +#endif /* KAFEL_SYSCALLDB_H */ diff --git a/src/syscalldb.inl b/src/syscalldb.inl new file mode 100644 index 0000000..70eefb9 --- /dev/null +++ b/src/syscalldb.inl @@ -0,0 +1,84 @@ +/* + Kafel - syscall database helper routines + ----------------------------------------- + + Copyright 2019 Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +*/ + +#include "syscall.h" + +/* O(1) */ +const struct syscalldb_definition* syscalldb_lookup(const char *name) { + const struct syscalldb_entry *entry; + if (!(entry = syscalldb_lookup_internal(name, strlen(name)))) return NULL; + return (const struct syscalldb_definition*)( + syscalldb_definitions+entry->definition_offset); +} + +static inline uint32_t get_nr( + const struct syscalldb_definition* def, uint32_t mask +) { + uint32_t match = mask & def->arch_mask; + return SYSCALLDB_DEFINITION_NR(def)[ + __builtin_popcount(def->arch_mask & (match^(match-1))) - 1]; +} + +/* O(n) */ +const char* syscalldb_reverse_lookup(uint32_t mask, uint32_t nr) { + const struct syscalldb_definition* def = (typeof(def))syscalldb_definitions; + for (; def->arch_mask; def=SYSCALLDB_DEFINITION_NEXT(def)) { + if (mask&def->arch_mask && get_nr(def, mask)==nr) { + uint32_t offset = (uint32_t)( + (const uint32_t*)def-syscalldb_definitions); + const struct syscalldb_entry* entry = syscalldb_entries; + while (entry->definition_offset!=offset) ++entry; + return syscalldb_name_pool+entry->name; + } + } + return NULL; +} + +void syscalldb_unpack( + const struct syscalldb_definition* def, uint32_t mask, + struct syscall_descriptor *dest) { + + memset(dest, 0, sizeof *dest); + dest->nr = get_nr(def, mask); + if (def->n_arg_info<=INT32_MAX) { + for (uint32_t i=def->n_arg_info; i--; ) { + int argno = SYSCALLDB_GET_ARGNO(def->arg_info[i]); + dest->args[argno].size = SYSCALLDB_GET_ARGTYPE(def->arg_info[i]); + dest->args[argno].name = syscalldb_arg_name_pool + +SYSCALLDB_GET_ARGNAME(def->arg_info[i]); + } + } else { + for (uint32_t i=-def->n_arg_info; i--; ) { + if (mask & def->ext_arg_info[i].arch_mask) { + int argno = SYSCALLDB_GET_ARGNO(def->ext_arg_info[i].arg_info); + dest->args[argno].size = + SYSCALLDB_GET_ARGTYPE(def->ext_arg_info[i].arg_info); + dest->args[argno].name = syscalldb_arg_name_pool + +SYSCALLDB_GET_ARGNAME(def->ext_arg_info[i].arg_info); + } + } + } + for (int i=0; i!=SYSCALL_MAX_ARGS; ++i) { + if (dest->args[i].name && !dest->args[i].size) { + dest->args[i].size = syscalldb_pointer_size[ + __builtin_ctz(mask&def->arch_mask)]; + } + } +} diff --git a/src/syscalls/Makefile b/src/syscalls/Makefile new file mode 100644 index 0000000..b56ea18 --- /dev/null +++ b/src/syscalls/Makefile @@ -0,0 +1,43 @@ +# +# Kafel - Makefile +# ----------------------------------------- +# +# Copyright 2019 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +CFLAGS+=-I.. +SRCS:=syscalldb_generator.c\ + amd64_syscalls.c \ + i386_syscalls.c \ + aarch64_syscalls.c \ + mipso32_syscalls.c \ + mips64_syscalls.c \ + arm_syscalls.c +TARGET=syscalldb_generator + +include ${PROJECT_ROOT}build/Makefile.mk + +syscalldb_generator: ${OBJECTS} + +# DO NOT DELETE THIS LINE -- make depend depends on it. + +syscalldb_generator.o: ../syscall.h ../syscalldb.h ../common.h +amd64_syscalls.o: ../syscall.h +i386_syscalls.o: ../syscall.h +aarch64_syscalls.o: ../syscall.h +mipso32_syscalls.o: ../syscall.h +mips64_syscalls.o: ../syscall.h +arm_syscalls.o: ../syscall.h + diff --git a/src/syscalls/syscalldb_generator.c b/src/syscalls/syscalldb_generator.c new file mode 100644 index 0000000..a1f84dd --- /dev/null +++ b/src/syscalls/syscalldb_generator.c @@ -0,0 +1,441 @@ +/* + Kafel - syscall database generator + ----------------------------------------- + + Copyright 2019 Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +*/ + +#define _GNU_SOURCE /* memmem() */ +#include "syscall.h" +#include "syscalldb.h" + +#include +#include +#include +#include +#include + +#include + +#include "common.h" + +// Fix for Linux <3.12 +#ifndef EM_ARM +#define EM_ARM 40 +#endif + +#define SYSCALL_LIST_DECL(arch) \ + extern const struct syscall_descriptor arch##_syscall_list[]; \ + extern const size_t arch##_syscall_list_size; + +#define SYSCALL_LIST(flag, arch, pointer_size) \ + { flag, pointer_size, arch##_syscall_list, &arch##_syscall_list_size } + +SYSCALL_LIST_DECL(arm) +SYSCALL_LIST_DECL(aarch64) +SYSCALL_LIST_DECL(amd64) +SYSCALL_LIST_DECL(mipso32) +SYSCALL_LIST_DECL(mips64) +SYSCALL_LIST_DECL(i386) + +struct syscall_list { + uint32_t arch_mask; + int pointer_size; + const struct syscall_descriptor* const syscalls; + const size_t* const size; +}; + +const struct syscall_list syscall_lists[] = { +#ifdef AUDIT_ARCH_ARM + SYSCALL_LIST(SYSCALLDB_ARCH_ARM_FLAG, arm, 4), +#endif +#ifdef AUDIT_ARCH_AARCH64 + SYSCALL_LIST(SYSCALLDB_ARCH_AARCH64_FLAG, aarch64, 8), +#endif +#ifdef AUDIT_ARCH_X86_64 + SYSCALL_LIST(SYSCALLDB_ARCH_X86_64_FLAG, amd64, 8), +#endif +#ifdef AUDIT_ARCH_MIPS + SYSCALL_LIST(SYSCALLDB_ARCH_MIPS_FLAG, mipso32, 4), +#endif +#ifdef AUDIT_ARCH_MIPS64 + SYSCALL_LIST(SYSCALLDB_ARCH_MIPS64_FLAG, mips64, 8), +#endif +#ifdef AUDIT_ARCH_I386 + SYSCALL_LIST(SYSCALLDB_ARCH_I386_FLAG, i386, 4), +#endif +}; + +enum { NARCH = sizeof(syscall_lists) / sizeof(syscall_lists[0]) }; + +struct entry { + const char* name; + uint32_t definition_offset; +}; + +struct ctx { + struct arch_ctx { + uint32_t arch_mask; + int pointer_size; + const struct syscall_descriptor** syscall; + } arch[NARCH]; + struct entry* syscall_entries; + uint32_t syscall_entries_size, syscall_entries_capacity; + uint32_t* syscall_definitions; + uint32_t syscall_definitions_size; + uint32_t syscall_definitions_capacity; + char* arg_name_pool; + uint32_t arg_name_pool_size; + uint32_t arg_name_pool_capacity; +}; + +static int syscall_descriptor_name_cmp(const void* lhs, const void* rhs) { + const char* syscall = (*(const struct syscall_descriptor**)lhs)->name; + int cmp = strcmp(syscall, (*(const struct syscall_descriptor**)rhs)->name); + if (!cmp) { + fprintf(stderr, + "Sanity check failed: multiple entries found for syscall '%s' " + "in one list\n", + syscall); + exit(EXIT_FAILURE); + } + return cmp; +} + +static int arch_ctx_arch_mask_cmp(const void* lhs, const void* rhs) { + uint32_t larch_mask = ((const struct arch_ctx*)lhs)->arch_mask; + uint32_t rarch_mask = ((const struct arch_ctx*)rhs)->arch_mask; + if (larch_mask == rarch_mask) { + fprintf(stderr, "Sanity check failed: non-unique arch_mask %" PRIx32 "\n", + larch_mask); + exit(EXIT_FAILURE); + } + return larch_mask < rarch_mask ? -1 : 1; +} + +static void init(struct ctx* ctx) { + for (size_t i = 0; i < NARCH; ++i) { + const size_t size = *syscall_lists[i].size; + const struct syscall_descriptor** p; + p = malloc(sizeof(p[0]) * (size + 1)); // NULL-terminated + for (size_t j = 0; j < size; ++j) p[j] = &syscall_lists[i].syscalls[j]; + qsort(p, size, sizeof(p[0]), syscall_descriptor_name_cmp); + p[size] = NULL; + ctx->arch[i].arch_mask = syscall_lists[i].arch_mask; + ctx->arch[i].pointer_size = syscall_lists[i].pointer_size; + ctx->arch[i].syscall = p; + if (__builtin_popcount(ctx->arch[i].arch_mask) != 1) { + fprintf(stderr, + "Sanity check failed: invalid arch_mask %" PRIx32 + ", must have a single set bit\n", + ctx->arch[i].arch_mask); + exit(EXIT_FAILURE); + } + } + qsort(ctx->arch, NARCH, sizeof(ctx->arch[0]), arch_ctx_arch_mask_cmp); +} + +#define CHECK_CAPACITY(ctx, name, size) \ + do { \ + while ((ctx)->name##_size + size > (ctx)->name##_capacity) { \ + if (!(ctx)->name##_capacity) \ + (ctx)->name##_capacity = 1024; \ + else \ + (ctx)->name##_capacity *= 2; \ + (ctx)->name = realloc((ctx)->name, \ + sizeof((ctx)->name[0]) * (ctx)->name##_capacity); \ + } \ + } while (0) + +static void syscall_entries_push(struct ctx* ctx, const char* name, + uint32_t offset) { + CHECK_CAPACITY(ctx, syscall_entries, 1); + ctx->syscall_entries[ctx->syscall_entries_size].name = name; + ctx->syscall_entries[ctx->syscall_entries_size].definition_offset = offset; + ++ctx->syscall_entries_size; +} + +static void syscall_definitions_push(struct ctx* ctx, uint32_t v) { + CHECK_CAPACITY(ctx, syscall_definitions, 1); + ctx->syscall_definitions[ctx->syscall_definitions_size++] = v; +} + +static uint32_t arg_name_intern(struct ctx* ctx, const char* str) { + uint32_t result, size = 1 + (uint32_t)strlen(str); + char* existing = + memmem(ctx->arg_name_pool, ctx->arg_name_pool_size, str, size); + if (existing) return (uint32_t)(existing - ctx->arg_name_pool); + CHECK_CAPACITY(ctx, arg_name_pool, size); + memcpy(ctx->arg_name_pool + (result = ctx->arg_name_pool_size), str, size); + ctx->arg_name_pool_size += size; + return result; +} + +// Find the lexicographically-minimal name in syscall descriptors +// pointed by ctx->arch[i].syscall; return the union of arch_mask-s +// of the architectures providing this syscall. +static uint32_t begin_syscall(const struct ctx* ctx, + const char** syscall_name) { + static const char sentinel[] = {CHAR_MAX, 0}; + uint32_t mask = 0; + const char* name_min = sentinel; + for (size_t i = 0; i != NARCH; ++i) { + int cmp; + const char* name; + if (!*ctx->arch[i].syscall) continue; + cmp = strcmp(name_min, name = (*ctx->arch[i].syscall)->name); + if (!cmp) { + mask |= ctx->arch[i].arch_mask; + } else if (cmp > 0) { + mask = ctx->arch[i].arch_mask; + name_min = name; + } + } + *syscall_name = name_min; + return mask; +} + +// Extend syscall_definitions with syscall numbers from the subset of +// syscall descriptors pointed by ctx->arch[i].syscall as indicated by +// mask; advance ctx->arch[i].syscall pointers. +static void complete_syscall(struct ctx* ctx, uint32_t mask) { + for (size_t i = 0; i < NARCH; ++i) { + if (ctx->arch[i].arch_mask & mask) { + syscall_definitions_push(ctx, (*ctx->arch[i].syscall)->nr); + ++ctx->arch[i].syscall; + } + } +} + +static uint32_t get_arg_name(const struct ctx* ctx, uint32_t mask, int argno, + const char** arg_name) { + size_t i = 0; + const char* name; + uint32_t result; + while (!(ctx->arch[i].arch_mask & mask) || + !(name = (*ctx->arch[i].syscall)->args[argno].name)) { + if (++i == NARCH) return 0; + } + result = ctx->arch[i].arch_mask; + *arg_name = name; + while (++i != NARCH) { + if (ctx->arch[i].arch_mask & mask && + (*ctx->arch[i].syscall)->args[argno].name && + !strcmp(name, (*ctx->arch[i].syscall)->args[argno].name)) { + result |= ctx->arch[i].arch_mask; + } + } + return result; +}; + +static bool is_ptr_sized_arg(const struct ctx* ctx, uint32_t mask, int argno) { + for (size_t i = 0; i != NARCH; ++i) { + if (ctx->arch[i].arch_mask & mask && + (*ctx->arch[i].syscall)->args[argno].size != ctx->arch[i].pointer_size) + return false; + } + return true; +} + +static uint32_t get_arg_type(const struct ctx* ctx, uint32_t mask, int argno, + int* arg_type) { + size_t i = 0; + int type; + uint32_t result; + while (!(ctx->arch[i].arch_mask & mask)) { + if (++i == NARCH) return 0; + } + result = ctx->arch[i].arch_mask; + *arg_type = type = (*ctx->arch[i].syscall)->args[argno].size; + while (++i != NARCH) { + if (ctx->arch[i].arch_mask & mask && + (*ctx->arch[i].syscall)->args[argno].size == type) + result |= ctx->arch[i].arch_mask; + } + return result; +} + +static void do_arg(struct ctx* ctx, uint32_t mask, int argno) { + uint32_t namemask; + const char* name; + while ((namemask = get_arg_name(ctx, mask, argno, &name))) { + uint32_t iname = arg_name_intern(ctx, name); + uint32_t typemask; + int type; + mask &= ~namemask; + if (is_ptr_sized_arg(ctx, namemask, argno)) { + syscall_definitions_push(ctx, namemask); + syscall_definitions_push(ctx, SYSCALLDB_ARGNO(argno) | + SYSCALLDB_ARGTYPE(0) | + SYSCALLDB_ARGNAME(iname)); + continue; + } + while ((typemask = get_arg_type(ctx, namemask, argno, &type))) { + namemask &= ~typemask; + if (type <= 0 || type > SYSCALLDB_MAX_ARGTYPE) { + fprintf(stderr, + "Syscall %s, argument #%d (%s): " + "invalid argument size: %d\n", + ctx->syscall_entries[ctx->syscall_entries_size - 1].name, argno, + name, type); + exit(EXIT_FAILURE); + } + syscall_definitions_push(ctx, typemask); + syscall_definitions_push(ctx, SYSCALLDB_ARGNO(argno) | + SYSCALLDB_ARGTYPE(type) | + SYSCALLDB_ARGNAME(iname)); + } + } +} + +static uint32_t compress_args(struct ctx* ctx, uint32_t mask, + uint32_t firstargoff) { + uint32_t narg = (ctx->syscall_definitions_size - firstargoff) / 2; + for (uint32_t i = firstargoff; i != ctx->syscall_definitions_size; i += 2) + if (ctx->syscall_definitions[i] != mask) return -narg; + for (uint32_t i = 0; i != narg; ++i) { + ctx->syscall_definitions[firstargoff + i] = + ctx->syscall_definitions[firstargoff + i * 2 + 1]; + } + ctx->syscall_definitions_size = firstargoff + narg; + return narg; +} + +static void write_pointer_size(const struct ctx* ctx) { + fputs("static const int syscalldb_pointer_size[] = {\n ", stdout); + for (int i = 0; i != NARCH; ++i) { + printf(", [%d] = %d" + !i, __builtin_ctz(ctx->arch[i].arch_mask), + syscall_lists[i].pointer_size); + } + fputs("\n};\n\n", stdout); +} + +static void write_syscall_definitions(const struct ctx* ctx) { + const struct syscalldb_definition* def = + (typeof(def))ctx->syscall_definitions; + const struct entry* entry = ctx->syscall_entries; + fputs("static const uint32_t syscalldb_definitions[] = {\n\n", stdout); + for (; def->arch_mask; def = SYSCALLDB_DEFINITION_NEXT(def), ++entry) { + printf(" // %s\n %#" PRIx32 ", ", entry->name, def->arch_mask); + if (def->n_arg_info <= INT32_MAX) { + printf("%" PRId32 ",\n", def->n_arg_info); + for (uint32_t i = 0; i != def->n_arg_info; ++i) { + printf(" ARGNO(%d) | ARGTYPE(%d) | ARGNAME(%d), // %s\n", + (int)SYSCALLDB_GET_ARGNO(def->arg_info[i]), + (int)SYSCALLDB_GET_ARGTYPE(def->arg_info[i]), + (int)SYSCALLDB_GET_ARGNAME(def->arg_info[i]), + ctx->arg_name_pool + SYSCALLDB_GET_ARGNAME(def->arg_info[i])); + } + } else { + printf("-%" PRId32 ",\n", -def->n_arg_info); + for (uint32_t i = 0; i != -def->n_arg_info; ++i) { + printf(" %#" PRIx32 ", ARGNO(%d) | ARGTYPE(%d) | ARGNAME(%d), // %s\n", + def->ext_arg_info[i].arch_mask, + (int)SYSCALLDB_GET_ARGNO(def->ext_arg_info[i].arg_info), + (int)SYSCALLDB_GET_ARGTYPE(def->ext_arg_info[i].arg_info), + (int)SYSCALLDB_GET_ARGNAME(def->ext_arg_info[i].arg_info), + ctx->arg_name_pool + + SYSCALLDB_GET_ARGNAME(def->ext_arg_info[i].arg_info)); + } + } + for (int i = 0; i != __builtin_popcount(def->arch_mask); ++i) { + printf(" %" PRId32 "," + (i != 0), SYSCALLDB_DEFINITION_NR(def)[i]); + } + fputs("\n\n", stdout); + } + fputs(" 0\n};\n\n", stdout); +} + +static void write_arg_name_pool(const struct ctx* ctx) { + enum { MAX_WIDTH = 72, INDENT = 1, QUOTATION_AND_DELIMITER_CHARS = 5 }; + fputs("static const char syscalldb_arg_name_pool[] =\n ", stdout); + uint32_t i = 0; + int pos = INDENT; + while (i != ctx->arg_name_pool_size) { + size_t len = strlen(ctx->arg_name_pool + i); + pos += QUOTATION_AND_DELIMITER_CHARS + (int)len; + if (pos >= MAX_WIDTH) { + fputs("\n ", stdout); + pos = INDENT + QUOTATION_AND_DELIMITER_CHARS + (int)len; + } + printf(" \"%s\\0\"", ctx->arg_name_pool + i); + i += (uint32_t)len + 1; + } + fputs(";\n\n", stdout); +} + +int main() { + struct ctx ctx = {}; + uint32_t mask; + const char* name; + init(&ctx); + while ((mask = begin_syscall(&ctx, &name))) { + uint32_t firstargoff; + syscall_entries_push(&ctx, name, ctx.syscall_definitions_size); + syscall_definitions_push(&ctx, mask); + syscall_definitions_push(&ctx, 0); + firstargoff = ctx.syscall_definitions_size; + for (int argno = 0; argno < SYSCALL_MAX_ARGS; ++argno) { + do_arg(&ctx, mask, argno); + } + ctx.syscall_definitions[firstargoff - 1] = + compress_args(&ctx, mask, firstargoff); + complete_syscall(&ctx, mask); + } + syscall_definitions_push(&ctx, 0); + if (ctx.arg_name_pool_size > SYSCALLDB_MAX_ARGNAME) { + fprintf(stderr, + "String pool size exceeds %d, " + "consider increasing SYSCALLDB_MAX_ARGNAME\n", + (int)SYSCALLDB_MAX_ARGNAME); + exit(EXIT_FAILURE); + } + // Produce output in gperf format + fputs( + "%{\n" + "#include \"syscalldb.h\"\n" + "\n" + "#define ARGNO(no) SYSCALLDB_ARGNO(no)\n" + "#define ARGTYPE(type) SYSCALLDB_ARGTYPE(type)\n" + "#define ARGNAME(name) SYSCALLDB_ARGNAME(name)\n" + "\n", + stdout); + write_pointer_size(&ctx); + write_syscall_definitions(&ctx); + write_arg_name_pool(&ctx); + fputs( + "%}\n" + "%struct-type\n" + "%readonly-tables\n" + "%global-table\n" + "%pic\n" + "%define initializer-suffix ,-1\n" + "%define word-array-name syscalldb_entries\n" + "%define string-pool-name syscalldb_name_pool\n" + "%define lookup-function-name syscalldb_lookup_internal\n" + "struct syscalldb_entry;\n" + "%%\n", + stdout); + for (uint32_t i = 0; i != ctx.syscall_entries_size; ++i) { + printf("%s, %u\n", ctx.syscall_entries[i].name, + ctx.syscall_entries[i].definition_offset); + } + fputs( + "%%\n" + "#include \"syscalldb.inl\"\n", + stdout); + return 0; +}