Skip to content

Commit

Permalink
Compressed syscall database wtih O(1) lookup
Browse files Browse the repository at this point in the history
libkafel.so 5x smaller (x86_64, stripped): down to 88KiB from 440KiB.

Closes #20
  • Loading branch information
mejedi committed May 5, 2019
1 parent d907da7 commit c466073
Show file tree
Hide file tree
Showing 11 changed files with 735 additions and 79 deletions.
31 changes: 14 additions & 17 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,14 @@
# limitations under the License.
#

SUBDIRS:=syscalls

OBJCOPY?=objcopy

CFLAGS+=-fPIC -fvisibility=hidden
GENERATED_SRCS:=lexer.c parser.c
GENERATED_SRCS:=lexer.c parser.c syscalldb.c
GENERATED:=lexer.h parser.h ${GENERATED_SRCS}
TEMPORARY:=libkafel_r.o libkafel.o
SYSCALL_LISTS:=amd64_syscalls.c \
i386_syscalls.c \
aarch64_syscalls.c \
mipso32_syscalls.c \
mips64_syscalls.c \
arm_syscalls.c
TEMPORARY:=libkafel_r.o libkafel.o syscalldb.gperf
SRCS:=kafel.c \
context.c \
codegen.c \
Expand All @@ -37,8 +33,7 @@ SRCS:=kafel.c \
policy.c \
range_rules.c \
syscall.c \
${GENERATED_SRCS} \
$(SYSCALL_LISTS:%.c=syscalls/%.c)
${GENERATED_SRCS}
DYNAMIC_TARGET:=${PROJECT_ROOT}libkafel.so
STATIC_TARGET:=${PROJECT_ROOT}libkafel.a
TARGET=${DYNAMIC_TARGET} ${STATIC_TARGET}
Expand All @@ -65,6 +60,13 @@ lexer.h lexer.c: lexer.l
parser.h parser.c: parser.y
bison $<

syscalldb.c: syscalls/syscalldb_generator
./syscalls/syscalldb_generator > ./syscalldb.gperf
gperf -m10 --output-file=./syscalldb.c ./syscalldb.gperf

syscalls/syscalldb_generator: syscalls
true

# DO NOT DELETE THIS LINE -- make depend depends on it.

kafel.o: codegen.h context.h includes.h policy.h expression.h syscall.h
Expand All @@ -76,14 +78,9 @@ expression.o: expression.h common.h
includes.o: includes.h common.h
policy.o: policy.h expression.h common.h
range_rules.o: range_rules.h policy.h expression.h common.h syscall.h
syscall.o: syscall.h common.h
syscall.o: syscall.h syscalldb.h common.h
syscalldb.o: syscall.h syscalldb.h syscalldb.inl
lexer.o: parser.h context.h includes.h policy.h expression.h syscall.h
lexer.o: common.h
parser.o: parser.h context.h includes.h policy.h expression.h syscall.h
parser.o: lexer.h
syscalls/amd64_syscalls.o: syscall.h
syscalls/i386_syscalls.o: syscall.h
syscalls/aarch64_syscalls.o: syscall.h
syscalls/mipso32_syscalls.o: syscall.h
syscalls/mips64_syscalls.o: syscall.h
syscalls/arm_syscalls.o: syscall.h
1 change: 0 additions & 1 deletion src/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ void kafel_ctxt_reset(kafel_ctxt_t ctxt) {
}
ctxt->default_action = 0;
ctxt->lexical_error = false;
ctxt->syscalls = NULL;
}

void kafel_ctxt_clean(kafel_ctxt_t ctxt) {
Expand Down
2 changes: 1 addition & 1 deletion src/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ struct kafel_ctxt {
struct policy* main_policy;
int default_action;
uint32_t target_arch;
const struct syscall_list* syscalls;
uint32_t target_arch_mask;
struct {
enum {
INPUT_NONE,
Expand Down
4 changes: 2 additions & 2 deletions src/kafel.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ static int parse(struct kafel_ctxt* ctxt) {
kafel_yyset_column(1, scanner);
kafel_yyset_lineno(1, scanner);

ctxt->syscalls = syscalls_lookup(ctxt->target_arch);
if (ctxt->syscalls == NULL) {
ctxt->target_arch_mask = syscall_get_arch_mask(ctxt->target_arch);
if (!ctxt->target_arch_mask) {
append_error(ctxt, "Cannot resolve syscall list for architecture %#x\n",
ctxt->target_arch);
kafel_yylex_destroy(scanner);
Expand Down
2 changes: 1 addition & 1 deletion src/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ syscall_id
$$ = syscall_custom(value);
} else {
$$ = (struct syscall_descriptor*)
syscall_lookup(ctxt->syscalls, $1);
syscall_lookup(ctxt->target_arch_mask, $1);
if ($$ == NULL) {
emit_error(@1, "Undefined syscall `%s'", $1);
free($1); $1 = NULL;
Expand Down
78 changes: 29 additions & 49 deletions src/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,75 +25,57 @@
#include <string.h>

#include "common.h"
#include "syscalldb.h"

// Fix for Linux <3.12
#ifndef EM_ARM
#define EM_ARM 40
#endif

#define SYSCALL_LIST_DECL(arch) \
extern const struct syscall_descriptor arch##_syscall_list[]; \
extern const size_t arch##_syscall_list_size;

#define SYSCALL_LIST(audit_arch, arch) \
{ audit_arch, arch##_syscall_list, &arch##_syscall_list_size }

SYSCALL_LIST_DECL(arm)
SYSCALL_LIST_DECL(aarch64)
SYSCALL_LIST_DECL(amd64)
SYSCALL_LIST_DECL(mipso32)
SYSCALL_LIST_DECL(mips64)
SYSCALL_LIST_DECL(i386)
struct syscall_descriptor* syscall_custom(uint32_t nr) {
struct syscall_descriptor* rv = calloc(1, sizeof(*rv));
rv->nr = nr;
return rv;
}

const struct syscall_list syscall_lists[] = {
uint32_t syscall_get_arch_mask(uint32_t arch) {
switch (arch) {
default:
return 0;
#ifdef AUDIT_ARCH_ARM
SYSCALL_LIST(AUDIT_ARCH_ARM, arm),
case AUDIT_ARCH_ARM:
return SYSCALLDB_ARCH_ARM_FLAG;
#endif
#ifdef AUDIT_ARCH_AARCH64
SYSCALL_LIST(AUDIT_ARCH_AARCH64, aarch64),
case AUDIT_ARCH_AARCH64:
return SYSCALLDB_ARCH_AARCH64_FLAG;
#endif
#ifdef AUDIT_ARCH_X86_64
SYSCALL_LIST(AUDIT_ARCH_X86_64, amd64),
case AUDIT_ARCH_X86_64:
return SYSCALLDB_ARCH_X86_64_FLAG;
#endif
#ifdef AUDIT_ARCH_MIPS
SYSCALL_LIST(AUDIT_ARCH_MIPS, mipso32),
case AUDIT_ARCH_MIPS:
return SYSCALLDB_ARCH_MIPS_FLAG;
#endif
#ifdef AUDIT_ARCH_MIPS64
SYSCALL_LIST(AUDIT_ARCH_MIPS64, mips64),
case AUDIT_ARCH_MIPS64:
return SYSCALLDB_ARCH_MIPS64_FLAG;
#endif
#ifdef AUDIT_ARCH_I386
SYSCALL_LIST(AUDIT_ARCH_I386, i386),
case AUDIT_ARCH_I386:
return SYSCALLDB_ARCH_I386_FLAG;
#endif
};

struct syscall_descriptor* syscall_custom(uint32_t nr) {
struct syscall_descriptor* rv = calloc(1, sizeof(*rv));
rv->nr = nr;
rv->is_custom = true;
return rv;
}

const struct syscall_list* syscalls_lookup(uint32_t arch) {
for (size_t i = 0; i < sizeof(syscall_lists) / sizeof(syscall_lists[0]);
++i) {
if (syscall_lists[i].arch == arch) {
return &syscall_lists[i];
}
}
return NULL;
}

const struct syscall_descriptor* syscall_lookup(const struct syscall_list* list,
const struct syscall_descriptor* syscall_lookup(uint32_t mask,
const char* name) {
ASSERT(list != NULL);
ASSERT(name != NULL);
/* TODO use binary search if syscalls can be guaranteed to be
* sorted alphabetically
*/
for (size_t i = 0; i < *list->size; ++i) {
if (strcmp(name, list->syscalls[i].name) == 0) {
return &list->syscalls[i];
}
const struct syscalldb_definition* def = syscalldb_lookup(name);
if (def && mask & def->arch_mask) {
struct syscall_descriptor* rv = calloc(1, sizeof(*rv));
syscalldb_unpack(def, mask, rv);
return rv;
}
return NULL;
}
Expand All @@ -102,8 +84,6 @@ void syscall_descriptor_destroy(struct syscall_descriptor** desc) {
ASSERT(desc != NULL);
ASSERT((*desc) != NULL);

if ((*desc)->is_custom) {
free(*desc);
}
free(*desc);
(*desc) = NULL;
}
10 changes: 2 additions & 8 deletions src/syscall.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,9 @@ struct syscall_descriptor {
struct syscall_arg args[SYSCALL_MAX_ARGS];
};

struct syscall_list {
uint32_t arch;
const struct syscall_descriptor* const syscalls;
const size_t* const size;
};

struct syscall_descriptor* syscall_custom(uint32_t nr);
const struct syscall_list* syscalls_lookup(uint32_t arch);
const struct syscall_descriptor* syscall_lookup(const struct syscall_list* list,
uint32_t syscall_get_arch_mask(uint32_t arch);
const struct syscall_descriptor* syscall_lookup(uint32_t arch_mask,
const char* name);
void syscall_descriptor_destroy(struct syscall_descriptor** desc);

Expand Down
115 changes: 115 additions & 0 deletions src/syscalldb.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
Kafel - syscall database
-----------------------------------------
Copyright 2019 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#ifndef KAFEL_SYSCALLDB_H
#define KAFEL_SYSCALLDB_H

#include <stddef.h>
#include <stdint.h>
#include <string.h>

struct syscalldb_definition;
struct syscall_descriptor;

enum {
SYSCALLDB_ARCH_ARM_FLAG = 0x01,
SYSCALLDB_ARCH_AARCH64_FLAG = 0x02,
SYSCALLDB_ARCH_X86_64_FLAG = 0x04,
SYSCALLDB_ARCH_MIPS_FLAG = 0x08,
SYSCALLDB_ARCH_MIPS64_FLAG = 0x10,
SYSCALLDB_ARCH_I386_FLAG = 0x20,
};

const struct syscalldb_definition* syscalldb_lookup(const char* name);
const char* syscalldb_reverse_lookup(uint32_t arch_mask, uint32_t nr);

void syscalldb_unpack(const struct syscalldb_definition* definition,
uint32_t arch_mask, struct syscall_descriptor* dest);

/*
internals
Generated from individual syscall lists, has O(1) lookups and takes
advantage of the redundancy in the data set to reduce footprint
dramatically.
O(1) lookups are courtesy of the perfect hash function generated with
GNU gperf. PHF maps a name to an index in the table of <name, offset>
tuples. If names match, syscall definition is found at the given
offset.
Syscall definitions are of the variable length and stored back to
back. For details, consult syscalldb_definition struct.
*/

#define SYSCALLDB_MAX_ARGTYPE 8
#define SYSCALLDB_MAX_ARGNAME 0xffff

#define SYSCALLDB_ARGNO(no) (((uint32_t)(no)) << 24)
#define SYSCALLDB_ARGTYPE(type) (((uint32_t)(type)) << 16)
#define SYSCALLDB_ARGNAME(name) ((uint32_t)(name))

#define SYSCALLDB_GET_ARGNO(x) (((x)&UINT32_C(0xff000000)) >> 24)
#define SYSCALLDB_GET_ARGTYPE(x) (((x)&UINT32_C(0x00ff0000)) >> 16)
#define SYSCALLDB_GET_ARGNAME(x) (((x)&UINT32_C(0x0000ffff)))

struct syscalldb_entry {
uint16_t name;
uint16_t definition_offset;
};

/*
Observations:
(1) very few syscalls are arch-specific;
(2) syscall numbers varies wildly across archs;
(3) argument names and sizes (modulo pointer size differences) are the same
across archs with a few notable exceptions (ex: clone).
Last but not least, avoid pointers in static data structures with
initializers! Due to PIC requirements every single one of theese
require relocation. Increases the footprint and has runtime overhead.
*/
struct syscalldb_definition {
uint32_t arch_mask; /* archs providing this syscall */
uint32_t n_arg_info; /* if >INT32_MAX), consult ext_arg_info;
it has -n_arg_info entries */
union {
uint32_t arg_info[1]; /* argno, argtype, argname */
struct {
uint32_t arch_mask; /* archs this entry applies to */
uint32_t arg_info; /* argno, argtype, argname */
} ext_arg_info[1];
};
/* uint32_t nr[]; syscall numbers, one value per a bit set in arch_mask */
};

#define SYSCALLDB_DEFINITION_NR(d) \
(&(d)->arch_mask + 2 + \
((d)->n_arg_info > INT32_MAX ? 2 * -(d)->n_arg_info : (d)->n_arg_info))

#define SYSCALLDB_DEFINITION_NEXT(d) \
(typeof(d))(SYSCALLDB_DEFINITION_NR(d) + __builtin_popcount((d)->arch_mask))

#endif /* KAFEL_SYSCALLDB_H */
Loading

0 comments on commit c466073

Please sign in to comment.