Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compressed syscall database with O(1) lookup #21

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
/src/lexer.c
/src/parser.h
/src/parser.c
/src/syscalldb.gperf
/src/syscalldb.c
/src/syscalls/syscalldb_generator

# Backup files
*.bak
Expand Down
31 changes: 14 additions & 17 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,14 @@
# limitations under the License.
#

SUBDIRS:=syscalls

OBJCOPY?=objcopy

CFLAGS+=-fPIC -fvisibility=hidden
GENERATED_SRCS:=lexer.c parser.c
GENERATED_SRCS:=lexer.c parser.c syscalldb.c
GENERATED:=lexer.h parser.h ${GENERATED_SRCS}
TEMPORARY:=libkafel_r.o libkafel.o
SYSCALL_LISTS:=amd64_syscalls.c \
i386_syscalls.c \
aarch64_syscalls.c \
mipso32_syscalls.c \
mips64_syscalls.c \
arm_syscalls.c
TEMPORARY:=libkafel_r.o libkafel.o syscalldb.gperf
SRCS:=kafel.c \
context.c \
codegen.c \
Expand All @@ -37,8 +33,7 @@ SRCS:=kafel.c \
policy.c \
range_rules.c \
syscall.c \
${GENERATED_SRCS} \
$(SYSCALL_LISTS:%.c=syscalls/%.c)
${GENERATED_SRCS}
DYNAMIC_TARGET:=${PROJECT_ROOT}libkafel.so
STATIC_TARGET:=${PROJECT_ROOT}libkafel.a
TARGET=${DYNAMIC_TARGET} ${STATIC_TARGET}
Expand All @@ -65,6 +60,13 @@ lexer.h lexer.c: lexer.l
parser.h parser.c: parser.y
bison $<

syscalldb.c: syscalls/syscalldb_generator
./syscalls/syscalldb_generator > ./syscalldb.gperf
gperf -m10 --output-file=./syscalldb.c ./syscalldb.gperf

syscalls/syscalldb_generator: syscalls
true
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unneeded


# DO NOT DELETE THIS LINE -- make depend depends on it.

kafel.o: codegen.h context.h includes.h policy.h expression.h syscall.h
Expand All @@ -76,14 +78,9 @@ expression.o: expression.h common.h
includes.o: includes.h common.h
policy.o: policy.h expression.h common.h
range_rules.o: range_rules.h policy.h expression.h common.h syscall.h
syscall.o: syscall.h common.h
syscall.o: syscall.h syscalldb.h common.h
syscalldb.o: syscall.h syscalldb.h syscalldb.inl
lexer.o: parser.h context.h includes.h policy.h expression.h syscall.h
lexer.o: common.h
parser.o: parser.h context.h includes.h policy.h expression.h syscall.h
parser.o: lexer.h
syscalls/amd64_syscalls.o: syscall.h
syscalls/i386_syscalls.o: syscall.h
syscalls/aarch64_syscalls.o: syscall.h
syscalls/mipso32_syscalls.o: syscall.h
syscalls/mips64_syscalls.o: syscall.h
syscalls/arm_syscalls.o: syscall.h
1 change: 0 additions & 1 deletion src/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ void kafel_ctxt_reset(kafel_ctxt_t ctxt) {
}
ctxt->default_action = 0;
ctxt->lexical_error = false;
ctxt->syscalls = NULL;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should clear target_arch_mask

}

void kafel_ctxt_clean(kafel_ctxt_t ctxt) {
Expand Down
2 changes: 1 addition & 1 deletion src/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ struct kafel_ctxt {
struct policy* main_policy;
int default_action;
uint32_t target_arch;
const struct syscall_list* syscalls;
uint32_t target_arch_mask;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use just int instead of uint32_t
name is a little bit misleading, maybe use syscalldb_arch

struct {
enum {
INPUT_NONE,
Expand Down
4 changes: 2 additions & 2 deletions src/kafel.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ static int parse(struct kafel_ctxt* ctxt) {
kafel_yyset_column(1, scanner);
kafel_yyset_lineno(1, scanner);

ctxt->syscalls = syscalls_lookup(ctxt->target_arch);
if (ctxt->syscalls == NULL) {
ctxt->target_arch_mask = syscall_get_arch_mask(ctxt->target_arch);
if (!ctxt->target_arch_mask) {
append_error(ctxt, "Cannot resolve syscall list for architecture %#x\n",
ctxt->target_arch);
kafel_yylex_destroy(scanner);
Expand Down
2 changes: 1 addition & 1 deletion src/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ syscall_id
$$ = syscall_custom(value);
} else {
$$ = (struct syscall_descriptor*)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cast not needed anymore

syscall_lookup(ctxt->syscalls, $1);
syscall_lookup(ctxt->target_arch_mask, $1);
if ($$ == NULL) {
emit_error(@1, "Undefined syscall `%s'", $1);
free($1); $1 = NULL;
Expand Down
78 changes: 29 additions & 49 deletions src/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,75 +25,57 @@
#include <string.h>

#include "common.h"
#include "syscalldb.h"

// Fix for Linux <3.12
#ifndef EM_ARM
#define EM_ARM 40
#endif

#define SYSCALL_LIST_DECL(arch) \
extern const struct syscall_descriptor arch##_syscall_list[]; \
extern const size_t arch##_syscall_list_size;

#define SYSCALL_LIST(audit_arch, arch) \
{ audit_arch, arch##_syscall_list, &arch##_syscall_list_size }

SYSCALL_LIST_DECL(arm)
SYSCALL_LIST_DECL(aarch64)
SYSCALL_LIST_DECL(amd64)
SYSCALL_LIST_DECL(mipso32)
SYSCALL_LIST_DECL(mips64)
SYSCALL_LIST_DECL(i386)
struct syscall_descriptor* syscall_custom(uint32_t nr) {
struct syscall_descriptor* rv = calloc(1, sizeof(*rv));
rv->nr = nr;
return rv;
}

const struct syscall_list syscall_lists[] = {
uint32_t syscall_get_arch_mask(uint32_t arch) {
switch (arch) {
default:
return 0;
#ifdef AUDIT_ARCH_ARM
SYSCALL_LIST(AUDIT_ARCH_ARM, arm),
case AUDIT_ARCH_ARM:
return SYSCALLDB_ARCH_ARM_FLAG;
#endif
#ifdef AUDIT_ARCH_AARCH64
SYSCALL_LIST(AUDIT_ARCH_AARCH64, aarch64),
case AUDIT_ARCH_AARCH64:
return SYSCALLDB_ARCH_AARCH64_FLAG;
#endif
#ifdef AUDIT_ARCH_X86_64
SYSCALL_LIST(AUDIT_ARCH_X86_64, amd64),
case AUDIT_ARCH_X86_64:
return SYSCALLDB_ARCH_X86_64_FLAG;
#endif
#ifdef AUDIT_ARCH_MIPS
SYSCALL_LIST(AUDIT_ARCH_MIPS, mipso32),
case AUDIT_ARCH_MIPS:
return SYSCALLDB_ARCH_MIPS_FLAG;
#endif
#ifdef AUDIT_ARCH_MIPS64
SYSCALL_LIST(AUDIT_ARCH_MIPS64, mips64),
case AUDIT_ARCH_MIPS64:
return SYSCALLDB_ARCH_MIPS64_FLAG;
#endif
#ifdef AUDIT_ARCH_I386
SYSCALL_LIST(AUDIT_ARCH_I386, i386),
case AUDIT_ARCH_I386:
return SYSCALLDB_ARCH_I386_FLAG;
#endif
};

struct syscall_descriptor* syscall_custom(uint32_t nr) {
struct syscall_descriptor* rv = calloc(1, sizeof(*rv));
rv->nr = nr;
rv->is_custom = true;
return rv;
}

const struct syscall_list* syscalls_lookup(uint32_t arch) {
for (size_t i = 0; i < sizeof(syscall_lists) / sizeof(syscall_lists[0]);
++i) {
if (syscall_lists[i].arch == arch) {
return &syscall_lists[i];
}
}
return NULL;
}

const struct syscall_descriptor* syscall_lookup(const struct syscall_list* list,
const struct syscall_descriptor* syscall_lookup(uint32_t mask,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove const from return type

const char* name) {
ASSERT(list != NULL);
ASSERT(name != NULL);
/* TODO use binary search if syscalls can be guaranteed to be
* sorted alphabetically
*/
for (size_t i = 0; i < *list->size; ++i) {
if (strcmp(name, list->syscalls[i].name) == 0) {
return &list->syscalls[i];
}
const struct syscalldb_definition* def = syscalldb_lookup(name);
if (def && mask & def->arch_mask) {
struct syscall_descriptor* rv = calloc(1, sizeof(*rv));
syscalldb_unpack(def, mask, rv);
return rv;
}
return NULL;
}
Expand All @@ -102,8 +84,6 @@ void syscall_descriptor_destroy(struct syscall_descriptor** desc) {
ASSERT(desc != NULL);
ASSERT((*desc) != NULL);

if ((*desc)->is_custom) {
free(*desc);
}
free(*desc);
(*desc) = NULL;
}
10 changes: 2 additions & 8 deletions src/syscall.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,9 @@ struct syscall_descriptor {
struct syscall_arg args[SYSCALL_MAX_ARGS];
};

struct syscall_list {
uint32_t arch;
const struct syscall_descriptor* const syscalls;
const size_t* const size;
};

struct syscall_descriptor* syscall_custom(uint32_t nr);
const struct syscall_list* syscalls_lookup(uint32_t arch);
const struct syscall_descriptor* syscall_lookup(const struct syscall_list* list,
uint32_t syscall_get_arch_mask(uint32_t arch);
const struct syscall_descriptor* syscall_lookup(uint32_t arch_mask,
const char* name);
void syscall_descriptor_destroy(struct syscall_descriptor** desc);

Expand Down
115 changes: 115 additions & 0 deletions src/syscalldb.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
Kafel - syscall database
-----------------------------------------

Copyright 2019 Google Inc. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

*/

#ifndef KAFEL_SYSCALLDB_H
#define KAFEL_SYSCALLDB_H

#include <stddef.h>
#include <stdint.h>
#include <string.h>

struct syscalldb_definition;
struct syscall_descriptor;

enum {
SYSCALLDB_ARCH_ARM_FLAG = 0x01,
SYSCALLDB_ARCH_AARCH64_FLAG = 0x02,
SYSCALLDB_ARCH_X86_64_FLAG = 0x04,
SYSCALLDB_ARCH_MIPS_FLAG = 0x08,
SYSCALLDB_ARCH_MIPS64_FLAG = 0x10,
SYSCALLDB_ARCH_I386_FLAG = 0x20,
};

const struct syscalldb_definition* syscalldb_lookup(const char* name);
const char* syscalldb_reverse_lookup(uint32_t arch_mask, uint32_t nr);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unused, please remove


void syscalldb_unpack(const struct syscalldb_definition* definition,
uint32_t arch_mask, struct syscall_descriptor* dest);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it does not make sense to call it with a mask like SYSCALLDB_ARCH_ARM_FLAG|SYSCALLDB_ARCH_AARCH64_FLAG and it won't work in current implementation.
Maybe just use masks internally and externally just consecutive values for arch enum or even AUDIT_ARCH_*.


/*
internals
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move internals out of the header


Generated from individual syscall lists, has O(1) lookups and takes
advantage of the redundancy in the data set to reduce footprint
dramatically.

O(1) lookups are courtesy of the perfect hash function generated with
GNU gperf. PHF maps a name to an index in the table of <name, offset>
tuples. If names match, syscall definition is found at the given
offset.

Syscall definitions are of the variable length and stored back to
back. For details, consult syscalldb_definition struct.

*/

#define SYSCALLDB_MAX_ARGTYPE 8
#define SYSCALLDB_MAX_ARGNAME 0xffff

#define SYSCALLDB_ARGNO(no) (((uint32_t)(no)) << 24)
#define SYSCALLDB_ARGTYPE(type) (((uint32_t)(type)) << 16)
#define SYSCALLDB_ARGNAME(name) ((uint32_t)(name))

#define SYSCALLDB_GET_ARGNO(x) (((x)&UINT32_C(0xff000000)) >> 24)
#define SYSCALLDB_GET_ARGTYPE(x) (((x)&UINT32_C(0x00ff0000)) >> 16)
#define SYSCALLDB_GET_ARGNAME(x) (((x)&UINT32_C(0x0000ffff)))

struct syscalldb_entry {
uint16_t name;
uint16_t definition_offset;
};

/*
Observations:

(1) very few syscalls are arch-specific;

(2) syscall numbers varies wildly across archs;

(3) argument names and sizes (modulo pointer size differences) are the same
across archs with a few notable exceptions (ex: clone).

Last but not least, avoid pointers in static data structures with
initializers! Due to PIC requirements every single one of theese
require relocation. Increases the footprint and has runtime overhead.

*/
struct syscalldb_definition {
uint32_t arch_mask; /* archs providing this syscall */
uint32_t n_arg_info; /* if >INT32_MAX), consult ext_arg_info;
it has -n_arg_info entries */
union {
uint32_t arg_info[1]; /* argno, argtype, argname */
struct {
uint32_t arch_mask; /* archs this entry applies to */
uint32_t arg_info; /* argno, argtype, argname */
} ext_arg_info[1];
};
/* uint32_t nr[]; syscall numbers, one value per a bit set in arch_mask */
};

#define SYSCALLDB_DEFINITION_NR(d) \
(&(d)->arch_mask + 2 + \
((d)->n_arg_info > INT32_MAX ? 2 * -(d)->n_arg_info : (d)->n_arg_info))

#define SYSCALLDB_DEFINITION_NEXT(d) \
(typeof(d))(SYSCALLDB_DEFINITION_NR(d) + __builtin_popcount((d)->arch_mask))

#endif /* KAFEL_SYSCALLDB_H */
Loading