Skip to content

Commit

Permalink
Enable hardware stack zeroing.
Browse files Browse the repository at this point in the history
This replaces the software stack zeroing path with offload to a hardware
engine, when available.  The hardware zeroing pipeline is controlled by
the ZTOP SCR (currently assumed to be number 27).  This zeroes from the
top of the capability in ZTOP to the bottom.

We assume that short-lived cross-compartment calls or returns from
deeply nested calls will result in ranges where each is a subset of
another, so we try to coalesce.  We could probably do better aggregating
overlapping but not subset ranges.
  • Loading branch information
davidchisnall committed Jan 26, 2024
1 parent 1966ac9 commit 3b4b670
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 36 deletions.
1 change: 1 addition & 0 deletions sdk/boards/ibex-safe-simulator.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"tickrate_hz" : 10,
"revoker" : "hardware",
"stack_high_water_mark" : true,
"fast_stack_zeroing" : true,
"simulation": true,
"simulator" : "${sdk}/../scripts/run-ibex-safe-sim.sh"
}
2 changes: 1 addition & 1 deletion sdk/core/loader/boot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,7 @@ namespace
threadTStack->mstatus =
(priv::MSTATUS_MPIE |
(priv::MSTATUS_PRV_M << priv::MSTATUS_MPP_SHIFT));
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
threadTStack->mshwm = stack.top();
threadTStack->mshwmb = stack.base();
#endif
Expand Down
97 changes: 89 additions & 8 deletions sdk/core/switcher/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
*/
#define CSR_MSHWMB 0xbc2

/**
* The special capability register for the fast zeroing control.
* Writing a capability here starts zeroing from the address of this register
* down to the base.
*/
#define SCR_ZTOP 0x1b

#define MAX_FAULTS_PER_COMPARTMENT_CALL 1024

#define SPILL_SLOT_cs0 0
Expand Down Expand Up @@ -128,6 +135,54 @@ switcher_scheduler_entry_csp:
* and integer register. All three registers are clobbered.
*/
.macro zero_stack base top scratch
#ifdef CHERIOT_HAS_ZTOP
// Derive the capability to the range that should be zeroed.
// This is stored in base, leaving top as a second scratch register to use
csub \top, c\top, c\base
// Inexact bounds here may round up, but will still be within the bounds of
// the stack. We must ensure that we zero the entire used region of the
// stack, zeroing slightly more is fine, in one direction. Data on the stack that we must
// not corrupt is *above* the range that we are zeroing. The hardware
// zeroing zeroes from the capability's address, downwards, and the address
// is set to the top of the to-zero range. The inexact bounds may give us
// a base that is lower or a top that is higher. Imprecision at the base
// can only zero already-zeroed parts of the stack.
csetbounds c\base, c\base, \top
cincoffset c\base, c\base, \top
// Wait for any prior zeroing to finish.
cspecialr c\scratch, SCR_ZTOP
// If ztop is untagged, no zeroing is happening and our attempt to store
// relative to ztop will trap.
cgettag \top, c\scratch
beqz \top, 2f
// If we've already reached the bottom then do nothing.
// NOTE: This can be deleted if ZTOP's tag is cleared when zeroing finishes.
cgetbase \top, c\scratch
beq \top, \scratch, 2f
// If the current zeroing range is a subset of range that we're about to
// zero, don't bother waiting, just zero from the start.
ctestsubset \top, c\base, c\scratch
bnez \top, 2f
// If the requested range is a subset of the previous requested range,
// restart zeroing that instead
// NOTE: This may result in zeroing things too many times, we're trading
// some throughput for latency here. This might be the wrong choice.
ctestsubset \top, c\scratch, c\base
bnez \top, 1f
// Store a byte at the bottom of the zeroed region. This will stall the
// CPU pipeline until we've finished the zeroing.
cgetbase \top, c\scratch
csetaddr c\top, c\scratch, \top
csb zero, 0(c\top)
j 2f
1:
cmove c\base, c\scratch
2:
// Set the new value
cspecialw SCR_ZTOP, c\base

#else

addi \scratch, \top, -32
addi \top, \top, -16
bgt \base, \scratch, 1f
Expand All @@ -145,6 +200,8 @@ switcher_scheduler_entry_csp:
csc cnull, 0(c\base)
csc cnull, 8(c\base)
2:

#endif
.endm

/**
Expand Down Expand Up @@ -214,7 +271,7 @@ __Z26compartment_switcher_entryz:
sub s1, s0, s1
csetboundsexact ct2, csp, s1
csetaddr csp, ct2, s0
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
// Read and align the stack high water mark
csrr gp, CSR_MSHWM
and gp, gp, ~0xf
Expand All @@ -227,7 +284,7 @@ __Z26compartment_switcher_entryz:
#endif
zero_stack t2, s0, gp
after_zero:
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
// store new stack top as stack high water mark
csrw CSR_MSHWM, sp
#endif
Expand Down Expand Up @@ -361,11 +418,20 @@ exception_entry_asm:
csc ct0, TrustedStack_offset_mepcc(csp)
csrr t1, mstatus
csw t1, TrustedStack_offset_mstatus(csp)
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
csrr t1, CSR_MSHWM
csw t1, TrustedStack_offset_mshwm(csp)
csrr t1, CSR_MSHWMB
csw t1, TrustedStack_offset_mshwmb(csp)
# ifdef CHERIOT_HAS_ZTOP
// Stop zeroing and capture the current zeroing value. Note: cspecialr is
// encoded as cspecialrw and so we can't just use cnull as the source. We
// probably could use the current ct1 value, since it's guaranteed to be
// untagged at this point in the code.
cmove ct1, cnull
cspecialrw ct1, SCR_ZTOP, ct1
csc ct1, TrustedStack_offset_ztop(csp)
# endif
#endif
csrr t1, mcause
csw t1, TrustedStack_offset_mcause(csp)
Expand Down Expand Up @@ -453,17 +519,32 @@ exception_entry_asm:
.Linstall_context:
clw x1, TrustedStack_offset_mstatus(csp)
csrw mstatus, x1
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
clw x1, TrustedStack_offset_mshwm(csp)
csrw CSR_MSHWM, x1
clw x1, TrustedStack_offset_mshwmb(csp)
csrw CSR_MSHWMB, x1
#endif
#ifdef CHERIOT_HAS_ZTOP
clc c1, TrustedStack_offset_ztop(csp)
cspecialw SCR_ZTOP, c1
#endif
cspecialw mepcc, ct2
csb zero, TrustedStack_offset_inForcedUnwind(csp)
// c2 is csp, which will be loaded last and will overwrite the trusted
// stack pointer with the thread's stack pointer.
#ifdef CHERIOT_HAS_ZTOP
// If we have ZTOP, each of these loads will take an extra cycle while the
// zeroing is running. We know that they aren't part of the stack (if the
// trusted stack and the stack overlap, everything is broken) so restart
// the zeroing as late as possible. We are reloading via csp, so it must
// be restored last.
reloadRegisters cgp, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15
cspecialw SCR_ZTOP, c1
reloadRegisters c1, csp
#else
reloadRegisters c1, cgp, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, csp
#endif
mret

// If we detect an invalid entry and there is no error handler installed, we want
Expand Down Expand Up @@ -678,7 +759,7 @@ exception_entry_asm:

// Load the trusted stack pointer to ct1
cspecialr ct1, mtdc
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
// Update the spilled copy of the stack high water mark to ensure that we
// will clear all of the stack used by the error handler and the spilled
// context.
Expand Down Expand Up @@ -721,7 +802,7 @@ exception_entry_asm:
j .Linstall_context

.Lhandle_injected_error:
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
clw x1, TrustedStack_offset_mshwm(csp)
csrw CSR_MSHWM, x1
clw x1, TrustedStack_offset_mshwmb(csp)
Expand Down Expand Up @@ -792,7 +873,7 @@ exception_entry_asm:
clc cgp, SPILL_SLOT_cgp(csp)
cincoffset csp, csp, SPILL_SLOT_SIZE
#ifndef CONFIG_NO_SWITCHER_SAFETY
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
// read and align the stack high water mark
// we will use this as base address for stack clearing
// note that it cannot be greater than stack top as we
Expand All @@ -805,7 +886,7 @@ exception_entry_asm:
cgetaddr t1, csp
csetaddr ct2, csp, tp
zero_stack t2, t1, tp
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
csrw CSR_MSHWM, sp
#endif
#endif // CONFIG_NO_SWITCHER_SAFETY
Expand Down
21 changes: 14 additions & 7 deletions sdk/core/switcher/trusted-stack-assembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,21 @@ EXPORT_ASSEMBLY_OFFSET(TrustedStack, c13, 13 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, c14, 14 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, c15, 15 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, hazardPointers, 16 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mstatus, 17 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mcause, (17 * 8) + 4)
#ifdef CONFIG_MSHWM
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mshwm, 18 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mshwmb, (18 * 8) + 4)

#ifdef CHERIOT_HAS_ZTOP
EXPORT_ASSEMBLY_OFFSET(TrustedStack, ztop, 17 * 8)
# define TSTACK_ZTOP_WORDS 1
#else
# define TSTACK_ZTOP_WORDS 0
#endif
EXPORT_ASSEMBLY_OFFSET(TrustedStack,
mstatus,
(17 + TSTACK_ZTOP_WORDS) * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mcause, TrustedStack_offset_mstatus + 4)
#ifdef CHERIOT_HAS_MSHWM
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mshwm, (18 + TSTACK_ZTOP_WORDS) * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mshwmb, TrustedStack_offset_mshwm + 4)
// Size of everything up to this point
# define TSTACK_REGFRAME_SZ (19 * 8)
# define TSTACK_REGFRAME_SZ ((19 + TSTACK_ZTOP_WORDS) * 8)
// frameoffset, inForcedUnwind and padding
# define TSTACK_HEADER_SZ 16
#else
Expand Down
45 changes: 26 additions & 19 deletions sdk/core/switcher/tstack.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,36 @@ struct TrustedStackFrame
uint16_t errorHandlerCount;
};

#if defined(CHERIOT_HAS_ZTOP) && !defined(CHERIOT_HAS_MSHWM)
# error Platforms with ZTOP must have MSHWM
#endif

template<size_t NFrames>
struct TrustedStackGeneric
{
void *mepcc;
void *c1;
void *csp;
void *cgp;
void *c4;
void *c5;
void *c6;
void *c7;
void *c8;
void *c9;
void *c10;
void *c11;
void *c12;
void *c13;
void *c14;
void *c15;
void *hazardPointers;
void *mepcc;
void *c1;
void *csp;
void *cgp;
void *c4;
void *c5;
void *c6;
void *c7;
void *c8;
void *c9;
void *c10;
void *c11;
void *c12;
void *c13;
void *c14;
void *c15;
void *hazardPointers;
#ifdef CHERIOT_HAS_ZTOP
void *ztop;
#endif
size_t mstatus;
size_t mcause;
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
uint32_t mshwm;
uint32_t mshwmb;
#endif
Expand All @@ -66,7 +73,7 @@ struct TrustedStackGeneric
uint8_t inForcedUnwind;
// Padding up to multiple of 16-bytes.
uint8_t padding[
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
11
#else
3
Expand Down
10 changes: 9 additions & 1 deletion sdk/xmake.lua
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,16 @@ rule("firmware")

local loader = target:deps()['cheriot.loader'];

if board.fast_stack_zeroing and not board.stack_high_water_mark then
error("Fast stack zeroing requires the stack high-water mark")
end
if board.stack_high_water_mark then
add_defines("CONFIG_MSHWM")
add_defines("CHERIOT_HAS_MSHWM")
if (board.fast_stack_zeroing) then
add_defines("CHERIOT_HAS_ZTOP")
-- If we have ztop, we need space to spill and reload it.
loader:set('loader_trusted_stack_size', loader:get('loader_trusted_stack_size') + 8)
end
else
-- If we don't have the stack high watermark, the trusted stack is smaller.
loader:set('loader_trusted_stack_size', 176)
Expand Down

0 comments on commit 3b4b670

Please sign in to comment.