Skip to content

Commit

Permalink
Enable hardware stack zeroing.
Browse files Browse the repository at this point in the history
This replaces the software stack zeroing path with offload to a hardware
engine, when available.  The hardware zeroing pipeline is controlled by
the ZTOP SCR (currently assumed to be number 27).  This zeroes from the
top of the capability in ZTOP to the bottom.

We assume that short-lived cross-compartment calls or returns from
deeply nested calls will result in ranges where each is a subset of
another, so we try to coalesce.  We could probably do better aggregating
overlapping but not subset ranges.
  • Loading branch information
davidchisnall committed Sep 28, 2023
1 parent 452d404 commit cfcc489
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 38 deletions.
2 changes: 1 addition & 1 deletion sdk/core/loader/boot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ namespace
threadTStack->mstatus =
(priv::MSTATUS_MPIE |
(priv::MSTATUS_PRV_M << priv::MSTATUS_MPP_SHIFT));
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
threadTStack->mshwm = stack.top();
threadTStack->mshwmb = stack.base();
#endif
Expand Down
86 changes: 79 additions & 7 deletions sdk/core/switcher/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
*/
#define CSR_MSHWMB 0xbc2

/**
* The special capability register for the fast zeroing control.
* Writing a capability here starts zeroing from the address of this register
* down to the base.
*/
#define SCR_ZTOP 0x1b

#define MAX_FAULTS_PER_COMPARTMENT_CALL 1024

#define SPILL_SLOT_cs0 0
Expand Down Expand Up @@ -126,6 +133,46 @@ switcher_scheduler_entry_csp:
* and integer register. All three registers are clobbered.
*/
.macro zero_stack base top scratch
#ifdef CHERIOT_HAS_ZTOP
// Derive the capability to the range that should be zeroed.
// This is stored in base, leaving top as a second scratch register to use
csub \top, c\top, c\base
csetboundsexact c\base, c\base, \top

// Wait for any prior zeroing to finish.
cspecialr c\scratch, SCR_ZTOP
// If ztop is untagged, no zeroing is happening and our attempt to store
// relative to ztop will trap.
cgettag \top, c\scratch
beqz \top, 2f
// If we've already reached the bottom then do nothing.
// NOTE: This can be deleted if ZTOP's tag is cleared when zeroing finishes.
cgetbase \top, c\scratch
beq \top, \scratch, 2f
// If the current zeroing range is a subset of range that we're about to
// zero, don't bother waiting, just zero from the start.
ctestsubset \top, c\base, c\scratch
bnez \top, 2f
// If the requested range is a subset of the previous requested range,
// restart zeroing that instead
// NOTE: This may result in zeroing things too many times, we're trading
// some throughput for latency here. This might be the wrong choice.
ctestsubset \top, c\scratch, c\base
bnez \top, 1f
// Store a byte at the bottom of the zeroed region. This will stall the
// CPU pipeline until we've finished the zeroing.
cgetbase \top, c\scratch
csetaddr c\top, c\scratch, \top
csb zero, 0(c\top)
j 2f
1:
cmove c\base, c\scratch
2:
// Set the new value
cspecialw SCR_ZTOP, c\base

#else

addi \scratch, \top, -32
addi \top, \top, -16
bgt \base, \scratch, 1f
Expand All @@ -143,6 +190,8 @@ switcher_scheduler_entry_csp:
csc cnull, 0(c\base)
csc cnull, 8(c\base)
2:

#endif
.endm

.section .text, "ax", @progbits
Expand Down Expand Up @@ -200,7 +249,7 @@ compartment_switcher_entry:
sub s1, s0, s1
csetboundsexact ct2, csp, s1
csetaddr csp, ct2, s0
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
// Read and align the stack high water mark
csrr gp, CSR_MSHWM
and gp, gp, ~0xf
Expand All @@ -213,7 +262,7 @@ compartment_switcher_entry:
#endif
zero_stack t2, s0, gp
after_zero:
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
// store new stack top as stack high water mark
csrw CSR_MSHWM, sp
#endif
Expand Down Expand Up @@ -347,11 +396,20 @@ exception_entry_asm:
csc ct0, TrustedStack_offset_mepcc(csp)
csrr t1, mstatus
csw t1, TrustedStack_offset_mstatus(csp)
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
csrr t1, CSR_MSHWM
csw t1, TrustedStack_offset_mshwm(csp)
csrr t1, CSR_MSHWMB
csw t1, TrustedStack_offset_mshwmb(csp)
# ifdef CHERIOT_HAS_ZTOP
// Stop zeroing and capture the current zeroing value. Note: cspecialr is
// encoded as cspecialrw and so we can't just use cnull as the source. We
// probably could use the current ct1 value, since it's guaranteed to be
// untagged at this point in the code.
cmove ct1, cnull
cspecialrw ct1, SCR_ZTOP, ct1
csc ct1, TrustedStack_offset_ztop(csp)
# endif
#endif
csrr t1, mcause
csw t1, TrustedStack_offset_mcause(csp)
Expand Down Expand Up @@ -432,17 +490,31 @@ exception_entry_asm:
.Linstall_context:
clw x1, TrustedStack_offset_mstatus(csp)
csrw mstatus, x1
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
clw x1, TrustedStack_offset_mshwm(csp)
csrw CSR_MSHWM, x1
clw x1, TrustedStack_offset_mshwmb(csp)
csrw CSR_MSHWMB, x1
#endif
#ifdef CHERIOT_HAS_ZTOP
clc c1, TrustedStack_offset_ztop(csp)
cspecialw SCR_ZTOP, c1
#endif
cspecialw mepcc, ct2
csb zero, TrustedStack_offset_inForcedUnwind(csp)
// c2 is csp, which will be loaded last and will overwrite the trusted
// stack pointer with the thread's stack pointer.
#ifdef CHERIOT_HAS_ZTOP
// If we have ZTOP, each of these loads will take an extra cycle while the
// zeroing is running. We know that they aren't part of the stack (if the
// trusted stack and the stack overlap, everything is broken) so restart
// the zeroing as late as possible.
reloadRegisters cgp, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, csp
cspecialw SCR_ZTOP, c1
reloadOne c1
#else
reloadRegisters c1, cgp, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, csp
#endif
mret

// If we detect an invalid entry and there is no error handler installed, we want
Expand Down Expand Up @@ -657,7 +729,7 @@ exception_entry_asm:

// Load the trusted stack pointer to ct1
cspecialr ct1, mtdc
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
// Update the spilled copy of the stack high water mark to ensure that we
// will clear all of the stack used by the error handler and the spilled
// context.
Expand Down Expand Up @@ -761,7 +833,7 @@ exception_entry_asm:
clc cgp, SPILL_SLOT_cgp(csp)
cincoffset csp, csp, SPILL_SLOT_SIZE
#ifndef CONFIG_NO_SWITCHER_SAFETY
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
// read and align the stack high water mark
// we will use this as base address for stack clearing
// note that it cannot be greater than stack top as we
Expand All @@ -774,7 +846,7 @@ exception_entry_asm:
cgetaddr t1, csp
csetaddr ct2, csp, tp
zero_stack t2, t1, tp
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
csrw CSR_MSHWM, sp
#endif
#endif // CONFIG_NO_SWITCHER_SAFETY
Expand Down
33 changes: 22 additions & 11 deletions sdk/core/switcher/trusted-stack-assembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,39 @@ EXPORT_ASSEMBLY_OFFSET(TrustedStack, c12, 12 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, c13, 13 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, c14, 14 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, c15, 15 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mstatus, 16 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mcause, (16 * 8) + 4)
#ifdef CONFIG_MSHWM
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mshwm, 17 * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mshwmb, (17 * 8) + 4)
#ifdef CHERIOT_HAS_ZTOP
EXPORT_ASSEMBLY_OFFSET(TrustedStack, ztop, 16 * 8)
# define TSTACK_HAS_ZTOP 1
#else
# define TSTACK_HAS_ZTOP 0
#endif
EXPORT_ASSEMBLY_OFFSET(TrustedStack,
mstatus,
(16 + TSTACK_HAS_ZTOP) * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mcause, TrustedStack_offset_mstatus + 4)
#ifdef CHERIOT_HAS_MSHWM
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mshwm, (17 + TSTACK_HAS_ZTOP) * 8)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, mshwmb, TrustedStack_offset_mshwm + 4)

// Size of everything up to this point
#define TSTACK_REGFRAME_SZ (18 * 8)
# define TSTACK_REGFRAME_SZ ((18 + TSTACK_HAS_ZTOP) * 8)
// frameoffset, inForcedUnwind and padding
#define TSTACK_HEADER_SZ 16
# define TSTACK_HEADER_SZ 16
#else
// Size of everything up to this point
#define TSTACK_REGFRAME_SZ ((16 * 8) + (2* 4))
# define TSTACK_REGFRAME_SZ ((16 * 8) + (2 * 4))
// frameoffset, inForcedUnwind and padding
#define TSTACK_HEADER_SZ 8
# define TSTACK_HEADER_SZ 8
#endif
// The basic trusted stack is the size of the save area, 8 bytes of state for
// unwinding information, and then a single trusted stack frame used for the
// unwind state of the initial thread. (8 * 3) is the size of TrustedStackFrame
// and will match the value below.
EXPORT_ASSEMBLY_SIZE(TrustedStack, TSTACK_REGFRAME_SZ + TSTACK_HEADER_SZ + (8 * 3))
EXPORT_ASSEMBLY_OFFSET(TrustedStack, frames, TSTACK_REGFRAME_SZ + TSTACK_HEADER_SZ)
EXPORT_ASSEMBLY_SIZE(TrustedStack,
TSTACK_REGFRAME_SZ + TSTACK_HEADER_SZ + (8 * 3))
EXPORT_ASSEMBLY_OFFSET(TrustedStack,
frames,
TSTACK_REGFRAME_SZ + TSTACK_HEADER_SZ)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, frameoffset, TSTACK_REGFRAME_SZ)
EXPORT_ASSEMBLY_OFFSET(TrustedStack, inForcedUnwind, TSTACK_REGFRAME_SZ + 2)

Expand Down
43 changes: 25 additions & 18 deletions sdk/core/switcher/tstack.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,35 @@ struct TrustedStackFrame
uint16_t errorHandlerCount;
};

#if defined(CHERIOT_HAS_ZTOP) && !defined(CHERIOT_HAS_MSHWM)
# error Platforms with ZTOP must have MSHWM
#endif

template<size_t NFrames>
struct TrustedStackGeneric
{
void *mepcc;
void *c1;
void *csp;
void *cgp;
void *c4;
void *c5;
void *c6;
void *c7;
void *c8;
void *c9;
void *c10;
void *c11;
void *c12;
void *c13;
void *c14;
void *c15;
void *mepcc;
void *c1;
void *csp;
void *cgp;
void *c4;
void *c5;
void *c6;
void *c7;
void *c8;
void *c9;
void *c10;
void *c11;
void *c12;
void *c13;
void *c14;
void *c15;
#ifdef CHERIOT_HAS_ZTOP
void *ztop;
#endif
size_t mstatus;
size_t mcause;
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
uint32_t mshwm;
uint32_t mshwmb;
#endif
Expand All @@ -61,7 +68,7 @@ struct TrustedStackGeneric
uint8_t inForcedUnwind;
// Padding up to multiple of 16-bytes.
uint8_t padding[
#ifdef CONFIG_MSHWM
#ifdef CHERIOT_HAS_MSHWM
13
#else
5
Expand Down
16 changes: 15 additions & 1 deletion sdk/xmake.lua
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,22 @@ rule("firmware")

local loader = target:deps()['cheriot.loader'];

if board.fast_stack_zeroing and not board.stack_high_water_mark then
error("Fast stack zeroing requires the stack high-water mark")
end
if board.stack_high_water_mark then
add_defines("CHERIOT_HAS_MSHWM")
if (board.fast_stack_zeroing) then
add_defines("CHERIOT_HAS_ZTOP")
-- If we have ztop, we need space to spill and reload it.
loader:set('loader_trusted_stack_size', loader:get('loader_trusted_stack_size') + 8)
end
else
-- If we don't have the stack high watermark, the trusted stack is smaller.
loader:set('loader_trusted_stack_size', 168)
end
if board.stack_high_water_mark then
add_defines("CONFIG_MSHWM")
add_defines("CHERIOT_HAS_MSHWM")
else
-- If we don't have the stack high watermark, the trusted stack is smaller.
loader:set('loader_trusted_stack_size', 168)
Expand Down

0 comments on commit cfcc489

Please sign in to comment.