Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

making thread_local fiber_local #618

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions glibc-2.40.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -210,7 +210,7 @@ advise_stack_range (void *mem, size_t size, uintptr_t pd, size_t guardsize)
new stack or reusing a cached stack of sufficient size.
ATTR must be non-NULL and point to a valid pthread_attr.
PDP must be non-NULL. */
-static int
+int
Copy link
Collaborator Author

@lihuiba lihuiba Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

making the internal function allocate_stack() in lib pthread public and accessible by, say, fiber_create().

allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
void **stack, size_t *stacksize)
{
71 changes: 42 additions & 29 deletions thread/thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ limitations under the License.
#include <thread>
#include <mutex>
#include <condition_variable>
#include <pthread.h>

#ifdef _WIN64
#include <processthreadsapi.h>
Expand All @@ -53,6 +54,10 @@ inline int posix_memalign(void** memptr, size_t alignment, size_t size) {
#include <photon/thread/thread-key.h>
#include <photon/thread/arch.h>

struct pthread_attr;
extern "C" int allocate_stack(const struct pthread_attr *attr,
struct pthread **pdp, void **stack, size_t *stacksize);

/* notes on the scheduler:

1. runq (denoted by CURRENT) and sleepq are compeltely private,
Expand Down Expand Up @@ -136,14 +141,15 @@ namespace photon
{
public:
template<typename F>
void init(void* ptr, F ret2func, thread* th)
void init(void* ptr, F ret2func, thread* th, void* tcb_tls)
{
_ptr = ptr;
assert((uint64_t)_ptr % 16 == 0);
push(0);
push(0);
push(ret2func);
push(th); // rbp <== th
push(tcb_tls); // fs <== tcb_tls
}
void** pointer_ref()
{
Expand Down Expand Up @@ -534,14 +540,7 @@ namespace photon
vcpu = current->get_vcpu();
(plock = &vcpu->runq_lock) -> foreground_lock();
}
mutable bool update_current = false;
void set_current(thread* th) const {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no longer the need to set/switch the thread-local variable CURRENT as part of context switching, given that it has become a fiber-local varible.

current = th;
update_current = true;
}
~AtomicRunQ() {
if (update_current)
*pc = current;
plock->foreground_unlock();
}
static void prefetch_context(thread* from, thread* to)
Expand All @@ -560,7 +559,6 @@ namespace photon
assert(!current->single());
auto from = current;
auto to = from->remove_from_list();
set_current(to);
prefetch_context(from, to);
from->state = new_state;
to->state = states::RUNNING;
Expand All @@ -571,7 +569,6 @@ namespace photon
prefetch_context(from, to);
from->state = states::READY;
to->state = states::RUNNING;
set_current(to);
return {from, to};
}
Switch goto_next() const {
Expand Down Expand Up @@ -648,42 +645,38 @@ namespace photon
to->get_vcpu()->switch_count++;
}

static void _photon_thread_die(thread* th) asm("_photon_thread_die");

#if defined(__x86_64__)
#if !defined(_WIN64)
asm(
DEF_ASM_FUNC(_photon_switch_context) // (void** rdi_to, void** rsi_from)
R"(
rdfsbase %rax
Copy link
Collaborator Author

@lihuiba lihuiba Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Save and restore fs register as part of context switching. It points to thread control block (TCB) and thread-local storage (TLS).

X86_64 only, for now.

push %rbp
push %rax
mov %rsp, (%rsi)
mov (%rdi), %rsp
pop %rax
pop %rbp
wrfsbase %rax
ret
)"

DEF_ASM_FUNC(_photon_switch_context_defer) // (void* rdi_arg, void (*rsi_defer)(void*), void** rdx_to, void** rcx_from)
R"(
rdfsbase %rax
push %rbp
push %rax
mov %rsp, (%rcx)
)"

DEF_ASM_FUNC(_photon_switch_context_defer_die) // (void* rdi_arg, void (*rsi_defer)(void*), void** rdx_to_th)
R"(
mov (%rdx), %rsp
pop %rax
pop %rbp
wrfsbase %rax
jmp *%rsi
)"

DEF_ASM_FUNC(_photon_thread_stub)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_photon_thread_stub() becomes more complex and would better be implemented in C++.

R"(
mov 0x40(%rbp), %rdi
movq $0, 0x40(%rbp)
call *0x48(%rbp)
mov %rax, 0x48(%rbp)
mov %rbp, %rdi
call _photon_thread_die
)"
);

inline void switch_context(thread* from, thread* to) {
Expand Down Expand Up @@ -896,14 +889,17 @@ R"(
_photon_switch_context_defer_die(
arg, func, sw.to->stack.pointer_ref());
}
static __attribute__((used, noreturn))
void _photon_thread_die(thread* th) {
static __attribute__((noreturn))
void _photon_thread_stub() {
register thread* th asm("rbp");
Copy link
Collaborator Author

@lihuiba lihuiba Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On entry rbp points to struct thread.

CURRENT = th; // CURRENT is now fiber-local
auto arg = th->arg;
th->tls = 0; // union with th->arg
th->retval = th->start(arg);
assert(th == CURRENT);
th->die();
}

extern "C" void _photon_thread_stub() asm ("_photon_thread_stub");

thread* thread_create(thread_entry start, void* arg,
uint64_t stack_size, uint16_t reserved_space) {
RunQ rq;
Expand All @@ -922,7 +918,24 @@ R"(
stack_size, least_stack_size, least_stack_size);
stack_size = least_stack_size;
}
char* ptr = (char*)photon_thread_alloc(stack_size);
// char* ptr = (char*)photon_thread_alloc(stack_size);
struct pthread* pd;
char* ptr; // ptr to stack
size_t pstacksize;
pthread_attr_t attr;
pthread_attr_init(&attr);
allocate_stack((struct pthread_attr*)&attr, &pd, (void**)&ptr, &pstacksize);
Copy link
Collaborator Author

@lihuiba lihuiba Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We must make use of allocate_thread() in lib pthread to allocate the stack, TCB (that compatible to threading facilities) and TLS (for all modules, no matter loaded at startup time or run time).


// this should be moved into allocate_stack(),
// as we don't have the defination of *pd
#if TLS_TCB_AT_TP
/* Reference to the TCB itself. */
pd->header.self = pd;

/* Self-reference for TLS. */
pd->header.tcb = pd;
#endif

if (unlikely(!ptr))
return nullptr;
uint64_t p = (uint64_t)ptr + stack_size - sizeof(thread) - randomizer;
Expand All @@ -934,7 +947,7 @@ R"(
th->stack_size = stack_size;
th->arg = arg;
auto sp = align_down(p - reserved_space, 64);
th->stack.init((void*)sp, &_photon_thread_stub, th);
th->stack.init((void*)sp, &_photon_thread_stub, th, pd);
AtomicRunQ arq(rq);
th->vcpu = arq.vcpu;
arq.vcpu->nthreads++;
Expand Down Expand Up @@ -1405,7 +1418,7 @@ R"(
}
void thread_exit(void* retval) {
CURRENT->retval = retval;
_photon_thread_die(CURRENT);
CURRENT->die();
}

int thread_shutdown(thread* th, bool flag)
Expand Down
Loading