-
Notifications
You must be signed in to change notification settings - Fork 120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
making thread_local fiber_local #618
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
--- a/nptl/allocatestack.c | ||
+++ b/nptl/allocatestack.c | ||
@@ -210,7 +210,7 @@ advise_stack_range (void *mem, size_t size, uintptr_t pd, size_t guardsize) | ||
new stack or reusing a cached stack of sufficient size. | ||
ATTR must be non-NULL and point to a valid pthread_attr. | ||
PDP must be non-NULL. */ | ||
-static int | ||
+int | ||
allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, | ||
void **stack, size_t *stacksize) | ||
{ | ||
@@ -403,6 +403,18 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, | ||
} | ||
} | ||
|
||
+ /* Initialize the TCB. All initializations with zero should be | ||
+ performed in 'get_cached_stack'. This way we avoid doing this if | ||
+ the stack freshly allocated with 'mmap'. */ | ||
+ | ||
+#if TLS_TCB_AT_TP | ||
+ /* Reference to the TCB itself. */ | ||
+ pd->header.self = pd; | ||
+ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These lines are moved from pthread_create(), because fiber_create() doesn't have the definition of *pd, and can not do it outside. |
||
+ /* Self-reference for TLS. */ | ||
+ pd->header.tcb = pd; | ||
+#endif | ||
+ | ||
/* Remember the stack-related values. */ | ||
pd->stackblock = mem; | ||
pd->stackblock_size = size; | ||
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c | ||
index 1d3665d5..6ee79035 100644 | ||
--- a/nptl/pthread_create.c | ||
+++ b/nptl/pthread_create.c | ||
@@ -665,18 +665,6 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr, | ||
} | ||
|
||
|
||
- /* Initialize the TCB. All initializations with zero should be | ||
- performed in 'get_cached_stack'. This way we avoid doing this if | ||
- the stack freshly allocated with 'mmap'. */ | ||
- | ||
-#if TLS_TCB_AT_TP | ||
- /* Reference to the TCB itself. */ | ||
- pd->header.self = pd; | ||
- | ||
- /* Self-reference for TLS. */ | ||
- pd->header.tcb = pd; | ||
-#endif | ||
- | ||
/* Store the address of the start routine and the parameter. Since | ||
we do not start the function directly the stillborn thread will | ||
get the information from its thread descriptor. */ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,7 @@ limitations under the License. | |
#include <thread> | ||
#include <mutex> | ||
#include <condition_variable> | ||
#include <pthread.h> | ||
|
||
#ifdef _WIN64 | ||
#include <processthreadsapi.h> | ||
|
@@ -53,6 +54,10 @@ inline int posix_memalign(void** memptr, size_t alignment, size_t size) { | |
#include <photon/thread/thread-key.h> | ||
#include <photon/thread/arch.h> | ||
|
||
struct pthread_attr; | ||
extern "C" int allocate_stack(const struct pthread_attr *attr, | ||
struct pthread **pdp, void **stack, size_t *stacksize); | ||
|
||
/* notes on the scheduler: | ||
|
||
1. runq (denoted by CURRENT) and sleepq are compeltely private, | ||
|
@@ -173,6 +178,7 @@ namespace photon | |
struct thread : public intrusive_list_node<thread> { | ||
volatile vcpu_t* vcpu; | ||
Stack stack; | ||
pthread_t tcb_or_tp; | ||
// offset 32B | ||
int idx = -1; /* index in the sleep queue array */ | ||
int error_number = 0; | ||
|
@@ -258,7 +264,8 @@ namespace photon | |
stack_size = stack_high - stack_low; | ||
#elif defined(__linux__) | ||
pthread_attr_t gattr; | ||
pthread_getattr_np(pthread_self(), &gattr); | ||
tcb_or_tp = pthread_self(); | ||
pthread_getattr_np(tcb_or_tp, &gattr); | ||
pthread_attr_getstack(&gattr, | ||
(void**)&stackful_alloc_top, &stack_size); | ||
pthread_attr_destroy(&gattr); | ||
|
@@ -287,6 +294,7 @@ namespace photon | |
#pragma GCC diagnostic ignored "-Winvalid-offsetof" | ||
static_assert(offsetof(thread, vcpu) == offsetof(partial_thread, vcpu), "..."); | ||
static_assert(offsetof(thread, tls) == offsetof(partial_thread, tls), "..."); | ||
static_assert(offsetof(thread, tcb_or_tp) - offsetof(thread, stack) == 8, "..."); | ||
#pragma GCC diagnostic pop | ||
|
||
struct thread_list : public intrusive_list<thread> | ||
|
@@ -534,14 +542,7 @@ namespace photon | |
vcpu = current->get_vcpu(); | ||
(plock = &vcpu->runq_lock) -> foreground_lock(); | ||
} | ||
mutable bool update_current = false; | ||
void set_current(thread* th) const { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no longer the need to set/switch the thread-local variable CURRENT as part of context switching, given that it has become a fiber-local varible. |
||
current = th; | ||
update_current = true; | ||
} | ||
~AtomicRunQ() { | ||
if (update_current) | ||
*pc = current; | ||
plock->foreground_unlock(); | ||
} | ||
static void prefetch_context(thread* from, thread* to) | ||
|
@@ -560,7 +561,6 @@ namespace photon | |
assert(!current->single()); | ||
auto from = current; | ||
auto to = from->remove_from_list(); | ||
set_current(to); | ||
prefetch_context(from, to); | ||
from->state = new_state; | ||
to->state = states::RUNNING; | ||
|
@@ -571,7 +571,6 @@ namespace photon | |
prefetch_context(from, to); | ||
from->state = states::READY; | ||
to->state = states::RUNNING; | ||
set_current(to); | ||
return {from, to}; | ||
} | ||
Switch goto_next() const { | ||
|
@@ -618,12 +617,6 @@ namespace photon | |
} | ||
}; | ||
|
||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Winvalid-offsetof" | ||
static_assert(offsetof(thread, arg) == 0x40, "..."); | ||
static_assert(offsetof(thread, start) == 0x48, "..."); | ||
#pragma GCC diagnostic pop | ||
|
||
inline void thread::dequeue_ready_atomic(states newstat) | ||
{ | ||
assert("this is not in runq, and this->lock is locked"); | ||
|
@@ -648,8 +641,6 @@ namespace photon | |
to->get_vcpu()->switch_count++; | ||
} | ||
|
||
static void _photon_thread_die(thread* th) asm("_photon_thread_die"); | ||
|
||
#if defined(__x86_64__) | ||
#if !defined(_WIN64) | ||
asm( | ||
|
@@ -659,6 +650,8 @@ R"( | |
mov %rsp, (%rsi) | ||
mov (%rdi), %rsp | ||
pop %rbp | ||
mov 8(%rdi), %rax | ||
wrfsbase %rax | ||
ret | ||
)" | ||
|
||
|
@@ -672,18 +665,10 @@ DEF_ASM_FUNC(_photon_switch_context_defer_die) // (void* rdi_arg, void (*rsi_def | |
R"( | ||
mov (%rdx), %rsp | ||
pop %rbp | ||
mov 8(%rdx), %rax | ||
wrfsbase %rax | ||
jmp *%rsi | ||
)" | ||
|
||
DEF_ASM_FUNC(_photon_thread_stub) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. _photon_thread_stub() becomes more complex and would better be implemented in C++. |
||
R"( | ||
mov 0x40(%rbp), %rdi | ||
movq $0, 0x40(%rbp) | ||
call *0x48(%rbp) | ||
mov %rax, 0x48(%rbp) | ||
mov %rbp, %rdi | ||
call _photon_thread_die | ||
)" | ||
); | ||
|
||
inline void switch_context(thread* from, thread* to) { | ||
|
@@ -896,14 +881,17 @@ R"( | |
_photon_switch_context_defer_die( | ||
arg, func, sw.to->stack.pointer_ref()); | ||
} | ||
static __attribute__((used, noreturn)) | ||
void _photon_thread_die(thread* th) { | ||
static __attribute__((noreturn)) | ||
void _photon_thread_stub() { | ||
register thread* th asm("rbp"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On entry |
||
CURRENT = th; // CURRENT is now fiber-local | ||
auto arg = th->arg; | ||
th->tls = 0; // union with th->arg | ||
th->retval = th->start(arg); | ||
assert(th == CURRENT); | ||
th->die(); | ||
} | ||
|
||
extern "C" void _photon_thread_stub() asm ("_photon_thread_stub"); | ||
|
||
thread* thread_create(thread_entry start, void* arg, | ||
uint64_t stack_size, uint16_t reserved_space) { | ||
RunQ rq; | ||
|
@@ -922,7 +910,14 @@ R"( | |
stack_size, least_stack_size, least_stack_size); | ||
stack_size = least_stack_size; | ||
} | ||
char* ptr = (char*)photon_thread_alloc(stack_size); | ||
// char* ptr = (char*)photon_thread_alloc(stack_size); | ||
struct pthread* pd; | ||
char* ptr; // ptr to stack | ||
size_t pstacksize; | ||
pthread_attr_t attr; | ||
pthread_attr_init(&attr); | ||
allocate_stack((struct pthread_attr*)&attr, &pd, (void**)&ptr, &pstacksize); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We must make use of |
||
|
||
if (unlikely(!ptr)) | ||
return nullptr; | ||
uint64_t p = (uint64_t)ptr + stack_size - sizeof(thread) - randomizer; | ||
|
@@ -934,6 +929,7 @@ R"( | |
th->stack_size = stack_size; | ||
th->arg = arg; | ||
auto sp = align_down(p - reserved_space, 64); | ||
th->tcb_or_tp = (pthread_t)pd; | ||
th->stack.init((void*)sp, &_photon_thread_stub, th); | ||
AtomicRunQ arq(rq); | ||
th->vcpu = arq.vcpu; | ||
|
@@ -1405,7 +1401,7 @@ R"( | |
} | ||
void thread_exit(void* retval) { | ||
CURRENT->retval = retval; | ||
_photon_thread_die(CURRENT); | ||
CURRENT->die(); | ||
} | ||
|
||
int thread_shutdown(thread* th, bool flag) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
making the internal function
allocate_stack()
in lib pthread public and accessible by, say, fiber_create().