diff --git a/glibc-2.40.patch b/glibc-2.40.patch new file mode 100644 index 00000000..c1bcf1e6 --- /dev/null +++ b/glibc-2.40.patch @@ -0,0 +1,53 @@ +--- a/nptl/allocatestack.c ++++ b/nptl/allocatestack.c +@@ -210,7 +210,7 @@ advise_stack_range (void *mem, size_t size, uintptr_t pd, size_t guardsize) + new stack or reusing a cached stack of sufficient size. + ATTR must be non-NULL and point to a valid pthread_attr. + PDP must be non-NULL. */ +-static int ++int + allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, + void **stack, size_t *stacksize) + { +@@ -403,6 +403,18 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, + } + } + ++ /* Initialize the TCB. All initializations with zero should be ++ performed in 'get_cached_stack'. This way we avoid doing this if ++ the stack freshly allocated with 'mmap'. */ ++ ++#if TLS_TCB_AT_TP ++ /* Reference to the TCB itself. */ ++ pd->header.self = pd; ++ ++ /* Self-reference for TLS. */ ++ pd->header.tcb = pd; ++#endif ++ + /* Remember the stack-related values. */ + pd->stackblock = mem; + pd->stackblock_size = size; +diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c +index 1d3665d5..6ee79035 100644 +--- a/nptl/pthread_create.c ++++ b/nptl/pthread_create.c +@@ -665,18 +665,6 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr, + } + + +- /* Initialize the TCB. All initializations with zero should be +- performed in 'get_cached_stack'. This way we avoid doing this if +- the stack freshly allocated with 'mmap'. */ +- +-#if TLS_TCB_AT_TP +- /* Reference to the TCB itself. */ +- pd->header.self = pd; +- +- /* Self-reference for TLS. */ +- pd->header.tcb = pd; +-#endif +- + /* Store the address of the start routine and the parameter. Since + we do not start the function directly the stillborn thread will + get the information from its thread descriptor. */ diff --git a/thread/thread.cpp b/thread/thread.cpp index f3595163..ed721f40 100644 --- a/thread/thread.cpp +++ b/thread/thread.cpp @@ -31,6 +31,7 @@ limitations under the License. #include #include #include +#include #ifdef _WIN64 #include @@ -53,6 +54,10 @@ inline int posix_memalign(void** memptr, size_t alignment, size_t size) { #include #include +struct pthread_attr; +extern "C" int allocate_stack(const struct pthread_attr *attr, + struct pthread **pdp, void **stack, size_t *stacksize); + /* notes on the scheduler: 1. runq (denoted by CURRENT) and sleepq are compeltely private, @@ -173,6 +178,7 @@ namespace photon struct thread : public intrusive_list_node { volatile vcpu_t* vcpu; Stack stack; + pthread_t tcb_or_tp; // offset 32B int idx = -1; /* index in the sleep queue array */ int error_number = 0; @@ -258,7 +264,8 @@ namespace photon stack_size = stack_high - stack_low; #elif defined(__linux__) pthread_attr_t gattr; - pthread_getattr_np(pthread_self(), &gattr); + tcb_or_tp = pthread_self(); + pthread_getattr_np(tcb_or_tp, &gattr); pthread_attr_getstack(&gattr, (void**)&stackful_alloc_top, &stack_size); pthread_attr_destroy(&gattr); @@ -287,6 +294,7 @@ namespace photon #pragma GCC diagnostic ignored "-Winvalid-offsetof" static_assert(offsetof(thread, vcpu) == offsetof(partial_thread, vcpu), "..."); static_assert(offsetof(thread, tls) == offsetof(partial_thread, tls), "..."); + static_assert(offsetof(thread, tcb_or_tp) - offsetof(thread, stack) == 8, "..."); #pragma GCC diagnostic pop struct thread_list : public intrusive_list @@ -534,14 +542,7 @@ namespace photon vcpu = current->get_vcpu(); (plock = &vcpu->runq_lock) -> foreground_lock(); } - mutable bool update_current = false; - void set_current(thread* th) const { - current = th; - update_current = true; - } ~AtomicRunQ() { - if (update_current) - *pc = current; plock->foreground_unlock(); } static void prefetch_context(thread* from, thread* to) @@ -560,7 +561,6 @@ namespace photon assert(!current->single()); auto from = current; auto to = from->remove_from_list(); - set_current(to); prefetch_context(from, to); from->state = new_state; to->state = states::RUNNING; @@ -571,7 +571,6 @@ namespace photon prefetch_context(from, to); from->state = states::READY; to->state = states::RUNNING; - set_current(to); return {from, to}; } Switch goto_next() const { @@ -618,12 +617,6 @@ namespace photon } }; -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Winvalid-offsetof" - static_assert(offsetof(thread, arg) == 0x40, "..."); - static_assert(offsetof(thread, start) == 0x48, "..."); -#pragma GCC diagnostic pop - inline void thread::dequeue_ready_atomic(states newstat) { assert("this is not in runq, and this->lock is locked"); @@ -648,8 +641,6 @@ namespace photon to->get_vcpu()->switch_count++; } - static void _photon_thread_die(thread* th) asm("_photon_thread_die"); - #if defined(__x86_64__) #if !defined(_WIN64) asm( @@ -659,6 +650,8 @@ R"( mov %rsp, (%rsi) mov (%rdi), %rsp pop %rbp + mov 8(%rdi), %rax + wrfsbase %rax ret )" @@ -672,18 +665,10 @@ DEF_ASM_FUNC(_photon_switch_context_defer_die) // (void* rdi_arg, void (*rsi_def R"( mov (%rdx), %rsp pop %rbp + mov 8(%rdx), %rax + wrfsbase %rax jmp *%rsi )" - -DEF_ASM_FUNC(_photon_thread_stub) -R"( - mov 0x40(%rbp), %rdi - movq $0, 0x40(%rbp) - call *0x48(%rbp) - mov %rax, 0x48(%rbp) - mov %rbp, %rdi - call _photon_thread_die -)" ); inline void switch_context(thread* from, thread* to) { @@ -896,14 +881,17 @@ R"( _photon_switch_context_defer_die( arg, func, sw.to->stack.pointer_ref()); } - static __attribute__((used, noreturn)) - void _photon_thread_die(thread* th) { + static __attribute__((noreturn)) + void _photon_thread_stub() { + register thread* th asm("rbp"); + CURRENT = th; // CURRENT is now fiber-local + auto arg = th->arg; + th->tls = 0; // union with th->arg + th->retval = th->start(arg); assert(th == CURRENT); th->die(); } - extern "C" void _photon_thread_stub() asm ("_photon_thread_stub"); - thread* thread_create(thread_entry start, void* arg, uint64_t stack_size, uint16_t reserved_space) { RunQ rq; @@ -922,7 +910,14 @@ R"( stack_size, least_stack_size, least_stack_size); stack_size = least_stack_size; } - char* ptr = (char*)photon_thread_alloc(stack_size); + // char* ptr = (char*)photon_thread_alloc(stack_size); + struct pthread* pd; + char* ptr; // ptr to stack + size_t pstacksize; + pthread_attr_t attr; + pthread_attr_init(&attr); + allocate_stack((struct pthread_attr*)&attr, &pd, (void**)&ptr, &pstacksize); + if (unlikely(!ptr)) return nullptr; uint64_t p = (uint64_t)ptr + stack_size - sizeof(thread) - randomizer; @@ -934,6 +929,7 @@ R"( th->stack_size = stack_size; th->arg = arg; auto sp = align_down(p - reserved_space, 64); + th->tcb_or_tp = (pthread_t)pd; th->stack.init((void*)sp, &_photon_thread_stub, th); AtomicRunQ arq(rq); th->vcpu = arq.vcpu; @@ -1405,7 +1401,7 @@ R"( } void thread_exit(void* retval) { CURRENT->retval = retval; - _photon_thread_die(CURRENT); + CURRENT->die(); } int thread_shutdown(thread* th, bool flag) diff --git a/thread/thread.h b/thread/thread.h index e99999eb..069bdce5 100644 --- a/thread/thread.h +++ b/thread/thread.h @@ -145,7 +145,7 @@ namespace photon struct partial_thread { uint64_t _, __; volatile vcpu_base* vcpu; - uint64_t ___[5]; + uint64_t ___[6]; void* tls; };