From 2db5f84f11151a52f065bccf056611a937ba2285 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Wed, 15 Jul 2020 16:12:43 -0400 Subject: [PATCH] chore: Initial buggy port --- runtime/include/arch/aarch64/context.h | 1 + runtime/include/arch/common.h | 17 +++- runtime/include/arch/context.h | 91 +++++++++++------ runtime/include/arch/x86_64/context.h | 25 +++-- runtime/include/sandbox.h | 2 +- runtime/src/local_runqueue_minheap.c | 14 ++- runtime/src/sandbox.c | 4 +- runtime/src/software_interrupt.c | 12 ++- runtime/src/worker_thread.c | 134 +++++++++++++++++-------- 9 files changed, 207 insertions(+), 93 deletions(-) diff --git a/runtime/include/arch/aarch64/context.h b/runtime/include/arch/aarch64/context.h index c7b2e7c..4b98314 100644 --- a/runtime/include/arch/aarch64/context.h +++ b/runtime/include/arch/aarch64/context.h @@ -20,6 +20,7 @@ arch_context_init(struct arch_context *actx, reg_t ip, reg_t sp) actx->regs[UREG_RSP] = sp; actx->regs[UREG_RIP] = ip; + actx->variant = ARCH_CONTEXT_QUICK; } /** diff --git a/runtime/include/arch/common.h b/runtime/include/arch/common.h index a8e9612..343107c 100644 --- a/runtime/include/arch/common.h +++ b/runtime/include/arch/common.h @@ -22,9 +22,20 @@ enum UREGS UREG_COUNT }; +/* The enum is compared directly in assembly, so maintain integral values! */ +enum ARCH_CONTEXT +{ + ARCH_CONTEXT_UNUSED = 0, + ARCH_CONTEXT_QUICK = 1, + ARCH_CONTEXT_SLOW = 2, + ARCH_CONTEXT_RUNNING = 3 +}; + + struct arch_context { - reg_t regs[UREG_COUNT]; - mcontext_t mctx; + enum ARCH_CONTEXT variant; + reg_t regs[UREG_COUNT]; + mcontext_t mctx; }; /* @@ -35,3 +46,5 @@ extern __thread struct arch_context worker_thread_base_context; /* Cannot be inlined because called in Assembly */ void __attribute__((noinline)) __attribute__((noreturn)) arch_context_mcontext_restore(void); + +extern __thread volatile bool worker_thread_is_switching_context; diff --git a/runtime/include/arch/context.h b/runtime/include/arch/context.h index aeef5c9..c0b2d68 100644 --- a/runtime/include/arch/context.h +++ b/runtime/include/arch/context.h @@ -21,48 +21,81 @@ /** - * Preempt the current sandbox and start executing the next sandbox - * @param mc - the context of the current thread of execution - * @param ctx - the context that we want to restore - * @return Return code in {0,1} - * 0 = context restored successfully. - * 1 = special processing because thread was last in a user-level context switch state + * Restore a full mcontext + * Writes sandbox_context to active_context and then zeroes sandbox_context out + * @param active_context - the context of the current worker thread + * @param sandbox_context - the context that we want to restore */ -static inline int -arch_mcontext_restore(mcontext_t *mc, struct arch_context *ctx) +static inline void +arch_mcontext_restore(mcontext_t *active_context, struct arch_context *sandbox_context) { - assert(ctx != &worker_thread_base_context); assert(!software_interrupt_is_enabled()); - /* if ctx->regs[0] is set, this was last in a user-level context switch state! - * else restore mcontext.. - */ - bool did_user_level_context_switch = ctx->regs[UREG_RSP]; - if (did_user_level_context_switch) { - mc->gregs[REG_RSP] = ctx->regs[UREG_RSP]; - mc->gregs[REG_RIP] = ctx->regs[UREG_RIP] + ARCH_SIG_JMP_OFF; - ctx->regs[UREG_RSP] = 0; + assert(active_context != NULL); + assert(sandbox_context != NULL); + + /* Validate that the sandbox_context is well formed */ + assert(sandbox_context->variant == ARCH_CONTEXT_SLOW); + assert(sandbox_context->mctx.gregs[REG_RSP] != 0); + assert(sandbox_context->mctx.gregs[REG_RIP] != 0); - return 1; - } + assert(sandbox_context != &worker_thread_base_context); /* Restore mcontext */ - memcpy(mc, &ctx->mctx, sizeof(mcontext_t)); - memset(&ctx->mctx, 0, sizeof(mcontext_t)); + memcpy(active_context, &sandbox_context->mctx, sizeof(mcontext_t)); + memset(&sandbox_context->mctx, 0, sizeof(mcontext_t)); +} + +/** + * Restore a sandbox that was previously executing and preempted for higher-priority work. + * This method restores only the instruction pointer and stack pointer registers rather than + * a full mcontext, so it is less expensive than arch_mcontext_restore. + * @param active_context - the context of the current worker thread + * @param sandbox_context - the context that we want to restore + */ +static void +arch_context_restore(mcontext_t *active_context, struct arch_context *sandbox_context) +{ + assert(active_context != NULL); + assert(sandbox_context != NULL); + assert(sandbox_context->variant == ARCH_CONTEXT_QUICK); + assert(sandbox_context != &worker_thread_base_context); - return 0; + /* TODO: Phani explained that we need to be able to restore a sandbox with an IP of 0. Why is this? */ + assert(sandbox_context->regs[UREG_RSP]); + + active_context->gregs[REG_RSP] = sandbox_context->regs[UREG_RSP]; + active_context->gregs[REG_RIP] = sandbox_context->regs[UREG_RIP] + ARCH_SIG_JMP_OFF; + sandbox_context->regs[UREG_RSP] = 0; + sandbox_context->regs[UREG_RIP] = 0; } /** - * Save the context of the currently executing process - * @param ctx - destination - * @param mc - source + * Save the full mcontext of the currently executing process + * @param sandbox_context - destination + * @param active_context - source */ static inline void -arch_mcontext_save(struct arch_context *ctx, mcontext_t *mc) +arch_mcontext_save(struct arch_context *sandbox_context, const mcontext_t *active_context) { - assert(ctx != &worker_thread_base_context); + assert(sandbox_context != NULL); + assert(active_context != NULL); + + /* Assumption: Only called indirectly via signal handler, so interrupts should be disabled */ + assert(!software_interrupt_is_enabled()); + + /* Assumption: The base context should never be modified */ + assert(sandbox_context != &worker_thread_base_context); + + /* Assumption: The executing process has sane IP and SP values */ + assert(active_context->gregs[REG_RIP] != 0); + assert(active_context->gregs[REG_RSP] != 0); + + /* Set variant to slow */ + sandbox_context->variant = ARCH_CONTEXT_SLOW; + sandbox_context->regs[UREG_RSP] = 0; + sandbox_context->regs[UREG_RIP] = 0; - ctx->regs[UREG_RSP] = 0; - memcpy(&ctx->mctx, mc, sizeof(mcontext_t)); + /* Copy mcontext */ + memcpy(&sandbox_context->mctx, active_context, sizeof(mcontext_t)); } diff --git a/runtime/include/arch/x86_64/context.h b/runtime/include/arch/x86_64/context.h index ddb151c..6ce1759 100644 --- a/runtime/include/arch/x86_64/context.h +++ b/runtime/include/arch/x86_64/context.h @@ -37,6 +37,7 @@ static void __attribute__((noinline)) arch_context_init(struct arch_context *act actx->regs[UREG_RSP] = sp; actx->regs[UREG_RIP] = ip; + actx->variant = ARCH_CONTEXT_QUICK; } @@ -64,6 +65,9 @@ arch_context_switch(struct arch_context *current, struct arch_context *next) if (current == NULL) current = &worker_thread_base_context; if (next == NULL) next = &worker_thread_base_context; + /* Assumption: The context we are switching to should have saved a context in some form */ + assert(next->variant == ARCH_CONTEXT_QUICK || next->variant != ARCH_CONTEXT_UNUSED); + reg_t *current_registers = current->regs, *next_registers = next->regs; assert(current_registers && next_registers); @@ -77,12 +81,13 @@ arch_context_switch(struct arch_context *current, struct arch_context *next) */ "movq $2f, 8(%%rax)\n\t" /* Write the address of label 2 to current_registers[1] (instruction_pointer). */ "movq %%rsp, (%%rax)\n\t" /* current_registers[0] (stack_pointer) = stack_pointer */ + "movq $1, (%%rcx)\n\t" /* current->variant = ARCH_CONTEXT_QUICK; */ /* * Check if the variant of the context we're trying to switch to is SLOW (mcontext-based) * If it is, jump to label 1 to restore the preempted sandbox */ - "cmpq $0, (%%rbx)\n\t" /* if (stack pointer == 0) */ + "cmpq $2, (%%rcx)\n\t" /* if (next->variant == ARCH_CONTEXT_SLOW); */ "je 1f\n\t" /* goto 1; restore the existing sandbox using mcontext */ /* @@ -94,7 +99,7 @@ arch_context_switch(struct arch_context *current, struct arch_context *next) /* * Slow Path - * If the stack pointer equaled 0, that means the sandbox was preempted and we need to + * If the variant is ARCH_CONTEXT_SLOW, that means the sandbox was preempted and we need to * fallback to a full mcontext-based context switch. We do this by invoking * arch_context_mcontext_restore, which fires a SIGUSR1 signal. The SIGUSR1 signal handler * executes the mcontext-based context switch. @@ -104,21 +109,23 @@ arch_context_switch(struct arch_context *current, struct arch_context *next) ".align 8\n\t" /* - * Where preempted sandbox resumes - * rbx contains the preempted sandbox's IP and SP in this context + * This label was written to the instruction pointer of the sandbox that was switched away from + * The sandbox either resumes at label 2 or 3 depending on if an offset of 8 is used. */ "2:\n\t" - "movq $0, (%%rbx)\n\t" /* stack pointer = 0 */ + "movq $3, (%%rdx)\n\t" /* next->variant = ARCH_CONTEXT_QUICK; */ ".align 8\n\t" /* This label is used in conjunction with a static offset */ "3:\n\t" + /* TODO: Should we set next->variant = ARCH_CONTEXT_SLOW here?;*/ "popq %%rbp\n\t" /* base_pointer = stack[--stack_len]; Base Pointer is restored */ : - : "a"(current_registers), "b"(next_registers) - : "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", - "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", - "xmm14", "xmm15"); + : "a"(current_registers), "b"(next_registers), "c"(¤t->variant), "d"(&next->variant) + : "memory", "cc", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", + "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", + "xmm15"); + worker_thread_is_switching_context = false; return 0; } diff --git a/runtime/include/sandbox.h b/runtime/include/sandbox.h index 25cb029..dacb002 100644 --- a/runtime/include/sandbox.h +++ b/runtime/include/sandbox.h @@ -81,7 +81,7 @@ struct sandbox { **************************/ -extern __thread struct arch_context *worker_thread_next_context; +extern __thread volatile bool worker_thread_is_switching_context; extern void worker_thread_block_current_sandbox(void); extern void worker_thread_on_sandbox_exit(struct sandbox *sandbox); diff --git a/runtime/src/local_runqueue_minheap.c b/runtime/src/local_runqueue_minheap.c index 045bb3f..05d3639 100644 --- a/runtime/src/local_runqueue_minheap.c +++ b/runtime/src/local_runqueue_minheap.c @@ -161,6 +161,8 @@ local_runqueue_minheap_preempt(ucontext_t *user_context) next_sandbox->state = SANDBOX_RUNNABLE; local_runqueue_add(next_sandbox); + worker_thread_is_switching_context = true; + /* Save the context of the currently executing sandbox before switching from it */ arch_mcontext_save(¤t_sandbox->ctxt, &user_context->uc_mcontext); @@ -168,12 +170,16 @@ local_runqueue_minheap_preempt(ucontext_t *user_context) current_sandbox_set(next_sandbox); /* - * And load the context of this new sandbox - * RC of 1 indicates that sandbox was last in a user-level context switch state, - * so do not enable software interrupts. + * Restore the context of this new sandbox + * If last in a user-level context switch state, + * do not enable software interrupts. */ - if (arch_mcontext_restore(&user_context->uc_mcontext, &next_sandbox->ctxt) == 1) + if (next_sandbox->ctxt.variant == ARCH_CONTEXT_SLOW) { + arch_mcontext_restore(&user_context->uc_mcontext, &next_sandbox->ctxt); + } else { + arch_context_restore(&user_context->uc_mcontext, &next_sandbox->ctxt); should_enable_software_interrupt = false; + } } done: if (should_enable_software_interrupt) software_interrupt_enable(); diff --git a/runtime/src/sandbox.c b/runtime/src/sandbox.c index 73ad543..ee988a1 100644 --- a/runtime/src/sandbox.c +++ b/runtime/src/sandbox.c @@ -252,7 +252,7 @@ current_sandbox_main(void) assert(!software_interrupt_is_enabled()); arch_context_init(&sandbox->ctxt, 0, 0); - worker_thread_next_context = NULL; + worker_thread_is_switching_context = false; software_interrupt_enable(); sandbox_initialize_io_handles_and_file_descriptors(sandbox); @@ -410,7 +410,7 @@ sandbox_allocate(struct sandbox_request *sandbox_request) sandbox->state = SANDBOX_INITIALIZING; /* Allocate the Stack */ - if (sandbox_allocate_stack(sandbox) == -1) { + if (sandbox_allocate_stack(sandbox) < 0) { error_message = "failed to allocate sandbox heap and linear memory"; goto err_stack_allocation_failed; } diff --git a/runtime/src/software_interrupt.c b/runtime/src/software_interrupt.c index fc4c249..8fb6dd6 100644 --- a/runtime/src/software_interrupt.c +++ b/runtime/src/software_interrupt.c @@ -88,7 +88,7 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void if (!software_interrupt_is_enabled()) return; /* Do not allow more than one layer of preemption */ - if (worker_thread_next_context) return; + if (worker_thread_is_switching_context) return; /* * if a SIGALRM fires while the worker thread is between sandboxes, executing libuv, completion queue @@ -122,13 +122,19 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void assert(!software_interrupt_is_enabled()); /* Assumption: Caller sets current_sandbox to the preempted sandbox */ - assert(worker_thread_next_context && (¤t_sandbox->ctxt == worker_thread_next_context)); + assert(current_sandbox); + + /* Extra checks to verify that preemption properly set context state */ + assert(current_sandbox->ctxt.variant == ARCH_CONTEXT_SLOW); + assert(current_sandbox->ctxt.regs[UREG_RSP] != 0); + assert(current_sandbox->ctxt.regs[UREG_RIP] != 0); software_interrupt_SIGUSR_count++; debuglog("usr1:%d\n", software_interrupt_SIGUSR_count); arch_mcontext_restore(&user_context->uc_mcontext, ¤t_sandbox->ctxt); - worker_thread_next_context = NULL; + + worker_thread_is_switching_context = false; software_interrupt_enable(); return; diff --git a/runtime/src/worker_thread.c b/runtime/src/worker_thread.c index 066028b..3d71ba5 100644 --- a/runtime/src/worker_thread.c +++ b/runtime/src/worker_thread.c @@ -20,9 +20,6 @@ * Worker Thread State * **************************/ -/* context pointer used to store and restore a preempted sandbox. SIGUSR1 */ -__thread struct arch_context *worker_thread_next_context = NULL; - /* context of the runtime thread before running sandboxes or to resume its "main". */ __thread struct arch_context worker_thread_base_context; @@ -32,14 +29,18 @@ __thread uv_loop_t worker_thread_uvio_handle; /* Flag to signify if the thread is currently running callbacks in the libuv event loop */ static __thread bool worker_thread_is_in_libuv_event_loop = false; +/* Flag to signify if the thread is currently undergoing a context switch */ +__thread volatile bool worker_thread_is_switching_context = false; + /*********************** * Worker Thread Logic * **********************/ /** * @brief Switches to the next sandbox, placing the current sandbox on the completion queue if in SANDBOX_RETURNED state - * @param next_sandbox The Sandbox Context to switch to or NULL, which forces return to base context - * @return void + * @param next_sandbox The Sandbox Context to switch to + * + * TODO: Confirm that this can gracefully resume sandboxes in a PREEMPTED state */ static inline void worker_thread_switch_to_sandbox(struct sandbox *next_sandbox) @@ -47,36 +48,72 @@ worker_thread_switch_to_sandbox(struct sandbox *next_sandbox) /* Assumption: The caller disables interrupts */ assert(software_interrupt_is_disabled); - struct arch_context *next_register_context = NULL; - if (next_sandbox != NULL) next_register_context = &next_sandbox->ctxt; + assert(next_sandbox != NULL); + struct arch_context *next_context = &next_sandbox->ctxt; + + worker_thread_is_switching_context = true; /* Get the old sandbox we're switching from */ - struct sandbox * previous_sandbox = current_sandbox_get(); - struct arch_context *previous_register_context = NULL; - if (previous_sandbox != NULL) previous_register_context = &previous_sandbox->ctxt; - - /* Set the current sandbox to the next */ - current_sandbox_set(next_sandbox); - - /* ...and switch to the associated context. - * Save the context pointer to worker_thread_next_context in case of preemption - */ - worker_thread_next_context = next_register_context; - arch_context_switch(previous_register_context, next_register_context); - - /* If previous sandbox is not NULL, ensure a valid state */ - assert(previous_sandbox == NULL || previous_sandbox->state != SANDBOX_INITIALIZING); - - /* If the current sandbox we're switching from is in a SANDBOX_RETURNED state, add to completion queue */ - if (previous_sandbox != NULL && previous_sandbox->state == SANDBOX_RETURNED) { - panic("Unexpectedly returned to a sandbox in a RETURNED state\n"); - } else if (previous_sandbox != NULL) { - debuglog("Resumed a sandbox in state %d\n", previous_sandbox->state); + struct sandbox *current_sandbox = current_sandbox_get(); + + if (current_sandbox == NULL) { + /* Switching from "Base Context" */ + current_sandbox_set(next_sandbox); + + debuglog("Thread %lu | Switching from Base Context to Sandbox %lu\n", pthread_self(), + next_sandbox->allocation_timestamp); + + arch_context_switch(NULL, next_context); + + if (current_sandbox != NULL && current_sandbox->state == SANDBOX_RETURNED) { + panic("Unexpectedly returned to a sandbox in a RETURNED state\n"); + } + } else { + /* Set the current sandbox to the next */ + assert(next_sandbox != current_sandbox); + + struct arch_context *current_context = ¤t_sandbox->ctxt; + + current_sandbox_set(next_sandbox); + + debuglog("Thread %lu | Switching from Sandbox %lu to Sandbox %lu\n", pthread_self(), + current_sandbox->allocation_timestamp, next_sandbox->allocation_timestamp); + + /* Switch to the associated context. */ + arch_context_switch(current_context, next_context); } software_interrupt_enable(); } + +/** + * @brief Switches to the base context, placing the current sandbox on the completion queue if in RETURNED state + */ +static inline void +worker_thread_switch_to_base_context() +{ + software_interrupt_disable(); + assert(worker_thread_is_switching_context == false); + worker_thread_is_switching_context = true; + + struct sandbox *current_sandbox = current_sandbox_get(); + + /* Assumption: Base Context should never switch to Base Context */ + assert(current_sandbox != NULL); + assert(¤t_sandbox->ctxt != &worker_thread_base_context); + + current_sandbox_set(NULL); + + debuglog("Thread %lu | Switching from Sandbox %lu to Base Context\n", pthread_self(), + current_sandbox->allocation_timestamp); + + arch_context_switch(¤t_sandbox->ctxt, &worker_thread_base_context); + + software_interrupt_enable(); + worker_thread_is_switching_context = false; +} + /** * Mark a blocked sandbox as runnable and add it to the runqueue * @param sandbox the sandbox to check and update if blocked @@ -99,9 +136,12 @@ done: /** - * Mark the currently executing sandbox as blocked, remove it from the local runqueue, and pull the sandbox at the head - * of the runqueue - */ + * Mark the currently executing sandbox as blocked, remove it from the local runqueue, + * and pull the sandbox at the head of the runqueue + * + * FIXME : What happens if we block on a sandbox that has preempted something ? Should we try to restore first? + * Is this accomplished by the runqueue design? + **/ void worker_thread_block_current_sandbox(void) { @@ -109,15 +149,24 @@ worker_thread_block_current_sandbox(void) software_interrupt_disable(); /* Remove the sandbox we were just executing from the runqueue and mark as blocked */ - struct sandbox *previous_sandbox = current_sandbox_get(); - local_runqueue_delete(previous_sandbox); - previous_sandbox->state = SANDBOX_BLOCKED; + struct sandbox *current_sandbox = current_sandbox_get(); + local_runqueue_delete(current_sandbox); + current_sandbox->state = SANDBOX_BLOCKED; /* Switch to the next sandbox */ struct sandbox *next_sandbox = local_runqueue_get_next(); - debuglog("[%p: %next_sandbox, %p: %next_sandbox]\n", previous_sandbox, previous_sandbox->module->name, + debuglog("[%p: %next_sandbox, %p: %next_sandbox]\n", current_sandbox, current_sandbox->module->name, next_sandbox, next_sandbox ? next_sandbox->module->name : ""); - worker_thread_switch_to_sandbox(next_sandbox); + + /* If able to get one, switch to it. Otherwise, return to base context */ + if (next_sandbox == NULL) { + worker_thread_switch_to_base_context(); + } else { + debuglog("[%p: %p, %p: %p]\n", current_sandbox, current_sandbox->module->name, next_sandbox, + next_sandbox ? next_sandbox->module->name : ""); + // TODO: Looks like a zombie: software_interrupt_enable(); + worker_thread_switch_to_sandbox(next_sandbox); + } } @@ -129,9 +178,9 @@ worker_thread_process_io(void) { #ifdef USE_HTTP_UVIO #ifdef USE_HTTP_SYNC - /* realistically, we're processing all async I/O on this core when a sandbox blocks on http processing, not - * great! if there is a way (TODO), perhaps RUN_ONCE and check if your I/O is processed, if yes, return else do - * async block! */ + /* realistically, we're processing all async I/O on this core when a sandbox blocks on http processing, + * not great! if there is a way (TODO), perhaps RUN_ONCE and check if your I/O is processed, if yes, + * return else do async block! */ uv_run(worker_thread_get_libuv_handle(), UV_RUN_DEFAULT); #else /* USE_HTTP_SYNC */ worker_thread_block_current_sandbox(); @@ -178,7 +227,7 @@ worker_thread_main(void *return_code) /* Initialize Flags */ software_interrupt_is_disabled = false; worker_thread_is_in_libuv_event_loop = false; - worker_thread_next_context = NULL; + worker_thread_is_switching_context = false; /* Unmask signals */ #ifndef PREEMPT_DISABLE @@ -227,9 +276,8 @@ worker_thread_on_sandbox_exit(struct sandbox *exiting_sandbox) /* Because the stack is still in use, only unmap linear memory and defer free resources until "main function execution" */ - errno = 0; int rc = munmap(exiting_sandbox->linear_memory_start, SBOX_MAX_MEM + PAGE_SIZE); - if (rc == -1) panic("worker_thread_on_sandbox_exit - munmap failed with errno - %s\n", strerror(errno)); + if (rc == -1) perror("worker_thread_on_sandbox_exit - munmap failed\n"); /* TODO: I do not understand when software interrupts must be disabled? */ software_interrupt_disable(); @@ -238,7 +286,7 @@ worker_thread_on_sandbox_exit(struct sandbox *exiting_sandbox) local_completion_queue_add(exiting_sandbox); /* This should force return to main event loop */ - worker_thread_switch_to_sandbox(NULL); + worker_thread_switch_to_base_context(); assert(0); }