From 3a60134d4476cd9eb0745583cb86c7a3d13af439 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Mon, 15 Nov 2021 20:23:14 -0500 Subject: [PATCH] refactor: scheduler cleanup --- .vscode/settings.json | 11 +- runtime/Makefile | 3 + runtime/include/current_sandbox.h | 3 + runtime/include/sandbox_exit.h | 36 --- runtime/include/sandbox_functions.h | 46 +--- runtime/include/sandbox_print_perf.h | 34 +++ runtime/include/sandbox_receive_request.h | 2 +- runtime/include/sandbox_send_response.h | 2 +- ...t_as_blocked.h => sandbox_set_as_asleep.h} | 18 +- runtime/include/sandbox_set_as_complete.h | 2 + runtime/include/sandbox_set_as_error.h | 2 + runtime/include/sandbox_set_as_initialized.h | 1 + runtime/include/sandbox_set_as_preempted.h | 7 +- runtime/include/sandbox_set_as_returned.h | 1 + runtime/include/sandbox_set_as_runnable.h | 13 +- .../include/sandbox_set_as_running_kernel.h | 11 +- runtime/include/sandbox_set_as_running_user.h | 7 +- runtime/include/sandbox_state.h | 2 +- runtime/include/sandbox_state_history.h | 14 ++ runtime/include/sandbox_types.h | 1 - runtime/include/scheduler.h | 234 ++++++++---------- ..._loop.h => scheduler_execute_epoll_loop.h} | 8 +- runtime/include/software_interrupt.h | 2 +- runtime/src/current_sandbox.c | 80 +++++- runtime/src/libc/syscall.c | 5 +- runtime/src/main.c | 2 +- runtime/src/runtime.c | 2 +- runtime/src/sandbox_state.c | 2 +- runtime/src/software_interrupt.c | 51 ++-- runtime/src/worker_thread.c | 18 +- 30 files changed, 337 insertions(+), 283 deletions(-) delete mode 100644 runtime/include/sandbox_exit.h create mode 100644 runtime/include/sandbox_print_perf.h rename runtime/include/{sandbox_set_as_blocked.h => sandbox_set_as_asleep.h} (69%) create mode 100644 runtime/include/sandbox_state_history.h rename runtime/include/{worker_thread_execute_epoll_loop.h => scheduler_execute_epoll_loop.h} (91%) diff --git a/.vscode/settings.json b/.vscode/settings.json index 1084464..554a940 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -87,12 +87,19 @@ "sandbox_set_as_error.h": "c", "likely.h": "c", "debuglog.h": "c", - "worker_thread_execute_epoll_loop.h": "c", + "scheduler_execute_epoll_loop.h": "c", "sandbox_set_as_running.h": "c", "sandbox_summarize_page_allocations.h": "c", "wasm_types.h": "c", "atomic": "c", - "sandbox_set_as_running_kernel.h": "c" + "sandbox_set_as_running_kernel.h": "c", + "stdbool.h": "c", + "sandbox_set_as_asleep.h": "c", + "sandbox_print_perf.h": "c", + "sandbox_state_history.h": "c", + "sandbox_set_as_running_user.h": "c", + "scheduler.h": "c", + "sandbox_set_as_returned.h": "c" }, "files.exclude": { "**/.git": true, diff --git a/runtime/Makefile b/runtime/Makefile index 7c4ed62..0a38294 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -61,6 +61,9 @@ BINARY_NAME=sledgert # The history trucates when the number of elements equal SANDBOX_STATE_HISTORY_CAPACITY # CFLAGS += -DLOG_STATE_CHANGES +# Stores the max number of deferred SIGALRMS for each worker +# CFLAGS += -DLOG_DEFERRED_SIGALRM_MAX + # This dumps per module *.csv files containing the cycle a sandbox has been in RUNNING when each # page is allocated. This helps understand the relationship to memory allocation and execution time. # CFLAGS += -DLOG_SANDBOX_MEMORY_PROFILE diff --git a/runtime/include/current_sandbox.h b/runtime/include/current_sandbox.h index d204eb4..988eb42 100644 --- a/runtime/include/current_sandbox.h +++ b/runtime/include/current_sandbox.h @@ -49,3 +49,6 @@ current_sandbox_set(struct sandbox *sandbox) runtime_worker_threads_deadline[worker_thread_idx] = sandbox->absolute_deadline; } } + + +extern void current_sandbox_sleep(); diff --git a/runtime/include/sandbox_exit.h b/runtime/include/sandbox_exit.h deleted file mode 100644 index 54ffb05..0000000 --- a/runtime/include/sandbox_exit.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include -#include - -#include "panic.h" -#include "sandbox_state.h" -#include "sandbox_set_as_complete.h" - -/** - * Conditionally triggers appropriate state changes for exiting sandboxes - * @param exiting_sandbox - The sandbox that ran to completion - */ -static inline void -sandbox_exit(struct sandbox *exiting_sandbox) -{ - assert(exiting_sandbox != NULL); - - switch (exiting_sandbox->state) { - case SANDBOX_RETURNED: - /* - * We draw a distinction between RETURNED and COMPLETED because a sandbox cannot add itself to the - * completion queue - * TODO: I think this executes when running inside the sandbox, as it hasn't yet yielded - * See Issue #224 at https://github.com/gwsystems/sledge-serverless-framework/issues/224 - */ - sandbox_set_as_complete(exiting_sandbox, SANDBOX_RETURNED); - break; - case SANDBOX_BLOCKED: - case SANDBOX_ERROR: - break; - default: - panic("Cooperatively switching from a sandbox in a non-terminal %s state\n", - sandbox_state_stringify(exiting_sandbox->state)); - } -} diff --git a/runtime/include/sandbox_functions.h b/runtime/include/sandbox_functions.h index 4c45fef..6bc0798 100644 --- a/runtime/include/sandbox_functions.h +++ b/runtime/include/sandbox_functions.h @@ -59,6 +59,12 @@ sandbox_get_priority(void *element) return sandbox->absolute_deadline; }; +static inline bool +sandbox_is_preemptable(struct sandbox *sandbox) +{ + return sandbox && sandbox->state == SANDBOX_RUNNING_USER; +}; + static inline void sandbox_open_http(struct sandbox *sandbox) { @@ -77,43 +83,3 @@ sandbox_open_http(struct sandbox *sandbox) &accept_evt); if (unlikely(rc < 0)) panic_err(); } - -/** - * Prints key performance metrics for a sandbox to runtime_sandbox_perf_log - * This is defined by an environment variable - * @param sandbox - */ -static inline void -sandbox_print_perf(struct sandbox *sandbox) -{ - /* If the log was not defined by an environment variable, early out */ - if (runtime_sandbox_perf_log == NULL) return; - - uint64_t queued_duration = sandbox->timestamp_of.allocation - sandbox->timestamp_of.request_arrival; - - /* - * Assumption: A sandbox is never able to free pages. If linear memory management - * becomes more intelligent, then peak linear memory size needs to be tracked - * seperately from current linear memory size. - */ - fprintf(runtime_sandbox_perf_log, - "%lu,%s,%d,%s,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%u,%u\n", sandbox->id, - sandbox->module->name, sandbox->module->port, sandbox_state_stringify(sandbox->state), - sandbox->module->relative_deadline, sandbox->total_time, queued_duration, - sandbox->duration_of_state[SANDBOX_UNINITIALIZED], sandbox->duration_of_state[SANDBOX_ALLOCATED], - sandbox->duration_of_state[SANDBOX_INITIALIZED], sandbox->duration_of_state[SANDBOX_RUNNABLE], - sandbox->duration_of_state[SANDBOX_PREEMPTED], sandbox->duration_of_state[SANDBOX_RUNNING_KERNEL], - sandbox->duration_of_state[SANDBOX_RUNNING_USER], sandbox->duration_of_state[SANDBOX_BLOCKED], - sandbox->duration_of_state[SANDBOX_RETURNED], sandbox->duration_of_state[SANDBOX_COMPLETE], - sandbox->duration_of_state[SANDBOX_ERROR], runtime_processor_speed_MHz, sandbox->memory.size); -} - -static inline void -sandbox_state_history_append(struct sandbox *sandbox, sandbox_state_t state) -{ -#ifdef LOG_STATE_CHANGES - if (likely(sandbox->state_history_count < SANDBOX_STATE_HISTORY_CAPACITY)) { - sandbox->state_history[sandbox->state_history_count++] = state; - } -#endif -} diff --git a/runtime/include/sandbox_print_perf.h b/runtime/include/sandbox_print_perf.h new file mode 100644 index 0000000..e673433 --- /dev/null +++ b/runtime/include/sandbox_print_perf.h @@ -0,0 +1,34 @@ +#pragma once + +#include "runtime.h" +#include "sandbox_types.h" + +/** + * Prints key performance metrics for a sandbox to runtime_sandbox_perf_log + * This is defined by an environment variable + * @param sandbox + */ +static inline void +sandbox_print_perf(struct sandbox *sandbox) +{ + /* If the log was not defined by an environment variable, early out */ + if (runtime_sandbox_perf_log == NULL) return; + + uint64_t queued_duration = sandbox->timestamp_of.allocation - sandbox->timestamp_of.request_arrival; + + /* + * Assumption: A sandbox is never able to free pages. If linear memory management + * becomes more intelligent, then peak linear memory size needs to be tracked + * seperately from current linear memory size. + */ + fprintf(runtime_sandbox_perf_log, + "%lu,%s,%d,%s,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%u,%u\n", sandbox->id, + sandbox->module->name, sandbox->module->port, sandbox_state_stringify(sandbox->state), + sandbox->module->relative_deadline, sandbox->total_time, queued_duration, + sandbox->duration_of_state[SANDBOX_UNINITIALIZED], sandbox->duration_of_state[SANDBOX_ALLOCATED], + sandbox->duration_of_state[SANDBOX_INITIALIZED], sandbox->duration_of_state[SANDBOX_RUNNABLE], + sandbox->duration_of_state[SANDBOX_PREEMPTED], sandbox->duration_of_state[SANDBOX_RUNNING_KERNEL], + sandbox->duration_of_state[SANDBOX_RUNNING_USER], sandbox->duration_of_state[SANDBOX_ASLEEP], + sandbox->duration_of_state[SANDBOX_RETURNED], sandbox->duration_of_state[SANDBOX_COMPLETE], + sandbox->duration_of_state[SANDBOX_ERROR], runtime_processor_speed_MHz, sandbox->memory.size); +} diff --git a/runtime/include/sandbox_receive_request.h b/runtime/include/sandbox_receive_request.h index 85c3233..3189f2b 100644 --- a/runtime/include/sandbox_receive_request.h +++ b/runtime/include/sandbox_receive_request.h @@ -47,7 +47,7 @@ sandbox_receive_request(struct sandbox *sandbox) if (bytes_received == -1) { if (errno == EAGAIN) { - scheduler_block(); + current_sandbox_sleep(); continue; } else { debuglog("Error reading socket %d - %s\n", sandbox->client_socket_descriptor, diff --git a/runtime/include/sandbox_send_response.h b/runtime/include/sandbox_send_response.h index 8ccc323..27b90dc 100644 --- a/runtime/include/sandbox_send_response.h +++ b/runtime/include/sandbox_send_response.h @@ -54,7 +54,7 @@ sandbox_send_response(struct sandbox *sandbox) rc = write(sandbox->client_socket_descriptor, response_header, response_size - sent); if (rc < 0) { if (errno == EAGAIN) - scheduler_block(); + current_sandbox_sleep(); else { perror("write"); goto err; diff --git a/runtime/include/sandbox_set_as_blocked.h b/runtime/include/sandbox_set_as_asleep.h similarity index 69% rename from runtime/include/sandbox_set_as_blocked.h rename to runtime/include/sandbox_set_as_asleep.h index f003526..4b52e6f 100644 --- a/runtime/include/sandbox_set_as_blocked.h +++ b/runtime/include/sandbox_set_as_asleep.h @@ -7,9 +7,10 @@ #include "local_runqueue.h" #include "sandbox_types.h" #include "sandbox_state.h" +#include "sandbox_state_history.h" /** - * Transitions a sandbox to the SANDBOX_BLOCKED state. + * Transitions a sandbox to the SANDBOX_ASLEEP state. * This occurs when a sandbox is executing and it makes a blocking API call of some kind. * Automatically removes the sandbox from the runqueue * @param sandbox the blocking sandbox @@ -17,10 +18,10 @@ * enable the compiler to perform constant propagation optimizations. */ static inline void -sandbox_set_as_blocked(struct sandbox *sandbox, sandbox_state_t last_state) +sandbox_set_as_asleep(struct sandbox *sandbox, sandbox_state_t last_state) { assert(sandbox); - sandbox->state = SANDBOX_BLOCKED; + sandbox->state = SANDBOX_ASLEEP; uint64_t now = __getcycles(); switch (last_state) { @@ -37,7 +38,14 @@ sandbox_set_as_blocked(struct sandbox *sandbox, sandbox_state_t last_state) /* State Change Bookkeeping */ sandbox->duration_of_state[last_state] += (now - sandbox->timestamp_of.last_state_change); sandbox->timestamp_of.last_state_change = now; - sandbox_state_history_append(sandbox, SANDBOX_BLOCKED); - runtime_sandbox_total_increment(SANDBOX_BLOCKED); + sandbox_state_history_append(sandbox, SANDBOX_ASLEEP); + runtime_sandbox_total_increment(SANDBOX_ASLEEP); runtime_sandbox_total_decrement(last_state); } + +static inline void +sandbox_sleep(struct sandbox *sandbox) +{ + assert(sandbox->state == SANDBOX_RUNNING_KERNEL); + sandbox_set_as_asleep(sandbox, SANDBOX_RUNNING_KERNEL); +} diff --git a/runtime/include/sandbox_set_as_complete.h b/runtime/include/sandbox_set_as_complete.h index f2b6b3f..c87d3d7 100644 --- a/runtime/include/sandbox_set_as_complete.h +++ b/runtime/include/sandbox_set_as_complete.h @@ -7,7 +7,9 @@ #include "panic.h" #include "local_completion_queue.h" #include "sandbox_functions.h" +#include "sandbox_print_perf.h" #include "sandbox_state.h" +#include "sandbox_state_history.h" #include "sandbox_summarize_page_allocations.h" #include "sandbox_types.h" diff --git a/runtime/include/sandbox_set_as_error.h b/runtime/include/sandbox_set_as_error.h index bad7ab2..3828eb6 100644 --- a/runtime/include/sandbox_set_as_error.h +++ b/runtime/include/sandbox_set_as_error.h @@ -8,6 +8,8 @@ #include "local_runqueue.h" #include "sandbox_state.h" #include "sandbox_functions.h" +#include "sandbox_print_perf.h" +#include "sandbox_state_history.h" #include "sandbox_summarize_page_allocations.h" #include "panic.h" diff --git a/runtime/include/sandbox_set_as_initialized.h b/runtime/include/sandbox_set_as_initialized.h index 0b7f8f9..484fa52 100644 --- a/runtime/include/sandbox_set_as_initialized.h +++ b/runtime/include/sandbox_set_as_initialized.h @@ -8,6 +8,7 @@ #include "current_sandbox.h" #include "ps_list.h" #include "sandbox_request.h" +#include "sandbox_state_history.h" #include "sandbox_types.h" /** diff --git a/runtime/include/sandbox_set_as_preempted.h b/runtime/include/sandbox_set_as_preempted.h index 87e9884..37ac2e2 100644 --- a/runtime/include/sandbox_set_as_preempted.h +++ b/runtime/include/sandbox_set_as_preempted.h @@ -6,14 +6,13 @@ #include "arch/getcycles.h" #include "local_runqueue.h" #include "panic.h" +#include "sandbox_state_history.h" #include "sandbox_types.h" /** * Transitions a sandbox to the SANDBOX_PREEMPTED state. * - * This occurs in the following scenarios: - * - A sandbox in the SANDBOX_INITIALIZED state completes initialization and is ready to be run - * - A sandbox in the SANDBOX_BLOCKED state completes what was blocking it and is ready to be run + * This occurs when a sandbox is preempted in the SIGALRM handler * * @param sandbox * @param last_state the state the sandbox is transitioning from. This is expressed as a constant to @@ -27,7 +26,7 @@ sandbox_set_as_preempted(struct sandbox *sandbox, sandbox_state_t last_state) uint64_t now = __getcycles(); switch (last_state) { - case SANDBOX_RUNNING_KERNEL: { + case SANDBOX_RUNNING_USER: { current_sandbox_set(NULL); break; } diff --git a/runtime/include/sandbox_set_as_returned.h b/runtime/include/sandbox_set_as_returned.h index 90bedd4..cfe4410 100644 --- a/runtime/include/sandbox_set_as_returned.h +++ b/runtime/include/sandbox_set_as_returned.h @@ -8,6 +8,7 @@ #include "panic.h" #include "sandbox_functions.h" #include "sandbox_state.h" +#include "sandbox_state_history.h" #include "sandbox_types.h" /** diff --git a/runtime/include/sandbox_set_as_runnable.h b/runtime/include/sandbox_set_as_runnable.h index b043e0f..edb0eb5 100644 --- a/runtime/include/sandbox_set_as_runnable.h +++ b/runtime/include/sandbox_set_as_runnable.h @@ -6,6 +6,7 @@ #include "arch/getcycles.h" #include "local_runqueue.h" #include "panic.h" +#include "sandbox_state_history.h" #include "sandbox_types.h" /** @@ -13,7 +14,7 @@ * * This occurs in the following scenarios: * - A sandbox in the SANDBOX_INITIALIZED state completes initialization and is ready to be run - * - A sandbox in the SANDBOX_BLOCKED state completes what was blocking it and is ready to be run + * - A sandbox in the SANDBOX_ASLEEP state completes what was blocking it and is ready to be run * * @param sandbox * @param last_state the state the sandbox is transitioning from. This is expressed as a constant to @@ -31,7 +32,7 @@ sandbox_set_as_runnable(struct sandbox *sandbox, sandbox_state_t last_state) local_runqueue_add(sandbox); break; } - case SANDBOX_BLOCKED: { + case SANDBOX_ASLEEP: { local_runqueue_add(sandbox); break; } @@ -48,3 +49,11 @@ sandbox_set_as_runnable(struct sandbox *sandbox, sandbox_state_t last_state) runtime_sandbox_total_increment(SANDBOX_RUNNABLE); runtime_sandbox_total_decrement(last_state); } + + +static inline void +sandbox_wakeup(struct sandbox *sandbox) +{ + assert(sandbox->state == SANDBOX_ASLEEP); + sandbox_set_as_runnable(sandbox, SANDBOX_ASLEEP); +} diff --git a/runtime/include/sandbox_set_as_running_kernel.h b/runtime/include/sandbox_set_as_running_kernel.h index 39311e6..c097fa8 100644 --- a/runtime/include/sandbox_set_as_running_kernel.h +++ b/runtime/include/sandbox_set_as_running_kernel.h @@ -6,8 +6,9 @@ #include "arch/getcycles.h" #include "current_sandbox.h" #include "panic.h" -#include "sandbox_types.h" #include "sandbox_functions.h" +#include "sandbox_state_history.h" +#include "sandbox_types.h" static inline void sandbox_set_as_running_kernel(struct sandbox *sandbox, sandbox_state_t last_state) @@ -29,14 +30,6 @@ sandbox_set_as_running_kernel(struct sandbox *sandbox, sandbox_state_t last_stat * can fix this by breakout out SANDBOX_RUNNABLE and SANDBOX_PREEMPTED */ break; } - case SANDBOX_PREEMPTED: { - assert(sandbox); - assert(sandbox->interrupted_state == SANDBOX_RUNNING_USER); - current_sandbox_set(sandbox); - /* Does not handle context switch because the caller knows if we need to use fast or slow switched. We - * can fix this by breakout out SANDBOX_RUNNABLE and SANDBOX_PREEMPTED */ - break; - } default: { panic("Sandbox %lu | Illegal transition from %s to Running Kernel\n", sandbox->id, sandbox_state_stringify(last_state)); diff --git a/runtime/include/sandbox_set_as_running_user.h b/runtime/include/sandbox_set_as_running_user.h index 16f861c..13ee3d6 100644 --- a/runtime/include/sandbox_set_as_running_user.h +++ b/runtime/include/sandbox_set_as_running_user.h @@ -6,6 +6,7 @@ #include "arch/getcycles.h" #include "current_sandbox.h" #include "panic.h" +#include "sandbox_state_history.h" #include "sandbox_types.h" #include "sandbox_functions.h" @@ -20,7 +21,11 @@ sandbox_set_as_running_user(struct sandbox *sandbox, sandbox_state_t last_state) case SANDBOX_RUNNING_KERNEL: { assert(sandbox == current_sandbox_get()); assert(runtime_worker_threads_deadline[worker_thread_idx] == sandbox->absolute_deadline); - + break; + } + case SANDBOX_PREEMPTED: { + assert(sandbox); + current_sandbox_set(sandbox); break; } default: { diff --git a/runtime/include/sandbox_state.h b/runtime/include/sandbox_state.h index bf10f49..14568ee 100644 --- a/runtime/include/sandbox_state.h +++ b/runtime/include/sandbox_state.h @@ -15,7 +15,7 @@ typedef enum SANDBOX_PREEMPTED, SANDBOX_RUNNING_KERNEL, SANDBOX_RUNNING_USER, - SANDBOX_BLOCKED, + SANDBOX_ASLEEP, SANDBOX_RETURNED, SANDBOX_COMPLETE, SANDBOX_ERROR, diff --git a/runtime/include/sandbox_state_history.h b/runtime/include/sandbox_state_history.h new file mode 100644 index 0000000..11e2afd --- /dev/null +++ b/runtime/include/sandbox_state_history.h @@ -0,0 +1,14 @@ +#pragma once + +#include "sandbox_state.h" +#include "sandbox_types.h" + +static inline void +sandbox_state_history_append(struct sandbox *sandbox, sandbox_state_t state) +{ +#ifdef LOG_STATE_CHANGES + if (likely(sandbox->state_history_count < SANDBOX_STATE_HISTORY_CAPACITY)) { + sandbox->state_history[sandbox->state_history_count++] = state; + } +#endif +} diff --git a/runtime/include/sandbox_types.h b/runtime/include/sandbox_types.h index d384253..59ebedc 100644 --- a/runtime/include/sandbox_types.h +++ b/runtime/include/sandbox_types.h @@ -74,7 +74,6 @@ struct sandbox_buffer { struct sandbox { uint64_t id; sandbox_state_t state; - sandbox_state_t interrupted_state; #ifdef LOG_STATE_CHANGES sandbox_state_t state_history[SANDBOX_STATE_HISTORY_CAPACITY]; diff --git a/runtime/include/scheduler.h b/runtime/include/scheduler.h index 2c190f5..6ecde79 100644 --- a/runtime/include/scheduler.h +++ b/runtime/include/scheduler.h @@ -14,14 +14,13 @@ #include "local_runqueue_list.h" #include "panic.h" #include "sandbox_request.h" -#include "sandbox_exit.h" #include "sandbox_functions.h" #include "sandbox_types.h" -#include "sandbox_set_as_blocked.h" #include "sandbox_set_as_preempted.h" #include "sandbox_set_as_runnable.h" #include "sandbox_set_as_running_kernel.h" -#include "worker_thread_execute_epoll_loop.h" +#include "sandbox_set_as_running_user.h" +#include "scheduler_execute_epoll_loop.h" enum SCHEDULER { @@ -101,6 +100,14 @@ err: static inline struct sandbox * scheduler_get_next() { +#ifdef LOG_DEFERRED_SIGALRM_MAX + if (unlikely(software_interrupt_deferred_sigalrm + > software_interrupt_deferred_sigalrm_max[worker_thread_idx])) { + software_interrupt_deferred_sigalrm_max[worker_thread_idx] = software_interrupt_deferred_sigalrm; + } +#endif + + atomic_store(&software_interrupt_deferred_sigalrm, 0); switch (scheduler) { case SCHEDULER_EDF: return scheduler_edf_get_next(); @@ -141,82 +148,6 @@ scheduler_runqueue_initialize() } } -/** - * Called by the SIGALRM handler after a quantum - * Assumes the caller validates that there is something to preempt - * @param interrupted_context - The context of our user-level Worker thread - * @returns the sandbox that the scheduler chose to run - */ -static inline struct sandbox * -scheduler_preempt(ucontext_t *interrupted_context) -{ - assert(interrupted_context != NULL); - - /* Process epoll to make sure that all runnable jobs are considered for execution */ - worker_thread_execute_epoll_loop(); - - struct sandbox *current = current_sandbox_get(); - assert(current != NULL); - assert(current->state == SANDBOX_RUNNING_KERNEL); - - struct sandbox *next = scheduler_get_next(); - /* Assumption: the current sandbox is on the runqueue, so the scheduler should always return something */ - assert(next != NULL); - - /* If current equals next, no switch is necessary, so resume execution */ - if (current == next) return current; - -#ifdef LOG_PREEMPTION - debuglog("Preempting sandbox %lu to run sandbox %lu\n", current->id, next->id); -#endif - - /* Save the context of the currently executing sandbox before switching from it */ - - /* How do I switch back to "user running" when this is resumed? */ - sandbox_set_as_preempted(current, SANDBOX_RUNNING_KERNEL); - arch_context_save_slow(¤t->ctxt, &interrupted_context->uc_mcontext); - - /* Update current_sandbox to the next sandbox */ - // assert(next->state == SANDBOX_RUNNABLE); - - switch (next->ctxt.variant) { - case ARCH_CONTEXT_VARIANT_FAST: { - assert(next->state == SANDBOX_RUNNABLE); - sandbox_set_as_running_kernel(next, SANDBOX_RUNNABLE); - arch_context_restore_fast(&interrupted_context->uc_mcontext, &next->ctxt); - break; - } - case ARCH_CONTEXT_VARIANT_SLOW: { - /* Our scheduler restores a fast context when switching to a sandbox that cooperatively yielded - * (probably by blocking) or when switching to a freshly allocated sandbox that hasn't yet run. - * These conditions can occur in either EDF or FIFO. - * - * A scheduler restores a slow context when switching to a sandbox that was preempted previously. - * Under EDF, a sandbox is only ever preempted by an earlier deadline that either had blocked and since - * become runnable or was just freshly allocated. This means that such EDF preemption context switches - * should always use a fast context. - * - * This is not true under FIFO, where there is no innate ordering between sandboxes. A runqueue is - * normally only a single sandbox, but it may have multiple sandboxes when one blocks and the worker - * pulls an addition request. When the blocked sandbox becomes runnable, the executing sandbox can be - * preempted yielding a slow context. This means that FIFO preemption context switches might cause - * either a fast or a slow context to be restored during "round robin" execution. - */ - assert(scheduler != SCHEDULER_EDF); - assert(next->state == SANDBOX_PREEMPTED); - arch_context_restore_slow(&interrupted_context->uc_mcontext, &next->ctxt); - sandbox_set_as_running_kernel(next, SANDBOX_PREEMPTED); - break; - } - default: { - panic("Unexpectedly tried to switch to a context in %s state\n", - arch_context_variant_print(next->ctxt.variant)); - } - } - - return next; -} - static inline char * scheduler_print(enum SCHEDULER variant) { @@ -237,6 +168,10 @@ scheduler_log_sandbox_switch(struct sandbox *current_sandbox, struct sandbox *ne debuglog("Base Context (@%p) (%s) > Sandbox %lu (@%p) (%s)\n", &worker_thread_base_context, arch_context_variant_print(worker_thread_base_context.variant), next_sandbox->id, &next_sandbox->ctxt, arch_context_variant_print(next_sandbox->ctxt.variant)); + } else if (next_sandbox == NULL) { + debuglog("Sandbox %lu (@%p) (%s) > Base Context (@%p) (%s)\n", current_sandbox->id, + ¤t_sandbox->ctxt, arch_context_variant_print(current_sandbox->ctxt.variant), + &worker_thread_base_context, arch_context_variant_print(worker_thread_base_context.variant)); } else { debuglog("Sandbox %lu (@%p) (%s) > Sandbox %lu (@%p) (%s)\n", current_sandbox->id, ¤t_sandbox->ctxt, arch_context_variant_print(current_sandbox->ctxt.variant), @@ -245,77 +180,120 @@ scheduler_log_sandbox_switch(struct sandbox *current_sandbox, struct sandbox *ne #endif } +static inline void +scheduler_preemptive_switch_to(ucontext_t *interrupted_context, struct sandbox *next) +{ + /* Switch to next sandbox */ + switch (next->ctxt.variant) { + case ARCH_CONTEXT_VARIANT_FAST: { + assert(next->state == SANDBOX_RUNNABLE); + arch_context_restore_fast(&interrupted_context->uc_mcontext, &next->ctxt); + sandbox_set_as_running_kernel(next, SANDBOX_RUNNABLE); + break; + } + case ARCH_CONTEXT_VARIANT_SLOW: { + assert(next->state == SANDBOX_PREEMPTED); + arch_context_restore_slow(&interrupted_context->uc_mcontext, &next->ctxt); + sandbox_set_as_running_user(next, SANDBOX_PREEMPTED); + break; + } + default: { + panic("Unexpectedly tried to switch to a context in %s state\n", + arch_context_variant_print(next->ctxt.variant)); + } + } +} + /** - * @brief Switches to the next sandbox, placing the current sandbox on the completion queue if in - * SANDBOX_RETURNED state - * @param next_sandbox The Sandbox Context to switch to + * Called by the SIGALRM handler after a quantum + * Assumes the caller validates that there is something to preempt + * @param interrupted_context - The context of our user-level Worker thread + * @returns the sandbox that the scheduler chose to run */ static inline void -scheduler_switch_to(struct sandbox *next_sandbox) +scheduler_preemptive_sched(ucontext_t *interrupted_context) { - assert(next_sandbox != NULL); - assert(next_sandbox->state == SANDBOX_RUNNABLE || next_sandbox->state == SANDBOX_PREEMPTED); - struct arch_context *next_context = &next_sandbox->ctxt; + assert(interrupted_context != NULL); - /* Get the old sandbox we're switching from. - * This is null if switching from base context - */ - struct sandbox *current_sandbox = current_sandbox_get(); - assert(next_sandbox != current_sandbox); + /* Process epoll to make sure that all runnable jobs are considered for execution */ + scheduler_execute_epoll_loop(); - struct arch_context *current_context = NULL; - if (current_sandbox != NULL) { - current_context = ¤t_sandbox->ctxt; - sandbox_exit(current_sandbox); - } + struct sandbox *current = current_sandbox_get(); + assert(current != NULL); + assert(current->state == SANDBOX_RUNNING_USER); - scheduler_log_sandbox_switch(current_sandbox, next_sandbox); - sandbox_set_as_running_kernel(next_sandbox, next_sandbox->state); - arch_context_switch(current_context, next_context); -} + struct sandbox *next = scheduler_get_next(); + /* Assumption: the current sandbox is on the runqueue, so the scheduler should always return something */ + assert(next != NULL); + + /* If current equals next, no switch is necessary, so resume execution */ + if (current == next) return; + +#ifdef LOG_PREEMPTION + debuglog("Preempting sandbox %lu to run sandbox %lu\n", current->id, next->id); +#endif + + scheduler_log_sandbox_switch(current, next); + /* Preempt executing sandbox */ + sandbox_set_as_preempted(current, SANDBOX_RUNNING_USER); + arch_context_save_slow(¤t->ctxt, &interrupted_context->uc_mcontext); + + scheduler_preemptive_switch_to(interrupted_context, next); +} /** - * @brief Switches to the base context, placing the current sandbox on the completion queue if in RETURNED state + * @brief Switches to the next sandbox + * Assumption: only called by the "base context" + * @param next_sandbox The Sandbox to switch to */ static inline void -scheduler_yield() +scheduler_cooperative_switch_to(struct sandbox *next_sandbox) { - struct sandbox *current_sandbox = current_sandbox_get(); - assert(current_sandbox != NULL); + assert(current_sandbox_get() == NULL); - struct arch_context *current_context = ¤t_sandbox->ctxt; - - /* Assumption: Base Context should never switch to Base Context */ - assert(current_context != &worker_thread_base_context); + struct arch_context *next_context = &next_sandbox->ctxt; -#ifdef LOG_CONTEXT_SWITCHES - debuglog("Sandbox %lu (@%p) (%s) > Base Context (@%p) (%s)\n", current_sandbox->id, current_context, - arch_context_variant_print(current_sandbox->ctxt.variant), &worker_thread_base_context, - arch_context_variant_print(worker_thread_base_context.variant)); -#endif + scheduler_log_sandbox_switch(NULL, next_sandbox); - sandbox_exit(current_sandbox); - current_sandbox_set(NULL); + /* Switch to next sandbox */ + switch (next_sandbox->state) { + case SANDBOX_RUNNABLE: { + assert(next_context->variant == ARCH_CONTEXT_VARIANT_FAST); + sandbox_set_as_running_kernel(next_sandbox, SANDBOX_RUNNABLE); + break; + } + case SANDBOX_PREEMPTED: { + assert(next_context->variant == ARCH_CONTEXT_VARIANT_SLOW); + /* arch_context_switch triggers a SIGUSR1, which transitions next_sandbox to running_user */ + current_sandbox_set(next_sandbox); + break; + } + default: { + panic("Unexpectedly tried to switch to a sandbox in %s state\n", + sandbox_state_stringify(next_sandbox->state)); + } + } - /* Assumption: Base Worker context should never be preempted */ - assert(worker_thread_base_context.variant == ARCH_CONTEXT_VARIANT_FAST); - arch_context_switch(current_context, &worker_thread_base_context); + arch_context_switch(&worker_thread_base_context, next_context); } -/** - * Mark the currently executing sandbox as blocked, remove it from the local runqueue, - * and switch to base context +/* A sandbox cannot execute the scheduler directly. It must yield to the base context, and then the context calls this + * within its idle loop */ static inline void -scheduler_block(void) +scheduler_cooperative_sched() { - /* Remove the sandbox we were just executing from the runqueue and mark as blocked */ - struct sandbox *current_sandbox = current_sandbox_get(); + /* Assumption: only called by the "base context" */ + assert(current_sandbox_get() == NULL); + + /* Try to wakeup sleeping sandboxes */ + scheduler_execute_epoll_loop(); - assert(current_sandbox->state == SANDBOX_RUNNING_KERNEL); - sandbox_set_as_blocked(current_sandbox, SANDBOX_RUNNING_KERNEL); - generic_thread_dump_lock_overhead(); + /* Switch to a sandbox if one is ready to run */ + struct sandbox *next_sandbox = scheduler_get_next(); + if (next_sandbox != NULL) scheduler_cooperative_switch_to(next_sandbox); - scheduler_yield(); + /* Clear the completion queue */ + local_completion_queue_free(); } diff --git a/runtime/include/worker_thread_execute_epoll_loop.h b/runtime/include/scheduler_execute_epoll_loop.h similarity index 91% rename from runtime/include/worker_thread_execute_epoll_loop.h rename to runtime/include/scheduler_execute_epoll_loop.h index c9d86de..07c9ffe 100644 --- a/runtime/include/worker_thread_execute_epoll_loop.h +++ b/runtime/include/scheduler_execute_epoll_loop.h @@ -18,7 +18,7 @@ * Run all outstanding events in the local thread's epoll loop */ static inline void -worker_thread_execute_epoll_loop(void) +scheduler_execute_epoll_loop(void) { while (true) { struct epoll_event epoll_events[RUNTIME_MAX_EPOLL_EVENTS]; @@ -35,13 +35,11 @@ worker_thread_execute_epoll_loop(void) for (int i = 0; i < descriptor_count; i++) { if (epoll_events[i].events & (EPOLLIN | EPOLLOUT)) { - /* Re-add to runqueue if blocked */ + /* Re-add to runqueue if asleep */ struct sandbox *sandbox = (struct sandbox *)epoll_events[i].data.ptr; assert(sandbox); - if (sandbox->state == SANDBOX_BLOCKED) { - sandbox_set_as_runnable(sandbox, SANDBOX_BLOCKED); - } + if (sandbox->state == SANDBOX_ASLEEP) { sandbox_wakeup(sandbox); } } else if (epoll_events[i].events & (EPOLLERR | EPOLLHUP)) { /* Mystery: This seems to never fire. Why? Issue #130 */ diff --git a/runtime/include/software_interrupt.h b/runtime/include/software_interrupt.h index 684e340..b1f4eac 100644 --- a/runtime/include/software_interrupt.h +++ b/runtime/include/software_interrupt.h @@ -82,5 +82,5 @@ void software_interrupt_initialize(void); void software_interrupt_arm_timer(void); void software_interrupt_disarm_timer(void); void software_interrupt_set_interval_duration(uint64_t cycles); +void software_interrupt_deferred_sigalrm_max_free(void); void software_interrupt_deferred_sigalrm_max_print(void); -void software_interrupt_cleanup(); diff --git a/runtime/src/current_sandbox.c b/runtime/src/current_sandbox.c index e4f8a36..4082e16 100644 --- a/runtime/src/current_sandbox.c +++ b/runtime/src/current_sandbox.c @@ -4,8 +4,10 @@ #include "sandbox_functions.h" #include "sandbox_receive_request.h" #include "sandbox_send_response.h" +#include "sandbox_set_as_asleep.h" #include "sandbox_set_as_error.h" #include "sandbox_set_as_returned.h" +#include "sandbox_set_as_complete.h" #include "sandbox_set_as_running_user.h" #include "sandbox_set_as_running_kernel.h" #include "sandbox_setup_arguments.h" @@ -23,6 +25,82 @@ thread_local struct sandbox_context_cache local_sandbox_context_cache = { .module_indirect_table = NULL, }; +/** + * @brief Switches from an executing sandbox to the worker thread base context + * + * This places the current sandbox on the completion queue if in RETURNED state + */ +void +current_sandbox_sleep() +{ + struct sandbox *sandbox = current_sandbox_get(); + assert(sandbox != NULL); + struct arch_context *current_context = &sandbox->ctxt; + + scheduler_log_sandbox_switch(sandbox, NULL); + generic_thread_dump_lock_overhead(); + + assert(sandbox != NULL); + + switch (sandbox->state) { + case SANDBOX_RUNNING_KERNEL: { + sandbox_sleep(sandbox); + break; + } + default: + panic("Cooperatively switching from a sandbox in a non-terminal %s state\n", + sandbox_state_stringify(sandbox->state)); + } + + current_sandbox_set(NULL); + + /* Assumption: Base Worker context should never be preempted */ + assert(worker_thread_base_context.variant == ARCH_CONTEXT_VARIANT_FAST); + arch_context_switch(current_context, &worker_thread_base_context); +} + +/** + * @brief Switches from an executing sandbox to the worker thread base context + * + * This places the current sandbox on the completion queue if in RETURNED state + */ +void +current_sandbox_exit() +{ + struct sandbox *sandbox = current_sandbox_get(); + assert(sandbox != NULL); + struct arch_context *current_context = &sandbox->ctxt; + + scheduler_log_sandbox_switch(sandbox, NULL); + generic_thread_dump_lock_overhead(); + + assert(sandbox != NULL); + + switch (sandbox->state) { + case SANDBOX_RETURNED: + /* + * We draw a distinction between RETURNED and COMPLETED because a sandbox cannot add itself to the + * completion queue + * TODO: I think this executes when running inside the sandbox, as it hasn't yet yielded + * See Issue #224 at https://github.com/gwsystems/sledge-serverless-framework/issues/224 + */ + sandbox_set_as_complete(sandbox, SANDBOX_RETURNED); + break; + case SANDBOX_ERROR: + break; + default: + panic("Cooperatively switching from a sandbox in a non-terminal %s state\n", + sandbox_state_stringify(sandbox->state)); + } + + current_sandbox_set(NULL); + + /* Assumption: Base Worker context should never be preempted */ + assert(worker_thread_base_context.variant == ARCH_CONTEXT_VARIANT_FAST); + arch_context_switch(current_context, &worker_thread_base_context); +} + + /** * Sandbox execution logic * Handles setup, request parsing, WebAssembly initialization, function execution, response building and @@ -82,7 +160,7 @@ current_sandbox_start(void) done: /* Cleanup connection and exit sandbox */ generic_thread_dump_lock_overhead(); - scheduler_yield(); + current_sandbox_exit(); /* This assert prevents a segfault discussed in * https://github.com/phanikishoreg/awsm-Serverless-Framework/issues/66 diff --git a/runtime/src/libc/syscall.c b/runtime/src/libc/syscall.c index 9c97399..121e519 100644 --- a/runtime/src/libc/syscall.c +++ b/runtime/src/libc/syscall.c @@ -88,10 +88,11 @@ stub_init(int32_t offset) uint32_t wasm_read(int32_t filedes, int32_t buf_offset, int32_t nbyte) { + struct sandbox *current_sandbox = current_sandbox_get(); + /* Non-blocking copy on stdin */ if (filedes == 0) { char * buffer = worker_thread_get_memory_ptr_void(buf_offset, nbyte); - struct sandbox * current_sandbox = current_sandbox_get(); struct http_request *current_request = ¤t_sandbox->http_request; if (current_request->body_length <= 0) return 0; int bytes_to_read = nbyte > current_request->body_length ? current_request->body_length : nbyte; @@ -109,7 +110,7 @@ wasm_read(int32_t filedes, int32_t buf_offset, int32_t nbyte) int32_t length_read = (int32_t)read(filedes, buf, nbyte); if (length_read < 0) { if (errno == EAGAIN) - scheduler_block(); + current_sandbox_sleep(); else { /* All other errors */ debuglog("Error reading socket %d - %s\n", filedes, strerror(errno)); diff --git a/runtime/src/main.c b/runtime/src/main.c index 15f6eb1..2cfcac2 100644 --- a/runtime/src/main.c +++ b/runtime/src/main.c @@ -225,7 +225,7 @@ runtime_configure() runtime_sandbox_perf_log = fopen(runtime_sandbox_perf_log_path, "w"); if (runtime_sandbox_perf_log == NULL) { perror("sandbox perf log"); } fprintf(runtime_sandbox_perf_log, "id,module,port,state,deadline,actual,queued,initializing,runnable," - "running,blocked,returned,proc_MHz,memory\n"); + "running,asleep,returned,proc_MHz,memory\n"); } else { printf("\tSandbox Performance Log: Disabled\n"); } diff --git a/runtime/src/runtime.c b/runtime/src/runtime.c index 290f34c..1d9e13f 100644 --- a/runtime/src/runtime.c +++ b/runtime/src/runtime.c @@ -47,7 +47,7 @@ runtime_cleanup() if (runtime_worker_threads) free(runtime_worker_threads); software_interrupt_deferred_sigalrm_max_print(); - software_interrupt_cleanup(); + software_interrupt_deferred_sigalrm_max_free(); exit(EXIT_SUCCESS); } diff --git a/runtime/src/sandbox_state.c b/runtime/src/sandbox_state.c index 72b366a..52954af 100644 --- a/runtime/src/sandbox_state.c +++ b/runtime/src/sandbox_state.c @@ -16,7 +16,7 @@ const char *sandbox_state_labels[SANDBOX_STATE_COUNT] = { [SANDBOX_PREEMPTED] = "Preempted", [SANDBOX_RUNNING_KERNEL] = "Running Kernel", [SANDBOX_RUNNING_USER] = "Running User", - [SANDBOX_BLOCKED] = "Blocked", + [SANDBOX_ASLEEP] = "Asleep", [SANDBOX_RETURNED] = "Returned", [SANDBOX_COMPLETE] = "Complete", [SANDBOX_ERROR] = "Error" diff --git a/runtime/src/software_interrupt.c b/runtime/src/software_interrupt.c index e6d181c..e95ac62 100644 --- a/runtime/src/software_interrupt.c +++ b/runtime/src/software_interrupt.c @@ -42,20 +42,32 @@ thread_local _Atomic volatile sig_atomic_t software_interrupt_signal_dept _Atomic volatile sig_atomic_t *software_interrupt_deferred_sigalrm_max; +void +software_interrupt_deferred_sigalrm_max_alloc() +{ +#ifdef LOG_DEFERRED_SIGALRM_MAX + software_interrupt_deferred_sigalrm_max = calloc(runtime_worker_threads_count, sizeof(_Atomic(sig_atomic_t))); +#endif +} + +void +software_interrupt_deferred_sigalrm_max_free() +{ +#ifdef LOG_DEFERRED_SIGALRM_MAX + if (software_interrupt_deferred_sigalrm_max) free((void *)software_interrupt_deferred_sigalrm_max); +#endif +} + void software_interrupt_deferred_sigalrm_max_print() { +#ifdef LOG_DEFERRED_SIGALRM_MAX printf("Max Deferred Sigalrms\n"); for (int i = 0; i < runtime_worker_threads_count; i++) { printf("Worker %d: %d\n", i, software_interrupt_deferred_sigalrm_max[i]); } fflush(stdout); -} - -void -software_interrupt_cleanup() -{ - if (software_interrupt_deferred_sigalrm_max) free((void *)software_interrupt_deferred_sigalrm_max); +#endif } /*************************************** @@ -149,24 +161,19 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void case SIGALRM: { sigalrm_propagate_workers(signal_info); - /* Current Sandbox is NULL when the base worker context is active. This already executes scheduling - * logic, so just return. */ - if (!current_sandbox) goto done; - - /* We need to track what state was interrupted to conditionally restore user running after preemption */ - current_sandbox->interrupted_state = current_sandbox->state; - - if (current_sandbox->state == SANDBOX_RUNNING_USER) { - sandbox_set_as_running_kernel(current_sandbox, SANDBOX_RUNNING_USER); - atomic_store(&software_interrupt_deferred_sigalrm, 0); - current_sandbox = scheduler_preempt(interrupted_context); - } else { + /* Nonpreemptive, so defer */ + if (!sandbox_is_preemptable(current_sandbox)) { atomic_fetch_add(&software_interrupt_deferred_sigalrm, 1); + goto done; } + + scheduler_preemptive_sched(interrupted_context); + goto done; } case SIGUSR1: { assert(current_sandbox); + assert(current_sandbox->state == SANDBOX_PREEMPTED); assert(current_sandbox->ctxt.variant == ARCH_CONTEXT_VARIANT_SLOW); atomic_fetch_add(&software_interrupt_SIGUSR_count, 1); @@ -175,9 +182,8 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void debuglog("Restoring sandbox: %lu, Stack %llu\n", current_sandbox->id, current_sandbox->ctxt.mctx.gregs[REG_RSP]); #endif - /* Overwrites the interrupted context with the context of the worker thread's current sandbox */ /* It is the responsibility of the caller to invoke current_sandbox_set before triggering the SIGUSR1 */ - arch_context_restore_slow(&interrupted_context->uc_mcontext, ¤t_sandbox->ctxt); + scheduler_preemptive_switch_to(interrupted_context, current_sandbox); goto done; } default: { @@ -194,9 +200,6 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void } done: atomic_fetch_sub(&software_interrupt_signal_depth, 1); - if (current_sandbox && current_sandbox->interrupted_state == SANDBOX_RUNNING_USER) { - sandbox_set_as_running_user(current_sandbox, SANDBOX_RUNNING_KERNEL); - } return; } @@ -275,7 +278,7 @@ software_interrupt_initialize(void) } } - software_interrupt_deferred_sigalrm_max = calloc(runtime_worker_threads_count, sizeof(_Atomic(sig_atomic_t))); + software_interrupt_deferred_sigalrm_max_alloc(); } void diff --git a/runtime/src/worker_thread.c b/runtime/src/worker_thread.c index 9f9ff1b..82f86a7 100644 --- a/runtime/src/worker_thread.c +++ b/runtime/src/worker_thread.c @@ -15,7 +15,6 @@ #include "runtime.h" #include "scheduler.h" #include "worker_thread.h" -#include "worker_thread_execute_epoll_loop.h" /*************************** * Worker Thread State * @@ -67,21 +66,8 @@ worker_thread_main(void *argument) software_interrupt_unmask_signal(SIGUSR1); } - /* Begin Worker Execution Loop */ - struct sandbox *next_sandbox = NULL; - while (true) { - /* Assumption: current_sandbox should be unset at start of loop */ - assert(current_sandbox_get() == NULL); - - worker_thread_execute_epoll_loop(); - - /* Switch to a sandbox if one is ready to run */ - next_sandbox = scheduler_get_next(); - if (next_sandbox != NULL) { scheduler_switch_to(next_sandbox); } - - /* Clear the completion queue */ - local_completion_queue_free(); - } + /* Idle Loop */ + while (true) scheduler_cooperative_sched(); panic("Worker Thread unexpectedly completed run loop."); }