refactor: scheduler cleanup

4 years ago · 3a60134d44
parent 54848b765a
commit 3a60134d44
30 changed files with 337 additions and 283 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -87,12 +87,19 @@
 		"sandbox_set_as_error.h": "c",
 		"likely.h": "c",
 		"debuglog.h": "c",
-		"worker_thread_execute_epoll_loop.h": "c",
+		"scheduler_execute_epoll_loop.h": "c",
 		"sandbox_set_as_running.h": "c",
 		"sandbox_summarize_page_allocations.h": "c",
 		"wasm_types.h": "c",
 		"atomic": "c",
-		"sandbox_set_as_running_kernel.h": "c"
+		"sandbox_set_as_running_kernel.h": "c",
+		"stdbool.h": "c",
+		"sandbox_set_as_asleep.h": "c",
+		"sandbox_print_perf.h": "c",
+		"sandbox_state_history.h": "c",
+		"sandbox_set_as_running_user.h": "c",
+		"scheduler.h": "c",
+		"sandbox_set_as_returned.h": "c"
 	},
 	"files.exclude": {
 		"**/.git": true,
--- a/runtime/Makefile
+++ b/runtime/Makefile
@ -61,6 +61,9 @@ BINARY_NAME=sledgert
 # The history trucates when the number of elements equal SANDBOX_STATE_HISTORY_CAPACITY 
 # CFLAGS += -DLOG_STATE_CHANGES

+# Stores the max number of deferred SIGALRMS for each worker
+# CFLAGS += -DLOG_DEFERRED_SIGALRM_MAX
+
 # This dumps per module *.csv files containing the cycle a sandbox has been in RUNNING when each
 # page is allocated. This helps understand the relationship to memory allocation and execution time.
 # CFLAGS += -DLOG_SANDBOX_MEMORY_PROFILE
--- a/runtime/include/current_sandbox.h
+++ b/runtime/include/current_sandbox.h
@ -49,3 +49,6 @@ current_sandbox_set(struct sandbox *sandbox)
 		runtime_worker_threads_deadline[worker_thread_idx] = sandbox->absolute_deadline;
 	}
 }
+
+
+extern void current_sandbox_sleep();
--- a/runtime/include/sandbox_exit.h
+++ b/runtime/include/sandbox_exit.h
@ -1,36 +0,0 @@
-#pragma once
-
-#include <assert.h>
-#include <stddef.h>
-
-#include "panic.h"
-#include "sandbox_state.h"
-#include "sandbox_set_as_complete.h"
-
-/**
- * Conditionally triggers appropriate state changes for exiting sandboxes
- * @param exiting_sandbox - The sandbox that ran to completion
- */
-static inline void
-sandbox_exit(struct sandbox *exiting_sandbox)
-{
-	assert(exiting_sandbox != NULL);
-
-	switch (exiting_sandbox->state) {
-	case SANDBOX_RETURNED:
-		/*
-		 * We draw a distinction between RETURNED and COMPLETED because a sandbox cannot add itself to the
-		 * completion queue
-		 * TODO: I think this executes when running inside the sandbox, as it hasn't yet yielded
-		 * See Issue #224 at https://github.com/gwsystems/sledge-serverless-framework/issues/224
-		 */
-		sandbox_set_as_complete(exiting_sandbox, SANDBOX_RETURNED);
-		break;
-	case SANDBOX_BLOCKED:
-	case SANDBOX_ERROR:
-		break;
-	default:
-		panic("Cooperatively switching from a sandbox in a non-terminal %s state\n",
-		      sandbox_state_stringify(exiting_sandbox->state));
-	}
-}
--- a/runtime/include/sandbox_functions.h
+++ b/runtime/include/sandbox_functions.h
@ -59,6 +59,12 @@ sandbox_get_priority(void *element)
 	return sandbox->absolute_deadline;
 };

+static inline bool
+sandbox_is_preemptable(struct sandbox *sandbox)
+{
+	return sandbox && sandbox->state == SANDBOX_RUNNING_USER;
+};
+
 static inline void
 sandbox_open_http(struct sandbox *sandbox)
 {
@ -77,43 +83,3 @@ sandbox_open_http(struct sandbox *sandbox)
 	                   &accept_evt);
 	if (unlikely(rc < 0)) panic_err();
 }
-
-/**
- * Prints key performance metrics for a sandbox to runtime_sandbox_perf_log
- * This is defined by an environment variable
- * @param sandbox
- */
-static inline void
-sandbox_print_perf(struct sandbox *sandbox)
-{
-	/* If the log was not defined by an environment variable, early out */
-	if (runtime_sandbox_perf_log == NULL) return;
-
-	uint64_t queued_duration = sandbox->timestamp_of.allocation - sandbox->timestamp_of.request_arrival;
-
-	/*
-	 * Assumption: A sandbox is never able to free pages. If linear memory management
-	 * becomes more intelligent, then peak linear memory size needs to be tracked
-	 * seperately from current linear memory size.
-	 */
-	fprintf(runtime_sandbox_perf_log,
-	        "%lu,%s,%d,%s,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%u,%u\n", sandbox->id,
-	        sandbox->module->name, sandbox->module->port, sandbox_state_stringify(sandbox->state),
-	        sandbox->module->relative_deadline, sandbox->total_time, queued_duration,
-	        sandbox->duration_of_state[SANDBOX_UNINITIALIZED], sandbox->duration_of_state[SANDBOX_ALLOCATED],
-	        sandbox->duration_of_state[SANDBOX_INITIALIZED], sandbox->duration_of_state[SANDBOX_RUNNABLE],
-	        sandbox->duration_of_state[SANDBOX_PREEMPTED], sandbox->duration_of_state[SANDBOX_RUNNING_KERNEL],
-	        sandbox->duration_of_state[SANDBOX_RUNNING_USER], sandbox->duration_of_state[SANDBOX_BLOCKED],
-	        sandbox->duration_of_state[SANDBOX_RETURNED], sandbox->duration_of_state[SANDBOX_COMPLETE],
-	        sandbox->duration_of_state[SANDBOX_ERROR], runtime_processor_speed_MHz, sandbox->memory.size);
-}
-
-static inline void
-sandbox_state_history_append(struct sandbox *sandbox, sandbox_state_t state)
-{
-#ifdef LOG_STATE_CHANGES
-	if (likely(sandbox->state_history_count < SANDBOX_STATE_HISTORY_CAPACITY)) {
-		sandbox->state_history[sandbox->state_history_count++] = state;
-	}
-#endif
-}
--- a/runtime/include/sandbox_print_perf.h
+++ b/runtime/include/sandbox_print_perf.h
@ -0,0 +1,34 @@
+#pragma once
+
+#include "runtime.h"
+#include "sandbox_types.h"
+
+/**
+ * Prints key performance metrics for a sandbox to runtime_sandbox_perf_log
+ * This is defined by an environment variable
+ * @param sandbox
+ */
+static inline void
+sandbox_print_perf(struct sandbox *sandbox)
+{
+	/* If the log was not defined by an environment variable, early out */
+	if (runtime_sandbox_perf_log == NULL) return;
+
+	uint64_t queued_duration = sandbox->timestamp_of.allocation - sandbox->timestamp_of.request_arrival;
+
+	/*
+	 * Assumption: A sandbox is never able to free pages. If linear memory management
+	 * becomes more intelligent, then peak linear memory size needs to be tracked
+	 * seperately from current linear memory size.
+	 */
+	fprintf(runtime_sandbox_perf_log,
+	        "%lu,%s,%d,%s,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu,%u,%u\n", sandbox->id,
+	        sandbox->module->name, sandbox->module->port, sandbox_state_stringify(sandbox->state),
+	        sandbox->module->relative_deadline, sandbox->total_time, queued_duration,
+	        sandbox->duration_of_state[SANDBOX_UNINITIALIZED], sandbox->duration_of_state[SANDBOX_ALLOCATED],
+	        sandbox->duration_of_state[SANDBOX_INITIALIZED], sandbox->duration_of_state[SANDBOX_RUNNABLE],
+	        sandbox->duration_of_state[SANDBOX_PREEMPTED], sandbox->duration_of_state[SANDBOX_RUNNING_KERNEL],
+	        sandbox->duration_of_state[SANDBOX_RUNNING_USER], sandbox->duration_of_state[SANDBOX_ASLEEP],
+	        sandbox->duration_of_state[SANDBOX_RETURNED], sandbox->duration_of_state[SANDBOX_COMPLETE],
+	        sandbox->duration_of_state[SANDBOX_ERROR], runtime_processor_speed_MHz, sandbox->memory.size);
+}
--- a/runtime/include/sandbox_receive_request.h
+++ b/runtime/include/sandbox_receive_request.h
@ -47,7 +47,7 @@ sandbox_receive_request(struct sandbox *sandbox)

 		if (bytes_received == -1) {
 			if (errno == EAGAIN) {
-				scheduler_block();
+				current_sandbox_sleep();
 				continue;
 			} else {
 				debuglog("Error reading socket %d - %s\n", sandbox->client_socket_descriptor,
--- a/runtime/include/sandbox_send_response.h
+++ b/runtime/include/sandbox_send_response.h
@ -54,7 +54,7 @@ sandbox_send_response(struct sandbox *sandbox)
 		rc = write(sandbox->client_socket_descriptor, response_header, response_size - sent);
 		if (rc < 0) {
 			if (errno == EAGAIN)
-				scheduler_block();
+				current_sandbox_sleep();
 			else {
 				perror("write");
 				goto err;
--- a/runtime/include/sandbox_set_as_blocked.h
+++ b/runtime/include/sandbox_set_as_blocked.h
@ -7,9 +7,10 @@
 #include "local_runqueue.h"
 #include "sandbox_types.h"
 #include "sandbox_state.h"
+#include "sandbox_state_history.h"

 /**
- * Transitions a sandbox to the SANDBOX_BLOCKED state.
+ * Transitions a sandbox to the SANDBOX_ASLEEP state.
 * This occurs when a sandbox is executing and it makes a blocking API call of some kind.
 * Automatically removes the sandbox from the runqueue
 * @param sandbox the blocking sandbox
@ -17,10 +18,10 @@
 * enable the compiler to perform constant propagation optimizations.
 */
 static inline void
-sandbox_set_as_blocked(struct sandbox *sandbox, sandbox_state_t last_state)
+sandbox_set_as_asleep(struct sandbox *sandbox, sandbox_state_t last_state)
 {
 	assert(sandbox);
-	sandbox->state = SANDBOX_BLOCKED;
+	sandbox->state = SANDBOX_ASLEEP;
 	uint64_t now   = __getcycles();

 	switch (last_state) {
@ -37,7 +38,14 @@ sandbox_set_as_blocked(struct sandbox *sandbox, sandbox_state_t last_state)
 	/* State Change Bookkeeping */
 	sandbox->duration_of_state[last_state] += (now - sandbox->timestamp_of.last_state_change);
 	sandbox->timestamp_of.last_state_change = now;
-	sandbox_state_history_append(sandbox, SANDBOX_BLOCKED);
-	runtime_sandbox_total_increment(SANDBOX_BLOCKED);
+	sandbox_state_history_append(sandbox, SANDBOX_ASLEEP);
+	runtime_sandbox_total_increment(SANDBOX_ASLEEP);
 	runtime_sandbox_total_decrement(last_state);
 }
+
+static inline void
+sandbox_sleep(struct sandbox *sandbox)
+{
+	assert(sandbox->state == SANDBOX_RUNNING_KERNEL);
+	sandbox_set_as_asleep(sandbox, SANDBOX_RUNNING_KERNEL);
+}
--- a/runtime/include/sandbox_set_as_complete.h
+++ b/runtime/include/sandbox_set_as_complete.h
@ -7,7 +7,9 @@
 #include "panic.h"
 #include "local_completion_queue.h"
 #include "sandbox_functions.h"
+#include "sandbox_print_perf.h"
 #include "sandbox_state.h"
+#include "sandbox_state_history.h"
 #include "sandbox_summarize_page_allocations.h"
 #include "sandbox_types.h"

--- a/runtime/include/sandbox_set_as_error.h
+++ b/runtime/include/sandbox_set_as_error.h
@ -8,6 +8,8 @@
 #include "local_runqueue.h"
 #include "sandbox_state.h"
 #include "sandbox_functions.h"
+#include "sandbox_print_perf.h"
+#include "sandbox_state_history.h"
 #include "sandbox_summarize_page_allocations.h"
 #include "panic.h"

--- a/runtime/include/sandbox_set_as_initialized.h
+++ b/runtime/include/sandbox_set_as_initialized.h
@ -8,6 +8,7 @@
 #include "current_sandbox.h"
 #include "ps_list.h"
 #include "sandbox_request.h"
+#include "sandbox_state_history.h"
 #include "sandbox_types.h"

 /**
--- a/runtime/include/sandbox_set_as_preempted.h
+++ b/runtime/include/sandbox_set_as_preempted.h
@ -6,14 +6,13 @@
 #include "arch/getcycles.h"
 #include "local_runqueue.h"
 #include "panic.h"
+#include "sandbox_state_history.h"
 #include "sandbox_types.h"

 /**
 * Transitions a sandbox to the SANDBOX_PREEMPTED state.
 *
- * This occurs in the following scenarios:
- * - A sandbox in the SANDBOX_INITIALIZED state completes initialization and is ready to be run
- * - A sandbox in the SANDBOX_BLOCKED state completes what was blocking it and is ready to be run
+ * This occurs when a sandbox is preempted in the SIGALRM handler
 *
 * @param sandbox
 * @param last_state the state the sandbox is transitioning from. This is expressed as a constant to
@ -27,7 +26,7 @@ sandbox_set_as_preempted(struct sandbox *sandbox, sandbox_state_t last_state)
 	uint64_t now   = __getcycles();

 	switch (last_state) {
-	case SANDBOX_RUNNING_KERNEL: {
+	case SANDBOX_RUNNING_USER: {
 		current_sandbox_set(NULL);
 		break;
 	}
--- a/runtime/include/sandbox_set_as_returned.h
+++ b/runtime/include/sandbox_set_as_returned.h
@ -8,6 +8,7 @@
 #include "panic.h"
 #include "sandbox_functions.h"
 #include "sandbox_state.h"
+#include "sandbox_state_history.h"
 #include "sandbox_types.h"

 /**
--- a/runtime/include/sandbox_set_as_runnable.h
+++ b/runtime/include/sandbox_set_as_runnable.h
@ -6,6 +6,7 @@
 #include "arch/getcycles.h"
 #include "local_runqueue.h"
 #include "panic.h"
+#include "sandbox_state_history.h"
 #include "sandbox_types.h"

 /**
@ -13,7 +14,7 @@
 *
 * This occurs in the following scenarios:
 * - A sandbox in the SANDBOX_INITIALIZED state completes initialization and is ready to be run
- * - A sandbox in the SANDBOX_BLOCKED state completes what was blocking it and is ready to be run
+ * - A sandbox in the SANDBOX_ASLEEP state completes what was blocking it and is ready to be run
 *
 * @param sandbox
 * @param last_state the state the sandbox is transitioning from. This is expressed as a constant to
@ -31,7 +32,7 @@ sandbox_set_as_runnable(struct sandbox *sandbox, sandbox_state_t last_state)
 		local_runqueue_add(sandbox);
 		break;
 	}
-	case SANDBOX_BLOCKED: {
+	case SANDBOX_ASLEEP: {
 		local_runqueue_add(sandbox);
 		break;
 	}
@ -48,3 +49,11 @@ sandbox_set_as_runnable(struct sandbox *sandbox, sandbox_state_t last_state)
 	runtime_sandbox_total_increment(SANDBOX_RUNNABLE);
 	runtime_sandbox_total_decrement(last_state);
 }
+
+
+static inline void
+sandbox_wakeup(struct sandbox *sandbox)
+{
+	assert(sandbox->state == SANDBOX_ASLEEP);
+	sandbox_set_as_runnable(sandbox, SANDBOX_ASLEEP);
+}
--- a/runtime/include/sandbox_set_as_running_kernel.h
+++ b/runtime/include/sandbox_set_as_running_kernel.h
@ -6,8 +6,9 @@
 #include "arch/getcycles.h"
 #include "current_sandbox.h"
 #include "panic.h"
-#include "sandbox_types.h"
 #include "sandbox_functions.h"
+#include "sandbox_state_history.h"
+#include "sandbox_types.h"

 static inline void
 sandbox_set_as_running_kernel(struct sandbox *sandbox, sandbox_state_t last_state)
@ -29,14 +30,6 @@ sandbox_set_as_running_kernel(struct sandbox *sandbox, sandbox_state_t last_stat
 		 * can fix this by breakout out SANDBOX_RUNNABLE and SANDBOX_PREEMPTED */
 		break;
 	}
-	case SANDBOX_PREEMPTED: {
-		assert(sandbox);
-		assert(sandbox->interrupted_state == SANDBOX_RUNNING_USER);
-		current_sandbox_set(sandbox);
-		/* Does not handle context switch because the caller knows if we need to use fast or slow switched. We
-		 * can fix this by breakout out SANDBOX_RUNNABLE and SANDBOX_PREEMPTED */
-		break;
-	}
 	default: {
 		panic("Sandbox %lu | Illegal transition from %s to Running Kernel\n", sandbox->id,
 		      sandbox_state_stringify(last_state));
--- a/runtime/include/sandbox_set_as_running_user.h
+++ b/runtime/include/sandbox_set_as_running_user.h
@ -6,6 +6,7 @@
 #include "arch/getcycles.h"
 #include "current_sandbox.h"
 #include "panic.h"
+#include "sandbox_state_history.h"
 #include "sandbox_types.h"
 #include "sandbox_functions.h"

@ -20,7 +21,11 @@ sandbox_set_as_running_user(struct sandbox *sandbox, sandbox_state_t last_state)
 	case SANDBOX_RUNNING_KERNEL: {
 		assert(sandbox == current_sandbox_get());
 		assert(runtime_worker_threads_deadline[worker_thread_idx] == sandbox->absolute_deadline);
-
+		break;
+	}
+	case SANDBOX_PREEMPTED: {
+		assert(sandbox);
+		current_sandbox_set(sandbox);
 		break;
 	}
 	default: {
--- a/runtime/include/sandbox_state.h
+++ b/runtime/include/sandbox_state.h
@ -15,7 +15,7 @@ typedef enum
 	SANDBOX_PREEMPTED,
 	SANDBOX_RUNNING_KERNEL,
 	SANDBOX_RUNNING_USER,
-	SANDBOX_BLOCKED,
+	SANDBOX_ASLEEP,
 	SANDBOX_RETURNED,
 	SANDBOX_COMPLETE,
 	SANDBOX_ERROR,
--- a/runtime/include/sandbox_state_history.h
+++ b/runtime/include/sandbox_state_history.h
@ -0,0 +1,14 @@
+#pragma once
+
+#include "sandbox_state.h"
+#include "sandbox_types.h"
+
+static inline void
+sandbox_state_history_append(struct sandbox *sandbox, sandbox_state_t state)
+{
+#ifdef LOG_STATE_CHANGES
+	if (likely(sandbox->state_history_count < SANDBOX_STATE_HISTORY_CAPACITY)) {
+		sandbox->state_history[sandbox->state_history_count++] = state;
+	}
+#endif
+}
--- a/runtime/include/sandbox_types.h
+++ b/runtime/include/sandbox_types.h
@ -74,7 +74,6 @@ struct sandbox_buffer {
 struct sandbox {
 	uint64_t        id;
 	sandbox_state_t state;
-	sandbox_state_t interrupted_state;

 #ifdef LOG_STATE_CHANGES
 	sandbox_state_t state_history[SANDBOX_STATE_HISTORY_CAPACITY];
--- a/runtime/include/scheduler.h
+++ b/runtime/include/scheduler.h
@ -14,14 +14,13 @@
 #include "local_runqueue_list.h"
 #include "panic.h"
 #include "sandbox_request.h"
-#include "sandbox_exit.h"
 #include "sandbox_functions.h"
 #include "sandbox_types.h"
-#include "sandbox_set_as_blocked.h"
 #include "sandbox_set_as_preempted.h"
 #include "sandbox_set_as_runnable.h"
 #include "sandbox_set_as_running_kernel.h"
-#include "worker_thread_execute_epoll_loop.h"
+#include "sandbox_set_as_running_user.h"
+#include "scheduler_execute_epoll_loop.h"

 enum SCHEDULER
 {
@ -101,6 +100,14 @@ err:
 static inline struct sandbox *
 scheduler_get_next()
 {
+#ifdef LOG_DEFERRED_SIGALRM_MAX
+	if (unlikely(software_interrupt_deferred_sigalrm
+	             > software_interrupt_deferred_sigalrm_max[worker_thread_idx])) {
+		software_interrupt_deferred_sigalrm_max[worker_thread_idx] = software_interrupt_deferred_sigalrm;
+	}
+#endif
+
+	atomic_store(&software_interrupt_deferred_sigalrm, 0);
 	switch (scheduler) {
 	case SCHEDULER_EDF:
 		return scheduler_edf_get_next();
@ -141,82 +148,6 @@ scheduler_runqueue_initialize()
 	}
 }

-/**
- * Called by the SIGALRM handler after a quantum
- * Assumes the caller validates that there is something to preempt
- * @param interrupted_context - The context of our user-level Worker thread
- * @returns the sandbox that the scheduler chose to run
- */
-static inline struct sandbox *
-scheduler_preempt(ucontext_t *interrupted_context)
-{
-	assert(interrupted_context != NULL);
-
-	/* Process epoll to make sure that all runnable jobs are considered for execution */
-	worker_thread_execute_epoll_loop();
-
-	struct sandbox *current = current_sandbox_get();
-	assert(current != NULL);
-	assert(current->state == SANDBOX_RUNNING_KERNEL);
-
-	struct sandbox *next = scheduler_get_next();
-	/* Assumption: the current sandbox is on the runqueue, so the scheduler should always return something */
-	assert(next != NULL);
-
-	/* If current equals next, no switch is necessary, so resume execution */
-	if (current == next) return current;
-
-#ifdef LOG_PREEMPTION
-	debuglog("Preempting sandbox %lu to run sandbox %lu\n", current->id, next->id);
-#endif
-
-	/* Save the context of the currently executing sandbox before switching from it */
-
-	/* How do I switch back to "user running" when this is resumed? */
-	sandbox_set_as_preempted(current, SANDBOX_RUNNING_KERNEL);
-	arch_context_save_slow(&current->ctxt, &interrupted_context->uc_mcontext);
-
-	/* Update current_sandbox to the next sandbox */
-	// assert(next->state == SANDBOX_RUNNABLE);
-
-	switch (next->ctxt.variant) {
-	case ARCH_CONTEXT_VARIANT_FAST: {
-		assert(next->state == SANDBOX_RUNNABLE);
-		sandbox_set_as_running_kernel(next, SANDBOX_RUNNABLE);
-		arch_context_restore_fast(&interrupted_context->uc_mcontext, &next->ctxt);
-		break;
-	}
-	case ARCH_CONTEXT_VARIANT_SLOW: {
-		/* Our scheduler restores a fast context when switching to a sandbox that cooperatively yielded
-		 * (probably by blocking) or when switching to a freshly allocated sandbox that hasn't yet run.
-		 * These conditions can occur in either EDF or FIFO.
-		 *
-		 * A scheduler restores a slow context when switching to a sandbox that was preempted previously.
-		 * Under EDF, a sandbox is only ever preempted by an earlier deadline that either had blocked and since
-		 * become runnable or was just freshly allocated. This means that such EDF preemption context switches
-		 * should always use a fast context.
-		 *
-		 * This is not true under FIFO, where there is no innate ordering between sandboxes. A runqueue is
-		 * normally only a single sandbox, but it may have multiple sandboxes when one blocks and the worker
-		 * pulls an addition request. When the blocked sandbox becomes runnable, the executing sandbox can be
-		 * preempted yielding a slow context. This means that FIFO preemption context switches might cause
-		 * either a fast or a slow context to be restored during "round robin" execution.
-		 */
-		assert(scheduler != SCHEDULER_EDF);
-		assert(next->state == SANDBOX_PREEMPTED);
-		arch_context_restore_slow(&interrupted_context->uc_mcontext, &next->ctxt);
-		sandbox_set_as_running_kernel(next, SANDBOX_PREEMPTED);
-		break;
-	}
-	default: {
-		panic("Unexpectedly tried to switch to a context in %s state\n",
-		      arch_context_variant_print(next->ctxt.variant));
-	}
-	}
-
-	return next;
-}
-
 static inline char *
 scheduler_print(enum SCHEDULER variant)
 {
@ -237,6 +168,10 @@ scheduler_log_sandbox_switch(struct sandbox *current_sandbox, struct sandbox *ne
 		debuglog("Base Context (@%p) (%s) > Sandbox %lu (@%p) (%s)\n", &worker_thread_base_context,
 		         arch_context_variant_print(worker_thread_base_context.variant), next_sandbox->id,
 		         &next_sandbox->ctxt, arch_context_variant_print(next_sandbox->ctxt.variant));
+	} else if (next_sandbox == NULL) {
+		debuglog("Sandbox %lu (@%p) (%s) > Base Context (@%p) (%s)\n", current_sandbox->id,
+		         &current_sandbox->ctxt, arch_context_variant_print(current_sandbox->ctxt.variant),
+		         &worker_thread_base_context, arch_context_variant_print(worker_thread_base_context.variant));
 	} else {
 		debuglog("Sandbox %lu (@%p) (%s) > Sandbox %lu (@%p) (%s)\n", current_sandbox->id,
 		         &current_sandbox->ctxt, arch_context_variant_print(current_sandbox->ctxt.variant),
@ -245,77 +180,120 @@ scheduler_log_sandbox_switch(struct sandbox *current_sandbox, struct sandbox *ne
 #endif
 }

+static inline void
+scheduler_preemptive_switch_to(ucontext_t *interrupted_context, struct sandbox *next)
+{
+	/* Switch to next sandbox */
+	switch (next->ctxt.variant) {
+	case ARCH_CONTEXT_VARIANT_FAST: {
+		assert(next->state == SANDBOX_RUNNABLE);
+		arch_context_restore_fast(&interrupted_context->uc_mcontext, &next->ctxt);
+		sandbox_set_as_running_kernel(next, SANDBOX_RUNNABLE);
+		break;
+	}
+	case ARCH_CONTEXT_VARIANT_SLOW: {
+		assert(next->state == SANDBOX_PREEMPTED);
+		arch_context_restore_slow(&interrupted_context->uc_mcontext, &next->ctxt);
+		sandbox_set_as_running_user(next, SANDBOX_PREEMPTED);
+		break;
+	}
+	default: {
+		panic("Unexpectedly tried to switch to a context in %s state\n",
+		      arch_context_variant_print(next->ctxt.variant));
+	}
+	}
+}
+
 /**
- * @brief Switches to the next sandbox, placing the current sandbox on the completion queue if in
- * SANDBOX_RETURNED state
- * @param next_sandbox The Sandbox Context to switch to
+ * Called by the SIGALRM handler after a quantum
+ * Assumes the caller validates that there is something to preempt
+ * @param interrupted_context - The context of our user-level Worker thread
+ * @returns the sandbox that the scheduler chose to run
 */
 static inline void
-scheduler_switch_to(struct sandbox *next_sandbox)
+scheduler_preemptive_sched(ucontext_t *interrupted_context)
 {
-	assert(next_sandbox != NULL);
-	assert(next_sandbox->state == SANDBOX_RUNNABLE || next_sandbox->state == SANDBOX_PREEMPTED);
-	struct arch_context *next_context = &next_sandbox->ctxt;
+	assert(interrupted_context != NULL);

-	/* Get the old sandbox we're switching from.
-	 * This is null if switching from base context
-	 */
-	struct sandbox *current_sandbox = current_sandbox_get();
-	assert(next_sandbox != current_sandbox);
+	/* Process epoll to make sure that all runnable jobs are considered for execution */
+	scheduler_execute_epoll_loop();

-	struct arch_context *current_context = NULL;
-	if (current_sandbox != NULL) {
-		current_context = &current_sandbox->ctxt;
-		sandbox_exit(current_sandbox);
-	}
+	struct sandbox *current = current_sandbox_get();
+	assert(current != NULL);
+	assert(current->state == SANDBOX_RUNNING_USER);

-	scheduler_log_sandbox_switch(current_sandbox, next_sandbox);
-	sandbox_set_as_running_kernel(next_sandbox, next_sandbox->state);
-	arch_context_switch(current_context, next_context);
-}
+	struct sandbox *next = scheduler_get_next();
+	/* Assumption: the current sandbox is on the runqueue, so the scheduler should always return something */
+	assert(next != NULL);
+
+	/* If current equals next, no switch is necessary, so resume execution */
+	if (current == next) return;
+
+#ifdef LOG_PREEMPTION
+	debuglog("Preempting sandbox %lu to run sandbox %lu\n", current->id, next->id);
+#endif
+
+	scheduler_log_sandbox_switch(current, next);

+	/* Preempt executing sandbox */
+	sandbox_set_as_preempted(current, SANDBOX_RUNNING_USER);
+	arch_context_save_slow(&current->ctxt, &interrupted_context->uc_mcontext);
+
+	scheduler_preemptive_switch_to(interrupted_context, next);
+}

 /**
- * @brief Switches to the base context, placing the current sandbox on the completion queue if in RETURNED state
+ * @brief Switches to the next sandbox
+ * Assumption: only called by the "base context"
+ * @param next_sandbox The Sandbox to switch to
 */
 static inline void
-scheduler_yield()
+scheduler_cooperative_switch_to(struct sandbox *next_sandbox)
 {
-	struct sandbox *current_sandbox = current_sandbox_get();
-	assert(current_sandbox != NULL);
+	assert(current_sandbox_get() == NULL);

-	struct arch_context *current_context = &current_sandbox->ctxt;
-
-	/* Assumption: Base Context should never switch to Base Context */
-	assert(current_context != &worker_thread_base_context);
+	struct arch_context *next_context = &next_sandbox->ctxt;

-#ifdef LOG_CONTEXT_SWITCHES
-	debuglog("Sandbox %lu (@%p) (%s) > Base Context (@%p) (%s)\n", current_sandbox->id, current_context,
-	         arch_context_variant_print(current_sandbox->ctxt.variant), &worker_thread_base_context,
-	         arch_context_variant_print(worker_thread_base_context.variant));
-#endif
+	scheduler_log_sandbox_switch(NULL, next_sandbox);

-	sandbox_exit(current_sandbox);
-	current_sandbox_set(NULL);
+	/* Switch to next sandbox */
+	switch (next_sandbox->state) {
+	case SANDBOX_RUNNABLE: {
+		assert(next_context->variant == ARCH_CONTEXT_VARIANT_FAST);
+		sandbox_set_as_running_kernel(next_sandbox, SANDBOX_RUNNABLE);
+		break;
+	}
+	case SANDBOX_PREEMPTED: {
+		assert(next_context->variant == ARCH_CONTEXT_VARIANT_SLOW);
+		/* arch_context_switch triggers a SIGUSR1, which transitions next_sandbox to running_user */
+		current_sandbox_set(next_sandbox);
+		break;
+	}
+	default: {
+		panic("Unexpectedly tried to switch to a sandbox in %s state\n",
+		      sandbox_state_stringify(next_sandbox->state));
+	}
+	}

-	/* Assumption: Base Worker context should never be preempted */
-	assert(worker_thread_base_context.variant == ARCH_CONTEXT_VARIANT_FAST);
-	arch_context_switch(current_context, &worker_thread_base_context);
+	arch_context_switch(&worker_thread_base_context, next_context);
 }

-/**
- * Mark the currently executing sandbox as blocked, remove it from the local runqueue,
- * and switch to base context
+/* A sandbox cannot execute the scheduler directly. It must yield to the base context, and then the context calls this
+ * within its idle loop
 */
 static inline void
-scheduler_block(void)
+scheduler_cooperative_sched()
 {
-	/* Remove the sandbox we were just executing from the runqueue and mark as blocked */
-	struct sandbox *current_sandbox = current_sandbox_get();
+	/* Assumption: only called by the "base context" */
+	assert(current_sandbox_get() == NULL);
+
+	/* Try to wakeup sleeping sandboxes */
+	scheduler_execute_epoll_loop();

-	assert(current_sandbox->state == SANDBOX_RUNNING_KERNEL);
-	sandbox_set_as_blocked(current_sandbox, SANDBOX_RUNNING_KERNEL);
-	generic_thread_dump_lock_overhead();
+	/* Switch to a sandbox if one is ready to run */
+	struct sandbox *next_sandbox = scheduler_get_next();
+	if (next_sandbox != NULL) scheduler_cooperative_switch_to(next_sandbox);

-	scheduler_yield();
+	/* Clear the completion queue */
+	local_completion_queue_free();
 }
--- a/runtime/include/worker_thread_execute_epoll_loop.h
+++ b/runtime/include/worker_thread_execute_epoll_loop.h
@ -18,7 +18,7 @@
 * Run all outstanding events in the local thread's epoll loop
 */
 static inline void
-worker_thread_execute_epoll_loop(void)
+scheduler_execute_epoll_loop(void)
 {
 	while (true) {
 		struct epoll_event epoll_events[RUNTIME_MAX_EPOLL_EVENTS];
@ -35,13 +35,11 @@ worker_thread_execute_epoll_loop(void)

 		for (int i = 0; i < descriptor_count; i++) {
 			if (epoll_events[i].events & (EPOLLIN | EPOLLOUT)) {
-				/* Re-add to runqueue if blocked */
+				/* Re-add to runqueue if asleep */
 				struct sandbox *sandbox = (struct sandbox *)epoll_events[i].data.ptr;
 				assert(sandbox);

-				if (sandbox->state == SANDBOX_BLOCKED) {
-					sandbox_set_as_runnable(sandbox, SANDBOX_BLOCKED);
-				}
+				if (sandbox->state == SANDBOX_ASLEEP) { sandbox_wakeup(sandbox); }
 			} else if (epoll_events[i].events & (EPOLLERR | EPOLLHUP)) {
 				/* Mystery: This seems to never fire. Why? Issue #130 */

--- a/runtime/include/software_interrupt.h
+++ b/runtime/include/software_interrupt.h
@ -82,5 +82,5 @@ void software_interrupt_initialize(void);
 void software_interrupt_arm_timer(void);
 void software_interrupt_disarm_timer(void);
 void software_interrupt_set_interval_duration(uint64_t cycles);
+void software_interrupt_deferred_sigalrm_max_free(void);
 void software_interrupt_deferred_sigalrm_max_print(void);
-void software_interrupt_cleanup();
--- a/runtime/src/current_sandbox.c
+++ b/runtime/src/current_sandbox.c
@ -4,8 +4,10 @@
 #include "sandbox_functions.h"
 #include "sandbox_receive_request.h"
 #include "sandbox_send_response.h"
+#include "sandbox_set_as_asleep.h"
 #include "sandbox_set_as_error.h"
 #include "sandbox_set_as_returned.h"
+#include "sandbox_set_as_complete.h"
 #include "sandbox_set_as_running_user.h"
 #include "sandbox_set_as_running_kernel.h"
 #include "sandbox_setup_arguments.h"
@ -23,6 +25,82 @@ thread_local struct sandbox_context_cache local_sandbox_context_cache = {
 	.module_indirect_table = NULL,
 };

+/**
+ * @brief Switches from an executing sandbox to the worker thread base context
+ *
+ * This places the current sandbox on the completion queue if in RETURNED state
+ */
+void
+current_sandbox_sleep()
+{
+	struct sandbox *sandbox = current_sandbox_get();
+	assert(sandbox != NULL);
+	struct arch_context *current_context = &sandbox->ctxt;
+
+	scheduler_log_sandbox_switch(sandbox, NULL);
+	generic_thread_dump_lock_overhead();
+
+	assert(sandbox != NULL);
+
+	switch (sandbox->state) {
+	case SANDBOX_RUNNING_KERNEL: {
+		sandbox_sleep(sandbox);
+		break;
+	}
+	default:
+		panic("Cooperatively switching from a sandbox in a non-terminal %s state\n",
+		      sandbox_state_stringify(sandbox->state));
+	}
+
+	current_sandbox_set(NULL);
+
+	/* Assumption: Base Worker context should never be preempted */
+	assert(worker_thread_base_context.variant == ARCH_CONTEXT_VARIANT_FAST);
+	arch_context_switch(current_context, &worker_thread_base_context);
+}
+
+/**
+ * @brief Switches from an executing sandbox to the worker thread base context
+ *
+ * This places the current sandbox on the completion queue if in RETURNED state
+ */
+void
+current_sandbox_exit()
+{
+	struct sandbox *sandbox = current_sandbox_get();
+	assert(sandbox != NULL);
+	struct arch_context *current_context = &sandbox->ctxt;
+
+	scheduler_log_sandbox_switch(sandbox, NULL);
+	generic_thread_dump_lock_overhead();
+
+	assert(sandbox != NULL);
+
+	switch (sandbox->state) {
+	case SANDBOX_RETURNED:
+		/*
+		 * We draw a distinction between RETURNED and COMPLETED because a sandbox cannot add itself to the
+		 * completion queue
+		 * TODO: I think this executes when running inside the sandbox, as it hasn't yet yielded
+		 * See Issue #224 at https://github.com/gwsystems/sledge-serverless-framework/issues/224
+		 */
+		sandbox_set_as_complete(sandbox, SANDBOX_RETURNED);
+		break;
+	case SANDBOX_ERROR:
+		break;
+	default:
+		panic("Cooperatively switching from a sandbox in a non-terminal %s state\n",
+		      sandbox_state_stringify(sandbox->state));
+	}
+
+	current_sandbox_set(NULL);
+
+	/* Assumption: Base Worker context should never be preempted */
+	assert(worker_thread_base_context.variant == ARCH_CONTEXT_VARIANT_FAST);
+	arch_context_switch(current_context, &worker_thread_base_context);
+}
+
+
 /**
 * Sandbox execution logic
 * Handles setup, request parsing, WebAssembly initialization, function execution, response building and
@ -82,7 +160,7 @@ current_sandbox_start(void)
 done:
 	/* Cleanup connection and exit sandbox */
 	generic_thread_dump_lock_overhead();
-	scheduler_yield();
+	current_sandbox_exit();

 	/* This assert prevents a segfault discussed in
 	 * https://github.com/phanikishoreg/awsm-Serverless-Framework/issues/66
--- a/runtime/src/libc/syscall.c
+++ b/runtime/src/libc/syscall.c
@ -88,10 +88,11 @@ stub_init(int32_t offset)
 uint32_t
 wasm_read(int32_t filedes, int32_t buf_offset, int32_t nbyte)
 {
+	struct sandbox *current_sandbox = current_sandbox_get();
+
 	/* Non-blocking copy on stdin */
 	if (filedes == 0) {
 		char *               buffer          = worker_thread_get_memory_ptr_void(buf_offset, nbyte);
-		struct sandbox *     current_sandbox = current_sandbox_get();
 		struct http_request *current_request = &current_sandbox->http_request;
 		if (current_request->body_length <= 0) return 0;
 		int bytes_to_read = nbyte > current_request->body_length ? current_request->body_length : nbyte;
@ -109,7 +110,7 @@ wasm_read(int32_t filedes, int32_t buf_offset, int32_t nbyte)
 		int32_t length_read = (int32_t)read(filedes, buf, nbyte);
 		if (length_read < 0) {
 			if (errno == EAGAIN)
-				scheduler_block();
+				current_sandbox_sleep();
 			else {
 				/* All other errors */
 				debuglog("Error reading socket %d - %s\n", filedes, strerror(errno));
--- a/runtime/src/main.c
+++ b/runtime/src/main.c
@ -225,7 +225,7 @@ runtime_configure()
 		runtime_sandbox_perf_log = fopen(runtime_sandbox_perf_log_path, "w");
 		if (runtime_sandbox_perf_log == NULL) { perror("sandbox perf log"); }
 		fprintf(runtime_sandbox_perf_log, "id,module,port,state,deadline,actual,queued,initializing,runnable,"
-		                                  "running,blocked,returned,proc_MHz,memory\n");
+		                                  "running,asleep,returned,proc_MHz,memory\n");
 	} else {
 		printf("\tSandbox Performance Log: Disabled\n");
 	}
--- a/runtime/src/runtime.c
+++ b/runtime/src/runtime.c
@ -47,7 +47,7 @@ runtime_cleanup()
 	if (runtime_worker_threads) free(runtime_worker_threads);

 	software_interrupt_deferred_sigalrm_max_print();
-	software_interrupt_cleanup();
+	software_interrupt_deferred_sigalrm_max_free();
 	exit(EXIT_SUCCESS);
 }

--- a/runtime/src/sandbox_state.c
+++ b/runtime/src/sandbox_state.c
@ -16,7 +16,7 @@ const char *sandbox_state_labels[SANDBOX_STATE_COUNT] = {
 	[SANDBOX_PREEMPTED]      = "Preempted",
 	[SANDBOX_RUNNING_KERNEL] = "Running Kernel",
 	[SANDBOX_RUNNING_USER]   = "Running User",
-	[SANDBOX_BLOCKED]        = "Blocked",
+	[SANDBOX_ASLEEP]         = "Asleep",
 	[SANDBOX_RETURNED]       = "Returned",
 	[SANDBOX_COMPLETE]       = "Complete",
 	[SANDBOX_ERROR]          = "Error"
--- a/runtime/src/software_interrupt.c
+++ b/runtime/src/software_interrupt.c
@ -42,20 +42,32 @@ thread_local _Atomic volatile sig_atomic_t        software_interrupt_signal_dept

 _Atomic volatile sig_atomic_t *software_interrupt_deferred_sigalrm_max;

+void
+software_interrupt_deferred_sigalrm_max_alloc()
+{
+#ifdef LOG_DEFERRED_SIGALRM_MAX
+	software_interrupt_deferred_sigalrm_max = calloc(runtime_worker_threads_count, sizeof(_Atomic(sig_atomic_t)));
+#endif
+}
+
+void
+software_interrupt_deferred_sigalrm_max_free()
+{
+#ifdef LOG_DEFERRED_SIGALRM_MAX
+	if (software_interrupt_deferred_sigalrm_max) free((void *)software_interrupt_deferred_sigalrm_max);
+#endif
+}
+
 void
 software_interrupt_deferred_sigalrm_max_print()
 {
+#ifdef LOG_DEFERRED_SIGALRM_MAX
 	printf("Max Deferred Sigalrms\n");
 	for (int i = 0; i < runtime_worker_threads_count; i++) {
 		printf("Worker %d: %d\n", i, software_interrupt_deferred_sigalrm_max[i]);
 	}
 	fflush(stdout);
-}
-
-void
-software_interrupt_cleanup()
-{
-	if (software_interrupt_deferred_sigalrm_max) free((void *)software_interrupt_deferred_sigalrm_max);
+#endif
 }

 /***************************************
@ -149,24 +161,19 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void
 	case SIGALRM: {
 		sigalrm_propagate_workers(signal_info);

-		/* Current Sandbox is NULL when the base worker context is active. This already executes scheduling
-		 * logic, so just return. */
-		if (!current_sandbox) goto done;
-
-		/* We need to track what state was interrupted to conditionally restore user running after preemption */
-		current_sandbox->interrupted_state = current_sandbox->state;
-
-		if (current_sandbox->state == SANDBOX_RUNNING_USER) {
-			sandbox_set_as_running_kernel(current_sandbox, SANDBOX_RUNNING_USER);
-			atomic_store(&software_interrupt_deferred_sigalrm, 0);
-			current_sandbox = scheduler_preempt(interrupted_context);
-		} else {
+		/* Nonpreemptive, so defer */
+		if (!sandbox_is_preemptable(current_sandbox)) {
 			atomic_fetch_add(&software_interrupt_deferred_sigalrm, 1);
+			goto done;
 		}
+
+		scheduler_preemptive_sched(interrupted_context);
+
 		goto done;
 	}
 	case SIGUSR1: {
 		assert(current_sandbox);
+		assert(current_sandbox->state == SANDBOX_PREEMPTED);
 		assert(current_sandbox->ctxt.variant == ARCH_CONTEXT_VARIANT_SLOW);

 		atomic_fetch_add(&software_interrupt_SIGUSR_count, 1);
@ -175,9 +182,8 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void
 		debuglog("Restoring sandbox: %lu, Stack %llu\n", current_sandbox->id,
 		         current_sandbox->ctxt.mctx.gregs[REG_RSP]);
 #endif
-		/* Overwrites the interrupted context with the context of the worker thread's current sandbox */
 		/* It is the responsibility of the caller to invoke current_sandbox_set before triggering the SIGUSR1 */
-		arch_context_restore_slow(&interrupted_context->uc_mcontext, &current_sandbox->ctxt);
+		scheduler_preemptive_switch_to(interrupted_context, current_sandbox);
 		goto done;
 	}
 	default: {
@ -194,9 +200,6 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void
 	}
 done:
 	atomic_fetch_sub(&software_interrupt_signal_depth, 1);
-	if (current_sandbox && current_sandbox->interrupted_state == SANDBOX_RUNNING_USER) {
-		sandbox_set_as_running_user(current_sandbox, SANDBOX_RUNNING_KERNEL);
-	}
 	return;
 }

@ -275,7 +278,7 @@ software_interrupt_initialize(void)
 		}
 	}

-	software_interrupt_deferred_sigalrm_max = calloc(runtime_worker_threads_count, sizeof(_Atomic(sig_atomic_t)));
+	software_interrupt_deferred_sigalrm_max_alloc();
 }

 void
--- a/runtime/src/worker_thread.c
+++ b/runtime/src/worker_thread.c
@ -15,7 +15,6 @@
 #include "runtime.h"
 #include "scheduler.h"
 #include "worker_thread.h"
-#include "worker_thread_execute_epoll_loop.h"

 /***************************
 * Worker Thread State     *
@ -67,21 +66,8 @@ worker_thread_main(void *argument)
 		software_interrupt_unmask_signal(SIGUSR1);
 	}

-	/* Begin Worker Execution Loop */
-	struct sandbox *next_sandbox = NULL;
-	while (true) {
-		/* Assumption: current_sandbox should be unset at start of loop */
-		assert(current_sandbox_get() == NULL);
-
-		worker_thread_execute_epoll_loop();
-
-		/* Switch to a sandbox if one is ready to run */
-		next_sandbox = scheduler_get_next();
-		if (next_sandbox != NULL) { scheduler_switch_to(next_sandbox); }
-
-		/* Clear the completion queue */
-		local_completion_queue_free();
-	}
+	/* Idle Loop */
+	while (true) scheduler_cooperative_sched();

 	panic("Worker Thread unexpectedly completed run loop.");
 }