feat: enable direct fast switch between sandboxes

4 years ago · 45e44d51de
parent 74a715febb
commit 45e44d51de
8 changed files with 101 additions and 75 deletions
--- a/runtime/experiments/bimodal/edf_preemption.env
+++ b/runtime/experiments/bimodal/edf_preemption.env
@ -1,4 +1,3 @@
 SLEDGE_SCHEDULER=EDF
 SLEDGE_DISABLE_PREEMPTION=false
 SLEDGE_SIGALRM_HANDLER=TRIAGED
-SLEDGE_NWORKERS=4
--- a/runtime/include/arch/x86_64/context.h
+++ b/runtime/include/arch/x86_64/context.h
@ -74,22 +74,16 @@ arch_context_restore_fast(mcontext_t *active_context, struct arch_context *sandb
 * @param b - the registers and context of what we're switching to
 * @return always returns 0, indicating success
 *
- * NULL in either of these values indicates the "no sandbox to execute" state,
- * which defaults to resuming execution of main
 */
 static inline int
 arch_context_switch(struct arch_context *a, struct arch_context *b)
 {
-	/* if both a and b are NULL, there is no state change */
-	assert(a != NULL || b != NULL);
+	assert(a != NULL);
+	assert(b != NULL);

 	/* Assumption: The caller does not switch to itself */
 	assert(a != b);

-	/* Set any NULLs to worker_thread_base_context to resume execution of main */
-	if (a == NULL) a = &worker_thread_base_context;
-	if (b == NULL) b = &worker_thread_base_context;
-
 	/* A Transition {Running} -> Fast */
 	assert(a->variant == ARCH_CONTEXT_VARIANT_RUNNING);

--- a/runtime/include/sandbox_set_as_complete.h
+++ b/runtime/include/sandbox_set_as_complete.h
@ -54,8 +54,7 @@ sandbox_set_as_complete(struct sandbox *sandbox, sandbox_state_t last_state)
 	sandbox_perf_log_print_entry(sandbox);
 	sandbox_summarize_page_allocations(sandbox);

-	/* Do not touch sandbox state after adding to completion queue to avoid use-after-free bugs */
-	local_completion_queue_add(sandbox);
+	/* Does not add to completion queue until in cooperative scheduler */
 }

 static inline void
--- a/runtime/include/sandbox_set_as_error.h
+++ b/runtime/include/sandbox_set_as_error.h
@ -61,8 +61,7 @@ sandbox_set_as_error(struct sandbox *sandbox, sandbox_state_t last_state)
 	sandbox_perf_log_print_entry(sandbox);
 	sandbox_summarize_page_allocations(sandbox);

-	/* Do not touch sandbox after adding to completion queue to avoid use-after-free bugs */
-	local_completion_queue_add(sandbox);
+	/* Does not add to completion queue until in cooperative scheduler */
 }

 static inline void
--- a/runtime/include/scheduler.h
+++ b/runtime/include/scheduler.h
@ -213,28 +213,28 @@ scheduler_preemptive_sched(ucontext_t *interrupted_context)
 	/* Process epoll to make sure that all runnable jobs are considered for execution */
 	scheduler_execute_epoll_loop();

-	struct sandbox *current = current_sandbox_get();
-	assert(current != NULL);
-	assert(current->state == SANDBOX_INTERRUPTED);
+	struct sandbox *interrupted_sandbox = current_sandbox_get();
+	assert(interrupted_sandbox != NULL);
+	assert(interrupted_sandbox->state == SANDBOX_INTERRUPTED);

 	struct sandbox *next = scheduler_get_next();
 	/* Assumption: the current sandbox is on the runqueue, so the scheduler should always return something */
 	assert(next != NULL);

 	/* If current equals next, no switch is necessary, so resume execution */
-	if (current == next) {
-		sandbox_interrupt_return(current, SANDBOX_RUNNING_USER);
+	if (interrupted_sandbox == next) {
+		sandbox_interrupt_return(interrupted_sandbox, SANDBOX_RUNNING_USER);
 		return;
 	}

 #ifdef LOG_PREEMPTION
-	debuglog("Preempting sandbox %lu to run sandbox %lu\n", current->id, next->id);
+	debuglog("Preempting sandbox %lu to run sandbox %lu\n", interrupted_sandbox->id, next->id);
 #endif

 	/* Preempt executing sandbox */
-	scheduler_log_sandbox_switch(current, next);
-	sandbox_preempt(current);
-	arch_context_save_slow(&current->ctxt, &interrupted_context->uc_mcontext);
+	scheduler_log_sandbox_switch(interrupted_sandbox, next);
+	sandbox_preempt(interrupted_sandbox);
+	arch_context_save_slow(&interrupted_sandbox->ctxt, &interrupted_context->uc_mcontext);
 	scheduler_preemptive_switch_to(interrupted_context, next);
 }

@ -244,14 +244,12 @@ scheduler_preemptive_sched(ucontext_t *interrupted_context)
 * @param next_sandbox The Sandbox to switch to
 */
 static inline void
-scheduler_cooperative_switch_to(struct sandbox *next_sandbox)
+scheduler_cooperative_switch_to(struct arch_context *current_context, struct sandbox *next_sandbox)
 {
 	assert(current_sandbox_get() == NULL);

 	struct arch_context *next_context = &next_sandbox->ctxt;

-	scheduler_log_sandbox_switch(NULL, next_sandbox);
-
 	/* Switch to next sandbox */
 	switch (next_sandbox->state) {
 	case SANDBOX_RUNNABLE: {
@ -271,17 +269,64 @@ scheduler_cooperative_switch_to(struct sandbox *next_sandbox)
 		      sandbox_state_stringify(next_sandbox->state));
 	}
 	}
+	arch_context_switch(current_context, next_context);
+}

-	arch_context_switch(&worker_thread_base_context, next_context);
+static inline void
+scheduler_switch_to_base_context(struct arch_context *current_context)
+{
+	/* Assumption: Base Worker context should never be preempted */
+	assert(worker_thread_base_context.variant == ARCH_CONTEXT_VARIANT_FAST);
+	arch_context_switch(current_context, &worker_thread_base_context);
 }

-/* A sandbox cannot execute the scheduler directly. It must yield to the base context, and then the context calls this
- * within its idle loop
+
+/* The idle_loop is executed by the base_context. This should not be called directly */
+static inline void
+scheduler_idle_loop()
+{
+	while (true) {
+		/* Assumption: only called by the "base context" */
+		assert(current_sandbox_get() == NULL);
+
+		/* Deferred signals should have been cleared by this point */
+		assert(deferred_sigalrm == 0);
+
+		/* Try to wakeup sleeping sandboxes */
+		scheduler_execute_epoll_loop();
+
+		/* Switch to a sandbox if one is ready to run */
+		struct sandbox *next_sandbox = scheduler_get_next();
+		if (next_sandbox != NULL) {
+			scheduler_cooperative_switch_to(&worker_thread_base_context, next_sandbox);
+		}
+
+		/* Clear the completion queue */
+		local_completion_queue_free();
+	}
+}
+
+/**
+ * @brief Used to cooperative switch sandboxes when a sandbox sleeps or exits
+ * Because of use-after-free bugs that interfere with our loggers, when a sandbox exits and switches away never to
+ * return, the boolean add_to_completion_queue needs to be set to true. Otherwise, we will leak sandboxes.
+ * @param add_to_completion_queue - Indicates that the sandbox should be added to the completion queue before switching
+ * away
 */
 static inline void
-scheduler_cooperative_sched()
+scheduler_cooperative_sched(bool add_to_completion_queue)
 {
-	/* Assumption: only called by the "base context" */
+	struct sandbox *exiting_sandbox = current_sandbox_get();
+	assert(exiting_sandbox != NULL);
+
+	/* Clearing current sandbox indicates we are entering the cooperative scheduler */
+	current_sandbox_set(NULL);
+	barrier();
+	software_interrupt_deferred_sigalrm_clear();
+
+	struct arch_context *exiting_context = &exiting_sandbox->ctxt;
+
+	/* Assumption: Called by an exiting or sleeping sandbox */
 	assert(current_sandbox_get() == NULL);

 	/* Deferred signals should have been cleared by this point */
@ -290,12 +335,29 @@ scheduler_cooperative_sched()
 	/* Try to wakeup sleeping sandboxes */
 	scheduler_execute_epoll_loop();

+	/* We have not added ourself to the completion queue, so we can free */
+	local_completion_queue_free();
+
 	/* Switch to a sandbox if one is ready to run */
 	struct sandbox *next_sandbox = scheduler_get_next();
-	if (next_sandbox != NULL) scheduler_cooperative_switch_to(next_sandbox);

-	/* Clear the completion queue */
-	local_completion_queue_free();
+	/* If our sandbox slept and immediately woke up, we can just return */
+	if (next_sandbox == exiting_sandbox) {
+		sandbox_set_as_running_sys(next_sandbox, SANDBOX_RUNNABLE);
+		current_sandbox_set(next_sandbox);
+		return;
+	}
+
+	scheduler_log_sandbox_switch(exiting_sandbox, next_sandbox);
+
+	if (add_to_completion_queue) local_completion_queue_add(exiting_sandbox);
+	/* Do not touch sandbox struct after this point! */
+
+	if (next_sandbox != NULL) {
+		scheduler_cooperative_switch_to(exiting_context, next_sandbox);
+	} else {
+		scheduler_switch_to_base_context(exiting_context);
+	}
 }


@ -307,15 +369,3 @@ scheduler_worker_would_preempt(int worker_idx)
 	uint64_t global_deadline = global_request_scheduler_peek();
 	return global_deadline < local_deadline;
 }
-
-static inline void
-scheduler_switch_to_base_context(struct arch_context *current_context)
-{
-	/* Clear any deferred sigalrms we hit while cleaning up sandbox. We'll run the scheduler cooperatively
-	in the base context */
-	software_interrupt_deferred_sigalrm_clear();
-
-	/* Assumption: Base Worker context should never be preempted */
-	assert(worker_thread_base_context.variant == ARCH_CONTEXT_VARIANT_FAST);
-	arch_context_switch(current_context, &worker_thread_base_context);
-}
--- a/runtime/src/current_sandbox.c
+++ b/runtime/src/current_sandbox.c
@ -33,28 +33,22 @@ thread_local struct sandbox_context_cache local_sandbox_context_cache = {
 void
 current_sandbox_sleep()
 {
-	struct sandbox *sandbox = current_sandbox_get();
-	current_sandbox_set(NULL);
-
-	assert(sandbox != NULL);
-	struct arch_context *current_context = &sandbox->ctxt;
+	struct sandbox *sleeping_sandbox = current_sandbox_get();
+	assert(sleeping_sandbox != NULL);

-	scheduler_log_sandbox_switch(sandbox, NULL);
 	generic_thread_dump_lock_overhead();

-	assert(sandbox != NULL);
-
-	switch (sandbox->state) {
+	switch (sleeping_sandbox->state) {
 	case SANDBOX_RUNNING_SYS: {
-		sandbox_sleep(sandbox);
+		sandbox_sleep(sleeping_sandbox);
 		break;
 	}
 	default:
 		panic("Cooperatively switching from a sandbox in a non-terminal %s state\n",
-		      sandbox_state_stringify(sandbox->state));
+		      sandbox_state_stringify(sleeping_sandbox->state));
 	}

-	scheduler_switch_to_base_context(current_context);
+	scheduler_cooperative_sched(false);
 }

 /**
@ -65,31 +59,24 @@ current_sandbox_sleep()
 void
 current_sandbox_exit()
 {
-	struct sandbox *sandbox = current_sandbox_get();
-	current_sandbox_set(NULL);
-
-	assert(sandbox != NULL);
-	struct arch_context *current_context = &sandbox->ctxt;
+	struct sandbox *exiting_sandbox = current_sandbox_get();
+	assert(exiting_sandbox != NULL);

-	scheduler_log_sandbox_switch(sandbox, NULL);
 	generic_thread_dump_lock_overhead();

-	assert(sandbox != NULL);
-
-	switch (sandbox->state) {
+	switch (exiting_sandbox->state) {
 	case SANDBOX_RETURNED:
-		sandbox_exit_success(sandbox);
+		sandbox_exit_success(exiting_sandbox);
 		break;
 	case SANDBOX_RUNNING_SYS:
-		sandbox_exit_error(sandbox);
+		sandbox_exit_error(exiting_sandbox);
 		break;
 	default:
 		panic("Cooperatively switching from a sandbox in a non-terminal %s state\n",
-		      sandbox_state_stringify(sandbox->state));
+		      sandbox_state_stringify(exiting_sandbox->state));
 	}
-	/* Do not access sandbox after this, as it is on the completion queue! */

-	scheduler_switch_to_base_context(current_context);
+	scheduler_cooperative_sched(true);

 	/* The scheduler should never switch back to completed sandboxes */
 	assert(0);
--- a/runtime/src/software_interrupt.c
+++ b/runtime/src/software_interrupt.c
@ -133,7 +133,6 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void

 		software_interrupt_counts_sigusr_increment();
 #ifdef LOG_PREEMPTION
-		debuglog("Total SIGUSR1 Received: %d\n", sigusr_count);
 		debuglog("Restoring sandbox: %lu, Stack %llu\n", current_sandbox->id,
 		         current_sandbox->ctxt.mctx.gregs[REG_RSP]);
 #endif
--- a/runtime/src/worker_thread.c
+++ b/runtime/src/worker_thread.c
@ -66,8 +66,7 @@ worker_thread_main(void *argument)
 		software_interrupt_unmask_signal(SIGUSR1);
 	}

-	/* Idle Loop */
-	while (true) scheduler_cooperative_sched();
+	scheduler_idle_loop();

-	panic("Worker Thread unexpectedly completed run loop.");
+	panic("Worker Thread unexpectedly completed idle loop.");
 }