From cb09ed51eaff9478d92f01b2e8dcc8b90416c9b5 Mon Sep 17 00:00:00 2001 From: Emil Date: Tue, 19 Jul 2022 16:31:55 -0400 Subject: [PATCH] Updated deferable server (#352) * refactor: use var accross the Makefile instead of sledgert string * refactor bash libraries - remove hash symbol for scv_to_dat - add set_print_pretty for gdb mode - add logging for client - change printf format for perf log in table from float to int since usec * cleanup: generalize redundant files in the experiments: - remove individual gitignore, have single gitignore in parent (tests) - remove individual env files, have single copies in 'common' dir - remove individual install.sh, have install_tools.sh (in bash libs) * - add comment into install_tools.sh - rename mts to mtds * update the env files dir to common * move multi-tenancy env files to another directory (temp) * apply the deferable server to the new master * add the deferrable server env file to github CI test * clang-format * add deferable server attributes to all experiment specs * remove previously added generic interface that was only for Def Serv * Accomodated important changes Sean requested: - remove unnecessary assertions from pri_queue.h - set the runtime queue size to MAX_TENANT - add the scheduler updates back to the sandbox_interrupt state transition --- .vscode/settings.json | 3 +- applications/wasm_apps | 2 +- runtime/Makefile | 2 +- .../include/global_request_scheduler_mtds.h | 12 + runtime/include/local_runqueue_mtds.h | 9 + runtime/include/priority_queue.h | 3 - runtime/include/route_config.h | 2 +- runtime/include/runtime.h | 4 +- runtime/include/sandbox_functions.h | 10 +- runtime/include/sandbox_perf_log.h | 2 +- runtime/include/sandbox_set_as_error.h | 2 + runtime/include/sandbox_set_as_interrupted.h | 6 + runtime/include/sandbox_set_as_returned.h | 2 + runtime/include/sandbox_set_as_running_sys.h | 3 + runtime/include/sandbox_set_as_running_user.h | 3 + runtime/include/sandbox_total.h | 2 +- runtime/include/scheduler.h | 92 ++++- runtime/include/scheduler_options.h | 6 +- runtime/include/tenant.h | 46 +++ runtime/include/tenant_config.h | 34 +- runtime/include/tenant_config_parse.h | 20 +- runtime/include/tenant_functions.h | 82 +++++ runtime/src/current_sandbox.c | 2 +- runtime/src/global_request_scheduler_mtds.c | 331 ++++++++++++++++++ runtime/src/local_runqueue_minheap.c | 3 +- runtime/src/local_runqueue_mtds.c | 262 ++++++++++++++ runtime/src/main.c | 27 +- runtime/src/software_interrupt.c | 13 +- runtime/src/worker_thread.c | 9 + .../CMSIS_5_NN/imageclassification/spec.json | 2 + tests/TinyEKF/by_iteration/spec.json | 2 + tests/TinyEKF/one_iteration/spec.json | 2 + tests/common/mtds_preemption.env | 3 + tests/deadline_description/spec.json | 4 +- tests/empty/concurrency/spec.json | 2 + tests/fibonacci/bimodal/spec.json | 6 +- tests/fibonacci/concurrency/README.md | 26 ++ tests/fibonacci/concurrency/latency.gnuplot | 21 ++ tests/fibonacci/concurrency/run_hey.sh | 163 +++++++++ tests/fibonacci/concurrency/run_lt.sh | 163 +++++++++ tests/fibonacci/concurrency/spec.json | 19 + tests/fibonacci/concurrency/success.gnuplot | 12 + .../fibonacci/concurrency/throughput.gnuplot | 12 + tests/gocr/by_dpi/spec.json | 4 +- tests/gocr/by_font/spec.json | 4 +- tests/gocr/by_word/spec.json | 2 + tests/gocr/fivebyeight/spec.json | 4 +- tests/gocr/handwriting/spec.json | 4 +- tests/gocr/hyde/spec.json | 4 +- tests/html/spec.json | 2 + tests/scratch_storage/spec.json | 2 + .../sod/image_resize/by_resolution/spec.json | 4 +- tests/sod/image_resize/test/spec.json | 2 + tests/sod/lpd/by_plate_count/spec.json | 2 + tests/speechtotext/spec.json | 2 + tests/stack_overflow/spec.json | 2 + tests/traps/spec.json | 2 + tests/workload_mix/spec.json | 2 + tests/workload_mix_realworld/spec.json | 2 + 59 files changed, 1444 insertions(+), 31 deletions(-) create mode 100644 runtime/include/global_request_scheduler_mtds.h create mode 100644 runtime/include/local_runqueue_mtds.h create mode 100644 runtime/src/global_request_scheduler_mtds.c create mode 100644 runtime/src/local_runqueue_mtds.c create mode 100644 tests/common/mtds_preemption.env create mode 100644 tests/fibonacci/concurrency/README.md create mode 100644 tests/fibonacci/concurrency/latency.gnuplot create mode 100755 tests/fibonacci/concurrency/run_hey.sh create mode 100755 tests/fibonacci/concurrency/run_lt.sh create mode 100644 tests/fibonacci/concurrency/spec.json create mode 100644 tests/fibonacci/concurrency/success.gnuplot create mode 100644 tests/fibonacci/concurrency/throughput.gnuplot diff --git a/.vscode/settings.json b/.vscode/settings.json index c850152..5ad4c9a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -123,7 +123,8 @@ "route_config_parse.h": "c", "route.h": "c", "pool.h": "c", - "local_cleanup_queue.h": "c" + "local_cleanup_queue.h": "c", + "sandbox_state_transition.h": "c" }, "files.exclude": { "**/.git": true, diff --git a/applications/wasm_apps b/applications/wasm_apps index 20a7c88..0b9f67d 160000 --- a/applications/wasm_apps +++ b/applications/wasm_apps @@ -1 +1 @@ -Subproject commit 20a7c88816c8f8882e03d42c76ff8c1e72bfeaec +Subproject commit 0b9f67d75fd9dab652e1995e7adf91806080523b diff --git a/runtime/Makefile b/runtime/Makefile index f770742..2bbcfd1 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -131,7 +131,7 @@ clean: thirdparty.clean runtime.clean bin/${BINARY_NAME}: ${HEADER_DEPS} ${CFILES} @echo "Compiling runtime" @mkdir -p bin/ - ${CC} ${INCLUDES} ${CFLAGS} ${LDFLAGS} ${JSMNCFLAGS} -L/usr/lib/ ${CFILES} -o bin/${BINARY_NAME} + @${CC} ${INCLUDES} ${CFLAGS} ${LDFLAGS} ${JSMNCFLAGS} -L/usr/lib/ ${CFILES} -o bin/${BINARY_NAME} .PHONY: runtime runtime: bin/${BINARY_NAME} diff --git a/runtime/include/global_request_scheduler_mtds.h b/runtime/include/global_request_scheduler_mtds.h new file mode 100644 index 0000000..4a73782 --- /dev/null +++ b/runtime/include/global_request_scheduler_mtds.h @@ -0,0 +1,12 @@ +#pragma once + +#include "global_request_scheduler.h" + +void global_request_scheduler_mtds_initialize(); +int global_request_scheduler_mtds_remove_with_mt_class(struct sandbox **, uint64_t, enum MULTI_TENANCY_CLASS); +uint64_t global_request_scheduler_mtds_guaranteed_peek(); +uint64_t global_request_scheduler_mtds_default_peek(); +void global_timeout_queue_add(struct tenant *); +void global_request_scheduler_mtds_promote_lock(struct tenant_global_request_queue *); +void global_request_scheduler_mtds_demote_nolock(struct tenant_global_request_queue *); +void global_timeout_queue_process_promotions(); diff --git a/runtime/include/local_runqueue_mtds.h b/runtime/include/local_runqueue_mtds.h new file mode 100644 index 0000000..09d6253 --- /dev/null +++ b/runtime/include/local_runqueue_mtds.h @@ -0,0 +1,9 @@ +#pragma once + +#include "tenant.h" + +void local_runqueue_mtds_initialize(); +void local_runqueue_mtds_promote(struct perworker_tenant_sandbox_queue *); +void local_runqueue_mtds_demote(struct perworker_tenant_sandbox_queue *); +void local_timeout_queue_add(struct tenant *); +void local_timeout_queue_process_promotions(); diff --git a/runtime/include/priority_queue.h b/runtime/include/priority_queue.h index ae92366..dff49d9 100644 --- a/runtime/include/priority_queue.h +++ b/runtime/include/priority_queue.h @@ -332,7 +332,6 @@ static inline int priority_queue_length_nolock(struct priority_queue *priority_queue) { assert(priority_queue != NULL); - assert(!listener_thread_is_running()); assert(!priority_queue->use_lock || LOCK_IS_LOCKED(&priority_queue->lock)); return priority_queue->size; @@ -404,7 +403,6 @@ priority_queue_delete_nolock(struct priority_queue *priority_queue, void *value) { assert(priority_queue != NULL); assert(value != NULL); - assert(!listener_thread_is_running()); assert(!priority_queue->use_lock || LOCK_IS_LOCKED(&priority_queue->lock)); for (int i = 1; i <= priority_queue->size; i++) { @@ -470,7 +468,6 @@ priority_queue_top_nolock(struct priority_queue *priority_queue, void **dequeued assert(priority_queue != NULL); assert(dequeued_element != NULL); assert(priority_queue->get_priority_fn != NULL); - assert(!listener_thread_is_running()); assert(!priority_queue->use_lock || LOCK_IS_LOCKED(&priority_queue->lock)); int return_code; diff --git a/runtime/include/route_config.h b/runtime/include/route_config.h index aeb214e..af44241 100644 --- a/runtime/include/route_config.h +++ b/runtime/include/route_config.h @@ -75,7 +75,7 @@ route_config_validate(struct route_config *config, bool *did_set) config->http_resp_content_type = "text/plain"; } - if (scheduler == SCHEDULER_EDF) { + if (scheduler != SCHEDULER_FIFO) { if (did_set[route_config_member_relative_deadline_us] == false) { fprintf(stderr, "relative_deadline_us is required\n"); return -1; diff --git a/runtime/include/runtime.h b/runtime/include/runtime.h index 71f0df2..5da9a05 100644 --- a/runtime/include/runtime.h +++ b/runtime/include/runtime.h @@ -23,9 +23,10 @@ #define RUNTIME_LOG_FILE "sledge.log" #define RUNTIME_MAX_EPOLL_EVENTS 128 -#define RUNTIME_MAX_TENANT_COUNT 32 /* Static buffer size for per-worker globals */ +#define RUNTIME_MAX_TENANT_COUNT 32 #define RUNTIME_RELATIVE_DEADLINE_US_MAX 3600000000 /* One Hour. Fits in uint32_t */ #define RUNTIME_RUNQUEUE_SIZE 256 /* Minimum guaranteed size. Might grow! */ +#define RUNTIME_TENANT_QUEUE_SIZE 4096 enum RUNTIME_SIGALRM_HANDLER { @@ -41,6 +42,7 @@ extern pthread_t *runtime_worker_threads; extern uint32_t runtime_worker_threads_count; extern int *runtime_worker_threads_argument; extern uint64_t *runtime_worker_threads_deadline; +extern uint64_t runtime_boot_timestamp; extern void runtime_initialize(void); extern void runtime_set_pthread_prio(pthread_t thread, unsigned int nice); diff --git a/runtime/include/sandbox_functions.h b/runtime/include/sandbox_functions.h index af6b28b..754d40c 100644 --- a/runtime/include/sandbox_functions.h +++ b/runtime/include/sandbox_functions.h @@ -49,4 +49,12 @@ sandbox_get_priority(void *element) { struct sandbox *sandbox = (struct sandbox *)element; return sandbox->absolute_deadline; -}; +} + +static inline void +sandbox_process_scheduler_updates(struct sandbox *sandbox) +{ + if (tenant_is_paid(sandbox->tenant)) { + atomic_fetch_sub(&sandbox->tenant->remaining_budget, sandbox->last_state_duration); + } +} diff --git a/runtime/include/sandbox_perf_log.h b/runtime/include/sandbox_perf_log.h index 6becea3..23b55a9 100644 --- a/runtime/include/sandbox_perf_log.h +++ b/runtime/include/sandbox_perf_log.h @@ -13,7 +13,7 @@ static inline void sandbox_perf_log_print_header() { if (sandbox_perf_log == NULL) { perror("sandbox perf log"); } - fprintf(sandbox_perf_log, "id,module,port,state,deadline,actual,queued,uninitialized,allocated,initialized," + fprintf(sandbox_perf_log, "id,tenant,route,state,deadline,actual,queued,uninitialized,allocated,initialized," "runnable,interrupted,preempted," "running_sys,running_user,asleep,returned,complete,error,proc_MHz,memory\n"); } diff --git a/runtime/include/sandbox_set_as_error.h b/runtime/include/sandbox_set_as_error.h index 4525264..2f259d1 100644 --- a/runtime/include/sandbox_set_as_error.h +++ b/runtime/include/sandbox_set_as_error.h @@ -79,4 +79,6 @@ sandbox_exit_error(struct sandbox *sandbox) { assert(sandbox->state == SANDBOX_RUNNING_SYS); sandbox_set_as_error(sandbox, SANDBOX_RUNNING_SYS); + + sandbox_process_scheduler_updates(sandbox); } diff --git a/runtime/include/sandbox_set_as_interrupted.h b/runtime/include/sandbox_set_as_interrupted.h index b9ca48a..e2ee369 100644 --- a/runtime/include/sandbox_set_as_interrupted.h +++ b/runtime/include/sandbox_set_as_interrupted.h @@ -40,6 +40,8 @@ static inline void sandbox_interrupt(struct sandbox *sandbox) { sandbox_set_as_interrupted(sandbox, sandbox->state); + + sandbox_process_scheduler_updates(sandbox); } @@ -63,6 +65,10 @@ sandbox_interrupt_return(struct sandbox *sandbox, sandbox_state_t interrupted_st sandbox_state_totals_increment(interrupted_state); sandbox_state_totals_decrement(SANDBOX_INTERRUPTED); + if (sandbox->absolute_deadline < now) { + // printf("Interrupted Sandbox missed deadline already!\n"); + } + barrier(); /* WARNING: Code after this assignment may be preemptable */ sandbox->state = interrupted_state; diff --git a/runtime/include/sandbox_set_as_returned.h b/runtime/include/sandbox_set_as_returned.h index 88cc472..6f7c3ee 100644 --- a/runtime/include/sandbox_set_as_returned.h +++ b/runtime/include/sandbox_set_as_returned.h @@ -59,4 +59,6 @@ sandbox_set_as_returned(struct sandbox *sandbox, sandbox_state_t last_state) /* State Change Hooks */ sandbox_state_transition_from_hook(sandbox, last_state); sandbox_state_transition_to_hook(sandbox, SANDBOX_RETURNED); + + sandbox_process_scheduler_updates(sandbox); } diff --git a/runtime/include/sandbox_set_as_running_sys.h b/runtime/include/sandbox_set_as_running_sys.h index b9ee4aa..5c00013 100644 --- a/runtime/include/sandbox_set_as_running_sys.h +++ b/runtime/include/sandbox_set_as_running_sys.h @@ -42,6 +42,7 @@ sandbox_set_as_running_sys(struct sandbox *sandbox, sandbox_state_t last_state) assert(now > sandbox->timestamp_of.last_state_change); sandbox->last_state_duration = now - sandbox->timestamp_of.last_state_change; sandbox->duration_of_state[last_state] += sandbox->last_state_duration; + sandbox->timestamp_of.last_state_change = now; sandbox_state_history_append(&sandbox->state_history, SANDBOX_RUNNING_SYS); sandbox_state_totals_increment(SANDBOX_RUNNING_SYS); sandbox_state_totals_decrement(last_state); @@ -56,4 +57,6 @@ sandbox_syscall(struct sandbox *sandbox) { assert(sandbox->state == SANDBOX_RUNNING_USER); sandbox_set_as_running_sys(sandbox, SANDBOX_RUNNING_USER); + + sandbox_process_scheduler_updates(sandbox); } diff --git a/runtime/include/sandbox_set_as_running_user.h b/runtime/include/sandbox_set_as_running_user.h index 8559ec7..bf6ade7 100644 --- a/runtime/include/sandbox_set_as_running_user.h +++ b/runtime/include/sandbox_set_as_running_user.h @@ -47,6 +47,9 @@ sandbox_set_as_running_user(struct sandbox *sandbox, sandbox_state_t last_state) sandbox_state_transition_from_hook(sandbox, last_state); sandbox_state_transition_to_hook(sandbox, SANDBOX_RUNNING_USER); + if (last_state == SANDBOX_RUNNING_SYS) + sandbox_process_scheduler_updates(sandbox); // TODO: is this code preemptable? Ok to be? + barrier(); sandbox->state = SANDBOX_RUNNING_USER; /* WARNING: All code after this assignment is preemptable */ diff --git a/runtime/include/sandbox_total.h b/runtime/include/sandbox_total.h index 6adf746..d381852 100644 --- a/runtime/include/sandbox_total.h +++ b/runtime/include/sandbox_total.h @@ -9,7 +9,7 @@ extern _Atomic uint32_t sandbox_total; static inline void sandbox_total_initialize() { - atomic_init(&sandbox_total, 0); + atomic_init(&sandbox_total, 1); /* keep zero for error-handling purposes */ } static inline uint32_t diff --git a/runtime/include/scheduler.h b/runtime/include/scheduler.h index 7c3caf8..206a391 100644 --- a/runtime/include/scheduler.h +++ b/runtime/include/scheduler.h @@ -8,10 +8,12 @@ #include "global_request_scheduler.h" #include "global_request_scheduler_deque.h" #include "global_request_scheduler_minheap.h" +#include "global_request_scheduler_mtds.h" #include "local_runqueue.h" #include "local_runqueue_minheap.h" #include "local_runqueue_list.h" #include "local_cleanup_queue.h" +#include "local_runqueue_mtds.h" #include "panic.h" #include "sandbox_functions.h" #include "sandbox_types.h" @@ -62,11 +64,53 @@ * initialize a sandbox. */ +static inline struct sandbox * +scheduler_mtdbf_get_next() +{ + return NULL; +} + +static inline struct sandbox * +scheduler_mtds_get_next() +{ + /* Get the deadline of the sandbox at the head of the local queue */ + struct sandbox *local = local_runqueue_get_next(); + uint64_t local_deadline = local == NULL ? UINT64_MAX : local->absolute_deadline; + enum MULTI_TENANCY_CLASS local_mt_class = MT_DEFAULT; + struct sandbox *global = NULL; + + if (local) local_mt_class = local->tenant->pwt_sandboxes[worker_thread_idx].mt_class; + + uint64_t global_guaranteed_deadline = global_request_scheduler_mtds_guaranteed_peek(); + uint64_t global_default_deadline = global_request_scheduler_mtds_default_peek(); + + /* Try to pull and allocate from the global queue if earlier + * This will be placed at the head of the local runqueue */ + switch (local_mt_class) { + case MT_GUARANTEED: + if (global_guaranteed_deadline >= local_deadline) goto done; + break; + case MT_DEFAULT: + if (global_guaranteed_deadline == UINT64_MAX && global_default_deadline >= local_deadline) goto done; + break; + } + + if (global_request_scheduler_mtds_remove_with_mt_class(&global, local_deadline, local_mt_class) == 0) { + assert(global != NULL); + sandbox_prepare_execution_environment(global); + assert(global->state == SANDBOX_INITIALIZED); + sandbox_set_as_runnable(global, SANDBOX_INITIALIZED); + } + +/* Return what is at the head of the local runqueue or NULL if empty */ +done: + return local_runqueue_get_next(); +} static inline struct sandbox * scheduler_edf_get_next() { - /* Get the deadline of the sandbox at the head of the local request queue */ + /* Get the deadline of the sandbox at the head of the local queue */ struct sandbox *local = local_runqueue_get_next(); uint64_t local_deadline = local == NULL ? UINT64_MAX : local->absolute_deadline; struct sandbox *global = NULL; @@ -116,6 +160,10 @@ static inline struct sandbox * scheduler_get_next() { switch (scheduler) { + case SCHEDULER_MTDBF: + return scheduler_mtdbf_get_next(); + case SCHEDULER_MTDS: + return scheduler_mtds_get_next(); case SCHEDULER_EDF: return scheduler_edf_get_next(); case SCHEDULER_FIFO: @@ -129,6 +177,12 @@ static inline void scheduler_initialize() { switch (scheduler) { + case SCHEDULER_MTDBF: + // global_request_scheduler_mtdbf_initialize(); + break; + case SCHEDULER_MTDS: + global_request_scheduler_mtds_initialize(); + break; case SCHEDULER_EDF: global_request_scheduler_minheap_initialize(); break; @@ -144,6 +198,12 @@ static inline void scheduler_runqueue_initialize() { switch (scheduler) { + case SCHEDULER_MTDBF: + // local_runqueue_mtdbf_initialize(); + break; + case SCHEDULER_MTDS: + local_runqueue_mtds_initialize(); + break; case SCHEDULER_EDF: local_runqueue_minheap_initialize(); break; @@ -163,6 +223,10 @@ scheduler_print(enum SCHEDULER variant) return "FIFO"; case SCHEDULER_EDF: return "EDF"; + case SCHEDULER_MTDS: + return "MTDS"; + case SCHEDULER_MTDBF: + return "MTDBF"; } } @@ -213,6 +277,30 @@ scheduler_preemptive_switch_to(ucontext_t *interrupted_context, struct sandbox * } } +/** + * Call either at preemptions or blockings to update the scheduler-specific + * properties for the given tenant. + */ +static inline void +scheduler_process_policy_specific_updates_on_interrupts(struct sandbox *interrupted_sandbox) +{ + switch (scheduler) { + case SCHEDULER_FIFO: + return; + case SCHEDULER_EDF: + return; + case SCHEDULER_MTDS: + local_timeout_queue_process_promotions(); + return; + case SCHEDULER_MTDBF: + // scheduler_check_messages_from_listener(); + if (interrupted_sandbox->state != SANDBOX_ERROR) { + sandbox_process_scheduler_updates(interrupted_sandbox); + } + return; + } +} + /** * Called by the SIGALRM handler after a quantum * Assumes the caller validates that there is something to preempt @@ -230,6 +318,8 @@ scheduler_preemptive_sched(ucontext_t *interrupted_context) struct sandbox *interrupted_sandbox = current_sandbox_get(); assert(interrupted_sandbox != NULL); assert(interrupted_sandbox->state == SANDBOX_INTERRUPTED); + // printf ("Worker #%d interrupted sandbox #%lu\n", worker_thread_idx, interrupted_sandbox->id); + scheduler_process_policy_specific_updates_on_interrupts(interrupted_sandbox); struct sandbox *next = scheduler_get_next(); /* Assumption: the current sandbox is on the runqueue, so the scheduler should always return something */ diff --git a/runtime/include/scheduler_options.h b/runtime/include/scheduler_options.h index 882476d..930dbd6 100644 --- a/runtime/include/scheduler_options.h +++ b/runtime/include/scheduler_options.h @@ -2,8 +2,10 @@ enum SCHEDULER { - SCHEDULER_FIFO = 0, - SCHEDULER_EDF = 1 + SCHEDULER_FIFO = 0, + SCHEDULER_EDF = 1, + SCHEDULER_MTDS = 2, + SCHEDULER_MTDBF = 3 }; extern enum SCHEDULER scheduler; diff --git a/runtime/include/tenant.h b/runtime/include/tenant.h index 06fa730..596c477 100644 --- a/runtime/include/tenant.h +++ b/runtime/include/tenant.h @@ -5,10 +5,56 @@ #include "module_database.h" #include "tcp_server.h" +enum MULTI_TENANCY_CLASS +{ + MT_DEFAULT, + MT_GUARANTEED +}; + +struct tenant_timeout { + uint64_t timeout; + struct tenant *tenant; + struct perworker_tenant_sandbox_queue *pwt; +}; + +struct perworker_tenant_sandbox_queue { + struct priority_queue *sandboxes; + struct tenant *tenant; // to be able to find the RB/MB/RP/RT. + struct tenant_timeout tenant_timeout; + enum MULTI_TENANCY_CLASS mt_class; // check whether the corresponding PWM has been demoted +} __attribute__((aligned(CACHE_PAD))); + +struct tenant_global_request_queue { + struct priority_queue *sandbox_requests; + struct tenant *tenant; + struct tenant_timeout tenant_timeout; + _Atomic volatile enum MULTI_TENANCY_CLASS mt_class; +}; + struct tenant { char *name; struct tcp_server tcp_server; http_router_t router; struct module_database module_db; struct map scratch_storage; + + /* Deferrable Server Attributes */ + uint64_t replenishment_period; /* cycles, not changing after init */ + uint64_t max_budget; /* cycles, not changing after init */ + _Atomic volatile int64_t remaining_budget; /* cycles left till next replenishment, can be negative */ + + struct perworker_tenant_sandbox_queue *pwt_sandboxes; + struct tenant_global_request_queue *tgrq_requests; }; + + +/** + * Check whether a tenant is a paid tenant + * @param tenant tenant + * @returns true if the tenant is paid, false otherwise + */ +static inline uint64_t +tenant_is_paid(struct tenant *tenant) +{ + return tenant->replenishment_period > 0; +} diff --git a/runtime/include/tenant_config.h b/runtime/include/tenant_config.h index fa3eda3..d907718 100644 --- a/runtime/include/tenant_config.h +++ b/runtime/include/tenant_config.h @@ -12,6 +12,8 @@ enum tenant_config_member { tenant_config_member_name, tenant_config_member_port, + tenant_config_member_replenishment_period_us, + tenant_config_member_max_budget_us, tenant_config_member_routes, tenant_config_member_len }; @@ -19,6 +21,8 @@ enum tenant_config_member struct tenant_config { char *name; uint16_t port; + uint32_t replenishment_period_us; + uint32_t max_budget_us; struct route_config *routes; size_t routes_len; }; @@ -27,7 +31,9 @@ static inline void tenant_config_deinit(struct tenant_config *config) { if (config->name != NULL) free(config->name); - config->name = NULL; + config->name = NULL; + config->replenishment_period_us = 0; + config->max_budget_us = 0; for (int i = 0; i < config->routes_len; i++) { route_config_deinit(&config->routes[i]); } free(config->routes); config->routes = NULL; @@ -39,6 +45,8 @@ tenant_config_print(struct tenant_config *config) { printf("[Tenant] Name: %s\n", config->name); printf("[Tenant] Path: %d\n", config->port); + printf("[Tenant] Replenishment Period (us): %u\n", config->replenishment_period_us); + printf("[Tenant] Max Budget (us): %u\n", config->max_budget_us); printf("[Tenant] Routes Size: %zu\n", config->routes_len); for (int i = 0; i < config->routes_len; i++) { route_config_print(&config->routes[i]); } } @@ -61,6 +69,30 @@ tenant_config_validate(struct tenant_config *config, bool *did_set) return -1; } + if (scheduler == SCHEDULER_MTDS) { + if (did_set[tenant_config_member_replenishment_period_us] == false) { + fprintf(stderr, "replenishment-period-us field is required\n"); + return -1; + } + + if (config->replenishment_period_us > (uint32_t)RUNTIME_RELATIVE_DEADLINE_US_MAX) { + fprintf(stderr, "Relative-deadline-us must be between 0 and %u, was %u\n", + (uint32_t)RUNTIME_RELATIVE_DEADLINE_US_MAX, config->replenishment_period_us); + return -1; + } + + if (did_set[tenant_config_member_max_budget_us] == false) { + fprintf(stderr, "max-budget-us field is required\n"); + return -1; + } + + if (config->max_budget_us > (uint32_t)RUNTIME_RELATIVE_DEADLINE_US_MAX) { + fprintf(stderr, "Max-budget-us must be between 0 and %u, was %u\n", + (uint32_t)RUNTIME_RELATIVE_DEADLINE_US_MAX, config->max_budget_us); + return -1; + } + } + if (config->routes_len == 0) { fprintf(stderr, "one or more routes are required\n"); return -1; diff --git a/runtime/include/tenant_config_parse.h b/runtime/include/tenant_config_parse.h index 9c5cec3..14b7aef 100644 --- a/runtime/include/tenant_config_parse.h +++ b/runtime/include/tenant_config_parse.h @@ -9,7 +9,8 @@ #include "route_config_parse.h" #include "tenant_config.h" -static const char *tenant_config_json_keys[tenant_config_member_len] = { "name", "port", "routes" }; +static const char *tenant_config_json_keys[tenant_config_member_len] = { "name", "port", "replenishment-period-us", + "max-budget-us", "routes" }; static inline int tenant_config_set_key_once(bool *did_set, enum tenant_config_member member) @@ -61,6 +62,23 @@ tenant_config_parse(struct tenant_config *config, const char *json_buf, jsmntok_ int rc = parse_uint16_t(tokens[i], json_buf, tenant_config_json_keys[tenant_config_member_port], &config->port); if (rc < 0) return -1; + } else if (strcmp(key, tenant_config_json_keys[tenant_config_member_replenishment_period_us]) == 0) { + if (!has_valid_type(tokens[i], key, JSMN_PRIMITIVE, json_buf)) return -1; + if (tenant_config_set_key_once(did_set, tenant_config_member_replenishment_period_us) == -1) + return -1; + + int rc = parse_uint32_t(tokens[i], json_buf, + tenant_config_json_keys[tenant_config_member_replenishment_period_us], + &config->replenishment_period_us); + if (rc < 0) return -1; + } else if (strcmp(key, tenant_config_json_keys[tenant_config_member_max_budget_us]) == 0) { + if (!has_valid_type(tokens[i], key, JSMN_PRIMITIVE, json_buf)) return -1; + if (tenant_config_set_key_once(did_set, tenant_config_member_max_budget_us) == -1) return -1; + + int rc = parse_uint32_t(tokens[i], json_buf, + tenant_config_json_keys[tenant_config_member_max_budget_us], + &config->max_budget_us); + if (rc < 0) return -1; } else if (strcmp(key, tenant_config_json_keys[tenant_config_member_routes]) == 0) { if (!has_valid_type(tokens[i], key, JSMN_ARRAY, json_buf)) return -1; if (tenant_config_set_key_once(did_set, tenant_config_member_routes) == -1) return -1; diff --git a/runtime/include/tenant_functions.h b/runtime/include/tenant_functions.h index 89bb6f8..cb76651 100644 --- a/runtime/include/tenant_functions.h +++ b/runtime/include/tenant_functions.h @@ -11,6 +11,8 @@ #include "scheduler_options.h" #include "tenant.h" #include "tenant_config.h" +#include "priority_queue.h" +#include "sandbox_functions.h" int tenant_database_add(struct tenant *tenant); struct tenant *tenant_database_find_by_name(char *name); @@ -18,6 +20,57 @@ struct tenant *tenant_database_find_by_socket_descriptor(int socket_descriptor); struct tenant *tenant_database_find_by_port(uint16_t port); struct tenant *tenant_database_find_by_ptr(void *ptr); +static inline int +tenant_policy_specific_init(struct tenant *tenant, struct tenant_config *config) +{ + switch (scheduler) { + case SCHEDULER_FIFO: + break; + case SCHEDULER_EDF: + break; + case SCHEDULER_MTDS: + /* Deferable Server Initialization */ + tenant->replenishment_period = (uint64_t)config->replenishment_period_us * runtime_processor_speed_MHz; + tenant->max_budget = (uint64_t)config->max_budget_us * runtime_processor_speed_MHz; + tenant->remaining_budget = tenant->max_budget; + + tenant->pwt_sandboxes = (struct perworker_tenant_sandbox_queue *)malloc( + runtime_worker_threads_count * sizeof(struct perworker_tenant_sandbox_queue)); + if (!tenant->pwt_sandboxes) { + fprintf(stderr, "Failed to allocate tenant_sandboxes array: %s\n", strerror(errno)); + return -1; + }; + + memset(tenant->pwt_sandboxes, 0, + runtime_worker_threads_count * sizeof(struct perworker_tenant_sandbox_queue)); + + for (int i = 0; i < runtime_worker_threads_count; i++) { + tenant->pwt_sandboxes[i].sandboxes = priority_queue_initialize(RUNTIME_TENANT_QUEUE_SIZE, false, + sandbox_get_priority); + tenant->pwt_sandboxes[i].tenant = tenant; + tenant->pwt_sandboxes[i].mt_class = (tenant->replenishment_period == 0) ? MT_DEFAULT + : MT_GUARANTEED; + tenant->pwt_sandboxes[i].tenant_timeout.tenant = tenant; + tenant->pwt_sandboxes[i].tenant_timeout.pwt = &tenant->pwt_sandboxes[i]; + } + + /* Initialize the tenant's global request queue */ + tenant->tgrq_requests = malloc(sizeof(struct tenant_global_request_queue)); + tenant->tgrq_requests->sandbox_requests = priority_queue_initialize(RUNTIME_TENANT_QUEUE_SIZE, false, + sandbox_get_priority); + tenant->tgrq_requests->tenant = tenant; + tenant->tgrq_requests->mt_class = (tenant->replenishment_period == 0) ? MT_DEFAULT : MT_GUARANTEED; + tenant->tgrq_requests->tenant_timeout.tenant = tenant; + tenant->tgrq_requests->tenant_timeout.pwt = NULL; + break; + + case SCHEDULER_MTDBF: + break; + } + + return 0; +} + static inline struct tenant * tenant_alloc(struct tenant_config *config) { @@ -39,6 +92,9 @@ tenant_alloc(struct tenant_config *config) module_database_init(&tenant->module_db); map_init(&tenant->scratch_storage); + /* Deferrable Server init */ + tenant_policy_specific_init(tenant, config); + for (int i = 0; i < config->routes_len; i++) { struct module *module = module_database_find_by_path(&tenant->module_db, config->routes[i].path); if (module == NULL) { @@ -69,6 +125,32 @@ tenant_alloc(struct tenant_config *config) return tenant; } +/** + * Get Timeout priority for Priority Queue ordering + * @param element tenant_timeout + * @returns the priority of the tenant _timeout element + */ +static inline uint64_t +tenant_timeout_get_priority(void *element) +{ + return ((struct tenant_timeout *)element)->timeout; +} + +/** + * Compute the next timeout given a tenant's replenishment period + * @param m_replenishment_period + * @return given tenant's next timeout + */ +static inline uint64_t +get_next_timeout_of_tenant(uint64_t replenishment_period) +{ + assert(replenishment_period != 0); + uint64_t now = __getcycles(); + return runtime_boot_timestamp + + ((now - runtime_boot_timestamp) / replenishment_period + 1) * replenishment_period; +} + + /** * Start the tenant as a server listening at tenant->port * @param tenant diff --git a/runtime/src/current_sandbox.c b/runtime/src/current_sandbox.c index 1d3f43e..792558c 100644 --- a/runtime/src/current_sandbox.c +++ b/runtime/src/current_sandbox.c @@ -46,7 +46,7 @@ current_sandbox_sleep() /** * @brief Switches from an executing sandbox to the worker thread base context * - * This places the current sandbox on the completion queue if in RETURNED or RUNNING_SYS state + * This places the current sandbox on the cleanup queue if in RETURNED or RUNNING_SYS state */ void current_sandbox_exit() diff --git a/runtime/src/global_request_scheduler_mtds.c b/runtime/src/global_request_scheduler_mtds.c new file mode 100644 index 0000000..09e82cb --- /dev/null +++ b/runtime/src/global_request_scheduler_mtds.c @@ -0,0 +1,331 @@ +#include +#include + +#include "global_request_scheduler.h" +#include "listener_thread.h" +#include "panic.h" +#include "priority_queue.h" +#include "runtime.h" +#include "tenant_functions.h" + +static struct priority_queue *global_request_scheduler_mtds_guaranteed; +static struct priority_queue *global_request_scheduler_mtds_default; +static struct priority_queue *global_tenant_timeout_queue; +static lock_t global_lock; + +static inline uint64_t +tenant_request_queue_get_priority(void *element) +{ + struct tenant_global_request_queue *tgrq = (struct tenant_global_request_queue *)element; + struct sandbox *sandbox = NULL; + priority_queue_top_nolock(tgrq->sandbox_requests, (void **)&sandbox); + return (sandbox) ? sandbox->absolute_deadline : UINT64_MAX; +} + +/** + * Demotes the given tenant's request queue, which means deletes the TGRQ from the Guaranteed queue + * and adds to the Default queue. + */ +void +global_request_scheduler_mtds_demote_nolock(struct tenant_global_request_queue *tgrq) +{ + assert(tgrq != NULL); + + if (tgrq->mt_class == MT_DEFAULT) return; + + /* Delete the corresponding TGRQ from the Guaranteed queue */ + int rc = priority_queue_delete_nolock(global_request_scheduler_mtds_guaranteed, tgrq); + if (rc == -1) { + panic("Tried to delete a non-present TGRQ from the Global Guaranteed queue. Already deleted?, ITS " + "SIZE: %d", + priority_queue_length_nolock(tgrq->sandbox_requests)); + } + + /* Add the corresponding TGRQ to the Default queue */ + rc = priority_queue_enqueue_nolock(global_request_scheduler_mtds_default, tgrq); + if (rc == -ENOSPC) panic("Global Default queue is full!\n"); +} + +/** + * Pushes a sandbox request to the global runqueue + * @param sandbox + * @returns pointer to request if added. NULL otherwise + */ +static struct sandbox * +global_request_scheduler_mtds_add(struct sandbox *sandbox) +{ + assert(sandbox); + assert(global_request_scheduler_mtds_guaranteed && global_request_scheduler_mtds_default); + if (unlikely(!listener_thread_is_running())) panic("%s is only callable by the listener thread\n", __func__); + + struct tenant_global_request_queue *tgrq = sandbox->tenant->tgrq_requests; + + LOCK_LOCK(&global_lock); + + struct priority_queue *destination_queue = global_request_scheduler_mtds_default; + if (sandbox->tenant->tgrq_requests->mt_class == MT_GUARANTEED) { + destination_queue = global_request_scheduler_mtds_guaranteed; + } + + uint64_t last_mrq_deadline = priority_queue_peek(tgrq->sandbox_requests); + + int rc = priority_queue_enqueue_nolock(tgrq->sandbox_requests, sandbox); + if (rc == -ENOSPC) panic("Tenant's Request Queue is full\n"); + // debuglog("Added a sandbox to the TGRQ"); + + /* Maintain the minheap structure by Removing and Adding the TGRQ from and to the global runqueue. + * Do this only when the TGRQ's priority is updated. + */ + if (priority_queue_peek(tgrq->sandbox_requests) < last_mrq_deadline) { + priority_queue_delete_nolock(destination_queue, tgrq); + + rc = priority_queue_enqueue_nolock(destination_queue, tgrq); + if (rc == -ENOSPC) panic("Global Runqueue is full!\n"); + // debuglog("Added the TGRQ back to the Global runqueue - %s to Heapify", QUEUE_NAME); + } + + LOCK_UNLOCK(&global_lock); + + return sandbox; +} + +/** + * @param pointer to the pointer that we want to set to the address of the removed sandbox request + * @returns 0 if successful, -ENOENT if empty + */ +int +global_request_scheduler_mtds_remove(struct sandbox **removed_sandbox) +{ + /* This function won't be used with the MTDS scheduler. Keeping merely for the polymorhism. */ + return -1; +} + + +/** + * @param removed_sandbox pointer to set to removed sandbox request + * @param target_deadline the deadline that the request must be earlier than to dequeue + * @returns 0 if successful, -ENOENT if empty or if request isn't earlier than target_deadline + */ +int +global_request_scheduler_mtds_remove_if_earlier(struct sandbox **removed_sandbox, uint64_t target_deadline) +{ + /* This function won't be used with the MTDS scheduler. Keeping merely for the polymorhism. */ + return -1; +} + +/** + * @param removed_sandbox pointer to set to removed sandbox request + * @param target_deadline the deadline that the request must be earlier than to dequeue + * @param mt_class the multi-tenancy class of the global request to compare the target deadline against + * @returns 0 if successful, -ENOENT if empty or if request isn't earlier than target_deadline + */ +int +global_request_scheduler_mtds_remove_with_mt_class(struct sandbox **removed_sandbox, uint64_t target_deadline, + enum MULTI_TENANCY_CLASS target_mt_class) +{ + int rc = -ENOENT; + ; + + LOCK_LOCK(&global_lock); + + /* Avoid unnessary locks when the target_deadline is tighter than the head of the Global runqueue */ + uint64_t global_guaranteed_deadline = priority_queue_peek(global_request_scheduler_mtds_guaranteed); + uint64_t global_default_deadline = priority_queue_peek(global_request_scheduler_mtds_default); + + switch (target_mt_class) { + case MT_GUARANTEED: + if (global_guaranteed_deadline >= target_deadline) goto done; + break; + case MT_DEFAULT: + if (global_guaranteed_deadline == UINT64_MAX && global_default_deadline >= target_deadline) goto done; + break; + } + + struct tenant_global_request_queue *top_tgrq = NULL; + struct priority_queue *destination_queue = global_request_scheduler_mtds_guaranteed; + + /* Spot the Tenant Global Request Queue (TGRQ) to remove the sandbox request from */ + rc = priority_queue_top_nolock(destination_queue, (void **)&top_tgrq); + if (rc == -ENOENT) { + if (target_mt_class == MT_GUARANTEED) goto done; + + destination_queue = global_request_scheduler_mtds_default; + + rc = priority_queue_top_nolock(destination_queue, (void **)&top_tgrq); + if (rc == -ENOENT) goto done; + } else { + if (top_tgrq->mt_class == MT_GUARANTEED && top_tgrq->tenant->remaining_budget <= 0) { + global_request_scheduler_mtds_demote_nolock(top_tgrq); + // debuglog("Demoted '%s' GLOBALLY", top_tgrq->tenant->name); + top_tgrq->mt_class = MT_DEFAULT; + + rc = -ENOENT; + goto done; + } + } + + assert(top_tgrq); + + /* Remove the sandbox from the corresponding TGRQ */ + rc = priority_queue_dequeue_nolock(top_tgrq->sandbox_requests, (void **)removed_sandbox); + assert(rc == 0); + + /* Delete the TGRQ from the global runqueue completely if TGRQ is empty, re-add otherwise to heapify */ + if (priority_queue_delete_nolock(destination_queue, top_tgrq) == -1) { + panic("Tried to delete an TGRQ from the Global runqueue, but was not present"); + } + + if (priority_queue_length_nolock(top_tgrq->sandbox_requests) > 0) { + priority_queue_enqueue_nolock(destination_queue, top_tgrq); + } + +done: + LOCK_UNLOCK(&global_lock); + return rc; +} + +/** + * Peek at the priority of the highest priority task without having to take the lock + * Because this is a min-heap PQ, the highest priority is the lowest 64-bit integer + * This is used to store an absolute deadline + * @returns value of highest priority value in queue or ULONG_MAX if empty + */ +static uint64_t +global_request_scheduler_mtds_peek(void) +{ + uint64_t val = priority_queue_peek(global_request_scheduler_mtds_guaranteed); + if (val == UINT64_MAX) val = priority_queue_peek(global_request_scheduler_mtds_default); + + return val; +} + + +uint64_t +global_request_scheduler_mtds_guaranteed_peek(void) +{ + return priority_queue_peek(global_request_scheduler_mtds_guaranteed); +} + +uint64_t +global_request_scheduler_mtds_default_peek(void) +{ + return priority_queue_peek(global_request_scheduler_mtds_default); +} + + +/** + * Initializes the variant and registers against the polymorphic interface + */ +void +global_request_scheduler_mtds_initialize() +{ + global_request_scheduler_mtds_guaranteed = priority_queue_initialize(RUNTIME_MAX_TENANT_COUNT, false, + tenant_request_queue_get_priority); + global_request_scheduler_mtds_default = priority_queue_initialize(RUNTIME_MAX_TENANT_COUNT, false, + tenant_request_queue_get_priority); + + global_tenant_timeout_queue = priority_queue_initialize(RUNTIME_MAX_TENANT_COUNT, false, + tenant_timeout_get_priority); + + LOCK_INIT(&global_lock); + + struct global_request_scheduler_config config = { + .add_fn = global_request_scheduler_mtds_add, + .remove_fn = global_request_scheduler_mtds_remove, + .remove_if_earlier_fn = global_request_scheduler_mtds_remove_if_earlier, + .peek_fn = global_request_scheduler_mtds_peek + }; + + global_request_scheduler_initialize(&config); +} + +void +global_request_scheduler_mtds_free() +{ + priority_queue_free(global_request_scheduler_mtds_guaranteed); + priority_queue_free(global_request_scheduler_mtds_default); + priority_queue_free(global_tenant_timeout_queue); +} + + +void +global_timeout_queue_add(struct tenant *tenant) +{ + tenant->tgrq_requests->tenant_timeout.timeout = get_next_timeout_of_tenant(tenant->replenishment_period); + priority_queue_enqueue_nolock(global_tenant_timeout_queue, &tenant->tgrq_requests->tenant_timeout); +} + +/** + * Promotes the given tenant, which means deletes from the Default queue + * and adds to the Guaranteed queue. + */ +void +global_request_scheduler_mtds_promote_lock(struct tenant_global_request_queue *tgrq) +{ + assert(tgrq != NULL); + // assert(priority_queue_length_nolock(tgrq->sandbox_requests) == 0); + + LOCK_LOCK(&global_lock); + + if (tgrq->mt_class == MT_GUARANTEED) goto done; + if (priority_queue_length_nolock(tgrq->sandbox_requests) == 0) goto done; + + /* Delete the corresponding TGRQ from the Guaranteed queue */ + int rc = priority_queue_delete_nolock(global_request_scheduler_mtds_default, tgrq); + if (rc == -1) { + panic("Tried to delete a non-present TGRQ from the Global Default queue. Already deleted?, ITS SIZE: " + "%d", + priority_queue_length_nolock(tgrq->sandbox_requests)); + } + + /* Add the corresponding TGRQ to the Default queue */ + rc = priority_queue_enqueue_nolock(global_request_scheduler_mtds_guaranteed, tgrq); + if (rc == -ENOSPC) panic("Global Guaranteed queue is full!\n"); + +done: + LOCK_UNLOCK(&global_lock); +} + +/* + * Checks if there are any tenant timers that run out in the GLOBAL queue, + * if so promote that tenant. + */ +void +global_timeout_queue_process_promotions() +{ + struct tenant_timeout *top_tenant_timeout = NULL; + + /* Check the timeout queue for a potential tenant to get PRomoted */ + priority_queue_top_nolock(global_tenant_timeout_queue, (void **)&top_tenant_timeout); + if (top_tenant_timeout == NULL) return; // no guaranteed tenants + + struct tenant *tenant = NULL; + struct tenant_global_request_queue *tgrq_to_promote = NULL; + uint64_t now = __getcycles(); + int64_t prev_budget; + + while (now >= top_tenant_timeout->timeout) { + tenant = top_tenant_timeout->tenant; + tgrq_to_promote = tenant->tgrq_requests; + + if (tgrq_to_promote->mt_class == MT_DEFAULT) { + if (priority_queue_length_nolock(tgrq_to_promote->sandbox_requests) > 0) + global_request_scheduler_mtds_promote_lock(tgrq_to_promote); + tgrq_to_promote->mt_class = MT_GUARANTEED; + // debuglog("Promoted '%s' GLOBALLY", tenant->name); + } + + // TODO: We need a smarter technique to reset budget to consider budget overusage: + prev_budget = atomic_load(&tenant->remaining_budget); + while (!atomic_compare_exchange_strong(&tenant->remaining_budget, &prev_budget, tenant->max_budget)) + ; + + /* Reheapify the timeout queue with the updated timeout value of the tenant */ + priority_queue_delete_nolock(global_tenant_timeout_queue, top_tenant_timeout); + top_tenant_timeout->timeout = get_next_timeout_of_tenant(tenant->replenishment_period); + priority_queue_enqueue_nolock(global_tenant_timeout_queue, top_tenant_timeout); + + priority_queue_top_nolock(global_tenant_timeout_queue, (void **)&top_tenant_timeout); + now = __getcycles(); + } +} diff --git a/runtime/src/local_runqueue_minheap.c b/runtime/src/local_runqueue_minheap.c index ba90f3d..d4d92a0 100644 --- a/runtime/src/local_runqueue_minheap.c +++ b/runtime/src/local_runqueue_minheap.c @@ -57,10 +57,9 @@ local_runqueue_minheap_delete(struct sandbox *sandbox) /** * This function determines the next sandbox to run. - * This is either the head of the runqueue or the head of the request queue + * This is the head of the local runqueue * * Execute the sandbox at the head of the thread local runqueue - * If the runqueue is empty, pull a fresh batch of sandbox requests, instantiate them, and then execute the new head * @return the sandbox to execute or NULL if none are available */ struct sandbox * diff --git a/runtime/src/local_runqueue_mtds.c b/runtime/src/local_runqueue_mtds.c new file mode 100644 index 0000000..6f7c0ca --- /dev/null +++ b/runtime/src/local_runqueue_mtds.c @@ -0,0 +1,262 @@ +#include +#include + +#include "arch/context.h" +#include "current_sandbox.h" +#include "debuglog.h" +#include "global_request_scheduler.h" +#include "local_runqueue.h" +#include "local_runqueue_mtds.h" +#include "panic.h" +#include "priority_queue.h" +#include "sandbox_functions.h" +#include "tenant_functions.h" +#include "runtime.h" + +thread_local static struct priority_queue *local_runqueue_mtds_guaranteed; +thread_local static struct priority_queue *local_runqueue_mtds_default; + +extern __thread struct priority_queue *worker_thread_timeout_queue; + +/** + * Get Per-Worker-Tenant priority for Priority Queue ordering + * @param element the PWT + * @returns the priority of the head of the PWT + */ +static inline uint64_t +perworker_tenant_get_priority(void *element) +{ + struct perworker_tenant_sandbox_queue *pwt = (struct perworker_tenant_sandbox_queue *)element; + struct sandbox *sandbox = NULL; + priority_queue_top_nolock(pwt->sandboxes, (void **)&sandbox); + return (sandbox) ? sandbox->absolute_deadline : UINT64_MAX; +} + +/** + * Checks if the run queue is empty + * @returns true if empty. false otherwise + */ +bool +local_runqueue_mtds_is_empty() +{ + return priority_queue_length_nolock(local_runqueue_mtds_guaranteed) == 0 + && priority_queue_length_nolock(local_runqueue_mtds_default) == 0; +} + +/** + * Adds a sandbox to the run queue + * @param sandbox + * @returns pointer to sandbox added + */ +void +local_runqueue_mtds_add(struct sandbox *sandbox) +{ + assert(sandbox != NULL); + + struct perworker_tenant_sandbox_queue *pwt = &sandbox->tenant->pwt_sandboxes[worker_thread_idx]; + struct priority_queue *destination_queue = pwt->mt_class == MT_GUARANTEED ? local_runqueue_mtds_guaranteed + : local_runqueue_mtds_default; + + uint64_t prev_pwt_deadline = priority_queue_peek(pwt->sandboxes); + + /* Add the sandbox to the per-worker-tenant (pwt) queue */ + int rc = priority_queue_enqueue_nolock(pwt->sandboxes, sandbox); + if (rc == -ENOSPC) panic("Per Worker Tenant queue is full!\n"); + // debuglog("Added a sandbox to the PWT of tenant '%s'", sandbox->tenant->name); + + /* If the tenant was not in the local runqueue, then since we are the first sandbox for it in this worker, add + * the tenant. */ + if (priority_queue_length_nolock(pwt->sandboxes) == 1) { + /* Add sandbox tenant to the worker's timeout queue if guaranteed tenant and only if first sandbox*/ + if (tenant_is_paid(sandbox->tenant)) { + pwt->tenant_timeout.timeout = get_next_timeout_of_tenant(sandbox->tenant->replenishment_period); + priority_queue_enqueue_nolock(worker_thread_timeout_queue, &pwt->tenant_timeout); + } + + rc = priority_queue_enqueue_nolock(destination_queue, pwt); + + return; + } + + if (sandbox->absolute_deadline < prev_pwt_deadline) { + /* Maintain the minheap structure by deleting & adding the pwt. + * Do this only when the pwt's priority is updated. */ + rc = priority_queue_delete_nolock(destination_queue, pwt); + assert(rc == 0); + rc = priority_queue_enqueue_nolock(destination_queue, pwt); + if (rc == -ENOSPC) panic("Worker Local Runqueue is full!\n"); + } +} + +/** + * Deletes a sandbox from the runqueue + * @param sandbox to delete + */ +static void +local_runqueue_mtds_delete(struct sandbox *sandbox) +{ + assert(sandbox != NULL); + struct perworker_tenant_sandbox_queue *pwt = &sandbox->tenant->pwt_sandboxes[worker_thread_idx]; + + /* Delete the sandbox from the corresponding Per-Worker-Tenant queue */ + if (priority_queue_delete_nolock(pwt->sandboxes, sandbox) == -1) { + panic("Tried to delete sandbox %lu from PWT queue, but was not present\n", sandbox->id); + } + + struct priority_queue *destination_queue = local_runqueue_mtds_default; + + if (pwt->mt_class == MT_GUARANTEED) { destination_queue = local_runqueue_mtds_guaranteed; } + + + /* Delete the PWT from the local runqueue completely if pwt is empty, re-add otherwise to heapify */ + if (priority_queue_delete_nolock(destination_queue, pwt) == -1) { + // TODO: Apply the composite way of PQ deletion O(logn) + panic("Tried to delete a PWT of %s from local runqueue, but was not present\n", pwt->tenant->name); + } + + /* Add the PWT back to the local runqueue if it still has other sandboxes inside */ + if (priority_queue_length_nolock(pwt->sandboxes) > 0) { + priority_queue_enqueue_nolock(destination_queue, pwt); + } else if (tenant_is_paid(pwt->tenant)) { + priority_queue_delete_nolock(worker_thread_timeout_queue, &pwt->tenant_timeout); + pwt->mt_class = MT_GUARANTEED; + } +} + +/** + * This function determines the next sandbox to run. + * This is the head of the runqueue + * + * Execute the sandbox at the head of the thread local runqueue + * @return the sandbox to execute or NULL if none are available + */ +struct sandbox * +local_runqueue_mtds_get_next() +{ + /* Get the deadline of the sandbox at the head of the local request queue */ + struct perworker_tenant_sandbox_queue *next_pwt = NULL; + struct priority_queue *dq = local_runqueue_mtds_guaranteed; + + /* Check the local guaranteed queue for any potential demotions */ + int rc = priority_queue_top_nolock(dq, (void **)&next_pwt); + while (rc != -ENOENT && next_pwt->tenant->remaining_budget <= 0) { // next_pwt->mt_class==MT_DEFAULT){ + local_runqueue_mtds_demote(next_pwt); + // debuglog("Demoted '%s' locally in GetNext", next_pwt->tenant->name); + next_pwt->mt_class = MT_DEFAULT; + rc = priority_queue_top_nolock(dq, (void **)&next_pwt); + } + + if (rc == -ENOENT) { + dq = local_runqueue_mtds_default; + rc = priority_queue_top_nolock(dq, (void **)&next_pwt); + if (rc == -ENOENT) return NULL; + } + + struct sandbox *next_sandbox = NULL; + priority_queue_top_nolock(next_pwt->sandboxes, (void **)&next_sandbox); + assert(next_sandbox); + + return next_sandbox; +} + +/** + * Registers the PS variant with the polymorphic interface + */ +void +local_runqueue_mtds_initialize() +{ + /* Initialize local state */ + local_runqueue_mtds_guaranteed = priority_queue_initialize(RUNTIME_MAX_TENANT_COUNT, false, + perworker_tenant_get_priority); + local_runqueue_mtds_default = priority_queue_initialize(RUNTIME_MAX_TENANT_COUNT, false, + perworker_tenant_get_priority); + + /* Register Function Pointers for Abstract Scheduling API */ + struct local_runqueue_config config = { .add_fn = local_runqueue_mtds_add, + .is_empty_fn = local_runqueue_mtds_is_empty, + .delete_fn = local_runqueue_mtds_delete, + .get_next_fn = local_runqueue_mtds_get_next }; + + local_runqueue_initialize(&config); +} + +/** + * Promotes the given per-worker-tenant queue, which means deletes from the Default queue + * and adds to the Guaranteed queue. + */ +void +local_runqueue_mtds_promote(struct perworker_tenant_sandbox_queue *pwt) +{ + assert(pwt != NULL); + + /* Delete the corresponding PWT from the Guaranteed queue */ + int rc = priority_queue_delete_nolock(local_runqueue_mtds_default, pwt); + if (rc == -1) { + panic("Tried to delete a non-present PWT of %s from the Local Default queue. Already deleted? , ITS " + "SIZE: %d", + pwt->tenant->name, priority_queue_length_nolock(pwt->sandboxes)); + } + + /* Add the corresponding PWT to the Default queue */ + rc = priority_queue_enqueue_nolock(local_runqueue_mtds_guaranteed, pwt); + if (rc == -ENOSPC) panic("Local Guaranteed queue is full!\n"); +} + +/** + * Demotes the given per-worker-tenant queue, which means deletes from the Guaranteed queue + * and adds to the Default queue. + */ +void +local_runqueue_mtds_demote(struct perworker_tenant_sandbox_queue *pwt) +{ + assert(pwt != NULL); + + /* Delete the corresponding PWT from the Guaranteed queue */ + int rc = priority_queue_delete_nolock(local_runqueue_mtds_guaranteed, pwt); + if (rc == -1) { + panic("Tried to delete a non-present PWT of %s from the Local Guaranteed queue. Already deleted? , ITS " + "SIZE: %d", + pwt->tenant->name, priority_queue_length_nolock(pwt->sandboxes)); + } + + /* Add the corresponding PWT to the Default queue */ + rc = priority_queue_enqueue_nolock(local_runqueue_mtds_default, pwt); + if (rc == -ENOSPC) panic("Local Default queue is full!\n"); +} + + +/* + * Checks if there are any tenant timers that run out in the LOCAL queue, + * if so promote that tenant. + */ +void +local_timeout_queue_process_promotions() +{ + struct tenant_timeout *top_tenant_timeout = NULL; + + /* Check the timeout queue for a potential tenant to get PRomoted */ + priority_queue_top_nolock(worker_thread_timeout_queue, (void **)&top_tenant_timeout); + if (top_tenant_timeout == NULL) return; // no guaranteed tenants + + struct perworker_tenant_sandbox_queue *pwt_to_promote = NULL; + uint64_t now = __getcycles(); + + while (now >= top_tenant_timeout->timeout) { + pwt_to_promote = top_tenant_timeout->pwt; + assert(priority_queue_length_nolock(pwt_to_promote->sandboxes) > 0); + + if (pwt_to_promote->mt_class == MT_DEFAULT) { + local_runqueue_mtds_promote(pwt_to_promote); + pwt_to_promote->mt_class = MT_GUARANTEED; + // debuglog("Promoted '%s' locally", top_tenant_timeout->tenant->name); + } + + /* Reheapify the timeout queue with the updated timeout value of the tenant */ + priority_queue_delete_nolock(worker_thread_timeout_queue, top_tenant_timeout); + top_tenant_timeout->timeout = get_next_timeout_of_tenant( + top_tenant_timeout->tenant->replenishment_period); + priority_queue_enqueue_nolock(worker_thread_timeout_queue, top_tenant_timeout); + + priority_queue_top_nolock(worker_thread_timeout_queue, (void **)&top_tenant_timeout); + } +} diff --git a/runtime/src/main.c b/runtime/src/main.c index 902ff46..6772016 100644 --- a/runtime/src/main.c +++ b/runtime/src/main.c @@ -38,6 +38,7 @@ enum RUNTIME_SIGALRM_HANDLER runtime_sigalrm_handler = RUNTIME_SIGALRM_HANDLER_B bool runtime_preemption_enabled = true; uint32_t runtime_quantum_us = 5000; /* 5ms */ +uint64_t runtime_boot_timestamp; /** * Returns instructions on use of CLI if used incorrectly @@ -178,12 +179,19 @@ runtime_configure() /* Scheduler Policy */ char *scheduler_policy = getenv("SLEDGE_SCHEDULER"); if (scheduler_policy == NULL) scheduler_policy = "EDF"; - if (strcmp(scheduler_policy, "EDF") == 0) { + if (strcmp(scheduler_policy, "MTDBF") == 0) { +#ifndef TRAFFIC_CONTROL + panic("This scheduler requires the TRAFFIC_CONTROL toggle ON!"); +#endif + scheduler = SCHEDULER_MTDBF; + } else if (strcmp(scheduler_policy, "MTDS") == 0) { + scheduler = SCHEDULER_MTDS; + } else if (strcmp(scheduler_policy, "EDF") == 0) { scheduler = SCHEDULER_EDF; } else if (strcmp(scheduler_policy, "FIFO") == 0) { scheduler = SCHEDULER_FIFO; } else { - panic("Invalid scheduler policy: %s. Must be {EDF|FIFO}\n", scheduler_policy); + panic("Invalid scheduler policy: %s. Must be {MTDBF|MTDS|EDF|FIFO}\n", scheduler_policy); } pretty_print_key_value("Scheduler Policy", "%s\n", scheduler_print(scheduler)); @@ -284,6 +292,12 @@ log_compiletime_config() pretty_print_key_disabled("Log Lock Overhead"); #endif +#ifdef LOG_TENANT_LOADING + pretty_print_key_enabled("Log Tenant Loading"); +#else + pretty_print_key_disabled("Log Tenant Loading"); +#endif + #ifdef LOG_PREEMPTION pretty_print_key_enabled("Log Preemption"); #else @@ -377,6 +391,9 @@ load_file_into_buffer(const char *file_name, char **file_buffer) /* Read the file into the buffer and check that the buffer size equals the file size */ size_t total_chars_read = fread(*file_buffer, sizeof(char), stat_buffer.st_size, file); +#ifdef LOG_TENANT_LOADING + debuglog("size read: %d content: %s\n", total_chars_read, *file_buffer); +#endif if (total_chars_read != stat_buffer.st_size) { fprintf(stderr, "Attempt to read %s into buffer failed: %s\n", file_name, strerror(errno)); goto fread_err; @@ -414,7 +431,6 @@ main(int argc, char **argv) printf("Starting the Sledge runtime\n"); - log_compiletime_config(); runtime_process_debug_log_behavior(); @@ -437,6 +453,9 @@ main(int argc, char **argv) runtime_start_runtime_worker_threads(); software_interrupt_arm_timer(); +#ifdef LOG_TENANT_LOADING + debuglog("Parsing file [%s]\n", argv[1]); +#endif const char *json_path = argv[1]; char *json_buf = NULL; size_t json_buf_len = load_file_into_buffer(json_path, &json_buf); @@ -461,6 +480,8 @@ main(int argc, char **argv) if (rc < 0) exit(-1); } + runtime_boot_timestamp = __getcycles(); + for (int tenant_idx = 0; tenant_idx < tenant_config_vec_len; tenant_idx++) { tenant_config_deinit(&tenant_config_vec[tenant_idx]); } diff --git a/runtime/src/software_interrupt.c b/runtime/src/software_interrupt.c index a61ab2a..e263900 100644 --- a/runtime/src/software_interrupt.c +++ b/runtime/src/software_interrupt.c @@ -117,16 +117,27 @@ software_interrupt_handle_signals(int signal_type, siginfo_t *signal_info, void if (worker_thread_is_running_cooperative_scheduler()) { /* There is no benefit to deferring SIGALRMs that occur when we are already in the cooperative * scheduler, so just propagate and return */ + if (scheduler == SCHEDULER_MTDS && signal_info->si_code == SI_KERNEL) { + /* Global tenant promotions */ + global_timeout_queue_process_promotions(); + } propagate_sigalrm(signal_info); } else if (current_sandbox_is_preemptable()) { /* Preemptable, so run scheduler. The scheduler handles outgoing state changes */ sandbox_interrupt(current_sandbox); + if (scheduler == SCHEDULER_MTDS && signal_info->si_code == SI_KERNEL) { + /* Global tenant promotions */ + global_timeout_queue_process_promotions(); + } propagate_sigalrm(signal_info); scheduler_preemptive_sched(interrupted_context); } else { /* We transition the sandbox to an interrupted state to exclude time propagating signals and * running the scheduler from per-sandbox accounting */ - sandbox_state_t interrupted_state = current_sandbox->state; + if (scheduler == SCHEDULER_MTDS && signal_info->si_code == SI_KERNEL) { + /* Global tenant promotions */ + global_timeout_queue_process_promotions(); + } propagate_sigalrm(signal_info); atomic_fetch_add(&deferred_sigalrm, 1); } diff --git a/runtime/src/worker_thread.c b/runtime/src/worker_thread.c index cfd9535..c719109 100644 --- a/runtime/src/worker_thread.c +++ b/runtime/src/worker_thread.c @@ -15,6 +15,8 @@ #include "runtime.h" #include "scheduler.h" #include "worker_thread.h" +#include "tenant_functions.h" +#include "priority_queue.h" /*************************** * Worker Thread State * @@ -28,6 +30,8 @@ thread_local int worker_thread_epoll_file_descriptor; /* Used to index into global arguments and deadlines arrays */ thread_local int worker_thread_idx; +/* Used to track tenants' timeouts */ +thread_local struct priority_queue *worker_thread_timeout_queue; /*********************** * Worker Thread Logic * **********************/ @@ -55,6 +59,11 @@ worker_thread_main(void *argument) /* Initialize Cleanup Queue */ local_cleanup_queue_initialize(); + if (scheduler == SCHEDULER_MTDS) { + worker_thread_timeout_queue = priority_queue_initialize(RUNTIME_MAX_TENANT_COUNT, false, + tenant_timeout_get_priority); + } + /* Initialize epoll */ worker_thread_epoll_file_descriptor = epoll_create1(0); if (unlikely(worker_thread_epoll_file_descriptor < 0)) panic_err(); diff --git a/tests/CMSIS_5_NN/imageclassification/spec.json b/tests/CMSIS_5_NN/imageclassification/spec.json index d7cf677..7bb6b2d 100644 --- a/tests/CMSIS_5_NN/imageclassification/spec.json +++ b/tests/CMSIS_5_NN/imageclassification/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/rand", diff --git a/tests/TinyEKF/by_iteration/spec.json b/tests/TinyEKF/by_iteration/spec.json index 8c7b55e..4955fb4 100644 --- a/tests/TinyEKF/by_iteration/spec.json +++ b/tests/TinyEKF/by_iteration/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/ekf_first_iter", diff --git a/tests/TinyEKF/one_iteration/spec.json b/tests/TinyEKF/one_iteration/spec.json index 2ac8327..f4f6773 100644 --- a/tests/TinyEKF/one_iteration/spec.json +++ b/tests/TinyEKF/one_iteration/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/ekf", diff --git a/tests/common/mtds_preemption.env b/tests/common/mtds_preemption.env new file mode 100644 index 0000000..cf8670b --- /dev/null +++ b/tests/common/mtds_preemption.env @@ -0,0 +1,3 @@ +SLEDGE_SCHEDULER=MTDS +SLEDGE_DISABLE_PREEMPTION=false +SLEDGE_SANDBOX_PERF_LOG=perf.log diff --git a/tests/deadline_description/spec.json b/tests/deadline_description/spec.json index 75ed801..73c7785 100644 --- a/tests/deadline_description/spec.json +++ b/tests/deadline_description/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/ekf", @@ -45,4 +47,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/empty/concurrency/spec.json b/tests/empty/concurrency/spec.json index d16e5ad..5383bde 100644 --- a/tests/empty/concurrency/spec.json +++ b/tests/empty/concurrency/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/empty", diff --git a/tests/fibonacci/bimodal/spec.json b/tests/fibonacci/bimodal/spec.json index a841aae..bc49a1d 100644 --- a/tests/fibonacci/bimodal/spec.json +++ b/tests/fibonacci/bimodal/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10010, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/fib", @@ -26,6 +28,8 @@ { "name": "conix", "port": 10020, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/fib", @@ -38,4 +42,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/fibonacci/concurrency/README.md b/tests/fibonacci/concurrency/README.md new file mode 100644 index 0000000..156efbd --- /dev/null +++ b/tests/fibonacci/concurrency/README.md @@ -0,0 +1,26 @@ +# Concurrency + +## Question + +_How does increasing levels of concurrent client requests affect tail latency, throughput, and the success/error rate of sandbox execution?_ + +## Independent Variable + +- The number of concurrent client requests made at a given time + +## Dependent Variables + +- p50, p90, p99, and p100 latency measured in ms +- throughput measures in requests/second +- success rate, measures in % of requests that return a 200 + +## Assumptions about test environment + +- You have a modern bash shell. My Linux environment shows version 4.4.20(1)-release +- `hey` (https://github.com/rakyll/hey) is available in your PATH +- You have compiled `sledgert` and the `empty.so` test workload + +## TODO + +- Harden scripts to validate assumptions +- Improve error handling in scripts. If `sledgrt` crashes, this charges forward until it hits a divide by error when attempting to clean data that doesn't exist diff --git a/tests/fibonacci/concurrency/latency.gnuplot b/tests/fibonacci/concurrency/latency.gnuplot new file mode 100644 index 0000000..4958803 --- /dev/null +++ b/tests/fibonacci/concurrency/latency.gnuplot @@ -0,0 +1,21 @@ +reset + +set term jpeg +set output "latency.jpg" + +set xlabel "Concurrency" +set ylabel "Latency (us)" + +set key left top + +set xrange [-5:] +set yrange [0:] + +set style histogram columnstacked + +plot 'latency.dat' using 1:8 title 'p100' linetype 0 linecolor 0 with linespoints, \ + 'latency.dat' using 1:7 title 'p99' lt 1 lc 1 w lp, \ + 'latency.dat' using 1:6 title 'p90' lt 2 lc 2 w lp, \ + 'latency.dat' using 1:5 title 'p50' lt 3 lc 3 w lp, \ + 'latency.dat' using 1:4 title 'mean' lt 4 lc 4 w lp, \ + 'latency.dat' using 1:3 title 'min' lt 5 lc 5 w lp diff --git a/tests/fibonacci/concurrency/run_hey.sh b/tests/fibonacci/concurrency/run_hey.sh new file mode 100755 index 0000000..1cf8934 --- /dev/null +++ b/tests/fibonacci/concurrency/run_hey.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success rate +# Success - The percentage of requests that complete by their deadlines +# Throughput - The mean number of successful requests per second +# Latency - the rount-trip resonse time (us) of successful requests at the p50, p90, p99, and p100 percentiles + +# Add bash_libraries directory to path +__run_sh__base_path="$(dirname "$(realpath --logical "${BASH_SOURCE[0]}")")" +__run_sh__bash_libraries_relative_path="../../bash_libraries" +__run_sh__bash_libraries_absolute_path=$(cd "$__run_sh__base_path" && cd "$__run_sh__bash_libraries_relative_path" && pwd) +export PATH="$__run_sh__bash_libraries_absolute_path:$PATH" + +source csv_to_dat.sh || exit 1 +source framework.sh || exit 1 +source generate_gnuplots.sh || exit 1 +source get_result_count.sh || exit 1 +source panic.sh || exit 1 +source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 + +validate_dependencies hey gnuplot jq + +declare -gi iterations=10000 +declare -gi duration_sec=5 +declare -ga concurrency=(1 18 20 24 28 32 36 40 50) + +# Execute the experiments concurrently +# $1 (hostname) +# $2 (results_directory) - a directory where we will store our results +run_experiments() { + if (($# != 2)); then + panic "invalid number of arguments \"$1\"" + return 1 + elif [[ -z "$1" ]]; then + panic "hostname \"$1\" was empty" + return 1 + elif [[ ! -d "$2" ]]; then + panic "directory \"$2\" does not exist" + return 1 + fi + + local hostname="$1" + local results_directory="$2" + + + printf "Running Experiments:\n" + for con in "${concurrency[@]}"; do + printf "\t%d Concurrency: " "$con" + hey -disable-compression -disable-keepalive -disable-redirects -z "$duration_sec"s -n "$iterations" -c "$con" -o csv -m GET -d "30\n" "http://$hostname:10030/fib" > "$results_directory/con$con.csv" 2> /dev/null || { + printf "[ERR]\n" + panic "experiment failed" + return 1 + } + get_result_count "$results_directory/con$con.csv" || { + printf "[ERR]\n" + panic "con$con.csv unexpectedly has zero requests" + return 1 + } + printf "[OK]\n" + done + + return 0 +} + +# Process the experimental results and generate human-friendly results for success rate, throughput, and latency +process_client_results() { + if (($# != 1)); then + error_msg "invalid number of arguments ($#, expected 1)" + return 1 + elif ! [[ -d "$1" ]]; then + error_msg "directory $1 does not exist" + return 1 + fi + + local -r results_directory="$1" + + printf "Processing Results: " + + # Write headers to CSVs + printf "Concurrency,Success_Rate\n" >> "$results_directory/success.csv" + printf "Concurrency,Throughput\n" >> "$results_directory/throughput.csv" + percentiles_table_header "$results_directory/latency.csv" "Con" + + for con in "${concurrency[@]}"; do + + if [[ ! -f "$results_directory/con$con.csv" ]]; then + printf "[ERR]\n" + error_msg "Missing $results_directory/con$con.csv" + return 1 + fi + + # Calculate Success Rate for csv (percent of requests that return 200) + # P.S. When using hey -z option, this result is meaningless + awk -F, ' + $7 == 200 {ok++} + END{printf "'"$con"',%3.2f\n", (ok / (NR - 1) * 100)} + ' < "$results_directory/con$con.csv" >> "$results_directory/success.csv" + + # Filter on 200s, convert from s to us, and sort + awk -F, '$7 == 200 {print ($1 * 1000000)}' < "$results_directory/con$con.csv" \ + | sort -g > "$results_directory/con$con-response.csv" + + # Get Number of 200s + oks=$(wc -l < "$results_directory/con$con-response.csv") + ((oks == 0)) && continue # If all errors, skip line + + # We determine duration by looking at the timestamp of the last complete request + # TODO: Should this instead just use the client-side synthetic duration_sec value? + duration=$(tail -n1 "$results_directory/con$con.csv" | cut -d, -f8) + + # Throughput is calculated as the mean number of successful requests per second + throughput=$(echo "$oks/$duration" | bc) + printf "%d,%d\n" "$con" "$throughput" >> "$results_directory/throughput.csv" + + # Generate Latency Data for csv + percentiles_table_row "$results_directory/con$con-response.csv" "$results_directory/latency.csv" "$con" + + # Delete scratch file used for sorting/counting + rm -rf "$results_directory/con$con-response.csv" + done + + # Transform csvs to dat files for gnuplot + csv_to_dat "$results_directory/success.csv" "$results_directory/throughput.csv" "$results_directory/latency.csv" + rm "$results_directory/success.csv" "$results_directory/throughput.csv" "$results_directory/latency.csv" + + # Generate gnuplots + generate_gnuplots "$results_directory" "$__run_sh__base_path" || { + printf "[ERR]\n" + panic "failed to generate gnuplots" + } + + printf "[OK]\n" + return 0 +} + +experiment_server_post() { + local -r results_directory="$1" + + # Only process data if SLEDGE_SANDBOX_PERF_LOG was set when running sledgert + if [[ -n "$SLEDGE_SANDBOX_PERF_LOG" ]]; then + if [[ -f "$__run_sh__base_path/$SLEDGE_SANDBOX_PERF_LOG" ]]; then + mv "$__run_sh__base_path/$SLEDGE_SANDBOX_PERF_LOG" "$results_directory/perf.log" + # process_server_results "$results_directory" || return 1 + else + echo "Perf Log was set, but perf.log not found!" + fi + fi +} + +# Expected Symbol used by the framework +experiment_client() { + local -r target_hostname="$1" + local -r results_directory="$2" + + #run_samples "$target_hostname" || return 1 + run_experiments "$target_hostname" "$results_directory" || return 1 + process_client_results "$results_directory" || return 1 + + return 0 +} + +framework_init "$@" diff --git a/tests/fibonacci/concurrency/run_lt.sh b/tests/fibonacci/concurrency/run_lt.sh new file mode 100755 index 0000000..3b66efc --- /dev/null +++ b/tests/fibonacci/concurrency/run_lt.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate +# Success - The percentage of requests that complete out of the total expected +# Throughput - The mean number of successful requests per second +# Latency - the rount-trip resonse time (us) of successful requests at the p50, p90, p99, and p100 percentiles + +# Add bash_libraries directory to path +__run_sh__base_path="$(dirname "$(realpath --logical "${BASH_SOURCE[0]}")")" +__run_sh__bash_libraries_relative_path="../../bash_libraries" +__run_sh__bash_libraries_absolute_path=$(cd "$__run_sh__base_path" && cd "$__run_sh__bash_libraries_relative_path" && pwd) +export PATH="$__run_sh__bash_libraries_absolute_path:$PATH" + +source csv_to_dat.sh || exit 1 +source framework.sh || exit 1 +source generate_gnuplots.sh || exit 1 +source get_result_count.sh || exit 1 +source panic.sh || exit 1 +source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 + +validate_dependencies loadtest gnuplot + +# The global configs for the scripts +declare -gi iterations=10000 +declare -gi duration_sec=5 +declare -ga concurrency=(1 9 18 20 30 40 60 80 100) +declare -gi deadline_us=16000 #10ms for fib30 + +# Execute the experiments concurrently +# $1 (hostname) +# $2 (results_directory) - a directory where we will store our results +run_experiments() { + if (($# != 2)); then + panic "invalid number of arguments \"$1\"" + return 1 + elif [[ -z "$1" ]]; then + panic "hostname \"$1\" was empty" + return 1 + elif [[ ! -d "$2" ]]; then + panic "directory \"$2\" does not exist" + return 1 + fi + + local hostname="$1" + local results_directory="$2" + + + printf "Running Experiments:\n" + + for con in "${concurrency[@]}"; do + printf "\t%d Concurrency: " "$con" + + rps=$((1000000/$deadline_us*$con)) + + loadtest -t "$duration_sec" -d "$(($deadline_us/1000))" -c "$con" --rps "$rps" -P "30" "http://$hostname:10030/fib" > "$results_directory/con$con.txt" || { #-n "$iterations" + printf "[ERR]\n" + panic "experiment failed" + return 1 + } + get_result_count "$results_directory/con$con.txt" || { + printf "[ERR]\n" + panic "con$con unexpectedly has zero requests" + return 1 + } + printf "[OK]\n" + done + + return 0 +} + +# Process the experimental results and generate human-friendly results for success rate, throughput, and latency +process_client_results() { + if (($# != 1)); then + error_msg "invalid number of arguments ($#, expected 1)" + return 1 + elif ! [[ -d "$1" ]]; then + error_msg "directory $1 does not exist" + return 1 + fi + + local -r results_directory="$1" + + printf "Processing Results: " + + # Write headers to CSVs + printf "Concurrency,Success_Rate\n" >> "$results_directory/success.csv" + printf "Concurrency,Throughput\n" >> "$results_directory/throughput.csv" + percentiles_table_header "$results_directory/latency.csv" "Con" + + for con in "${concurrency[@]}"; do + + if [[ ! -f "$results_directory/con$con.txt" ]]; then + printf "[ERR]\n" + error_msg "Missing $results_directory/con$con.txt" + return 1 + fi + + # Get Number of 200s and then calculate Success Rate (percent of requests that return 200) + # If using loadtest -n option (not -t), then use ok/iterations instead of ok/total. + total=$(grep "Completed requests:" "$results_directory/con$con.txt" | tr -s ' ' | cut -d ' ' -f 14) + missed=$(grep "Total errors:" "$results_directory/con$con.txt" | tr -s ' ' | cut -d ' ' -f 13) + ok=$((total-missed)) + ((ok == 0)) && continue # If all errors, skip line + success_rate=$(echo "scale=2; $ok/$total*100"|bc) + printf "%d,%3.1f\n" "$con" "$success_rate" >> "$results_directory/success.csv" + + # Throughput is calculated as the mean number of successful requests per second + throughput=$(grep "Requests per second" "$results_directory/con$con.txt" | cut -d ' ' -f 14 | tail -n 1) + printf "%d,%d\n" "$con" "$throughput" >> "$results_directory/throughput.csv" + + # Generate Latency Data + min=0 + p50=$(echo 1000*"$(grep 50% "$results_directory/con$con.txt" | tr -s ' ' | cut -d ' ' -f 12)" | bc) + p90=$(echo 1000*"$(grep 90% "$results_directory/con$con.txt" | tr -s ' ' | cut -d ' ' -f 12)" | bc) + p99=$(echo 1000*"$(grep 99% "$results_directory/con$con.txt" | tr -s ' ' | cut -d ' ' -f 12)" | bc) + p100=$(echo 1000*"$(grep 100% "$results_directory/con$con.txt" | tr -s ' ' | cut -d ' ' -f 12 | tail -n 1)" | bc) + mean=$(echo 1000*"$(grep "Mean latency:" "$results_directory/con$con.txt" | tr -s ' ' | cut -d ' ' -f 13)" | bc) + + printf "%d,%d,%d,%.2f,%d,%d,%d,%d\n" "$con" "$ok" "$min" "$mean" "$p50" "$p90" "$p99" "$p100" >> "$results_directory/latency.csv" + done + + # Transform csvs to dat files for gnuplot + csv_to_dat "$results_directory/success.csv" "$results_directory/throughput.csv" "$results_directory/latency.csv" + rm "$results_directory/success.csv" "$results_directory/throughput.csv" "$results_directory/latency.csv" + + # Generate gnuplots + generate_gnuplots "$results_directory" "$__run_sh__base_path" || { + printf "[ERR]\n" + panic "failed to generate gnuplots" + } + + printf "[OK]\n" + return 0 +} + +experiment_server_post() { + local -r results_directory="$1" + + # Only process data if SLEDGE_SANDBOX_PERF_LOG was set when running sledgert + if [[ -n "$SLEDGE_SANDBOX_PERF_LOG" ]]; then + if [[ -f "$__run_sh__base_path/$SLEDGE_SANDBOX_PERF_LOG" ]]; then + mv "$__run_sh__base_path/$SLEDGE_SANDBOX_PERF_LOG" "$results_directory/perf.log" + # process_server_results "$results_directory" || return 1 + else + echo "Perf Log was set, but perf.log not found!" + fi + fi +} + +# Expected Symbol used by the framework +experiment_client() { + local -r target_hostname="$1" + local -r results_directory="$2" + + #run_samples "$target_hostname" || return 1 + run_experiments "$target_hostname" "$results_directory" || return 1 + process_client_results "$results_directory" || return 1 + + return 0 +} + +framework_init "$@" diff --git a/tests/fibonacci/concurrency/spec.json b/tests/fibonacci/concurrency/spec.json new file mode 100644 index 0000000..72f39fe --- /dev/null +++ b/tests/fibonacci/concurrency/spec.json @@ -0,0 +1,19 @@ +[ + { + "name": "gwu", + "port": 10030, + "replenishment-period-us": 0, + "max-budget-us": 0, + "routes": [ + { + "route": "/fib", + "path": "fibonacci.wasm.so", + "admissions-percentile": 70, + "expected-execution-us": 4000, + "relative-deadline-us": 16000, + "http-resp-size": 1024, + "http-resp-content-type": "text/plain" + } + ] + } +] diff --git a/tests/fibonacci/concurrency/success.gnuplot b/tests/fibonacci/concurrency/success.gnuplot new file mode 100644 index 0000000..0e08247 --- /dev/null +++ b/tests/fibonacci/concurrency/success.gnuplot @@ -0,0 +1,12 @@ +reset + +set term jpeg +set output "success.jpg" + +set xlabel "Concurrency" +set ylabel "Success (RC=200) rate %" + +set xrange [0:] +set yrange [0:110] + +plot 'success.dat' using 1:2 title 'Success rate' linetype 1 linecolor 1 with linespoints diff --git a/tests/fibonacci/concurrency/throughput.gnuplot b/tests/fibonacci/concurrency/throughput.gnuplot new file mode 100644 index 0000000..f3e79d3 --- /dev/null +++ b/tests/fibonacci/concurrency/throughput.gnuplot @@ -0,0 +1,12 @@ +reset + +set term jpeg +set output "throughput.jpg" + +set xlabel "Concurrency" +set ylabel "Requests/sec" + +set xrange [-5:] +set yrange [0:] + +plot 'throughput.dat' using 1:2 title 'Reqs/sec' linetype 1 linecolor 1 with linespoints diff --git a/tests/gocr/by_dpi/spec.json b/tests/gocr/by_dpi/spec.json index d83fb7a..0adeb39 100644 --- a/tests/gocr/by_dpi/spec.json +++ b/tests/gocr/by_dpi/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/gocr_72_dpi", @@ -29,4 +31,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/gocr/by_font/spec.json b/tests/gocr/by_font/spec.json index 70baa4d..85dac11 100644 --- a/tests/gocr/by_font/spec.json +++ b/tests/gocr/by_font/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/gocr_mono", @@ -29,4 +31,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/gocr/by_word/spec.json b/tests/gocr/by_word/spec.json index f5fc63e..a9c5622 100644 --- a/tests/gocr/by_word/spec.json +++ b/tests/gocr/by_word/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/gocr_1_word", diff --git a/tests/gocr/fivebyeight/spec.json b/tests/gocr/fivebyeight/spec.json index 6b2e815..c18d0c4 100644 --- a/tests/gocr/fivebyeight/spec.json +++ b/tests/gocr/fivebyeight/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/gocr", @@ -13,4 +15,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/gocr/handwriting/spec.json b/tests/gocr/handwriting/spec.json index 6b2e815..c18d0c4 100644 --- a/tests/gocr/handwriting/spec.json +++ b/tests/gocr/handwriting/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/gocr", @@ -13,4 +15,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/gocr/hyde/spec.json b/tests/gocr/hyde/spec.json index 9802ece..f814f6d 100644 --- a/tests/gocr/hyde/spec.json +++ b/tests/gocr/hyde/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/gocr", @@ -13,4 +15,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/html/spec.json b/tests/html/spec.json index bfa0af4..5ab14c1 100644 --- a/tests/html/spec.json +++ b/tests/html/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 1337, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/index.html", diff --git a/tests/scratch_storage/spec.json b/tests/scratch_storage/spec.json index 79ee5d1..12f3e3f 100644 --- a/tests/scratch_storage/spec.json +++ b/tests/scratch_storage/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 1337, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/set", diff --git a/tests/sod/image_resize/by_resolution/spec.json b/tests/sod/image_resize/by_resolution/spec.json index 1836c8a..909979b 100644 --- a/tests/sod/image_resize/by_resolution/spec.json +++ b/tests/sod/image_resize/by_resolution/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/resize_small", @@ -29,4 +31,4 @@ } ] } -] \ No newline at end of file +] diff --git a/tests/sod/image_resize/test/spec.json b/tests/sod/image_resize/test/spec.json index 456c911..4e1e780 100644 --- a/tests/sod/image_resize/test/spec.json +++ b/tests/sod/image_resize/test/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/resize", diff --git a/tests/sod/lpd/by_plate_count/spec.json b/tests/sod/lpd/by_plate_count/spec.json index 4a568fe..c595066 100644 --- a/tests/sod/lpd/by_plate_count/spec.json +++ b/tests/sod/lpd/by_plate_count/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/lpd1", diff --git a/tests/speechtotext/spec.json b/tests/speechtotext/spec.json index 227b6b7..a1c5f42 100644 --- a/tests/speechtotext/spec.json +++ b/tests/speechtotext/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/hello_ps", diff --git a/tests/stack_overflow/spec.json b/tests/stack_overflow/spec.json index e78789e..4a1e6ac 100644 --- a/tests/stack_overflow/spec.json +++ b/tests/stack_overflow/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/stack_overflow", diff --git a/tests/traps/spec.json b/tests/traps/spec.json index a7ed752..ca84754 100644 --- a/tests/traps/spec.json +++ b/tests/traps/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/divide", diff --git a/tests/workload_mix/spec.json b/tests/workload_mix/spec.json index ab5d351..b59d586 100644 --- a/tests/workload_mix/spec.json +++ b/tests/workload_mix/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/fibonacci_10", diff --git a/tests/workload_mix_realworld/spec.json b/tests/workload_mix_realworld/spec.json index 6e4ff48..efd7281 100644 --- a/tests/workload_mix_realworld/spec.json +++ b/tests/workload_mix_realworld/spec.json @@ -2,6 +2,8 @@ { "name": "gwu", "port": 10000, + "replenishment-period-us": 0, + "max-budget-us": 0, "routes": [ { "route": "/cifar10_1.5",