From 71c99e91bba54c5c96d0d62c5f6d9f2070ea3442 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 22 Jul 2022 16:55:21 -0400 Subject: [PATCH 01/23] feat: Initial metrics server --- runtime/include/metrics_server.h | 10 +++++ runtime/src/listener_thread.c | 49 ++++++++++++++++++++++- runtime/src/metrics_server.c | 67 ++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 runtime/include/metrics_server.h create mode 100644 runtime/src/metrics_server.c diff --git a/runtime/include/metrics_server.h b/runtime/include/metrics_server.h new file mode 100644 index 00000000..540d1a59 --- /dev/null +++ b/runtime/include/metrics_server.h @@ -0,0 +1,10 @@ +#pragma once + +#include "tcp_server.h" + +extern struct tcp_server metrics_server; + +void metrics_server_init(); +int metrics_server_listen(); +int metrics_server_close(); +void metrics_server_handler(int client_socket); diff --git a/runtime/src/listener_thread.c b/runtime/src/listener_thread.c index f615db80..af28dc31 100644 --- a/runtime/src/listener_thread.c +++ b/runtime/src/listener_thread.c @@ -5,6 +5,7 @@ #include "global_request_scheduler.h" #include "generic_thread.h" #include "listener_thread.h" +#include "metrics_server.h" #include "module.h" #include "runtime.h" #include "sandbox_functions.h" @@ -134,6 +135,23 @@ listener_thread_register_tenant(struct tenant *tenant) return rc; } +int +listener_thread_register_metrics_server() +{ + if (unlikely(listener_thread_epoll_file_descriptor == 0)) { + panic("Attempting to register metrics_server before listener thread initialization"); + } + + int rc = 0; + struct epoll_event accept_evt; + accept_evt.data.ptr = (void *)&metrics_server; + accept_evt.events = EPOLLIN; + rc = epoll_ctl(listener_thread_epoll_file_descriptor, EPOLL_CTL_ADD, metrics_server.socket_descriptor, + &accept_evt); + + return rc; +} + static void panic_on_epoll_error(struct epoll_event *evt) { @@ -325,6 +343,29 @@ on_tenant_socket_epoll_event(struct epoll_event *evt) } } +static void +on_metrics_server_epoll_event(struct epoll_event *evt) +{ + assert((evt->events & EPOLLIN) == EPOLLIN); + + /* Accept Client Request as a nonblocking socket, saving address information */ + struct sockaddr_in client_address; + socklen_t address_length = sizeof(client_address); + + /* Accept as many clients requests as possible, returning when we would have blocked */ + while (true) { + int client_socket = accept4(metrics_server.socket_descriptor, (struct sockaddr *)&client_address, + &address_length, SOCK_NONBLOCK); + if (unlikely(client_socket < 0)) { + if (errno == EWOULDBLOCK || errno == EAGAIN) return; + + panic("accept4: %s", strerror(errno)); + } + + metrics_server_handler(client_socket); + } +} + static void on_client_socket_epoll_event(struct epoll_event *evt) { @@ -370,6 +411,10 @@ listener_thread_main(void *dummy) generic_thread_initialize(); + metrics_server_init(); + metrics_server_listen(); + listener_thread_register_metrics_server(); + /* Set my priority */ // runtime_set_pthread_prio(pthread_self(), 2); pthread_setschedprio(pthread_self(), -20); @@ -390,7 +435,9 @@ listener_thread_main(void *dummy) for (int i = 0; i < descriptor_count; i++) { panic_on_epoll_error(&epoll_events[i]); - if (tenant_database_find_by_ptr(epoll_events[i].data.ptr) != NULL) { + if (epoll_events[i].data.ptr == &metrics_server) { + on_metrics_server_epoll_event(&epoll_events[i]); + } else if (tenant_database_find_by_ptr(epoll_events[i].data.ptr) != NULL) { on_tenant_socket_epoll_event(&epoll_events[i]); } else { on_client_socket_epoll_event(&epoll_events[i]); diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c new file mode 100644 index 00000000..344708d7 --- /dev/null +++ b/runtime/src/metrics_server.c @@ -0,0 +1,67 @@ +#include +#include + +#include "tcp_server.h" +#include "http_total.h" + +struct tcp_server metrics_server; + +void +metrics_server_init() +{ + tcp_server_init(&metrics_server, 1776); +} + +int +metrics_server_listen() +{ + return tcp_server_listen(&metrics_server); +} + +int +metrics_server_close() +{ + return tcp_server_close(&metrics_server); +} + +void +metrics_server_handler(int client_socket) +{ + int rc = 0; + + char *ostream_base = NULL; + size_t ostream_size = 0; + FILE *ostream = open_memstream(&ostream_base, &ostream_size); + assert(ostream != NULL); + + uint32_t total_reqs = atomic_load(&http_total_requests); + uint32_t total_5XX = atomic_load(&http_total_5XX); + + fprintf(ostream, "HTTP/1.1 200 OK\r\n\r\n"); + + fprintf(ostream, "# TYPE total_requests counter\n"); + fprintf(ostream, "total_requests: %d\n", total_reqs); + + fprintf(ostream, "# TYPE total_rejections counter\n"); + fprintf(ostream, "total_rejections: %d\n", total_5XX); + fflush(ostream); + + rewind(ostream); + + char buf[256] = { 0 }; + size_t nread = 0; + do { + nread = fread(buf, 1, 255, ostream); + buf[nread] = '\0'; + /* TODO: Deal with blocking here! */ + write(client_socket, buf, nread); + } while (nread > 0); + + rc = fclose(ostream); + assert(rc == 0); + + free(ostream_base); + ostream_size = 0; + + close(client_socket); +} From ccbee1a41ea42095b839caef9ed3d41159e8df9c Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Mon, 1 Aug 2022 17:14:15 -0400 Subject: [PATCH 02/23] feat: Additional counters and gauges --- .vscode/settings.json | 3 +- runtime/include/admissions_control.h | 3 + runtime/include/http_session.h | 2 + runtime/include/http_total.h | 16 +++++ runtime/include/route_config.h | 5 +- runtime/src/admissions_control.c | 3 + runtime/src/admissions_info.c | 2 + runtime/src/metrics_server.c | 100 +++++++++++++++++++++++---- 8 files changed, 119 insertions(+), 15 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 09ae2d4a..d49fa085 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -125,7 +125,8 @@ "pool.h": "c", "local_cleanup_queue.h": "c", "sandbox_state_transition.h": "c", - "http_session_perf_log.h": "c" + "http_session_perf_log.h": "c", + "perf_window.h": "c" }, "files.exclude": { "**/.git": true, diff --git a/runtime/include/admissions_control.h b/runtime/include/admissions_control.h index 03302cd3..0ecbd340 100644 --- a/runtime/include/admissions_control.h +++ b/runtime/include/admissions_control.h @@ -5,6 +5,9 @@ #define ADMISSIONS_CONTROL_GRANULARITY 1000000 +extern _Atomic uint64_t admissions_control_admitted; +extern uint64_t admissions_control_capacity; + void admissions_control_initialize(void); void admissions_control_add(uint64_t admissions_estimate); void admissions_control_subtract(uint64_t admissions_estimate); diff --git a/runtime/include/http_session.h b/runtime/include/http_session.h index 2252f64a..baeb685e 100644 --- a/runtime/include/http_session.h +++ b/runtime/include/http_session.h @@ -14,6 +14,7 @@ #include "http_request.h" #include "http_parser.h" #include "http_parser_settings.h" +#include "http_total.h" #include "tenant.h" #include "vec.h" #include "http_session_perf_log.h" @@ -173,6 +174,7 @@ http_session_set_response_header(struct http_session *session, int status_code, { assert(session != NULL); assert(status_code >= 200 && status_code <= 599); + http_total_increment(status_code); if (status_code == 200) { session->response_header_length = snprintf(session->response_header, diff --git a/runtime/include/http_total.h b/runtime/include/http_total.h index 09fcf6f3..dc697456 100644 --- a/runtime/include/http_total.h +++ b/runtime/include/http_total.h @@ -55,3 +55,19 @@ http_total_increment_5XX() { atomic_fetch_add(&http_total_5XX, 1); } + +static inline void +http_total_increment(int status_code) +{ +#ifdef LOG_TOTAL_REQS_RESPS + if (status_code >= 200 && status_code <= 299) { + http_total_increment_2XX(); + } else if (status_code >= 400 && status_code <= 499) { + http_total_increment_4XX(); + } else if (status_code >= 500 && status_code <= 599) { + http_total_increment_5XX(); + } +#else + if (status_code >= 500 && status_code <= 599) { http_total_increment_5XX(); } +#endif +} diff --git a/runtime/include/route_config.h b/runtime/include/route_config.h index af44241f..a7ad9505 100644 --- a/runtime/include/route_config.h +++ b/runtime/include/route_config.h @@ -4,6 +4,7 @@ #include #include +#include "admissions_control.h" #include "runtime.h" #include "scheduler_options.h" @@ -100,12 +101,12 @@ route_config_validate(struct route_config *config, bool *did_set) if (config->admissions_percentile > 99 || config->admissions_percentile < 50) { fprintf(stderr, "admissions-percentile must be > 50 and <= 99 but was %u\n", - route_config->admissions_percentile); + config->admissions_percentile); return -1; } /* If the ratio is too big, admissions control is too coarse */ - uint32_t ratio = route_config->relative_deadline_us / route_config->expected_execution_us; + uint32_t ratio = config->relative_deadline_us / config->expected_execution_us; if (ratio > ADMISSIONS_CONTROL_GRANULARITY) { fprintf(stderr, "Ratio of Deadline to Execution time cannot exceed admissions control " diff --git a/runtime/src/admissions_control.c b/runtime/src/admissions_control.c index 022da508..b5bb530a 100644 --- a/runtime/src/admissions_control.c +++ b/runtime/src/admissions_control.c @@ -1,8 +1,11 @@ +#include #include #include #include "admissions_control.h" #include "debuglog.h" +#include "panic.h" +#include "runtime.h" /* * Unitless estimate of the instantaneous fraction of system capacity required to complete all previously diff --git a/runtime/src/admissions_info.c b/runtime/src/admissions_info.c index 0f59cf78..f8ea0900 100644 --- a/runtime/src/admissions_info.c +++ b/runtime/src/admissions_info.c @@ -1,5 +1,7 @@ +#include "admissions_control.h" #include "admissions_info.h" #include "debuglog.h" +#include "perf_window.h" /** * Initializes perf window diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index 344708d7..5c981cb9 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -1,8 +1,11 @@ #include #include +#include "admissions_control.h" #include "tcp_server.h" #include "http_total.h" +#include "sandbox_total.h" +#include "sandbox_state.h" struct tcp_server metrics_server; @@ -37,25 +40,98 @@ metrics_server_handler(int client_socket) uint32_t total_reqs = atomic_load(&http_total_requests); uint32_t total_5XX = atomic_load(&http_total_5XX); +#ifdef LOG_TOTAL_REQS_RESPS + uint32_t total_2XX = atomic_load(&http_total_2XX); + uint32_t total_4XX = atomic_load(&http_total_4XX); +#endif + + uint32_t total_sandboxes = atomic_load(&sandbox_total); + +#ifdef SANDBOX_STATE_TOTALS + uint32_t total_sandboxes_uninitialized = atomic_load(&sandbox_state_totals[SANDBOX_UNINITIALIZED]); + uint32_t total_sandboxes_allocated = atomic_load(&sandbox_state_totals[SANDBOX_ALLOCATED]); + uint32_t total_sandboxes_initialized = atomic_load(&sandbox_state_totals[SANDBOX_INITIALIZED]); + uint32_t total_sandboxes_runnable = atomic_load(&sandbox_state_totals[SANDBOX_RUNNABLE]); + uint32_t total_sandboxes_preempted = atomic_load(&sandbox_state_totals[SANDBOX_PREEMPTED]); + uint32_t total_sandboxes_running_sys = atomic_load(&sandbox_state_totals[SANDBOX_RUNNING_SYS]); + uint32_t total_sandboxes_running_user = atomic_load(&sandbox_state_totals[SANDBOX_RUNNING_USER]); + uint32_t total_sandboxes_interrupted = atomic_load(&sandbox_state_totals[SANDBOX_INTERRUPTED]); + uint32_t total_sandboxes_asleep = atomic_load(&sandbox_state_totals[SANDBOX_ASLEEP]); + uint32_t total_sandboxes_returned = atomic_load(&sandbox_state_totals[SANDBOX_RETURNED]); + uint32_t total_sandboxes_complete = atomic_load(&sandbox_state_totals[SANDBOX_COMPLETE]); + uint32_t total_sandboxes_error = atomic_load(&sandbox_state_totals[SANDBOX_ERROR]); +#endif + +#ifdef ADMISSIONS_CONTROL + uint32_t work_admitted = atomic_load(&admissions_control_admitted); + double work_admitted_percentile = (double)work_admitted / admissions_control_capacity * 100; +#endif + fprintf(ostream, "HTTP/1.1 200 OK\r\n\r\n"); fprintf(ostream, "# TYPE total_requests counter\n"); fprintf(ostream, "total_requests: %d\n", total_reqs); - fprintf(ostream, "# TYPE total_rejections counter\n"); - fprintf(ostream, "total_rejections: %d\n", total_5XX); - fflush(ostream); +#ifdef ADMISSIONS_CONTROL + fprintf(ostream, "# TYPE work_admitted_percentile gauge\n"); + fprintf(ostream, "work_admitted_percentile: %f\n", work_admitted_percentile); +#endif + + fprintf(ostream, "# TYPE total_5XX counter\n"); + fprintf(ostream, "total_5XX: %d\n", total_5XX); + +#ifdef LOG_TOTAL_REQS_RESPS + fprintf(ostream, "# TYPE total_2XX counter\n"); + fprintf(ostream, "total_2XX: %d\n", total_2XX); + + fprintf(ostream, "# TYPE total_4XX counter\n"); + fprintf(ostream, "total_4XX: %d\n", total_4XX); +#endif + + // This global is padded by 1 for error handling, so decrement here for true value + fprintf(ostream, "# TYPE total_sandboxes counter\n"); + fprintf(ostream, "total_sandboxes: %d\n", total_sandboxes - 1); + +#ifdef SANDBOX_STATE_TOTALS + fprintf(ostream, "# TYPE total_sandboxes_uninitialized gauge\n"); + fprintf(ostream, "total_sandboxes_uninitialized: %d\n", total_sandboxes_uninitialized); + + fprintf(ostream, "# TYPE total_sandboxes_allocated gauge\n"); + fprintf(ostream, "total_sandboxes_allocated: %d\n", total_sandboxes_allocated); + + fprintf(ostream, "# TYPE total_sandboxes_initialized gauge\n"); + fprintf(ostream, "total_sandboxes_initialized: %d\n", total_sandboxes_initialized); - rewind(ostream); + fprintf(ostream, "# TYPE total_sandboxes_runnable gauge\n"); + fprintf(ostream, "total_sandboxes_runnable: %d\n", total_sandboxes_runnable); - char buf[256] = { 0 }; - size_t nread = 0; - do { - nread = fread(buf, 1, 255, ostream); - buf[nread] = '\0'; - /* TODO: Deal with blocking here! */ - write(client_socket, buf, nread); - } while (nread > 0); + fprintf(ostream, "# TYPE total_sandboxes_preempted gauge\n"); + fprintf(ostream, "total_sandboxes_preempted: %d\n", total_sandboxes_preempted); + + fprintf(ostream, "# TYPE total_sandboxes_running_sys gauge\n"); + fprintf(ostream, "total_sandboxes_running_sys: %d\n", total_sandboxes_running_sys); + + fprintf(ostream, "# TYPE total_sandboxes_running_user gauge\n"); + fprintf(ostream, "total_sandboxes_running_user: %d\n", total_sandboxes_running_user); + + fprintf(ostream, "# TYPE total_sandboxes_interrupted gauge\n"); + fprintf(ostream, "total_sandboxes_interrupted: %d\n", total_sandboxes_interrupted); + + fprintf(ostream, "# TYPE total_sandboxes_asleep gauge\n"); + fprintf(ostream, "total_sandboxes_asleep: %d\n", total_sandboxes_asleep); + + fprintf(ostream, "# TYPE total_sandboxes_returned gauge\n"); + fprintf(ostream, "total_sandboxes_returned: %d\n", total_sandboxes_returned); + + fprintf(ostream, "# TYPE total_sandboxes_complete gauge\n"); + fprintf(ostream, "total_sandboxes_complete: %d\n", total_sandboxes_complete); + + fprintf(ostream, "# TYPE total_sandboxes_error gauge\n"); + fprintf(ostream, "total_sandboxes_error: %d\n", total_sandboxes_error); +#endif + + fflush(ostream); + write(client_socket, ostream_base, ostream_size); rc = fclose(ostream); assert(rc == 0); From e40d139536e2fbae6f861905a1a3a00e058085f6 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Tue, 2 Aug 2022 15:51:48 -0400 Subject: [PATCH 03/23] feat: metrics thread and proc stat --- runtime/include/metrics_server.h | 3 +- runtime/include/proc_stat.h | 103 +++++++++++++++++++++++++++++++ runtime/include/runtime.h | 2 + runtime/src/listener_thread.c | 6 +- runtime/src/main.c | 4 ++ runtime/src/metrics_server.c | 96 +++++++++++++++++++++++++--- 6 files changed, 199 insertions(+), 15 deletions(-) create mode 100644 runtime/include/proc_stat.h diff --git a/runtime/include/metrics_server.h b/runtime/include/metrics_server.h index 540d1a59..48a754e3 100644 --- a/runtime/include/metrics_server.h +++ b/runtime/include/metrics_server.h @@ -5,6 +5,5 @@ extern struct tcp_server metrics_server; void metrics_server_init(); -int metrics_server_listen(); +void metrics_server_thread_spawn(int client_socket); int metrics_server_close(); -void metrics_server_handler(int client_socket); diff --git a/runtime/include/proc_stat.h b/runtime/include/proc_stat.h new file mode 100644 index 00000000..f36e9599 --- /dev/null +++ b/runtime/include/proc_stat.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "runtime.h" /* For runtime_pid */ + +/* Used to read process-level metrics associated with sledgert from procfs + * The parsing behavior is based on prtstat -r + */ + + +enum PROC_STAT +{ + PROC_STAT_PID = 0, /* Process ID */ + PROC_STAT_COMM = 1, /* Process Name */ + PROC_STAT_STATE = 2, /* State */ + PROC_STAT_PPID, /* Parent Process ID */ + PROC_STAT_PGRP, /* Group ID */ + PROC_STAT_SESSION, /* Session ID */ + PROC_STAT_TTY_NR, /* ??? */ + PROC_STAT_TPGID, /* ??? */ + PROC_STAT_FLAGS, /* ??? */ + PROC_STAT_MINFLT, /* Minor Page Faults */ + PROC_STAT_CMINFLT, /* Minor Page Faults of children */ + PROC_STAT_MAJFLT, /* Major Page Faults */ + PROC_STAT_CMAJFLT, /* Major Page Faults of children */ + PROC_STAT_UTIME, /* User Time */ + PROC_STAT_STIME, /* System Time */ + PROC_STAT_CUTIME, /* Child User Time */ + PROC_STAT_CSTIME, /* Child System Time */ + PROC_STAT_PRIORITY, + PROC_STAT_NICE, + PROC_STAT_NUM_THREADS, + PROC_STAT_ITREALVALUE, + PROC_STAT_STARTTIME, /* Start Time */ + PROC_STAT_VSIZE, /* Virtual Memory */ + PROC_STAT_RSS, + PROC_STAT_RSSLIM, + PROC_STAT_STARTCODE, + PROC_STAT_ENDCODE, + PROC_STAT_STARTSTACK, + PROC_STAT_KSTKESP, + PROC_STAT_KSTKEIP, + PROC_STAT_WCHAN, + PROC_STAT_NSWAP, + PROC_STAT_CNSWAP, + PROC_STAT_EXIT_SIGNAL, + PROC_STAT_PROCESSOR, + PROC_STAT_RT_PRIORITY, + PROC_STAT_POLICY, + PROC_STAT_DELAYACCR_BLKIO_TICKS, + PROC_STAT_GUEST_TIME, + PROC_STAT_CGUEST_TIME, + PROC_STAT_COUNT +}; + +struct proc_stat_metrics { + uint64_t minor_page_faults; + uint64_t major_page_faults; + uint64_t child_minor_page_faults; + uint64_t child_major_page_faults; + uint64_t user_time; + uint64_t system_time; + uint64_t guest_time; +}; + +static inline void +proc_stat_metrics_init(struct proc_stat_metrics *stat) +{ + assert(runtime_pid > 0); + + // Open sledgert's stat file in procfs + char path[256]; + snprintf(path, 256, "/proc/%d/stat", runtime_pid); + FILE *proc_stat = fopen(path, "r"); + + /* Read stat file into in-memory buffer */ + char buf[BUFSIZ]; + fgets(buf, BUFSIZ, proc_stat); + fclose(proc_stat); + + /* Parse into an array of tokens with indices aligning to the PROC_STAT enum */ + char *pos = NULL; + char *proc_stat_values[PROC_STAT_COUNT]; + for (int i = 0; i < PROC_STAT_COUNT; i++) { + char *tok = i == 0 ? strtok_r(buf, " ", &pos) : strtok_r(NULL, " ", &pos); + proc_stat_values[i] = tok; + } + + /* Fill the proc_state_metrics struct with metrics of interest */ + /* Minor Page Faults, Major Page Faults, Vsize, User, System, Guest, Uptime */ + stat->minor_page_faults = strtoul(proc_stat_values[PROC_STAT_MINFLT], NULL, 10); + stat->major_page_faults = strtoul(proc_stat_values[PROC_STAT_MAJFLT], NULL, 10); + stat->child_minor_page_faults = strtoul(proc_stat_values[PROC_STAT_CMINFLT], NULL, 10); + stat->child_major_page_faults = strtoul(proc_stat_values[PROC_STAT_CMAJFLT], NULL, 10); + stat->user_time = strtoul(proc_stat_values[PROC_STAT_UTIME], NULL, 10); + stat->system_time = strtoul(proc_stat_values[PROC_STAT_STIME], NULL, 10); + stat->guest_time = strtoul(proc_stat_values[PROC_STAT_GUEST_TIME], NULL, 10); +} diff --git a/runtime/include/runtime.h b/runtime/include/runtime.h index 5da9a050..91570c53 100644 --- a/runtime/include/runtime.h +++ b/runtime/include/runtime.h @@ -2,6 +2,7 @@ #include #include /* for epoll_create1(), epoll_ctl(), struct epoll_event */ +#include /* for pid_t */ #include #include @@ -34,6 +35,7 @@ enum RUNTIME_SIGALRM_HANDLER RUNTIME_SIGALRM_HANDLER_TRIAGED = 1 }; +extern pid_t runtime_pid; extern bool runtime_preemption_enabled; extern uint32_t runtime_processor_speed_MHz; extern uint32_t runtime_quantum_us; diff --git a/runtime/src/listener_thread.c b/runtime/src/listener_thread.c index af28dc31..9aa1b73c 100644 --- a/runtime/src/listener_thread.c +++ b/runtime/src/listener_thread.c @@ -354,15 +354,16 @@ on_metrics_server_epoll_event(struct epoll_event *evt) /* Accept as many clients requests as possible, returning when we would have blocked */ while (true) { + /* We accept the client connection with blocking semantics because we spawn ephemeral worker threads */ int client_socket = accept4(metrics_server.socket_descriptor, (struct sockaddr *)&client_address, - &address_length, SOCK_NONBLOCK); + &address_length, 0); if (unlikely(client_socket < 0)) { if (errno == EWOULDBLOCK || errno == EAGAIN) return; panic("accept4: %s", strerror(errno)); } - metrics_server_handler(client_socket); + metrics_server_thread_spawn(client_socket); } } @@ -412,7 +413,6 @@ listener_thread_main(void *dummy) generic_thread_initialize(); metrics_server_init(); - metrics_server_listen(); listener_thread_register_metrics_server(); /* Set my priority */ diff --git a/runtime/src/main.c b/runtime/src/main.c index ff04f3ba..69324283 100644 --- a/runtime/src/main.c +++ b/runtime/src/main.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #ifdef LOG_TO_FILE @@ -39,6 +40,7 @@ enum RUNTIME_SIGALRM_HANDLER runtime_sigalrm_handler = RUNTIME_SIGALRM_HANDLER_B bool runtime_preemption_enabled = true; uint32_t runtime_quantum_us = 5000; /* 5ms */ uint64_t runtime_boot_timestamp; +pid_t runtime_pid = 0; /** * Returns instructions on use of CLI if used incorrectly @@ -430,6 +432,8 @@ main(int argc, char **argv) exit(-1); } + runtime_pid = getpid(); + printf("Starting the Sledge runtime\n"); log_compiletime_config(); diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index 5c981cb9..e0ba75ac 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -1,24 +1,39 @@ +#include +#include #include +#include #include #include "admissions_control.h" -#include "tcp_server.h" +#include "debuglog.h" +#include "http.h" #include "http_total.h" +#include "proc_stat.h" +#include "runtime.h" #include "sandbox_total.h" #include "sandbox_state.h" +#include "tcp_server.h" + +/* We run threads on the "reserved OS core" using blocking semantics */ +#define METRICS_SERVER_CORE_ID 0 -struct tcp_server metrics_server; +static pthread_attr_t metrics_server_thread_settings; +struct tcp_server metrics_server; +static void *metrics_server_handler(void *arg); void metrics_server_init() { tcp_server_init(&metrics_server, 1776); -} + int rc = tcp_server_listen(&metrics_server); + assert(rc == 0); -int -metrics_server_listen() -{ - return tcp_server_listen(&metrics_server); + /* Configure pthread attributes to pin metrics server threads to CPU 0 */ + pthread_attr_init(&metrics_server_thread_settings); + cpu_set_t cs; + CPU_ZERO(&cs); + CPU_SET(METRICS_SERVER_CORE_ID, &cs); + pthread_attr_setaffinity_np(&metrics_server_thread_settings, sizeof(cpu_set_t), &cs); } int @@ -28,8 +43,40 @@ metrics_server_close() } void -metrics_server_handler(int client_socket) +metrics_server_thread_spawn(int client_socket) { + /* Duplicate fd so fclose doesn't close the actual client_socket */ + int temp_fd = dup(client_socket); + FILE *req_body = fdopen(temp_fd, "r"); + + /* Basic L7 routing to filter out favicon requests */ + char http_status_code_buf[256]; + fgets(http_status_code_buf, 256, req_body); + fclose(req_body); + + if (strncmp(http_status_code_buf, "GET / HTTP", 10) != 0) { + write(client_socket, http_header_build(404), http_header_len(404)); + close(client_socket); + return; + } + + /* Fire and forget, so we don't save the thread handles */ + pthread_t metrics_server_thread; + int rc = pthread_create(&metrics_server_thread, &metrics_server_thread_settings, metrics_server_handler, + (void *)(long)client_socket); + + if (rc != 0) { + debuglog("Metrics Server failed to spawn pthread with %s\n", strerror(rc)); + close(client_socket); + } +} + +static void * +metrics_server_handler(void *arg) +{ + /* Intermediate cast to integral value of 64-bit width to silence compiler nits */ + int client_socket = (int)(long)arg; + int rc = 0; char *ostream_base = NULL; @@ -130,14 +177,43 @@ metrics_server_handler(int client_socket) fprintf(ostream, "total_sandboxes_error: %d\n", total_sandboxes_error); #endif - fflush(ostream); - write(client_socket, ostream_base, ostream_size); + struct proc_stat_metrics stat; + proc_stat_metrics_init(&stat); + + fprintf(ostream, "# TYPE os_proc_major_page_faults counter\n"); + fprintf(ostream, "os_proc_major_page_faults: %lu\n", stat.major_page_faults); + + fprintf(ostream, "# TYPE os_proc_minor_page_faults counter\n"); + fprintf(ostream, "os_proc_minor_page_faults: %lu\n", stat.minor_page_faults); + + fprintf(ostream, "# TYPE os_proc_child_major_page_faults counter\n"); + fprintf(ostream, "os_proc_child_major_page_faults: %lu\n", stat.child_major_page_faults); + + fprintf(ostream, "# TYPE os_proc_child_minor_page_faults counter\n"); + fprintf(ostream, "os_proc_child_minor_page_faults: %lu\n", stat.child_minor_page_faults); + + fprintf(ostream, "# TYPE os_proc_user_time counter\n"); + fprintf(ostream, "os_proc_user_time: %lu\n", stat.user_time); + fprintf(ostream, "# TYPE os_proc_sys_time counter\n"); + fprintf(ostream, "os_proc_sys_time: %lu\n", stat.system_time); + + fprintf(ostream, "# TYPE os_proc_guest_time counter\n"); + fprintf(ostream, "os_proc_guest_time: %lu\n", stat.guest_time); + + fflush(ostream); + assert(ostream_size > 0); rc = fclose(ostream); assert(rc == 0); + /* Closing the memstream does not close the generated buffer */ + ssize_t nwritten = write(client_socket, ostream_base, ostream_size); + assert(nwritten == ostream_size); + free(ostream_base); ostream_size = 0; close(client_socket); + + pthread_exit(NULL); } From 36edac74257165d87c0e0d24e0a2bd53354c91bd Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Tue, 2 Aug 2022 18:37:35 -0400 Subject: [PATCH 04/23] feat: route level metrics --- runtime/include/http_router.h | 8 +++ runtime/include/http_session.h | 5 ++ runtime/include/perf_window.h | 3 +- runtime/include/route.h | 6 +- runtime/include/route_metrics.h | 55 ++++++++++++++++ runtime/include/tenant_functions.h | 3 + runtime/src/listener_thread.c | 2 + runtime/src/metrics_server.c | 4 ++ .../src/metrics_server_route_level_metrics.c | 62 +++++++++++++++++++ runtime/src/tenant_database.c | 9 +++ 10 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 runtime/include/route_metrics.h create mode 100644 runtime/src/metrics_server_route_level_metrics.c diff --git a/runtime/include/http_router.h b/runtime/include/http_router.h index 13c96f29..1291ad94 100644 --- a/runtime/include/http_router.h +++ b/runtime/include/http_router.h @@ -37,6 +37,8 @@ http_router_add_route(http_router_t *router, struct route_config *config, struct .response_size = config->http_resp_size, .response_content_type = config->http_resp_content_type }; + route_metrics_init(&route.metrics); + /* Admissions Control */ uint64_t expected_execution = (uint64_t)config->expected_execution_us * runtime_processor_speed_MHz; admissions_info_initialize(&route.admissions_info, config->admissions_percentile, expected_execution, @@ -59,3 +61,9 @@ http_router_match_route(http_router_t *router, char *route) return NULL; } + +static inline void +http_router_foreach(http_router_t *router, void (*cb)(route_t *, void *, void *), void *arg_one, void *arg_two) +{ + for (int i = 0; i < router->length; i++) { cb(&router->buffer[i], arg_one, arg_two); } +} diff --git a/runtime/include/http_session.h b/runtime/include/http_session.h index baeb685e..8dc91b45 100644 --- a/runtime/include/http_session.h +++ b/runtime/include/http_session.h @@ -15,6 +15,8 @@ #include "http_parser.h" #include "http_parser_settings.h" #include "http_total.h" +#include "route.h" +#include "route_metrics.h" #include "tenant.h" #include "vec.h" #include "http_session_perf_log.h" @@ -54,6 +56,7 @@ struct http_session { struct vec_u8 response_buffer; size_t response_buffer_written; struct tenant *tenant; /* Backlink required when read blocks on listener core */ + struct route *route; /* Backlink required to handle http metrics */ uint64_t request_arrival_timestamp; uint64_t request_downloaded_timestamp; uint64_t response_takeoff_timestamp; @@ -90,6 +93,7 @@ http_session_init(struct http_session *session, int socket_descriptor, const str assert(socket_address != NULL); session->tenant = tenant; + session->route = NULL; session->socket = socket_descriptor; session->request_arrival_timestamp = request_arrival_timestamp; memcpy(&session->client_address, socket_address, sizeof(struct sockaddr)); @@ -175,6 +179,7 @@ http_session_set_response_header(struct http_session *session, int status_code, assert(session != NULL); assert(status_code >= 200 && status_code <= 599); http_total_increment(status_code); + route_metrics_increment(&session->route->metrics, status_code); if (status_code == 200) { session->response_header_length = snprintf(session->response_header, diff --git a/runtime/include/perf_window.h b/runtime/include/perf_window.h index 0045a971..50cc0549 100644 --- a/runtime/include/perf_window.h +++ b/runtime/include/perf_window.h @@ -149,7 +149,8 @@ perf_window_get_percentile(struct perf_window *perf_window, uint8_t percentile, { assert(perf_window != NULL); assert(percentile >= 50 && percentile <= 99); - assert(perf_window->count > 0); + + if (unlikely(perf_window->count == 0)) return 0; if (likely(perf_window->count >= PERF_WINDOW_BUFFER_SIZE)) return perf_window->by_duration[precomputed_index].execution_time; diff --git a/runtime/include/route.h b/runtime/include/route.h index af4b490f..8b01aa6a 100644 --- a/runtime/include/route.h +++ b/runtime/include/route.h @@ -5,11 +5,13 @@ #include "admissions_info.h" #include "module.h" +#include "route_metrics.h" /* Assumption: entrypoint is always _start. This should be enhanced later */ struct route { - char *route; - struct module *module; + char *route; + struct route_metrics metrics; + struct module *module; /* HTTP State */ uint32_t relative_deadline_us; uint64_t relative_deadline; /* cycles */ diff --git a/runtime/include/route_metrics.h b/runtime/include/route_metrics.h new file mode 100644 index 00000000..d7480ad5 --- /dev/null +++ b/runtime/include/route_metrics.h @@ -0,0 +1,55 @@ +#pragma once + +#include + +struct route_metrics { + atomic_ulong total_requests; + atomic_ulong total_2XX; + atomic_ulong total_4XX; + atomic_ulong total_5XX; +}; + +static inline void +route_metrics_init(struct route_metrics *rm) +{ + atomic_init(&rm->total_requests, 0); + atomic_init(&rm->total_2XX, 0); + atomic_init(&rm->total_4XX, 0); + atomic_init(&rm->total_5XX, 0); +} + +static inline void +route_metrics_increment_request(struct route_metrics *rm) +{ + atomic_fetch_add(&rm->total_requests, 1); +} + +static inline void +route_metrics_increment_2XX(struct route_metrics *rm) +{ + atomic_fetch_add(&rm->total_2XX, 1); +} + +static inline void +route_metrics_increment_4XX(struct route_metrics *rm) +{ + atomic_fetch_add(&rm->total_4XX, 1); +} + +static inline void +route_metrics_increment_5XX(struct route_metrics *rm) +{ + atomic_fetch_add(&rm->total_5XX, 1); +} + +static inline void +route_metrics_increment(struct route_metrics *rm, int status_code) +{ + if (status_code >= 200 && status_code <= 299) { + route_metrics_increment_2XX(rm); + } else if (status_code >= 400 && status_code <= 499) { + route_metrics_increment_4XX(rm); + } else if (status_code >= 500 && status_code <= 599) { + route_metrics_increment_5XX(rm); + } +} diff --git a/runtime/include/tenant_functions.h b/runtime/include/tenant_functions.h index cb76651b..be9d3554 100644 --- a/runtime/include/tenant_functions.h +++ b/runtime/include/tenant_functions.h @@ -20,6 +20,9 @@ struct tenant *tenant_database_find_by_socket_descriptor(int socket_descriptor); struct tenant *tenant_database_find_by_port(uint16_t port); struct tenant *tenant_database_find_by_ptr(void *ptr); +typedef void (*tenant_database_foreach_cb_t)(struct tenant *, void *, void *); +void tenant_database_foreach(tenant_database_foreach_cb_t, void *, void *); + static inline int tenant_policy_specific_init(struct tenant *tenant, struct tenant_config *config) { diff --git a/runtime/src/listener_thread.c b/runtime/src/listener_thread.c index 9aa1b73c..93b887d6 100644 --- a/runtime/src/listener_thread.c +++ b/runtime/src/listener_thread.c @@ -233,6 +233,8 @@ on_client_request_received(struct http_session *session) return; } + session->route = route; + /* * Perform admissions control. * If 0, workload was rejected, so close with 429 "Too Many Requests" and continue diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index e0ba75ac..b8cd1576 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -21,6 +21,8 @@ static pthread_attr_t metrics_server_thread_settings; struct tcp_server metrics_server; static void *metrics_server_handler(void *arg); +extern void metrics_server_route_level_metrics_render(FILE *ostream); + void metrics_server_init() { @@ -201,6 +203,8 @@ metrics_server_handler(void *arg) fprintf(ostream, "# TYPE os_proc_guest_time counter\n"); fprintf(ostream, "os_proc_guest_time: %lu\n", stat.guest_time); + metrics_server_route_level_metrics_render(ostream); + fflush(ostream); assert(ostream_size > 0); rc = fclose(ostream); diff --git a/runtime/src/metrics_server_route_level_metrics.c b/runtime/src/metrics_server_route_level_metrics.c new file mode 100644 index 00000000..2936a5dc --- /dev/null +++ b/runtime/src/metrics_server_route_level_metrics.c @@ -0,0 +1,62 @@ +#include +#include + +#include "perf_window.h" +#include "tenant_functions.h" + +// tenant_database_foreach_cb_t + +static const int p50_idx = PERF_WINDOW_BUFFER_SIZE * 50 / 100; +static const int p90_idx = PERF_WINDOW_BUFFER_SIZE * 90 / 100; + +void +render_routes(struct route *route, void *arg_one, void *arg_two) +{ + FILE *ostream = (FILE *)arg_one; + struct tenant *tenant = (struct tenant *)arg_two; + +#ifdef ADMISSIONS_CONTROL + uint64_t latency_p50 = perf_window_get_percentile(&route->admissions_info.perf_window, 50, p50_idx); + uint64_t latency_p90 = perf_window_get_percentile(&route->admissions_info.perf_window, 90, p90_idx); +#endif + + uint64_t total_requests = atomic_load(&route->metrics.total_requests); + uint64_t total_2XX = atomic_load(&route->metrics.total_2XX); + uint64_t total_4XX = atomic_load(&route->metrics.total_4XX); + uint64_t total_5XX = atomic_load(&route->metrics.total_5XX); + + fprintf(ostream, "# TYPE %s_%s_total_requests counter\n", tenant->name, route->route); + fprintf(ostream, "%s_%s_total_requests: %lu\n", tenant->name, route->route, total_requests); + + fprintf(ostream, "# TYPE %s_%s_total_2XX counter\n", tenant->name, route->route); + fprintf(ostream, "%s_%s_total_2XX: %lu\n", tenant->name, route->route, total_2XX); + + fprintf(ostream, "# TYPE %s_%s_total_4XX counter\n", tenant->name, route->route); + fprintf(ostream, "%s_%s_total_4XX: %lu\n", tenant->name, route->route, total_4XX); + + fprintf(ostream, "# TYPE %s_%s_total_5XX counter\n", tenant->name, route->route); + fprintf(ostream, "%s_%s_total_5XX: %lu\n", tenant->name, route->route, total_5XX); + +#ifdef ADMISSIONS_CONTROL + fprintf(ostream, "# TYPE %s_%s_latency_p50 gauge\n", tenant->name, route->route); + fprintf(ostream, "%s_%s_latency_p50: %lu\n", tenant->name, route->route, latency_p50); + + fprintf(ostream, "# TYPE %s_%s_latency_p90 gauge\n", tenant->name, route->route); + fprintf(ostream, "%s_%s_latency_p90: %lu\n", tenant->name, route->route, latency_p90); +#endif +} + +void +render_tenant_routers(struct tenant *tenant, void *arg_one, void *arg_two) +{ + FILE *ostream = (FILE *)arg_one; + char *name = tenant->name; + + http_router_foreach(&tenant->router, render_routes, ostream, tenant); +} + +void +metrics_server_route_level_metrics_render(FILE *ostream) +{ + tenant_database_foreach(render_tenant_routers, ostream, NULL); +} diff --git a/runtime/src/tenant_database.c b/runtime/src/tenant_database.c index 78440721..db909802 100644 --- a/runtime/src/tenant_database.c +++ b/runtime/src/tenant_database.c @@ -93,3 +93,12 @@ tenant_database_find_by_ptr(void *ptr) } return NULL; } + +void +tenant_database_foreach(void (*cb)(struct tenant *, void *), void *arg) +{ + for (size_t i = 0; i < tenant_database_count; i++) { + assert(tenant_database[i]); + cb(tenant_database[i], arg); + } +} From 8b2b26ffe06be7371d3923987d22887320cee9b4 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Thu, 4 Aug 2022 18:57:07 -0400 Subject: [PATCH 05/23] fix: Correct routes and URLs to please prometheus --- runtime/src/metrics_server.c | 2 +- .../src/metrics_server_route_level_metrics.c | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index b8cd1576..a4f4d4a7 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -56,7 +56,7 @@ metrics_server_thread_spawn(int client_socket) fgets(http_status_code_buf, 256, req_body); fclose(req_body); - if (strncmp(http_status_code_buf, "GET / HTTP", 10) != 0) { + if (strncmp(http_status_code_buf, "GET /metrics HTTP", 10) != 0) { write(client_socket, http_header_build(404), http_header_len(404)); close(client_socket); return; diff --git a/runtime/src/metrics_server_route_level_metrics.c b/runtime/src/metrics_server_route_level_metrics.c index 2936a5dc..b5178742 100644 --- a/runtime/src/metrics_server_route_level_metrics.c +++ b/runtime/src/metrics_server_route_level_metrics.c @@ -25,17 +25,20 @@ render_routes(struct route *route, void *arg_one, void *arg_two) uint64_t total_4XX = atomic_load(&route->metrics.total_4XX); uint64_t total_5XX = atomic_load(&route->metrics.total_5XX); - fprintf(ostream, "# TYPE %s_%s_total_requests counter\n", tenant->name, route->route); - fprintf(ostream, "%s_%s_total_requests: %lu\n", tenant->name, route->route, total_requests); + // Strip leading / + const char *route_label = &route->route[1]; - fprintf(ostream, "# TYPE %s_%s_total_2XX counter\n", tenant->name, route->route); - fprintf(ostream, "%s_%s_total_2XX: %lu\n", tenant->name, route->route, total_2XX); + fprintf(ostream, "# TYPE %s_%s_total_requests counter\n", tenant->name, route_label); + fprintf(ostream, "%s_%s_total_requests: %lu\n", tenant->name, route_label, total_requests); - fprintf(ostream, "# TYPE %s_%s_total_4XX counter\n", tenant->name, route->route); - fprintf(ostream, "%s_%s_total_4XX: %lu\n", tenant->name, route->route, total_4XX); + fprintf(ostream, "# TYPE %s_%s_total_2XX counter\n", tenant->name, route_label); + fprintf(ostream, "%s_%s_total_2XX: %lu\n", tenant->name, route_label, total_2XX); - fprintf(ostream, "# TYPE %s_%s_total_5XX counter\n", tenant->name, route->route); - fprintf(ostream, "%s_%s_total_5XX: %lu\n", tenant->name, route->route, total_5XX); + fprintf(ostream, "# TYPE %s_%s_total_4XX counter\n", tenant->name, route_label); + fprintf(ostream, "%s_%s_total_4XX: %lu\n", tenant->name, route_label, total_4XX); + + fprintf(ostream, "# TYPE %s_%s_total_5XX counter\n", tenant->name, route_label); + fprintf(ostream, "%s_%s_total_5XX: %lu\n", tenant->name, route_label, total_5XX); #ifdef ADMISSIONS_CONTROL fprintf(ostream, "# TYPE %s_%s_latency_p50 gauge\n", tenant->name, route->route); From dac3322b97f98e3475118bf893740f0a51496ebb Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 5 Aug 2022 14:04:23 -0400 Subject: [PATCH 06/23] fix: skip route accounting on no match --- runtime/include/http_session.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/runtime/include/http_session.h b/runtime/include/http_session.h index 8dc91b45..21840df2 100644 --- a/runtime/include/http_session.h +++ b/runtime/include/http_session.h @@ -179,7 +179,9 @@ http_session_set_response_header(struct http_session *session, int status_code, assert(session != NULL); assert(status_code >= 200 && status_code <= 599); http_total_increment(status_code); - route_metrics_increment(&session->route->metrics, status_code); + + /* We might not have actually matched a route */ + if (likely(session->route != NULL)) { route_metrics_increment(&session->route->metrics, status_code); } if (status_code == 200) { session->response_header_length = snprintf(session->response_header, From d19820ac4ac7aa5d54c1873832489b8e34b17df6 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 5 Aug 2022 14:04:37 -0400 Subject: [PATCH 07/23] fix: HTTP nits --- runtime/include/http.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/runtime/include/http.h b/runtime/include/http.h index cab6f346..c3e3a0d0 100644 --- a/runtime/include/http.h +++ b/runtime/include/http.h @@ -5,8 +5,8 @@ #include "http_total.h" #include "panic.h" -#define HTTP_MAX_HEADER_COUNT 16 -#define HTTP_MAX_HEADER_LENGTH 32 +#define HTTP_MAX_HEADER_COUNT 32 +#define HTTP_MAX_HEADER_LENGTH 64 #define HTTP_MAX_HEADER_VALUE_LENGTH 256 #define HTTP_MAX_FULL_URL_LENGTH 256 @@ -33,31 +33,41 @@ #define HTTP_RESPONSE_404_NOT_FOUND \ "HTTP/1.1 404 Not Found\r\n" \ "Server: SLEdge\r\n" \ + "Content-Length: 0\r\n" \ "Connection: close\r\n" \ + "\r\n" \ "\r\n" #define HTTP_RESPONSE_413_PAYLOAD_TOO_LARGE \ "HTTP/1.1 413 Payload Too Large\r\n" \ "Server: SLEdge\r\n" \ + "Content-Length: 0\r\n" \ "Connection: close\r\n" \ + "\r\n" \ "\r\n" #define HTTP_RESPONSE_429_TOO_MANY_REQUESTS \ "HTTP/1.1 429 Too Many Requests\r\n" \ "Server: SLEdge\r\n" \ + "Content-Length: 0\r\n" \ "Connection: close\r\n" \ + "\r\n" \ "\r\n" #define HTTP_RESPONSE_500_INTERNAL_SERVER_ERROR \ "HTTP/1.1 500 Internal Server Error\r\n" \ "Server: SLEdge\r\n" \ + "Content-Length: 0\r\n" \ "Connection: close\r\n" \ + "\r\n" \ "\r\n" #define HTTP_RESPONSE_503_SERVICE_UNAVAILABLE \ "HTTP/1.1 503 Service Unavailable\r\n" \ "Server: SLEdge\r\n" \ + "Content-Length: 0\r\n" \ "Connection: close\r\n" \ + "\r\n" \ "\r\n" static inline const char * From 4939ba32fb4b41d7df8e0c7358bd5cb9192f3442 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 5 Aug 2022 14:05:01 -0400 Subject: [PATCH 08/23] fix: Correct admissions control prometheus keys --- runtime/src/metrics_server_route_level_metrics.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/runtime/src/metrics_server_route_level_metrics.c b/runtime/src/metrics_server_route_level_metrics.c index b5178742..bda2b0a8 100644 --- a/runtime/src/metrics_server_route_level_metrics.c +++ b/runtime/src/metrics_server_route_level_metrics.c @@ -41,11 +41,11 @@ render_routes(struct route *route, void *arg_one, void *arg_two) fprintf(ostream, "%s_%s_total_5XX: %lu\n", tenant->name, route_label, total_5XX); #ifdef ADMISSIONS_CONTROL - fprintf(ostream, "# TYPE %s_%s_latency_p50 gauge\n", tenant->name, route->route); - fprintf(ostream, "%s_%s_latency_p50: %lu\n", tenant->name, route->route, latency_p50); + fprintf(ostream, "# TYPE %s_%s_latency_p50 gauge\n", tenant->name, route_label); + fprintf(ostream, "%s_%s_latency_p50: %lu\n", tenant->name, route_label, latency_p50); - fprintf(ostream, "# TYPE %s_%s_latency_p90 gauge\n", tenant->name, route->route); - fprintf(ostream, "%s_%s_latency_p90: %lu\n", tenant->name, route->route, latency_p90); + fprintf(ostream, "# TYPE %s_%s_latency_p90 gauge\n", tenant->name, route_label); + fprintf(ostream, "%s_%s_latency_p90: %lu\n", tenant->name, route_label, latency_p90); #endif } From 1230e0bfd930fdbaa97f1d529f8b3c2eebc98fee Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 13:28:07 -0400 Subject: [PATCH 09/23] fix: merge conflicts from pulling master --- runtime/src/listener_thread.c | 2 -- runtime/src/metrics_server_route_level_metrics.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/runtime/src/listener_thread.c b/runtime/src/listener_thread.c index 2630a950..18d3bd48 100644 --- a/runtime/src/listener_thread.c +++ b/runtime/src/listener_thread.c @@ -410,8 +410,6 @@ listener_thread_main(void *dummy) { struct epoll_event epoll_events[RUNTIME_MAX_EPOLL_EVENTS]; - generic_thread_initialize(); - metrics_server_init(); listener_thread_register_metrics_server(); diff --git a/runtime/src/metrics_server_route_level_metrics.c b/runtime/src/metrics_server_route_level_metrics.c index bda2b0a8..bb54d4a4 100644 --- a/runtime/src/metrics_server_route_level_metrics.c +++ b/runtime/src/metrics_server_route_level_metrics.c @@ -6,8 +6,8 @@ // tenant_database_foreach_cb_t -static const int p50_idx = PERF_WINDOW_BUFFER_SIZE * 50 / 100; -static const int p90_idx = PERF_WINDOW_BUFFER_SIZE * 90 / 100; +static const int p50_idx = perf_window_capacity * 50 / 100; +static const int p90_idx = perf_window_capacity * 90 / 100; void render_routes(struct route *route, void *arg_one, void *arg_two) From 89fc0b26fd9888b17498b77c3f1b8ce593b856d6 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 13:46:04 -0400 Subject: [PATCH 10/23] fix: HTTP header truncation --- runtime/include/http.h | 10 ---------- runtime/include/http_session.h | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/runtime/include/http.h b/runtime/include/http.h index c3e3a0d0..7d31bdb3 100644 --- a/runtime/include/http.h +++ b/runtime/include/http.h @@ -33,41 +33,31 @@ #define HTTP_RESPONSE_404_NOT_FOUND \ "HTTP/1.1 404 Not Found\r\n" \ "Server: SLEdge\r\n" \ - "Content-Length: 0\r\n" \ "Connection: close\r\n" \ - "\r\n" \ "\r\n" #define HTTP_RESPONSE_413_PAYLOAD_TOO_LARGE \ "HTTP/1.1 413 Payload Too Large\r\n" \ "Server: SLEdge\r\n" \ - "Content-Length: 0\r\n" \ "Connection: close\r\n" \ - "\r\n" \ "\r\n" #define HTTP_RESPONSE_429_TOO_MANY_REQUESTS \ "HTTP/1.1 429 Too Many Requests\r\n" \ "Server: SLEdge\r\n" \ - "Content-Length: 0\r\n" \ "Connection: close\r\n" \ - "\r\n" \ "\r\n" #define HTTP_RESPONSE_500_INTERNAL_SERVER_ERROR \ "HTTP/1.1 500 Internal Server Error\r\n" \ "Server: SLEdge\r\n" \ - "Content-Length: 0\r\n" \ "Connection: close\r\n" \ - "\r\n" \ "\r\n" #define HTTP_RESPONSE_503_SERVICE_UNAVAILABLE \ "HTTP/1.1 503 Service Unavailable\r\n" \ "Server: SLEdge\r\n" \ - "Content-Length: 0\r\n" \ "Connection: close\r\n" \ - "\r\n" \ "\r\n" static inline const char * diff --git a/runtime/include/http_session.h b/runtime/include/http_session.h index fc7973e9..5a0a24bb 100644 --- a/runtime/include/http_session.h +++ b/runtime/include/http_session.h @@ -194,7 +194,7 @@ http_session_set_response_header(struct http_session *session, int status_code, ? HTTP_SESSION_RESPONSE_HEADER_CAPACITY : header_len; - strncpy(session->response_header, http_header_build(status_code), to_copy - 1); + strncpy(session->response_header, http_header_build(status_code), to_copy); session->response_header_length = to_copy; } From 2b5957c394ad2b022f7eba6a335c1559f174df3c Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 14:06:24 -0400 Subject: [PATCH 11/23] refactor: HTTP total counters --- runtime/include/http.h | 28 +++++------------------ runtime/include/http_session.h | 2 +- runtime/include/http_total.h | 41 ++++++++-------------------------- runtime/src/http_total.c | 25 +++++++++------------ runtime/src/main.c | 6 ++--- runtime/src/metrics_server.c | 19 ++++++++-------- 6 files changed, 39 insertions(+), 82 deletions(-) diff --git a/runtime/include/http.h b/runtime/include/http.h index 7d31bdb3..9d4300fc 100644 --- a/runtime/include/http.h +++ b/runtime/include/http.h @@ -63,38 +63,22 @@ static inline const char * http_header_build(int status_code) { - const char *response; - int rc; switch (status_code) { case 400: - response = HTTP_RESPONSE_400_BAD_REQUEST; - http_total_increment_4XX(); - break; + return HTTP_RESPONSE_400_BAD_REQUEST; case 404: - response = HTTP_RESPONSE_404_NOT_FOUND; - http_total_increment_4XX(); - break; + return HTTP_RESPONSE_404_NOT_FOUND; case 413: - response = HTTP_RESPONSE_413_PAYLOAD_TOO_LARGE; - http_total_increment_4XX(); - break; + return HTTP_RESPONSE_413_PAYLOAD_TOO_LARGE; case 429: - response = HTTP_RESPONSE_429_TOO_MANY_REQUESTS; - http_total_increment_4XX(); - break; + return HTTP_RESPONSE_429_TOO_MANY_REQUESTS; case 500: - response = HTTP_RESPONSE_500_INTERNAL_SERVER_ERROR; - http_total_increment_5XX(); - break; + return HTTP_RESPONSE_500_INTERNAL_SERVER_ERROR; case 503: - response = HTTP_RESPONSE_503_SERVICE_UNAVAILABLE; - http_total_increment_5XX(); - break; + return HTTP_RESPONSE_503_SERVICE_UNAVAILABLE; default: panic("%d is not a valid status code\n", status_code); } - - return response; } static inline size_t diff --git a/runtime/include/http_session.h b/runtime/include/http_session.h index 5a0a24bb..ca7e1d12 100644 --- a/runtime/include/http_session.h +++ b/runtime/include/http_session.h @@ -179,7 +179,7 @@ http_session_set_response_header(struct http_session *session, int status_code, { assert(session != NULL); assert(status_code >= 200 && status_code <= 599); - http_total_increment(status_code); + http_total_increment_response(status_code); /* We might not have actually matched a route */ if (likely(session->route != NULL)) { route_metrics_increment(&session->route->metrics, status_code); } diff --git a/runtime/include/http_total.h b/runtime/include/http_total.h index dc697456..455b0546 100644 --- a/runtime/include/http_total.h +++ b/runtime/include/http_total.h @@ -9,10 +9,9 @@ * behind a compiler flag. 2XX and 4XX can be incremented by worker cores, so they are behind a flag because * of concerns about contention */ +#ifdef HTTP_TOTAL_COUNTERS extern _Atomic uint32_t http_total_requests; extern _Atomic uint32_t http_total_5XX; - -#ifdef LOG_TOTAL_REQS_RESPS extern _Atomic uint32_t http_total_2XX; extern _Atomic uint32_t http_total_4XX; #endif @@ -20,54 +19,32 @@ extern _Atomic uint32_t http_total_4XX; static inline void http_total_init() { +#ifdef HTTP_TOTAL_COUNTERS atomic_init(&http_total_requests, 0); - atomic_init(&http_total_5XX, 0); -#ifdef LOG_TOTAL_REQS_RESPS atomic_init(&http_total_2XX, 0); atomic_init(&http_total_4XX, 0); + atomic_init(&http_total_5XX, 0); #endif } static inline void http_total_increment_request() { +#ifdef HTTP_TOTAL_COUNTERS atomic_fetch_add(&http_total_requests, 1); -} - -static inline void -http_total_increment_2XX() -{ -#ifdef LOG_TOTAL_REQS_RESPS - atomic_fetch_add(&http_total_2XX, 1); -#endif -} - -static inline void -http_total_increment_4XX() -{ -#ifdef LOG_TOTAL_REQS_RESPS - atomic_fetch_add(&http_total_4XX, 1); #endif } static inline void -http_total_increment_5XX() -{ - atomic_fetch_add(&http_total_5XX, 1); -} - -static inline void -http_total_increment(int status_code) +http_total_increment_response(int status_code) { -#ifdef LOG_TOTAL_REQS_RESPS +#ifdef HTTP_TOTAL_COUNTERS if (status_code >= 200 && status_code <= 299) { - http_total_increment_2XX(); + atomic_fetch_add(&http_total_2XX, 1); } else if (status_code >= 400 && status_code <= 499) { - http_total_increment_4XX(); + atomic_fetch_add(&http_total_4XX, 1); } else if (status_code >= 500 && status_code <= 599) { - http_total_increment_5XX(); + atomic_fetch_add(&http_total_5XX, 1); } -#else - if (status_code >= 500 && status_code <= 599) { http_total_increment_5XX(); } #endif } diff --git a/runtime/src/http_total.c b/runtime/src/http_total.c index 68d4ca94..c655bf92 100644 --- a/runtime/src/http_total.c +++ b/runtime/src/http_total.c @@ -5,31 +5,26 @@ /* 2XX + 4XX should equal sandboxes */ /* Listener Core Bookkeeping */ +#ifdef HTTP_TOTAL_COUNTERS _Atomic uint32_t http_total_requests = 0; _Atomic uint32_t http_total_5XX = 0; - -#ifdef LOG_TOTAL_REQS_RESPS -_Atomic uint32_t http_total_2XX = 0; -_Atomic uint32_t http_total_4XX = 0; +_Atomic uint32_t http_total_2XX = 0; +_Atomic uint32_t http_total_4XX = 0; #endif /* Primarily intended to be called via GDB */ void http_total_log() { - uint32_t total_reqs = atomic_load(&http_total_requests); - uint32_t total_5XX = atomic_load(&http_total_5XX); - -#ifdef LOG_TOTAL_REQS_RESPS - uint32_t total_2XX = atomic_load(&http_total_2XX); - uint32_t total_4XX = atomic_load(&http_total_4XX); - - int64_t total_responses = total_2XX + total_4XX + total_5XX; - int64_t outstanding_requests = (int64_t)total_reqs - total_responses; +#ifdef HTTP_TOTAL_COUNTERS + uint32_t total_reqs = atomic_load(&http_total_requests); + uint32_t total_2XX = atomic_load(&http_total_2XX); + uint32_t total_4XX = atomic_load(&http_total_4XX); + uint32_t total_5XX = atomic_load(&http_total_5XX); + int64_t total_responses = total_2XX + total_4XX + total_5XX; + int64_t outstanding_requests = (int64_t)total_reqs - total_responses; debuglog("Requests: %u (%ld outstanding)\n\tResponses: %ld\n\t\t2XX: %u\n\t\t4XX: %u\n\t\t5XX: %u\n", total_reqs, outstanding_requests, total_responses, total_2XX, total_4XX, total_5XX); -#else - debuglog("Requests: %u\n\tResponses:\n\t\t\t5XX: %u\n", total_reqs, total_5XX); #endif }; diff --git a/runtime/src/main.c b/runtime/src/main.c index 2f4cd3c0..769a439e 100644 --- a/runtime/src/main.c +++ b/runtime/src/main.c @@ -319,10 +319,10 @@ log_compiletime_config() pretty_print_key_disabled("Log State Changes"); #endif -#ifdef LOG_TOTAL_REQS_RESPS - pretty_print_key_enabled("Log Total Reqs/Resps"); +#ifdef HTTP_TOTAL_COUNTERS + pretty_print_key_enabled("HTTP Total Counters"); #else - pretty_print_key_disabled("Log Total Reqs/Resps"); + pretty_print_key_disabled("HTTP Total Counters"); #endif #ifdef SANDBOX_STATE_TOTALS diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index a4f4d4a7..a729734e 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -86,12 +86,11 @@ metrics_server_handler(void *arg) FILE *ostream = open_memstream(&ostream_base, &ostream_size); assert(ostream != NULL); +#ifdef HTTP_TOTAL_COUNTERS uint32_t total_reqs = atomic_load(&http_total_requests); uint32_t total_5XX = atomic_load(&http_total_5XX); - -#ifdef LOG_TOTAL_REQS_RESPS - uint32_t total_2XX = atomic_load(&http_total_2XX); - uint32_t total_4XX = atomic_load(&http_total_4XX); + uint32_t total_2XX = atomic_load(&http_total_2XX); + uint32_t total_4XX = atomic_load(&http_total_4XX); #endif uint32_t total_sandboxes = atomic_load(&sandbox_total); @@ -118,23 +117,25 @@ metrics_server_handler(void *arg) fprintf(ostream, "HTTP/1.1 200 OK\r\n\r\n"); - fprintf(ostream, "# TYPE total_requests counter\n"); - fprintf(ostream, "total_requests: %d\n", total_reqs); #ifdef ADMISSIONS_CONTROL fprintf(ostream, "# TYPE work_admitted_percentile gauge\n"); fprintf(ostream, "work_admitted_percentile: %f\n", work_admitted_percentile); #endif - fprintf(ostream, "# TYPE total_5XX counter\n"); - fprintf(ostream, "total_5XX: %d\n", total_5XX); -#ifdef LOG_TOTAL_REQS_RESPS +#ifdef HTTP_TOTAL_COUNTERS + fprintf(ostream, "# TYPE total_requests counter\n"); + fprintf(ostream, "total_requests: %d\n", total_reqs); + fprintf(ostream, "# TYPE total_2XX counter\n"); fprintf(ostream, "total_2XX: %d\n", total_2XX); fprintf(ostream, "# TYPE total_4XX counter\n"); fprintf(ostream, "total_4XX: %d\n", total_4XX); + + fprintf(ostream, "# TYPE total_5XX counter\n"); + fprintf(ostream, "total_5XX: %d\n", total_5XX); #endif // This global is padded by 1 for error handling, so decrement here for true value From edf98efa51e60e5b16a1e0b0e59e70e5257a082a Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 14:09:01 -0400 Subject: [PATCH 12/23] refactor: http total counters Makefile --- runtime/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/Makefile b/runtime/Makefile index 2fe298e8..28cb3588 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -78,7 +78,7 @@ BINARY_NAME=sledgert # This flag dumps totals of incoming requests and outgoing responses, broken out by status code # family, such as 2XX, 4XX, 5XX. It is useful to debug clients hanging waiting for a response. # To log, run `call http_total_log()` while in GDB -# CFLAGS += -DLOG_TOTAL_REQS_RESPS +CFLAGS += -DHTTP_TOTAL_COUNTERS # This flag tracks the total number of sandboxes in the various states # It is useful to debug if sandboxes are "getting caught" in a particular state From 876634b87bbf12703f8a6d8ad9a3856227c59339 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 14:19:00 -0400 Subject: [PATCH 13/23] refactor: admissions control flags --- runtime/Makefile | 2 +- runtime/include/admissions_control.h | 3 ++- runtime/src/admissions_control.c | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/runtime/Makefile b/runtime/Makefile index 28cb3588..c2f2049e 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -78,7 +78,7 @@ BINARY_NAME=sledgert # This flag dumps totals of incoming requests and outgoing responses, broken out by status code # family, such as 2XX, 4XX, 5XX. It is useful to debug clients hanging waiting for a response. # To log, run `call http_total_log()` while in GDB -CFLAGS += -DHTTP_TOTAL_COUNTERS +# CFLAGS += -DHTTP_TOTAL_COUNTERS # This flag tracks the total number of sandboxes in the various states # It is useful to debug if sandboxes are "getting caught" in a particular state diff --git a/runtime/include/admissions_control.h b/runtime/include/admissions_control.h index 0ecbd340..c7dbcab2 100644 --- a/runtime/include/admissions_control.h +++ b/runtime/include/admissions_control.h @@ -3,10 +3,11 @@ #include #include +#ifdef ADMISSIONS_CONTROL #define ADMISSIONS_CONTROL_GRANULARITY 1000000 - extern _Atomic uint64_t admissions_control_admitted; extern uint64_t admissions_control_capacity; +#endif void admissions_control_initialize(void); void admissions_control_add(uint64_t admissions_estimate); diff --git a/runtime/src/admissions_control.c b/runtime/src/admissions_control.c index 6f857d4e..77d67051 100644 --- a/runtime/src/admissions_control.c +++ b/runtime/src/admissions_control.c @@ -18,10 +18,12 @@ * These estimates are incremented on request acceptance and decremented on request completion (either * success or failure) */ + +#ifdef ADMISSIONS_CONTROL _Atomic uint64_t admissions_control_admitted; uint64_t admissions_control_capacity; - -const double admissions_control_overhead = 0.2; +const double admissions_control_overhead = 0.2; +#endif void admissions_control_initialize() @@ -93,10 +95,12 @@ admissions_control_calculate_estimate_us(uint32_t estimated_execution_us, uint32 void admissions_control_log_decision(uint64_t admissions_estimate, bool admitted) { +#ifdef ADMISSIONS_CONTROL #ifdef LOG_ADMISSIONS_CONTROL debuglog("Admitted: %lu, Capacity: %lu, Estimate: %lu, Admitted? %s\n", admissions_control_admitted, admissions_control_capacity, admissions_estimate, admitted ? "yes" : "no"); #endif /* LOG_ADMISSIONS_CONTROL */ +#endif /* ADMISSIONS_CONTROL */ } uint64_t From 9458c5ad7fde2d5602b348485dd58157d9c4d8c6 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 14:45:11 -0400 Subject: [PATCH 14/23] refactor: http route total counters --- runtime/Makefile | 8 ++- runtime/include/http_route_total.h | 48 ++++++++++++++++ runtime/include/http_router.h | 2 +- runtime/include/http_session.h | 4 +- runtime/include/route.h | 8 +-- runtime/include/route_metrics.h | 55 ------------------- .../src/metrics_server_route_level_metrics.c | 5 ++ 7 files changed, 66 insertions(+), 64 deletions(-) create mode 100644 runtime/include/http_route_total.h delete mode 100644 runtime/include/route_metrics.h diff --git a/runtime/Makefile b/runtime/Makefile index c2f2049e..7fee2cb3 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -75,11 +75,15 @@ BINARY_NAME=sledgert # page is allocated. This helps understand the relationship to memory allocation and execution time. # CFLAGS += -DLOG_SANDBOX_MEMORY_PROFILE -# This flag dumps totals of incoming requests and outgoing responses, broken out by status code -# family, such as 2XX, 4XX, 5XX. It is useful to debug clients hanging waiting for a response. +# This flag enables HTTP-level counters of incoming requests and outgoing responses, broken out by status code +# family, such as 2XX, 4XX, 5XX. # To log, run `call http_total_log()` while in GDB # CFLAGS += -DHTTP_TOTAL_COUNTERS +# This flag enables per-route counters of incoming requests and outgoing responses, broken out by status code +# family, such as 2XX, 4XX, 5XX. +CFLAGS += -DHTTP_ROUTE_TOTAL_COUNTERS + # This flag tracks the total number of sandboxes in the various states # It is useful to debug if sandboxes are "getting caught" in a particular state # CFLAGS += -DSANDBOX_STATE_TOTALS diff --git a/runtime/include/http_route_total.h b/runtime/include/http_route_total.h new file mode 100644 index 00000000..8f30579c --- /dev/null +++ b/runtime/include/http_route_total.h @@ -0,0 +1,48 @@ +#pragma once + +#include + +#ifdef HTTP_ROUTE_TOTAL_COUNTERS +struct http_route_total { + atomic_ulong total_requests; + atomic_ulong total_2XX; + atomic_ulong total_4XX; + atomic_ulong total_5XX; +}; +#else +struct http_route_total { +}; +#endif + +static inline void +http_route_total_init(struct http_route_total *rm) +{ +#ifdef HTTP_ROUTE_TOTAL_COUNTERS + atomic_init(&rm->total_requests, 0); + atomic_init(&rm->total_2XX, 0); + atomic_init(&rm->total_4XX, 0); + atomic_init(&rm->total_5XX, 0); +#endif +} + +static inline void +http_route_total_increment_request(struct http_route_total *rm) +{ +#ifdef HTTP_ROUTE_TOTAL_COUNTERS + atomic_fetch_add(&rm->total_requests, 1); +#endif +} + +static inline void +http_route_total_increment(struct http_route_total *rm, int status_code) +{ +#ifdef HTTP_ROUTE_TOTAL_COUNTERS + if (status_code >= 200 && status_code <= 299) { + atomic_fetch_add(&rm->total_2XX, 1); + } else if (status_code >= 400 && status_code <= 499) { + atomic_fetch_add(&rm->total_4XX, 1); + } else if (status_code >= 500 && status_code <= 599) { + atomic_fetch_add(&rm->total_5XX, 1); + } +#endif +} diff --git a/runtime/include/http_router.h b/runtime/include/http_router.h index 1291ad94..7039525d 100644 --- a/runtime/include/http_router.h +++ b/runtime/include/http_router.h @@ -37,7 +37,7 @@ http_router_add_route(http_router_t *router, struct route_config *config, struct .response_size = config->http_resp_size, .response_content_type = config->http_resp_content_type }; - route_metrics_init(&route.metrics); + http_route_total_init(&route.metrics); /* Admissions Control */ uint64_t expected_execution = (uint64_t)config->expected_execution_us * runtime_processor_speed_MHz; diff --git a/runtime/include/http_session.h b/runtime/include/http_session.h index ca7e1d12..9a0e0925 100644 --- a/runtime/include/http_session.h +++ b/runtime/include/http_session.h @@ -16,7 +16,7 @@ #include "http_parser_settings.h" #include "http_total.h" #include "route.h" -#include "route_metrics.h" +#include "http_route_total.h" #include "tenant.h" #include "vec.h" #include "http_session_perf_log.h" @@ -182,7 +182,7 @@ http_session_set_response_header(struct http_session *session, int status_code, http_total_increment_response(status_code); /* We might not have actually matched a route */ - if (likely(session->route != NULL)) { route_metrics_increment(&session->route->metrics, status_code); } + if (likely(session->route != NULL)) { http_route_total_increment(&session->route->metrics, status_code); } if (status_code == 200) { session->response_header_length = snprintf(session->response_header, diff --git a/runtime/include/route.h b/runtime/include/route.h index 8b01aa6a..853c44fa 100644 --- a/runtime/include/route.h +++ b/runtime/include/route.h @@ -5,13 +5,13 @@ #include "admissions_info.h" #include "module.h" -#include "route_metrics.h" +#include "http_route_total.h" /* Assumption: entrypoint is always _start. This should be enhanced later */ struct route { - char *route; - struct route_metrics metrics; - struct module *module; + char *route; + struct http_route_total metrics; + struct module *module; /* HTTP State */ uint32_t relative_deadline_us; uint64_t relative_deadline; /* cycles */ diff --git a/runtime/include/route_metrics.h b/runtime/include/route_metrics.h deleted file mode 100644 index d7480ad5..00000000 --- a/runtime/include/route_metrics.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once - -#include - -struct route_metrics { - atomic_ulong total_requests; - atomic_ulong total_2XX; - atomic_ulong total_4XX; - atomic_ulong total_5XX; -}; - -static inline void -route_metrics_init(struct route_metrics *rm) -{ - atomic_init(&rm->total_requests, 0); - atomic_init(&rm->total_2XX, 0); - atomic_init(&rm->total_4XX, 0); - atomic_init(&rm->total_5XX, 0); -} - -static inline void -route_metrics_increment_request(struct route_metrics *rm) -{ - atomic_fetch_add(&rm->total_requests, 1); -} - -static inline void -route_metrics_increment_2XX(struct route_metrics *rm) -{ - atomic_fetch_add(&rm->total_2XX, 1); -} - -static inline void -route_metrics_increment_4XX(struct route_metrics *rm) -{ - atomic_fetch_add(&rm->total_4XX, 1); -} - -static inline void -route_metrics_increment_5XX(struct route_metrics *rm) -{ - atomic_fetch_add(&rm->total_5XX, 1); -} - -static inline void -route_metrics_increment(struct route_metrics *rm, int status_code) -{ - if (status_code >= 200 && status_code <= 299) { - route_metrics_increment_2XX(rm); - } else if (status_code >= 400 && status_code <= 499) { - route_metrics_increment_4XX(rm); - } else if (status_code >= 500 && status_code <= 599) { - route_metrics_increment_5XX(rm); - } -} diff --git a/runtime/src/metrics_server_route_level_metrics.c b/runtime/src/metrics_server_route_level_metrics.c index bb54d4a4..714dd52f 100644 --- a/runtime/src/metrics_server_route_level_metrics.c +++ b/runtime/src/metrics_server_route_level_metrics.c @@ -12,6 +12,7 @@ static const int p90_idx = perf_window_capacity * 90 / 100; void render_routes(struct route *route, void *arg_one, void *arg_two) { +#ifdef HTTP_ROUTE_TOTAL_COUNTERS FILE *ostream = (FILE *)arg_one; struct tenant *tenant = (struct tenant *)arg_two; @@ -47,6 +48,8 @@ render_routes(struct route *route, void *arg_one, void *arg_two) fprintf(ostream, "# TYPE %s_%s_latency_p90 gauge\n", tenant->name, route_label); fprintf(ostream, "%s_%s_latency_p90: %lu\n", tenant->name, route_label, latency_p90); #endif + +#endif /* HTTP_ROUTE_TOTAL_COUNTERS */ } void @@ -61,5 +64,7 @@ render_tenant_routers(struct tenant *tenant, void *arg_one, void *arg_two) void metrics_server_route_level_metrics_render(FILE *ostream) { +#ifdef HTTP_ROUTE_TOTAL_COUNTERS tenant_database_foreach(render_tenant_routers, ostream, NULL); +#endif } From cdca93f31753959418ec89d9ff886ee737809f02 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 14:51:18 -0400 Subject: [PATCH 15/23] chore: log HTTP Route Total Counters status --- runtime/src/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/runtime/src/main.c b/runtime/src/main.c index 769a439e..bc732f9b 100644 --- a/runtime/src/main.c +++ b/runtime/src/main.c @@ -325,6 +325,12 @@ log_compiletime_config() pretty_print_key_disabled("HTTP Total Counters"); #endif +#ifdef HTTP_ROUTE_TOTAL_COUNTERS + pretty_print_key_enabled("HTTP Route Total Counters"); +#else + pretty_print_key_disabled("HTTP Route Total Counters"); +#endif + #ifdef SANDBOX_STATE_TOTALS pretty_print_key_enabled("Log Sandbox State Count"); #else From 66aa87a152aa09a20d50ecd1ee9a93703bd5aea9 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 14:52:29 -0400 Subject: [PATCH 16/23] refactor: Metrics server port --- runtime/src/metrics_server.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index a729734e..0ef64ead 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -16,6 +16,7 @@ /* We run threads on the "reserved OS core" using blocking semantics */ #define METRICS_SERVER_CORE_ID 0 +#define METRICS_SERVER_PORT 1776 static pthread_attr_t metrics_server_thread_settings; struct tcp_server metrics_server; @@ -26,7 +27,7 @@ extern void metrics_server_route_level_metrics_render(FILE *ostream); void metrics_server_init() { - tcp_server_init(&metrics_server, 1776); + tcp_server_init(&metrics_server, METRICS_SERVER_PORT); int rc = tcp_server_listen(&metrics_server); assert(rc == 0); From cd73d7d172cac369f9493897a5d96e84e9bc17e0 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 15:09:54 -0400 Subject: [PATCH 17/23] refactor: reorder metrics --- runtime/Makefile | 5 +++- runtime/src/metrics_server.c | 56 +++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/runtime/Makefile b/runtime/Makefile index 7fee2cb3..97706418 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -75,6 +75,9 @@ BINARY_NAME=sledgert # page is allocated. This helps understand the relationship to memory allocation and execution time. # CFLAGS += -DLOG_SANDBOX_MEMORY_PROFILE +# This flag enables runtime-level metrics from procfs +# CFLAGS += -DPROC_STAT_METRICS + # This flag enables HTTP-level counters of incoming requests and outgoing responses, broken out by status code # family, such as 2XX, 4XX, 5XX. # To log, run `call http_total_log()` while in GDB @@ -82,7 +85,7 @@ BINARY_NAME=sledgert # This flag enables per-route counters of incoming requests and outgoing responses, broken out by status code # family, such as 2XX, 4XX, 5XX. -CFLAGS += -DHTTP_ROUTE_TOTAL_COUNTERS +# CFLAGS += -DHTTP_ROUTE_TOTAL_COUNTERS # This flag tracks the total number of sandboxes in the various states # It is useful to debug if sandboxes are "getting caught" in a particular state diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index 0ef64ead..215a8353 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -116,15 +116,41 @@ metrics_server_handler(void *arg) double work_admitted_percentile = (double)work_admitted / admissions_control_capacity * 100; #endif +#ifdef PROC_STAT_METRICS + struct proc_stat_metrics stat; + proc_stat_metrics_init(&stat); +#endif + fprintf(ostream, "HTTP/1.1 200 OK\r\n\r\n"); +#ifdef PROC_STAT_METRICS + fprintf(ostream, "# TYPE os_proc_major_page_faults counter\n"); + fprintf(ostream, "os_proc_major_page_faults: %lu\n", stat.major_page_faults); + + fprintf(ostream, "# TYPE os_proc_minor_page_faults counter\n"); + fprintf(ostream, "os_proc_minor_page_faults: %lu\n", stat.minor_page_faults); + + fprintf(ostream, "# TYPE os_proc_child_major_page_faults counter\n"); + fprintf(ostream, "os_proc_child_major_page_faults: %lu\n", stat.child_major_page_faults); + + fprintf(ostream, "# TYPE os_proc_child_minor_page_faults counter\n"); + fprintf(ostream, "os_proc_child_minor_page_faults: %lu\n", stat.child_minor_page_faults); + + fprintf(ostream, "# TYPE os_proc_user_time counter\n"); + fprintf(ostream, "os_proc_user_time: %lu\n", stat.user_time); + + fprintf(ostream, "# TYPE os_proc_sys_time counter\n"); + fprintf(ostream, "os_proc_sys_time: %lu\n", stat.system_time); + + fprintf(ostream, "# TYPE os_proc_guest_time counter\n"); + fprintf(ostream, "os_proc_guest_time: %lu\n", stat.guest_time); +#endif /* PROC_STAT_METRICS */ #ifdef ADMISSIONS_CONTROL fprintf(ostream, "# TYPE work_admitted_percentile gauge\n"); fprintf(ostream, "work_admitted_percentile: %f\n", work_admitted_percentile); #endif - #ifdef HTTP_TOTAL_COUNTERS fprintf(ostream, "# TYPE total_requests counter\n"); fprintf(ostream, "total_requests: %d\n", total_reqs); @@ -139,6 +165,8 @@ metrics_server_handler(void *arg) fprintf(ostream, "total_5XX: %d\n", total_5XX); #endif + metrics_server_route_level_metrics_render(ostream); + // This global is padded by 1 for error handling, so decrement here for true value fprintf(ostream, "# TYPE total_sandboxes counter\n"); fprintf(ostream, "total_sandboxes: %d\n", total_sandboxes - 1); @@ -181,32 +209,6 @@ metrics_server_handler(void *arg) fprintf(ostream, "total_sandboxes_error: %d\n", total_sandboxes_error); #endif - struct proc_stat_metrics stat; - proc_stat_metrics_init(&stat); - - fprintf(ostream, "# TYPE os_proc_major_page_faults counter\n"); - fprintf(ostream, "os_proc_major_page_faults: %lu\n", stat.major_page_faults); - - fprintf(ostream, "# TYPE os_proc_minor_page_faults counter\n"); - fprintf(ostream, "os_proc_minor_page_faults: %lu\n", stat.minor_page_faults); - - fprintf(ostream, "# TYPE os_proc_child_major_page_faults counter\n"); - fprintf(ostream, "os_proc_child_major_page_faults: %lu\n", stat.child_major_page_faults); - - fprintf(ostream, "# TYPE os_proc_child_minor_page_faults counter\n"); - fprintf(ostream, "os_proc_child_minor_page_faults: %lu\n", stat.child_minor_page_faults); - - fprintf(ostream, "# TYPE os_proc_user_time counter\n"); - fprintf(ostream, "os_proc_user_time: %lu\n", stat.user_time); - - fprintf(ostream, "# TYPE os_proc_sys_time counter\n"); - fprintf(ostream, "os_proc_sys_time: %lu\n", stat.system_time); - - fprintf(ostream, "# TYPE os_proc_guest_time counter\n"); - fprintf(ostream, "os_proc_guest_time: %lu\n", stat.guest_time); - - metrics_server_route_level_metrics_render(ostream); - fflush(ostream); assert(ostream_size > 0); rc = fclose(ostream); From 1981c0133825735608cb0fdd4ef61ea348a95b4b Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 15:12:38 -0400 Subject: [PATCH 18/23] refactor: PERF_WINDOW_CAPACITY --- runtime/include/perf_window.h | 22 +++++++++---------- runtime/include/perf_window_t.h | 12 +++++----- runtime/src/admissions_info.c | 2 +- .../src/metrics_server_route_level_metrics.c | 4 ++-- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/runtime/include/perf_window.h b/runtime/include/perf_window.h index bfa32e49..cb54a5a9 100644 --- a/runtime/include/perf_window.h +++ b/runtime/include/perf_window.h @@ -21,8 +21,8 @@ perf_window_initialize(struct perf_window *perf_window) lock_init(&perf_window->lock); perf_window->count = 0; - memset(perf_window->by_duration, 0, sizeof(struct execution_node) * perf_window_capacity); - memset(perf_window->by_termination, 0, sizeof(uint16_t) * perf_window_capacity); + memset(perf_window->by_duration, 0, sizeof(struct execution_node) * PERF_WINDOW_CAPACITY); + memset(perf_window->by_termination, 0, sizeof(uint16_t) * PERF_WINDOW_CAPACITY); } @@ -38,8 +38,8 @@ perf_window_swap(struct perf_window *perf_window, uint16_t first_by_duration_idx { assert(lock_is_locked(&perf_window->lock)); assert(perf_window != NULL); - assert(first_by_duration_idx < perf_window_capacity); - assert(second_by_duration_idx < perf_window_capacity); + assert(first_by_duration_idx < PERF_WINDOW_CAPACITY); + assert(second_by_duration_idx < PERF_WINDOW_CAPACITY); uint16_t first_by_termination_idx = perf_window->by_duration[first_by_duration_idx].by_termination_idx; uint16_t second_by_termination_idx = perf_window->by_duration[second_by_duration_idx].by_termination_idx; @@ -70,12 +70,12 @@ perf_window_swap(struct perf_window *perf_window, uint16_t first_by_duration_idx static inline void perf_window_fill(struct perf_window *perf_window, uint64_t newest_execution_time) { - for (uint16_t i = 0; i < perf_window_capacity; i++) { + for (uint16_t i = 0; i < PERF_WINDOW_CAPACITY; i++) { perf_window->by_termination[i] = i; perf_window->by_duration[i] = (struct execution_node){ .execution_time = newest_execution_time, .by_termination_idx = i }; } - perf_window->count = perf_window_capacity; + perf_window->count = PERF_WINDOW_CAPACITY; } /** @@ -104,7 +104,7 @@ perf_window_add(struct perf_window *perf_window, uint64_t newest_execution_time) } /* If full, replace the oldest execution_time. Save the old execution time to know which direction to swap */ - idx_to_replace = perf_window->by_termination[perf_window->count % perf_window_capacity]; + idx_to_replace = perf_window->by_termination[perf_window->count % PERF_WINDOW_CAPACITY]; previous_execution_time = perf_window->by_duration[idx_to_replace].execution_time; perf_window->by_duration[idx_to_replace].execution_time = newest_execution_time; @@ -112,7 +112,7 @@ perf_window_add(struct perf_window *perf_window, uint64_t newest_execution_time) * right. We can determine which direction to shift by comparing with the previous execution time. */ if (newest_execution_time > previous_execution_time) { for (uint16_t i = idx_to_replace; - i + 1 < perf_window_capacity + i + 1 < PERF_WINDOW_CAPACITY && perf_window->by_duration[i + 1].execution_time < perf_window->by_duration[i].execution_time; i++) { perf_window_swap(perf_window, i, i + 1); @@ -127,13 +127,13 @@ perf_window_add(struct perf_window *perf_window, uint64_t newest_execution_time) } /* The idx that we replaces should still point to the same newest_execution_time */ - assert(perf_window->by_duration[perf_window->by_termination[perf_window->count % perf_window_capacity]] + assert(perf_window->by_duration[perf_window->by_termination[perf_window->count % PERF_WINDOW_CAPACITY]] .execution_time == newest_execution_time); /* The by_duration array should be ordered by execution time */ #ifndef NDEBUG - for (int i = 1; i < perf_window_capacity; i++) { + for (int i = 1; i < PERF_WINDOW_CAPACITY; i++) { assert(perf_window->by_duration[i - 1].execution_time <= perf_window->by_duration[i].execution_time); } #endif @@ -160,7 +160,7 @@ perf_window_get_percentile(struct perf_window *perf_window, uint8_t percentile, if (unlikely(perf_window->count == 0)) return 0; int idx = precomputed_index; - if (unlikely(perf_window->count < perf_window_capacity)) idx = perf_window->count * percentile / 100; + if (unlikely(perf_window->count < PERF_WINDOW_CAPACITY)) idx = perf_window->count * percentile / 100; return perf_window->by_duration[idx].execution_time; } diff --git a/runtime/include/perf_window_t.h b/runtime/include/perf_window_t.h index 0cc0f3e5..50d0a96c 100644 --- a/runtime/include/perf_window_t.h +++ b/runtime/include/perf_window_t.h @@ -7,13 +7,13 @@ enum { - perf_window_capacity = 32 + PERF_WINDOW_CAPACITY = 32 }; -static_assert(perf_window_capacity && !(perf_window_capacity & (perf_window_capacity - 1)), - "perf_window_capacity must be power of 2!"); +static_assert(PERF_WINDOW_CAPACITY && !(PERF_WINDOW_CAPACITY & (PERF_WINDOW_CAPACITY - 1)), + "PERF_WINDOW_CAPACITY must be power of 2!"); -static_assert(perf_window_capacity <= UINT16_MAX, "perf_window_capacity must be indexable by a 16-bit unsigned int"); +static_assert(PERF_WINDOW_CAPACITY <= UINT16_MAX, "PERF_WINDOW_CAPACITY must be indexable by a 16-bit unsigned int"); /* * The by_duration array sorts the last N executions by execution time @@ -28,8 +28,8 @@ struct execution_node { }; struct perf_window { - struct execution_node by_duration[perf_window_capacity]; - uint16_t by_termination[perf_window_capacity]; + struct execution_node by_duration[PERF_WINDOW_CAPACITY]; + uint16_t by_termination[PERF_WINDOW_CAPACITY]; uint64_t count; lock_t lock; }; diff --git a/runtime/src/admissions_info.c b/runtime/src/admissions_info.c index 8ef76bc9..63c9ee1e 100644 --- a/runtime/src/admissions_info.c +++ b/runtime/src/admissions_info.c @@ -24,7 +24,7 @@ admissions_info_initialize(struct admissions_info *admissions_info, uint8_t perc if (unlikely(percentile < 50 || percentile > 99)) panic("Invalid admissions percentile"); admissions_info->percentile = percentile; - admissions_info->control_index = perf_window_capacity * percentile / 100; + admissions_info->control_index = PERF_WINDOW_CAPACITY * percentile / 100; #ifdef LOG_ADMISSIONS_CONTROL debuglog("Percentile: %u\n", admissions_info->percentile); debuglog("Control Index: %d\n", admissions_info->control_index); diff --git a/runtime/src/metrics_server_route_level_metrics.c b/runtime/src/metrics_server_route_level_metrics.c index 714dd52f..9bacb57b 100644 --- a/runtime/src/metrics_server_route_level_metrics.c +++ b/runtime/src/metrics_server_route_level_metrics.c @@ -6,8 +6,8 @@ // tenant_database_foreach_cb_t -static const int p50_idx = perf_window_capacity * 50 / 100; -static const int p90_idx = perf_window_capacity * 90 / 100; +static const int p50_idx = PERF_WINDOW_CAPACITY * 50 / 100; +static const int p90_idx = PERF_WINDOW_CAPACITY * 90 / 100; void render_routes(struct route *route, void *arg_one, void *arg_two) From 199914d69de8ab8a8d48d1443aaf50e006808493 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 15:22:23 -0400 Subject: [PATCH 19/23] refactor: log procfs metrics status --- runtime/src/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/runtime/src/main.c b/runtime/src/main.c index bc732f9b..96b87968 100644 --- a/runtime/src/main.c +++ b/runtime/src/main.c @@ -331,6 +331,12 @@ log_compiletime_config() pretty_print_key_disabled("HTTP Route Total Counters"); #endif +#ifdef PROC_STAT_METRICS + pretty_print_key_enabled("procfs Metrics"); +#else + pretty_print_key_disabled("procfs Metrics"); +#endif + #ifdef SANDBOX_STATE_TOTALS pretty_print_key_enabled("Log Sandbox State Count"); #else From 61baa40ba44c8e8a5c18705950f20d6cdf5f76be Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 15:24:22 -0400 Subject: [PATCH 20/23] refactor: zombie comment --- runtime/src/metrics_server_route_level_metrics.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/runtime/src/metrics_server_route_level_metrics.c b/runtime/src/metrics_server_route_level_metrics.c index 9bacb57b..2a74f211 100644 --- a/runtime/src/metrics_server_route_level_metrics.c +++ b/runtime/src/metrics_server_route_level_metrics.c @@ -4,8 +4,6 @@ #include "perf_window.h" #include "tenant_functions.h" -// tenant_database_foreach_cb_t - static const int p50_idx = PERF_WINDOW_CAPACITY * 50 / 100; static const int p90_idx = PERF_WINDOW_CAPACITY * 90 / 100; From 63e38f0e7c9d2d03bd40c299aacfd857d9f34b04 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 15:36:39 -0400 Subject: [PATCH 21/23] chore: PERF_WINDOW_CAPACITY scripts --- tests/empty/concurrency/run.sh | 12 ++++++------ tests/fibonacci/bimodal/run.sh | 14 +++++++------- tests/workload_mix/run.sh | 14 +++++++------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/empty/concurrency/run.sh b/tests/empty/concurrency/run.sh index 28167707..f074a7ec 100755 --- a/tests/empty/concurrency/run.sh +++ b/tests/empty/concurrency/run.sh @@ -38,16 +38,16 @@ run_samples() { # Scrape the perf window size from the source if possible # TODO: Make a util function local -r perf_window_path="$(path_join "$__run_sh__base_path" ../../../runtime/include/perf_window_t.h)" - local -i perf_window_capacity - if ! perf_window_capacity=$(grep "perf_window_capacity =" < "$perf_window_path" | cut -d\ -f3); then - printf "Failed to scrape perf_window_capacity from ../../include/perf_window.h\n" + local -i PERF_WINDOW_CAPACITY + if ! PERF_WINDOW_CAPACITY=$(grep "PERF_WINDOW_CAPACITY =" < "$perf_window_path" | cut -d\ -f3); then + printf "Failed to scrape PERF_WINDOW_CAPACITY from ../../include/perf_window.h\n" printf "Defaulting to 16\n" - perf_window_capacity=16 + PERF_WINDOW_CAPACITY=16 fi - local -ir perf_window_capacity + local -ir PERF_WINDOW_CAPACITY printf "Running Samples: " - hey -disable-compression -disable-keepalive -disable-redirects -n "$perf_window_capacity" -c "$perf_window_capacity" -q 200 -cpus 3 -o csv -m GET "http://${hostname}:10000/empty" 1> /dev/null 2> /dev/null || { + hey -disable-compression -disable-keepalive -disable-redirects -n "$PERF_WINDOW_CAPACITY" -c "$PERF_WINDOW_CAPACITY" -q 200 -cpus 3 -o csv -m GET "http://${hostname}:10000/empty" 1> /dev/null 2> /dev/null || { printf "[ERR]\n" panic "samples failed" return 1 diff --git a/tests/fibonacci/bimodal/run.sh b/tests/fibonacci/bimodal/run.sh index c95b6943..acdb15e1 100755 --- a/tests/fibonacci/bimodal/run.sh +++ b/tests/fibonacci/bimodal/run.sh @@ -41,22 +41,22 @@ run_samples() { # Scrape the perf window size from the source if possible # TODO: Make a util function local -r perf_window_path="$(path_join "$__run_sh__base_path" ../../../runtime/include/perf_window_t.h)" - local -i perf_window_capacity - if ! perf_window_capacity=$(grep "perf_window_capacity =" < "$perf_window_path" | cut -d\ -f3); then - printf "Failed to scrape perf_window_capacity from ../../../runtime/include/perf_window.h\n" + local -i PERF_WINDOW_CAPACITY + if ! PERF_WINDOW_CAPACITY=$(grep "PERF_WINDOW_CAPACITY =" < "$perf_window_path" | cut -d\ -f3); then + printf "Failed to scrape PERF_WINDOW_CAPACITY from ../../../runtime/include/perf_window.h\n" printf "Defaulting to 16\n" - perf_window_capacity=16 + PERF_WINDOW_CAPACITY=16 fi - local -ir perf_window_capacity + local -ir PERF_WINDOW_CAPACITY printf "Running Samples: " - hey -disable-compression -disable-keepalive -disable-redirects -n "$perf_window_capacity" -c "$perf_window_capacity" -cpus 3 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10010/fib2" 1> /dev/null 2> /dev/null || { + hey -disable-compression -disable-keepalive -disable-redirects -n "$PERF_WINDOW_CAPACITY" -c "$PERF_WINDOW_CAPACITY" -cpus 3 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10010/fib2" 1> /dev/null 2> /dev/null || { printf "[ERR]\n" panic "fib40 samples failed with $?" return 1 } - hey -disable-compression -disable-keepalive -disable-redirects -n "$perf_window_capacity" -c "$perf_window_capacity" -cpus 3 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:100010/fib" 1> /dev/null 2> /dev/null || { + hey -disable-compression -disable-keepalive -disable-redirects -n "$PERF_WINDOW_CAPACITY" -c "$PERF_WINDOW_CAPACITY" -cpus 3 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:100010/fib" 1> /dev/null 2> /dev/null || { printf "[ERR]\n" panic "fib10 samples failed with $?" return 1 diff --git a/tests/workload_mix/run.sh b/tests/workload_mix/run.sh index 86e54b98..afe14397 100755 --- a/tests/workload_mix/run.sh +++ b/tests/workload_mix/run.sh @@ -38,22 +38,22 @@ run_samples() { # Scrape the perf window size from the source if possible # TODO: Make a util function local -r perf_window_path="$(path_join "$__run_sh__base_path" ../../runtime/include/perf_window_t.h)" - local -i perf_window_capacity - if ! perf_window_capacity=$(grep "perf_window_capacity =" < "$perf_window_path" | cut -d\ -f3); then - printf "Failed to scrape perf_window_capacity from ../../runtime/include/perf_window.h\n" + local -i PERF_WINDOW_CAPACITY + if ! PERF_WINDOW_CAPACITY=$(grep "PERF_WINDOW_CAPACITY =" < "$perf_window_path" | cut -d\ -f3); then + printf "Failed to scrape PERF_WINDOW_CAPACITY from ../../runtime/include/perf_window.h\n" printf "Defaulting to 16\n" - perf_window_capacity=16 + PERF_WINDOW_CAPACITY=16 fi - local -ir perf_window_capacity + local -ir PERF_WINDOW_CAPACITY printf "Running Samples: " - hey -disable-compression -disable-keepalive -disable-redirects -n "$perf_window_capacity" -c "$perf_window_capacity" -cpus 3 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10000/fibonacci_10" 1> /dev/null 2> /dev/null || { + hey -disable-compression -disable-keepalive -disable-redirects -n "$PERF_WINDOW_CAPACITY" -c "$PERF_WINDOW_CAPACITY" -cpus 3 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10000/fibonacci_10" 1> /dev/null 2> /dev/null || { printf "[ERR]\n" panic "fibonacci_40 samples failed with $?" return 1 } - hey -disable-compression -disable-keepalive -disable-redirects -n "$perf_window_capacity" -c "$perf_window_capacity" -cpus 3 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:10000/fibonacci_10" 1> /dev/null 2> /dev/null || { + hey -disable-compression -disable-keepalive -disable-redirects -n "$PERF_WINDOW_CAPACITY" -c "$PERF_WINDOW_CAPACITY" -cpus 3 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:10000/fibonacci_10" 1> /dev/null 2> /dev/null || { printf "[ERR]\n" panic "fibonacci_10 samples failed with $?" return 1 From 38494da400fc45b3a95fb84875885fac862f05ca Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 16:29:40 -0400 Subject: [PATCH 22/23] feat: epoll tags --- runtime/include/epoll_tag.h | 9 +++++++++ runtime/include/http_session.h | 11 +++++++---- runtime/include/metrics_server.h | 10 +++++++++- runtime/include/tenant.h | 2 ++ runtime/include/tenant_functions.h | 1 + runtime/src/listener_thread.c | 20 ++++++++++++++------ runtime/src/metrics_server.c | 17 +++++++++-------- 7 files changed, 51 insertions(+), 19 deletions(-) create mode 100644 runtime/include/epoll_tag.h diff --git a/runtime/include/epoll_tag.h b/runtime/include/epoll_tag.h new file mode 100644 index 00000000..bb44dfef --- /dev/null +++ b/runtime/include/epoll_tag.h @@ -0,0 +1,9 @@ +#pragma once + +enum epoll_tag +{ + EPOLL_TAG_INVALID = 0, + EPOLL_TAG_TENANT_SERVER_SOCKET = 1, + EPOLL_TAG_METRICS_SERVER_SOCKET, + EPOLL_TAG_HTTP_SESSION_CLIENT_SOCKET, +}; diff --git a/runtime/include/http_session.h b/runtime/include/http_session.h index 9a0e0925..dccf9861 100644 --- a/runtime/include/http_session.h +++ b/runtime/include/http_session.h @@ -9,17 +9,18 @@ #include #include -#include "tcp_session.h" #include "debuglog.h" -#include "http_request.h" +#include "epoll_tag.h" #include "http_parser.h" #include "http_parser_settings.h" +#include "http_request.h" +#include "http_route_total.h" +#include "http_session_perf_log.h" #include "http_total.h" #include "route.h" -#include "http_route_total.h" +#include "tcp_session.h" #include "tenant.h" #include "vec.h" -#include "http_session_perf_log.h" #define HTTP_SESSION_DEFAULT_REQUEST_RESPONSE_SIZE (PAGE_SIZE) #define HTTP_SESSION_RESPONSE_HEADER_CAPACITY 256 @@ -45,6 +46,7 @@ enum http_session_state }; struct http_session { + enum epoll_tag tag; enum http_session_state state; struct sockaddr client_address; /* client requesting connection! */ int socket; @@ -93,6 +95,7 @@ http_session_init(struct http_session *session, int socket_descriptor, const str assert(socket_descriptor >= 0); assert(socket_address != NULL); + session->tag = EPOLL_TAG_HTTP_SESSION_CLIENT_SOCKET; session->tenant = tenant; session->route = NULL; session->socket = socket_descriptor; diff --git a/runtime/include/metrics_server.h b/runtime/include/metrics_server.h index 48a754e3..f1805795 100644 --- a/runtime/include/metrics_server.h +++ b/runtime/include/metrics_server.h @@ -1,8 +1,16 @@ #pragma once +#include "epoll_tag.h" #include "tcp_server.h" -extern struct tcp_server metrics_server; +struct metrics_server { + enum epoll_tag tag; + struct tcp_server tcp; + pthread_attr_t thread_settings; +}; + + +extern struct metrics_server metrics_server; void metrics_server_init(); void metrics_server_thread_spawn(int client_socket); diff --git a/runtime/include/tenant.h b/runtime/include/tenant.h index 596c4773..615bcead 100644 --- a/runtime/include/tenant.h +++ b/runtime/include/tenant.h @@ -1,5 +1,6 @@ #pragma once +#include "epoll_tag.h" #include "http_router.h" #include "map.h" #include "module_database.h" @@ -32,6 +33,7 @@ struct tenant_global_request_queue { }; struct tenant { + enum epoll_tag tag; /* Tag must be first member */ char *name; struct tcp_server tcp_server; http_router_t router; diff --git a/runtime/include/tenant_functions.h b/runtime/include/tenant_functions.h index be9d3554..423f6b6d 100644 --- a/runtime/include/tenant_functions.h +++ b/runtime/include/tenant_functions.h @@ -87,6 +87,7 @@ tenant_alloc(struct tenant_config *config) struct tenant *tenant = (struct tenant *)calloc(1, sizeof(struct tenant)); /* Move name */ + tenant->tag = EPOLL_TAG_TENANT_SERVER_SOCKET; tenant->name = config->name; config->name = NULL; diff --git a/runtime/src/listener_thread.c b/runtime/src/listener_thread.c index 18d3bd48..55b30074 100644 --- a/runtime/src/listener_thread.c +++ b/runtime/src/listener_thread.c @@ -145,7 +145,7 @@ listener_thread_register_metrics_server() struct epoll_event accept_evt; accept_evt.data.ptr = (void *)&metrics_server; accept_evt.events = EPOLLIN; - rc = epoll_ctl(listener_thread_epoll_file_descriptor, EPOLL_CTL_ADD, metrics_server.socket_descriptor, + rc = epoll_ctl(listener_thread_epoll_file_descriptor, EPOLL_CTL_ADD, metrics_server.tcp.socket_descriptor, &accept_evt); return rc; @@ -355,7 +355,7 @@ on_metrics_server_epoll_event(struct epoll_event *evt) /* Accept as many clients requests as possible, returning when we would have blocked */ while (true) { /* We accept the client connection with blocking semantics because we spawn ephemeral worker threads */ - int client_socket = accept4(metrics_server.socket_descriptor, (struct sockaddr *)&client_address, + int client_socket = accept4(metrics_server.tcp.socket_descriptor, (struct sockaddr *)&client_address, &address_length, 0); if (unlikely(client_socket < 0)) { if (errno == EWOULDBLOCK || errno == EAGAIN) return; @@ -433,12 +433,20 @@ listener_thread_main(void *dummy) for (int i = 0; i < descriptor_count; i++) { panic_on_epoll_error(&epoll_events[i]); - if (epoll_events[i].data.ptr == &metrics_server) { - on_metrics_server_epoll_event(&epoll_events[i]); - } else if (tenant_database_find_by_ptr(epoll_events[i].data.ptr) != NULL) { + enum epoll_tag *tag = (enum epoll_tag *)epoll_events[i].data.ptr; + + switch (*tag) { + case EPOLL_TAG_TENANT_SERVER_SOCKET: on_tenant_socket_epoll_event(&epoll_events[i]); - } else { + break; + case EPOLL_TAG_HTTP_SESSION_CLIENT_SOCKET: on_client_socket_epoll_event(&epoll_events[i]); + break; + case EPOLL_TAG_METRICS_SERVER_SOCKET: + on_metrics_server_epoll_event(&epoll_events[i]); + break; + default: + panic("Unknown epoll type!"); } } } diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index 215a8353..e554a531 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -8,6 +8,7 @@ #include "debuglog.h" #include "http.h" #include "http_total.h" +#include "metrics_server.h" #include "proc_stat.h" #include "runtime.h" #include "sandbox_total.h" @@ -18,8 +19,7 @@ #define METRICS_SERVER_CORE_ID 0 #define METRICS_SERVER_PORT 1776 -static pthread_attr_t metrics_server_thread_settings; -struct tcp_server metrics_server; +struct metrics_server metrics_server; static void *metrics_server_handler(void *arg); extern void metrics_server_route_level_metrics_render(FILE *ostream); @@ -27,22 +27,23 @@ extern void metrics_server_route_level_metrics_render(FILE *ostream); void metrics_server_init() { - tcp_server_init(&metrics_server, METRICS_SERVER_PORT); - int rc = tcp_server_listen(&metrics_server); + metrics_server.tag = EPOLL_TAG_METRICS_SERVER_SOCKET; + tcp_server_init(&metrics_server.tcp, METRICS_SERVER_PORT); + int rc = tcp_server_listen(&metrics_server.tcp); assert(rc == 0); /* Configure pthread attributes to pin metrics server threads to CPU 0 */ - pthread_attr_init(&metrics_server_thread_settings); + pthread_attr_init(&metrics_server.thread_settings); cpu_set_t cs; CPU_ZERO(&cs); CPU_SET(METRICS_SERVER_CORE_ID, &cs); - pthread_attr_setaffinity_np(&metrics_server_thread_settings, sizeof(cpu_set_t), &cs); + pthread_attr_setaffinity_np(&metrics_server.thread_settings, sizeof(cpu_set_t), &cs); } int metrics_server_close() { - return tcp_server_close(&metrics_server); + return tcp_server_close(&metrics_server.tcp); } void @@ -65,7 +66,7 @@ metrics_server_thread_spawn(int client_socket) /* Fire and forget, so we don't save the thread handles */ pthread_t metrics_server_thread; - int rc = pthread_create(&metrics_server_thread, &metrics_server_thread_settings, metrics_server_handler, + int rc = pthread_create(&metrics_server_thread, &metrics_server.thread_settings, metrics_server_handler, (void *)(long)client_socket); if (rc != 0) { From 0f9e01dad1e5272a22d75a0415e971b4be8e6601 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 16:35:30 -0400 Subject: [PATCH 23/23] refactor: deref tag --- runtime/src/listener_thread.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/src/listener_thread.c b/runtime/src/listener_thread.c index 55b30074..d0756114 100644 --- a/runtime/src/listener_thread.c +++ b/runtime/src/listener_thread.c @@ -433,9 +433,9 @@ listener_thread_main(void *dummy) for (int i = 0; i < descriptor_count; i++) { panic_on_epoll_error(&epoll_events[i]); - enum epoll_tag *tag = (enum epoll_tag *)epoll_events[i].data.ptr; + enum epoll_tag tag = *(enum epoll_tag *)epoll_events[i].data.ptr; - switch (*tag) { + switch (tag) { case EPOLL_TAG_TENANT_SERVER_SOCKET: on_tenant_socket_epoll_event(&epoll_events[i]); break;