feat: Latency perflog (#364)

* feat: latency perf_window * chore: Increase perf window capacity * chore: fix Makefile * fix: Restore route_latency precomputed idx * fix: clang-format nit * fix: clang-format * fix: Restore accidental deletion * fixed: perf_window_get_percentile was not considering cases where perf_count<CAP * - removed the globals for p50_idx and p90_idx - changed latency unit from ms to us * undo renaming perf_window * upgrade sandbox_total type to u64 make sure sandbox_id starts from 1 * improve Makefile comments Co-authored-by: emil <emil916@gmail.com>
3 years ago · 0c3b661301
parent 56f32ec44b
commit 0c3b661301
12 changed files with 91 additions and 41 deletions
--- a/runtime/Makefile
+++ b/runtime/Makefile
@ -87,6 +87,11 @@ BINARY_NAME=sledgert
 # family, such as 2XX, 4XX, 5XX.
 # CFLAGS += -DHTTP_ROUTE_TOTAL_COUNTERS

+# This flag enables per-route latency perf-logs. 
+# This has a perf impact due to the use of locks.
+# This flag has a dependency on the flag HTTP_ROUTE_TOTAL_COUNTERS
+# CFLAGS += -DROUTE_LATENCY
+
 # This flag tracks the total number of sandboxes in the various states
 # It is useful to debug if sandboxes are "getting caught" in a particular state
 # CFLAGS += -DSANDBOX_STATE_TOTALS
--- a/runtime/include/http_router.h
+++ b/runtime/include/http_router.h
@ -5,6 +5,7 @@

 #include "http.h"
 #include "module.h"
+#include "route_latency.h"
 #include "route.h"
 #include "route_config.h"
 #include "vec.h"
@ -36,6 +37,7 @@ http_router_add_route(http_router_t *router, struct route_config *config, struct
 		                                    * runtime_processor_speed_MHz,
 		               .response_content_type = config->http_resp_content_type };

+	route_latency_init(&route.latency);
 	http_route_total_init(&route.metrics);

 	/* Admissions Control */
--- a/runtime/include/perf_window.h
+++ b/runtime/include/perf_window.h
@ -159,8 +159,13 @@ perf_window_get_percentile(struct perf_window *perf_window, uint8_t percentile,

 	if (unlikely(perf_window->count == 0)) return 0;

+	uint64_t perf_window_size = PERF_WINDOW_CAPACITY;
+	if (unlikely(perf_window->count < PERF_WINDOW_CAPACITY)) perf_window_size = perf_window->count;
+
 	int idx = precomputed_index;
-	if (unlikely(perf_window->count < PERF_WINDOW_CAPACITY)) idx = perf_window->count * percentile / 100;
+	if (precomputed_index == 0 || perf_window->count < PERF_WINDOW_CAPACITY) {
+		idx = perf_window_size * percentile / 100;
+	}

 	return perf_window->by_duration[idx].execution_time;
 }
--- a/runtime/include/perf_window_t.h
+++ b/runtime/include/perf_window_t.h
@ -7,7 +7,7 @@

 enum
 {
-	PERF_WINDOW_CAPACITY = 32
+	PERF_WINDOW_CAPACITY = 256
 };

 static_assert(PERF_WINDOW_CAPACITY && !(PERF_WINDOW_CAPACITY & (PERF_WINDOW_CAPACITY - 1)),
--- a/runtime/include/route.h
+++ b/runtime/include/route.h
@ -6,6 +6,7 @@
 #include "admissions_info.h"
 #include "module.h"
 #include "http_route_total.h"
+#include "perf_window.h"

 /* Assumption: entrypoint is always _start. This should be enhanced later */
 struct route {
@ -17,4 +18,5 @@ struct route {
 	uint64_t               relative_deadline; /* cycles */
 	char                  *response_content_type;
 	struct admissions_info admissions_info;
+	struct perf_window     latency;
 };
--- a/runtime/include/route_latency.h
+++ b/runtime/include/route_latency.h
@ -0,0 +1,38 @@
+#pragma once
+
+#include <stdint.h>
+
+#include "perf_window.h"
+
+static inline void
+route_latency_init(struct perf_window *route_latency)
+{
+#ifdef ROUTE_LATENCY
+	perf_window_initialize(route_latency);
+#endif
+}
+
+static inline uint64_t
+route_latency_get(struct perf_window *route_latency, uint8_t percentile, int precomputed_index)
+{
+#ifdef ROUTE_LATENCY
+	lock_node_t node = {};
+	lock_lock(&route_latency->lock, &node);
+	uint64_t res = perf_window_get_percentile(route_latency, percentile, precomputed_index);
+	lock_unlock(&route_latency->lock, &node);
+	return res;
+#else
+	return 0;
+#endif
+}
+
+static inline void
+route_latency_add(struct perf_window *route_latency, uint64_t value)
+{
+#ifdef ROUTE_LATENCY
+	lock_node_t node = {};
+	lock_lock(&route_latency->lock, &node);
+	perf_window_add(route_latency, value);
+	lock_unlock(&route_latency->lock, &node);
+#endif
+}
--- a/runtime/include/sandbox_set_as_complete.h
+++ b/runtime/include/sandbox_set_as_complete.h
@ -54,6 +54,7 @@ sandbox_set_as_complete(struct sandbox *sandbox, sandbox_state_t last_state)
 	/* Terminal State Logging for Sandbox */
 	sandbox_perf_log_print_entry(sandbox);
 	sandbox_summarize_page_allocations(sandbox);
+	route_latency_add(&sandbox->route->latency, sandbox->total_time);

 	/* State Change Hooks */
 	sandbox_state_transition_from_hook(sandbox, last_state);
--- a/runtime/include/sandbox_total.h
+++ b/runtime/include/sandbox_total.h
@ -4,16 +4,16 @@
 #include <stdint.h>

 /* Count of the total number of requests we've ever allocated. Never decrements as it is used to generate IDs */
-extern _Atomic uint32_t sandbox_total;
+extern _Atomic uint64_t sandbox_total;

 static inline void
 sandbox_total_initialize()
 {
-	atomic_init(&sandbox_total, 1); /* keep zero for error-handling purposes */
+	atomic_init(&sandbox_total, 0);
 }

-static inline uint32_t
+static inline uint64_t
 sandbox_total_postfix_increment()
 {
-	return atomic_fetch_add(&sandbox_total, 1);
+	return atomic_fetch_add(&sandbox_total, 1) + 1;
 }
--- a/runtime/src/admissions_info.c
+++ b/runtime/src/admissions_info.c
@ -45,12 +45,12 @@ admissions_info_update(struct admissions_info *admissions_info, uint64_t executi
 	struct perf_window *perf_window = &admissions_info->perf_window;

 	lock_node_t node = {};
-	lock_lock(&admissions_info->perf_window.lock, &node);
+	lock_lock(&perf_window->lock, &node);
 	perf_window_add(perf_window, execution_duration);
 	uint64_t estimated_execution = perf_window_get_percentile(perf_window, admissions_info->percentile,
 	                                                          admissions_info->control_index);
 	admissions_info->estimate    = admissions_control_calculate_estimate(estimated_execution,
 	                                                                     admissions_info->relative_deadline);
-	lock_unlock(&admissions_info->perf_window.lock, &node);
+	lock_unlock(&perf_window->lock, &node);
 #endif
 }
--- a/runtime/src/metrics_server.c
+++ b/runtime/src/metrics_server.c
@ -49,22 +49,6 @@ metrics_server_close()
 void
 metrics_server_thread_spawn(int client_socket)
 {
-	/* Duplicate fd so fclose doesn't close the actual client_socket */
-	int   temp_fd  = dup(client_socket);
-	FILE *req_body = fdopen(temp_fd, "r");
-
-	/* Basic L7 routing to filter out favicon requests */
-	char http_status_code_buf[256];
-	fgets(http_status_code_buf, 256, req_body);
-	fclose(req_body);
-
-	if (strncmp(http_status_code_buf, "GET /metrics HTTP", 10) != 0) {
-		write(client_socket, http_header_build(404), http_header_len(404));
-		write(client_socket, HTTP_RESPONSE_TERMINATOR, HTTP_RESPONSE_TERMINATOR_LENGTH);
-		close(client_socket);
-		return;
-	}
-
 	/* Fire and forget, so we don't save the thread handles */
 	pthread_t metrics_server_thread;
 	int       rc = pthread_create(&metrics_server_thread, &metrics_server.thread_settings, metrics_server_handler,
@ -82,6 +66,21 @@ metrics_server_handler(void *arg)
 	/* Intermediate cast to integral value of 64-bit width to silence compiler nits */
 	int client_socket = (int)(long)arg;

+	/* Duplicate fd so fclose doesn't close the actual client_socket */
+	int   temp_fd  = dup(client_socket);
+	FILE *req_body = fdopen(temp_fd, "r");
+
+	/* Basic L7 routing to filter out favicon requests */
+	char http_status_code_buf[256];
+	fgets(http_status_code_buf, 256, req_body);
+	fclose(req_body);
+
+	if (strncmp(http_status_code_buf, "GET /metrics HTTP", 10) != 0) {
+		write(client_socket, http_header_build(404), http_header_len(404));
+		close(client_socket);
+		pthread_exit(NULL);
+	}
+
 	int rc = 0;

 	char  *ostream_base = NULL;
@ -96,7 +95,7 @@ metrics_server_handler(void *arg)
 	uint32_t total_4XX  = atomic_load(&http_total_4XX);
 #endif

-	uint32_t total_sandboxes = atomic_load(&sandbox_total);
+	uint64_t total_sandboxes = atomic_load(&sandbox_total);

 #ifdef SANDBOX_STATE_TOTALS
 	uint32_t total_sandboxes_uninitialized = atomic_load(&sandbox_state_totals[SANDBOX_UNINITIALIZED]);
@ -169,9 +168,8 @@ metrics_server_handler(void *arg)

 	metrics_server_route_level_metrics_render(ostream);

-	// This global is padded by 1 for error handling, so decrement here for true value
 	fprintf(ostream, "# TYPE total_sandboxes counter\n");
-	fprintf(ostream, "total_sandboxes: %d\n", total_sandboxes - 1);
+	fprintf(ostream, "total_sandboxes: %lu\n", total_sandboxes);

 #ifdef SANDBOX_STATE_TOTALS
 	fprintf(ostream, "# TYPE total_sandboxes_uninitialized gauge\n");
--- a/runtime/src/metrics_server_route_level_metrics.c
+++ b/runtime/src/metrics_server_route_level_metrics.c
@ -4,9 +4,6 @@
 #include "perf_window.h"
 #include "tenant_functions.h"

-static const int p50_idx = PERF_WINDOW_CAPACITY * 50 / 100;
-static const int p90_idx = PERF_WINDOW_CAPACITY * 90 / 100;
-
 void
 render_routes(struct route *route, void *arg_one, void *arg_two)
 {
@ -14,10 +11,10 @@ render_routes(struct route *route, void *arg_one, void *arg_two)
 	FILE          *ostream = (FILE *)arg_one;
 	struct tenant *tenant  = (struct tenant *)arg_two;

-#ifdef ADMISSIONS_CONTROL
-	uint64_t latency_p50 = perf_window_get_percentile(&route->admissions_info.perf_window, 50, p50_idx);
-	uint64_t latency_p90 = perf_window_get_percentile(&route->admissions_info.perf_window, 90, p90_idx);
-#endif
+#ifdef ROUTE_LATENCY
+	uint64_t latency_p50 = route_latency_get(&route->latency, 50, 0);
+	uint64_t latency_p90 = route_latency_get(&route->latency, 90, 0);
+#endif /* ROUTE_LATENCY */

 	uint64_t total_requests = atomic_load(&route->metrics.total_requests);
 	uint64_t total_2XX      = atomic_load(&route->metrics.total_2XX);
@ -39,13 +36,15 @@ render_routes(struct route *route, void *arg_one, void *arg_two)
 	fprintf(ostream, "# TYPE %s_%s_total_5XX counter\n", tenant->name, route_label);
 	fprintf(ostream, "%s_%s_total_5XX: %lu\n", tenant->name, route_label, total_5XX);

-#ifdef ADMISSIONS_CONTROL
-	fprintf(ostream, "# TYPE %s_%s_latency_p50 gauge\n", tenant->name, route_label);
-	fprintf(ostream, "%s_%s_latency_p50: %lu\n", tenant->name, route_label, latency_p50);
+#ifdef ROUTE_LATENCY
+	fprintf(ostream, "# TYPE %s_%s_latency_us_p50 gauge\n", tenant->name, route_label);
+	fprintf(ostream, "%s_%s_latency_us_p50: %lu\n", tenant->name, route_label,
+	        latency_p50 / runtime_processor_speed_MHz);

-	fprintf(ostream, "# TYPE %s_%s_latency_p90 gauge\n", tenant->name, route_label);
-	fprintf(ostream, "%s_%s_latency_p90: %lu\n", tenant->name, route_label, latency_p90);
-#endif
+	fprintf(ostream, "# TYPE %s_%s_latency_us_p90 gauge\n", tenant->name, route_label);
+	fprintf(ostream, "%s_%s_latency_us_p90: %lu\n", tenant->name, route_label,
+	        latency_p90 / runtime_processor_speed_MHz);
+#endif /* ROUTE_LATENCY */

 #endif /* HTTP_ROUTE_TOTAL_COUNTERS */
 }
--- a/runtime/src/sandbox.c
+++ b/runtime/src/sandbox.c
@ -15,7 +15,7 @@
 #include "wasm_memory.h"
 #include "wasm_stack.h"

-_Atomic uint32_t sandbox_total = 0;
+_Atomic uint64_t sandbox_total = 0;

 static inline void
 sandbox_log_allocation(struct sandbox *sandbox)