feat: Latency perflog (#364)

* feat: latency perf_window

* chore: Increase perf window capacity

* chore: fix Makefile

* fix: Restore route_latency precomputed idx

* fix: clang-format nit

* fix: clang-format

* fix: Restore accidental deletion

* fixed: perf_window_get_percentile was not considering cases where perf_count<CAP

* - removed the globals for p50_idx and p90_idx
- changed latency unit from ms to us

* undo renaming perf_window

* upgrade sandbox_total type to u64
make sure sandbox_id starts from 1

* improve Makefile comments

Co-authored-by: emil <emil916@gmail.com>
master
Sean McBride 2 years ago committed by GitHub
parent 56f32ec44b
commit 0c3b661301
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -87,6 +87,11 @@ BINARY_NAME=sledgert
# family, such as 2XX, 4XX, 5XX.
# CFLAGS += -DHTTP_ROUTE_TOTAL_COUNTERS
# This flag enables per-route latency perf-logs.
# This has a perf impact due to the use of locks.
# This flag has a dependency on the flag HTTP_ROUTE_TOTAL_COUNTERS
# CFLAGS += -DROUTE_LATENCY
# This flag tracks the total number of sandboxes in the various states
# It is useful to debug if sandboxes are "getting caught" in a particular state
# CFLAGS += -DSANDBOX_STATE_TOTALS

@ -5,6 +5,7 @@
#include "http.h"
#include "module.h"
#include "route_latency.h"
#include "route.h"
#include "route_config.h"
#include "vec.h"
@ -36,6 +37,7 @@ http_router_add_route(http_router_t *router, struct route_config *config, struct
* runtime_processor_speed_MHz,
.response_content_type = config->http_resp_content_type };
route_latency_init(&route.latency);
http_route_total_init(&route.metrics);
/* Admissions Control */

@ -159,8 +159,13 @@ perf_window_get_percentile(struct perf_window *perf_window, uint8_t percentile,
if (unlikely(perf_window->count == 0)) return 0;
uint64_t perf_window_size = PERF_WINDOW_CAPACITY;
if (unlikely(perf_window->count < PERF_WINDOW_CAPACITY)) perf_window_size = perf_window->count;
int idx = precomputed_index;
if (unlikely(perf_window->count < PERF_WINDOW_CAPACITY)) idx = perf_window->count * percentile / 100;
if (precomputed_index == 0 || perf_window->count < PERF_WINDOW_CAPACITY) {
idx = perf_window_size * percentile / 100;
}
return perf_window->by_duration[idx].execution_time;
}

@ -7,7 +7,7 @@
enum
{
PERF_WINDOW_CAPACITY = 32
PERF_WINDOW_CAPACITY = 256
};
static_assert(PERF_WINDOW_CAPACITY && !(PERF_WINDOW_CAPACITY & (PERF_WINDOW_CAPACITY - 1)),

@ -6,6 +6,7 @@
#include "admissions_info.h"
#include "module.h"
#include "http_route_total.h"
#include "perf_window.h"
/* Assumption: entrypoint is always _start. This should be enhanced later */
struct route {
@ -17,4 +18,5 @@ struct route {
uint64_t relative_deadline; /* cycles */
char *response_content_type;
struct admissions_info admissions_info;
struct perf_window latency;
};

@ -0,0 +1,38 @@
#pragma once
#include <stdint.h>
#include "perf_window.h"
static inline void
route_latency_init(struct perf_window *route_latency)
{
#ifdef ROUTE_LATENCY
perf_window_initialize(route_latency);
#endif
}
static inline uint64_t
route_latency_get(struct perf_window *route_latency, uint8_t percentile, int precomputed_index)
{
#ifdef ROUTE_LATENCY
lock_node_t node = {};
lock_lock(&route_latency->lock, &node);
uint64_t res = perf_window_get_percentile(route_latency, percentile, precomputed_index);
lock_unlock(&route_latency->lock, &node);
return res;
#else
return 0;
#endif
}
static inline void
route_latency_add(struct perf_window *route_latency, uint64_t value)
{
#ifdef ROUTE_LATENCY
lock_node_t node = {};
lock_lock(&route_latency->lock, &node);
perf_window_add(route_latency, value);
lock_unlock(&route_latency->lock, &node);
#endif
}

@ -54,6 +54,7 @@ sandbox_set_as_complete(struct sandbox *sandbox, sandbox_state_t last_state)
/* Terminal State Logging for Sandbox */
sandbox_perf_log_print_entry(sandbox);
sandbox_summarize_page_allocations(sandbox);
route_latency_add(&sandbox->route->latency, sandbox->total_time);
/* State Change Hooks */
sandbox_state_transition_from_hook(sandbox, last_state);

@ -4,16 +4,16 @@
#include <stdint.h>
/* Count of the total number of requests we've ever allocated. Never decrements as it is used to generate IDs */
extern _Atomic uint32_t sandbox_total;
extern _Atomic uint64_t sandbox_total;
static inline void
sandbox_total_initialize()
{
atomic_init(&sandbox_total, 1); /* keep zero for error-handling purposes */
atomic_init(&sandbox_total, 0);
}
static inline uint32_t
static inline uint64_t
sandbox_total_postfix_increment()
{
return atomic_fetch_add(&sandbox_total, 1);
return atomic_fetch_add(&sandbox_total, 1) + 1;
}

@ -45,12 +45,12 @@ admissions_info_update(struct admissions_info *admissions_info, uint64_t executi
struct perf_window *perf_window = &admissions_info->perf_window;
lock_node_t node = {};
lock_lock(&admissions_info->perf_window.lock, &node);
lock_lock(&perf_window->lock, &node);
perf_window_add(perf_window, execution_duration);
uint64_t estimated_execution = perf_window_get_percentile(perf_window, admissions_info->percentile,
admissions_info->control_index);
admissions_info->estimate = admissions_control_calculate_estimate(estimated_execution,
admissions_info->relative_deadline);
lock_unlock(&admissions_info->perf_window.lock, &node);
lock_unlock(&perf_window->lock, &node);
#endif
}

@ -49,22 +49,6 @@ metrics_server_close()
void
metrics_server_thread_spawn(int client_socket)
{
/* Duplicate fd so fclose doesn't close the actual client_socket */
int temp_fd = dup(client_socket);
FILE *req_body = fdopen(temp_fd, "r");
/* Basic L7 routing to filter out favicon requests */
char http_status_code_buf[256];
fgets(http_status_code_buf, 256, req_body);
fclose(req_body);
if (strncmp(http_status_code_buf, "GET /metrics HTTP", 10) != 0) {
write(client_socket, http_header_build(404), http_header_len(404));
write(client_socket, HTTP_RESPONSE_TERMINATOR, HTTP_RESPONSE_TERMINATOR_LENGTH);
close(client_socket);
return;
}
/* Fire and forget, so we don't save the thread handles */
pthread_t metrics_server_thread;
int rc = pthread_create(&metrics_server_thread, &metrics_server.thread_settings, metrics_server_handler,
@ -82,6 +66,21 @@ metrics_server_handler(void *arg)
/* Intermediate cast to integral value of 64-bit width to silence compiler nits */
int client_socket = (int)(long)arg;
/* Duplicate fd so fclose doesn't close the actual client_socket */
int temp_fd = dup(client_socket);
FILE *req_body = fdopen(temp_fd, "r");
/* Basic L7 routing to filter out favicon requests */
char http_status_code_buf[256];
fgets(http_status_code_buf, 256, req_body);
fclose(req_body);
if (strncmp(http_status_code_buf, "GET /metrics HTTP", 10) != 0) {
write(client_socket, http_header_build(404), http_header_len(404));
close(client_socket);
pthread_exit(NULL);
}
int rc = 0;
char *ostream_base = NULL;
@ -96,7 +95,7 @@ metrics_server_handler(void *arg)
uint32_t total_4XX = atomic_load(&http_total_4XX);
#endif
uint32_t total_sandboxes = atomic_load(&sandbox_total);
uint64_t total_sandboxes = atomic_load(&sandbox_total);
#ifdef SANDBOX_STATE_TOTALS
uint32_t total_sandboxes_uninitialized = atomic_load(&sandbox_state_totals[SANDBOX_UNINITIALIZED]);
@ -169,9 +168,8 @@ metrics_server_handler(void *arg)
metrics_server_route_level_metrics_render(ostream);
// This global is padded by 1 for error handling, so decrement here for true value
fprintf(ostream, "# TYPE total_sandboxes counter\n");
fprintf(ostream, "total_sandboxes: %d\n", total_sandboxes - 1);
fprintf(ostream, "total_sandboxes: %lu\n", total_sandboxes);
#ifdef SANDBOX_STATE_TOTALS
fprintf(ostream, "# TYPE total_sandboxes_uninitialized gauge\n");

@ -4,9 +4,6 @@
#include "perf_window.h"
#include "tenant_functions.h"
static const int p50_idx = PERF_WINDOW_CAPACITY * 50 / 100;
static const int p90_idx = PERF_WINDOW_CAPACITY * 90 / 100;
void
render_routes(struct route *route, void *arg_one, void *arg_two)
{
@ -14,10 +11,10 @@ render_routes(struct route *route, void *arg_one, void *arg_two)
FILE *ostream = (FILE *)arg_one;
struct tenant *tenant = (struct tenant *)arg_two;
#ifdef ADMISSIONS_CONTROL
uint64_t latency_p50 = perf_window_get_percentile(&route->admissions_info.perf_window, 50, p50_idx);
uint64_t latency_p90 = perf_window_get_percentile(&route->admissions_info.perf_window, 90, p90_idx);
#endif
#ifdef ROUTE_LATENCY
uint64_t latency_p50 = route_latency_get(&route->latency, 50, 0);
uint64_t latency_p90 = route_latency_get(&route->latency, 90, 0);
#endif /* ROUTE_LATENCY */
uint64_t total_requests = atomic_load(&route->metrics.total_requests);
uint64_t total_2XX = atomic_load(&route->metrics.total_2XX);
@ -39,13 +36,15 @@ render_routes(struct route *route, void *arg_one, void *arg_two)
fprintf(ostream, "# TYPE %s_%s_total_5XX counter\n", tenant->name, route_label);
fprintf(ostream, "%s_%s_total_5XX: %lu\n", tenant->name, route_label, total_5XX);
#ifdef ADMISSIONS_CONTROL
fprintf(ostream, "# TYPE %s_%s_latency_p50 gauge\n", tenant->name, route_label);
fprintf(ostream, "%s_%s_latency_p50: %lu\n", tenant->name, route_label, latency_p50);
#ifdef ROUTE_LATENCY
fprintf(ostream, "# TYPE %s_%s_latency_us_p50 gauge\n", tenant->name, route_label);
fprintf(ostream, "%s_%s_latency_us_p50: %lu\n", tenant->name, route_label,
latency_p50 / runtime_processor_speed_MHz);
fprintf(ostream, "# TYPE %s_%s_latency_p90 gauge\n", tenant->name, route_label);
fprintf(ostream, "%s_%s_latency_p90: %lu\n", tenant->name, route_label, latency_p90);
#endif
fprintf(ostream, "# TYPE %s_%s_latency_us_p90 gauge\n", tenant->name, route_label);
fprintf(ostream, "%s_%s_latency_us_p90: %lu\n", tenant->name, route_label,
latency_p90 / runtime_processor_speed_MHz);
#endif /* ROUTE_LATENCY */
#endif /* HTTP_ROUTE_TOTAL_COUNTERS */
}

@ -15,7 +15,7 @@
#include "wasm_memory.h"
#include "wasm_stack.h"
_Atomic uint32_t sandbox_total = 0;
_Atomic uint64_t sandbox_total = 0;
static inline void
sandbox_log_allocation(struct sandbox *sandbox)

Loading…
Cancel
Save