From cd73d7d172cac369f9493897a5d96e84e9bc17e0 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 Aug 2022 15:09:54 -0400 Subject: [PATCH] refactor: reorder metrics --- runtime/Makefile | 5 +++- runtime/src/metrics_server.c | 56 +++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/runtime/Makefile b/runtime/Makefile index 7fee2cb..9770641 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -75,6 +75,9 @@ BINARY_NAME=sledgert # page is allocated. This helps understand the relationship to memory allocation and execution time. # CFLAGS += -DLOG_SANDBOX_MEMORY_PROFILE +# This flag enables runtime-level metrics from procfs +# CFLAGS += -DPROC_STAT_METRICS + # This flag enables HTTP-level counters of incoming requests and outgoing responses, broken out by status code # family, such as 2XX, 4XX, 5XX. # To log, run `call http_total_log()` while in GDB @@ -82,7 +85,7 @@ BINARY_NAME=sledgert # This flag enables per-route counters of incoming requests and outgoing responses, broken out by status code # family, such as 2XX, 4XX, 5XX. -CFLAGS += -DHTTP_ROUTE_TOTAL_COUNTERS +# CFLAGS += -DHTTP_ROUTE_TOTAL_COUNTERS # This flag tracks the total number of sandboxes in the various states # It is useful to debug if sandboxes are "getting caught" in a particular state diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index 0ef64ea..215a835 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -116,15 +116,41 @@ metrics_server_handler(void *arg) double work_admitted_percentile = (double)work_admitted / admissions_control_capacity * 100; #endif +#ifdef PROC_STAT_METRICS + struct proc_stat_metrics stat; + proc_stat_metrics_init(&stat); +#endif + fprintf(ostream, "HTTP/1.1 200 OK\r\n\r\n"); +#ifdef PROC_STAT_METRICS + fprintf(ostream, "# TYPE os_proc_major_page_faults counter\n"); + fprintf(ostream, "os_proc_major_page_faults: %lu\n", stat.major_page_faults); + + fprintf(ostream, "# TYPE os_proc_minor_page_faults counter\n"); + fprintf(ostream, "os_proc_minor_page_faults: %lu\n", stat.minor_page_faults); + + fprintf(ostream, "# TYPE os_proc_child_major_page_faults counter\n"); + fprintf(ostream, "os_proc_child_major_page_faults: %lu\n", stat.child_major_page_faults); + + fprintf(ostream, "# TYPE os_proc_child_minor_page_faults counter\n"); + fprintf(ostream, "os_proc_child_minor_page_faults: %lu\n", stat.child_minor_page_faults); + + fprintf(ostream, "# TYPE os_proc_user_time counter\n"); + fprintf(ostream, "os_proc_user_time: %lu\n", stat.user_time); + + fprintf(ostream, "# TYPE os_proc_sys_time counter\n"); + fprintf(ostream, "os_proc_sys_time: %lu\n", stat.system_time); + + fprintf(ostream, "# TYPE os_proc_guest_time counter\n"); + fprintf(ostream, "os_proc_guest_time: %lu\n", stat.guest_time); +#endif /* PROC_STAT_METRICS */ #ifdef ADMISSIONS_CONTROL fprintf(ostream, "# TYPE work_admitted_percentile gauge\n"); fprintf(ostream, "work_admitted_percentile: %f\n", work_admitted_percentile); #endif - #ifdef HTTP_TOTAL_COUNTERS fprintf(ostream, "# TYPE total_requests counter\n"); fprintf(ostream, "total_requests: %d\n", total_reqs); @@ -139,6 +165,8 @@ metrics_server_handler(void *arg) fprintf(ostream, "total_5XX: %d\n", total_5XX); #endif + metrics_server_route_level_metrics_render(ostream); + // This global is padded by 1 for error handling, so decrement here for true value fprintf(ostream, "# TYPE total_sandboxes counter\n"); fprintf(ostream, "total_sandboxes: %d\n", total_sandboxes - 1); @@ -181,32 +209,6 @@ metrics_server_handler(void *arg) fprintf(ostream, "total_sandboxes_error: %d\n", total_sandboxes_error); #endif - struct proc_stat_metrics stat; - proc_stat_metrics_init(&stat); - - fprintf(ostream, "# TYPE os_proc_major_page_faults counter\n"); - fprintf(ostream, "os_proc_major_page_faults: %lu\n", stat.major_page_faults); - - fprintf(ostream, "# TYPE os_proc_minor_page_faults counter\n"); - fprintf(ostream, "os_proc_minor_page_faults: %lu\n", stat.minor_page_faults); - - fprintf(ostream, "# TYPE os_proc_child_major_page_faults counter\n"); - fprintf(ostream, "os_proc_child_major_page_faults: %lu\n", stat.child_major_page_faults); - - fprintf(ostream, "# TYPE os_proc_child_minor_page_faults counter\n"); - fprintf(ostream, "os_proc_child_minor_page_faults: %lu\n", stat.child_minor_page_faults); - - fprintf(ostream, "# TYPE os_proc_user_time counter\n"); - fprintf(ostream, "os_proc_user_time: %lu\n", stat.user_time); - - fprintf(ostream, "# TYPE os_proc_sys_time counter\n"); - fprintf(ostream, "os_proc_sys_time: %lu\n", stat.system_time); - - fprintf(ostream, "# TYPE os_proc_guest_time counter\n"); - fprintf(ostream, "os_proc_guest_time: %lu\n", stat.guest_time); - - metrics_server_route_level_metrics_render(ostream); - fflush(ostream); assert(ostream_size > 0); rc = fclose(ostream);