diff --git a/runtime/include/metrics_server.h b/runtime/include/metrics_server.h index 540d1a5..48a754e 100644 --- a/runtime/include/metrics_server.h +++ b/runtime/include/metrics_server.h @@ -5,6 +5,5 @@ extern struct tcp_server metrics_server; void metrics_server_init(); -int metrics_server_listen(); +void metrics_server_thread_spawn(int client_socket); int metrics_server_close(); -void metrics_server_handler(int client_socket); diff --git a/runtime/include/proc_stat.h b/runtime/include/proc_stat.h new file mode 100644 index 0000000..f36e959 --- /dev/null +++ b/runtime/include/proc_stat.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "runtime.h" /* For runtime_pid */ + +/* Used to read process-level metrics associated with sledgert from procfs + * The parsing behavior is based on prtstat -r + */ + + +enum PROC_STAT +{ + PROC_STAT_PID = 0, /* Process ID */ + PROC_STAT_COMM = 1, /* Process Name */ + PROC_STAT_STATE = 2, /* State */ + PROC_STAT_PPID, /* Parent Process ID */ + PROC_STAT_PGRP, /* Group ID */ + PROC_STAT_SESSION, /* Session ID */ + PROC_STAT_TTY_NR, /* ??? */ + PROC_STAT_TPGID, /* ??? */ + PROC_STAT_FLAGS, /* ??? */ + PROC_STAT_MINFLT, /* Minor Page Faults */ + PROC_STAT_CMINFLT, /* Minor Page Faults of children */ + PROC_STAT_MAJFLT, /* Major Page Faults */ + PROC_STAT_CMAJFLT, /* Major Page Faults of children */ + PROC_STAT_UTIME, /* User Time */ + PROC_STAT_STIME, /* System Time */ + PROC_STAT_CUTIME, /* Child User Time */ + PROC_STAT_CSTIME, /* Child System Time */ + PROC_STAT_PRIORITY, + PROC_STAT_NICE, + PROC_STAT_NUM_THREADS, + PROC_STAT_ITREALVALUE, + PROC_STAT_STARTTIME, /* Start Time */ + PROC_STAT_VSIZE, /* Virtual Memory */ + PROC_STAT_RSS, + PROC_STAT_RSSLIM, + PROC_STAT_STARTCODE, + PROC_STAT_ENDCODE, + PROC_STAT_STARTSTACK, + PROC_STAT_KSTKESP, + PROC_STAT_KSTKEIP, + PROC_STAT_WCHAN, + PROC_STAT_NSWAP, + PROC_STAT_CNSWAP, + PROC_STAT_EXIT_SIGNAL, + PROC_STAT_PROCESSOR, + PROC_STAT_RT_PRIORITY, + PROC_STAT_POLICY, + PROC_STAT_DELAYACCR_BLKIO_TICKS, + PROC_STAT_GUEST_TIME, + PROC_STAT_CGUEST_TIME, + PROC_STAT_COUNT +}; + +struct proc_stat_metrics { + uint64_t minor_page_faults; + uint64_t major_page_faults; + uint64_t child_minor_page_faults; + uint64_t child_major_page_faults; + uint64_t user_time; + uint64_t system_time; + uint64_t guest_time; +}; + +static inline void +proc_stat_metrics_init(struct proc_stat_metrics *stat) +{ + assert(runtime_pid > 0); + + // Open sledgert's stat file in procfs + char path[256]; + snprintf(path, 256, "/proc/%d/stat", runtime_pid); + FILE *proc_stat = fopen(path, "r"); + + /* Read stat file into in-memory buffer */ + char buf[BUFSIZ]; + fgets(buf, BUFSIZ, proc_stat); + fclose(proc_stat); + + /* Parse into an array of tokens with indices aligning to the PROC_STAT enum */ + char *pos = NULL; + char *proc_stat_values[PROC_STAT_COUNT]; + for (int i = 0; i < PROC_STAT_COUNT; i++) { + char *tok = i == 0 ? strtok_r(buf, " ", &pos) : strtok_r(NULL, " ", &pos); + proc_stat_values[i] = tok; + } + + /* Fill the proc_state_metrics struct with metrics of interest */ + /* Minor Page Faults, Major Page Faults, Vsize, User, System, Guest, Uptime */ + stat->minor_page_faults = strtoul(proc_stat_values[PROC_STAT_MINFLT], NULL, 10); + stat->major_page_faults = strtoul(proc_stat_values[PROC_STAT_MAJFLT], NULL, 10); + stat->child_minor_page_faults = strtoul(proc_stat_values[PROC_STAT_CMINFLT], NULL, 10); + stat->child_major_page_faults = strtoul(proc_stat_values[PROC_STAT_CMAJFLT], NULL, 10); + stat->user_time = strtoul(proc_stat_values[PROC_STAT_UTIME], NULL, 10); + stat->system_time = strtoul(proc_stat_values[PROC_STAT_STIME], NULL, 10); + stat->guest_time = strtoul(proc_stat_values[PROC_STAT_GUEST_TIME], NULL, 10); +} diff --git a/runtime/include/runtime.h b/runtime/include/runtime.h index 5da9a05..91570c5 100644 --- a/runtime/include/runtime.h +++ b/runtime/include/runtime.h @@ -2,6 +2,7 @@ #include #include /* for epoll_create1(), epoll_ctl(), struct epoll_event */ +#include /* for pid_t */ #include #include @@ -34,6 +35,7 @@ enum RUNTIME_SIGALRM_HANDLER RUNTIME_SIGALRM_HANDLER_TRIAGED = 1 }; +extern pid_t runtime_pid; extern bool runtime_preemption_enabled; extern uint32_t runtime_processor_speed_MHz; extern uint32_t runtime_quantum_us; diff --git a/runtime/src/listener_thread.c b/runtime/src/listener_thread.c index af28dc3..9aa1b73 100644 --- a/runtime/src/listener_thread.c +++ b/runtime/src/listener_thread.c @@ -354,15 +354,16 @@ on_metrics_server_epoll_event(struct epoll_event *evt) /* Accept as many clients requests as possible, returning when we would have blocked */ while (true) { + /* We accept the client connection with blocking semantics because we spawn ephemeral worker threads */ int client_socket = accept4(metrics_server.socket_descriptor, (struct sockaddr *)&client_address, - &address_length, SOCK_NONBLOCK); + &address_length, 0); if (unlikely(client_socket < 0)) { if (errno == EWOULDBLOCK || errno == EAGAIN) return; panic("accept4: %s", strerror(errno)); } - metrics_server_handler(client_socket); + metrics_server_thread_spawn(client_socket); } } @@ -412,7 +413,6 @@ listener_thread_main(void *dummy) generic_thread_initialize(); metrics_server_init(); - metrics_server_listen(); listener_thread_register_metrics_server(); /* Set my priority */ diff --git a/runtime/src/main.c b/runtime/src/main.c index ff04f3b..6932428 100644 --- a/runtime/src/main.c +++ b/runtime/src/main.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #ifdef LOG_TO_FILE @@ -39,6 +40,7 @@ enum RUNTIME_SIGALRM_HANDLER runtime_sigalrm_handler = RUNTIME_SIGALRM_HANDLER_B bool runtime_preemption_enabled = true; uint32_t runtime_quantum_us = 5000; /* 5ms */ uint64_t runtime_boot_timestamp; +pid_t runtime_pid = 0; /** * Returns instructions on use of CLI if used incorrectly @@ -430,6 +432,8 @@ main(int argc, char **argv) exit(-1); } + runtime_pid = getpid(); + printf("Starting the Sledge runtime\n"); log_compiletime_config(); diff --git a/runtime/src/metrics_server.c b/runtime/src/metrics_server.c index 5c981cb..e0ba75a 100644 --- a/runtime/src/metrics_server.c +++ b/runtime/src/metrics_server.c @@ -1,24 +1,39 @@ +#include +#include #include +#include #include #include "admissions_control.h" -#include "tcp_server.h" +#include "debuglog.h" +#include "http.h" #include "http_total.h" +#include "proc_stat.h" +#include "runtime.h" #include "sandbox_total.h" #include "sandbox_state.h" +#include "tcp_server.h" + +/* We run threads on the "reserved OS core" using blocking semantics */ +#define METRICS_SERVER_CORE_ID 0 -struct tcp_server metrics_server; +static pthread_attr_t metrics_server_thread_settings; +struct tcp_server metrics_server; +static void *metrics_server_handler(void *arg); void metrics_server_init() { tcp_server_init(&metrics_server, 1776); -} + int rc = tcp_server_listen(&metrics_server); + assert(rc == 0); -int -metrics_server_listen() -{ - return tcp_server_listen(&metrics_server); + /* Configure pthread attributes to pin metrics server threads to CPU 0 */ + pthread_attr_init(&metrics_server_thread_settings); + cpu_set_t cs; + CPU_ZERO(&cs); + CPU_SET(METRICS_SERVER_CORE_ID, &cs); + pthread_attr_setaffinity_np(&metrics_server_thread_settings, sizeof(cpu_set_t), &cs); } int @@ -28,8 +43,40 @@ metrics_server_close() } void -metrics_server_handler(int client_socket) +metrics_server_thread_spawn(int client_socket) { + /* Duplicate fd so fclose doesn't close the actual client_socket */ + int temp_fd = dup(client_socket); + FILE *req_body = fdopen(temp_fd, "r"); + + /* Basic L7 routing to filter out favicon requests */ + char http_status_code_buf[256]; + fgets(http_status_code_buf, 256, req_body); + fclose(req_body); + + if (strncmp(http_status_code_buf, "GET / HTTP", 10) != 0) { + write(client_socket, http_header_build(404), http_header_len(404)); + close(client_socket); + return; + } + + /* Fire and forget, so we don't save the thread handles */ + pthread_t metrics_server_thread; + int rc = pthread_create(&metrics_server_thread, &metrics_server_thread_settings, metrics_server_handler, + (void *)(long)client_socket); + + if (rc != 0) { + debuglog("Metrics Server failed to spawn pthread with %s\n", strerror(rc)); + close(client_socket); + } +} + +static void * +metrics_server_handler(void *arg) +{ + /* Intermediate cast to integral value of 64-bit width to silence compiler nits */ + int client_socket = (int)(long)arg; + int rc = 0; char *ostream_base = NULL; @@ -130,14 +177,43 @@ metrics_server_handler(int client_socket) fprintf(ostream, "total_sandboxes_error: %d\n", total_sandboxes_error); #endif - fflush(ostream); - write(client_socket, ostream_base, ostream_size); + struct proc_stat_metrics stat; + proc_stat_metrics_init(&stat); + + fprintf(ostream, "# TYPE os_proc_major_page_faults counter\n"); + fprintf(ostream, "os_proc_major_page_faults: %lu\n", stat.major_page_faults); + + fprintf(ostream, "# TYPE os_proc_minor_page_faults counter\n"); + fprintf(ostream, "os_proc_minor_page_faults: %lu\n", stat.minor_page_faults); + + fprintf(ostream, "# TYPE os_proc_child_major_page_faults counter\n"); + fprintf(ostream, "os_proc_child_major_page_faults: %lu\n", stat.child_major_page_faults); + + fprintf(ostream, "# TYPE os_proc_child_minor_page_faults counter\n"); + fprintf(ostream, "os_proc_child_minor_page_faults: %lu\n", stat.child_minor_page_faults); + + fprintf(ostream, "# TYPE os_proc_user_time counter\n"); + fprintf(ostream, "os_proc_user_time: %lu\n", stat.user_time); + fprintf(ostream, "# TYPE os_proc_sys_time counter\n"); + fprintf(ostream, "os_proc_sys_time: %lu\n", stat.system_time); + + fprintf(ostream, "# TYPE os_proc_guest_time counter\n"); + fprintf(ostream, "os_proc_guest_time: %lu\n", stat.guest_time); + + fflush(ostream); + assert(ostream_size > 0); rc = fclose(ostream); assert(rc == 0); + /* Closing the memstream does not close the generated buffer */ + ssize_t nwritten = write(client_socket, ostream_base, ostream_size); + assert(nwritten == ostream_size); + free(ostream_base); ostream_size = 0; close(client_socket); + + pthread_exit(NULL); }