feat: metrics thread and proc stat

master
Sean McBride 3 years ago
parent ccbee1a41e
commit e40d139536

@ -5,6 +5,5 @@
extern struct tcp_server metrics_server;
void metrics_server_init();
int metrics_server_listen();
void metrics_server_thread_spawn(int client_socket);
int metrics_server_close();
void metrics_server_handler(int client_socket);

@ -0,0 +1,103 @@
#pragma once
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "runtime.h" /* For runtime_pid */
/* Used to read process-level metrics associated with sledgert from procfs
* The parsing behavior is based on prtstat -r
*/
enum PROC_STAT
{
PROC_STAT_PID = 0, /* Process ID */
PROC_STAT_COMM = 1, /* Process Name */
PROC_STAT_STATE = 2, /* State */
PROC_STAT_PPID, /* Parent Process ID */
PROC_STAT_PGRP, /* Group ID */
PROC_STAT_SESSION, /* Session ID */
PROC_STAT_TTY_NR, /* ??? */
PROC_STAT_TPGID, /* ??? */
PROC_STAT_FLAGS, /* ??? */
PROC_STAT_MINFLT, /* Minor Page Faults */
PROC_STAT_CMINFLT, /* Minor Page Faults of children */
PROC_STAT_MAJFLT, /* Major Page Faults */
PROC_STAT_CMAJFLT, /* Major Page Faults of children */
PROC_STAT_UTIME, /* User Time */
PROC_STAT_STIME, /* System Time */
PROC_STAT_CUTIME, /* Child User Time */
PROC_STAT_CSTIME, /* Child System Time */
PROC_STAT_PRIORITY,
PROC_STAT_NICE,
PROC_STAT_NUM_THREADS,
PROC_STAT_ITREALVALUE,
PROC_STAT_STARTTIME, /* Start Time */
PROC_STAT_VSIZE, /* Virtual Memory */
PROC_STAT_RSS,
PROC_STAT_RSSLIM,
PROC_STAT_STARTCODE,
PROC_STAT_ENDCODE,
PROC_STAT_STARTSTACK,
PROC_STAT_KSTKESP,
PROC_STAT_KSTKEIP,
PROC_STAT_WCHAN,
PROC_STAT_NSWAP,
PROC_STAT_CNSWAP,
PROC_STAT_EXIT_SIGNAL,
PROC_STAT_PROCESSOR,
PROC_STAT_RT_PRIORITY,
PROC_STAT_POLICY,
PROC_STAT_DELAYACCR_BLKIO_TICKS,
PROC_STAT_GUEST_TIME,
PROC_STAT_CGUEST_TIME,
PROC_STAT_COUNT
};
struct proc_stat_metrics {
uint64_t minor_page_faults;
uint64_t major_page_faults;
uint64_t child_minor_page_faults;
uint64_t child_major_page_faults;
uint64_t user_time;
uint64_t system_time;
uint64_t guest_time;
};
static inline void
proc_stat_metrics_init(struct proc_stat_metrics *stat)
{
assert(runtime_pid > 0);
// Open sledgert's stat file in procfs
char path[256];
snprintf(path, 256, "/proc/%d/stat", runtime_pid);
FILE *proc_stat = fopen(path, "r");
/* Read stat file into in-memory buffer */
char buf[BUFSIZ];
fgets(buf, BUFSIZ, proc_stat);
fclose(proc_stat);
/* Parse into an array of tokens with indices aligning to the PROC_STAT enum */
char *pos = NULL;
char *proc_stat_values[PROC_STAT_COUNT];
for (int i = 0; i < PROC_STAT_COUNT; i++) {
char *tok = i == 0 ? strtok_r(buf, " ", &pos) : strtok_r(NULL, " ", &pos);
proc_stat_values[i] = tok;
}
/* Fill the proc_state_metrics struct with metrics of interest */
/* Minor Page Faults, Major Page Faults, Vsize, User, System, Guest, Uptime */
stat->minor_page_faults = strtoul(proc_stat_values[PROC_STAT_MINFLT], NULL, 10);
stat->major_page_faults = strtoul(proc_stat_values[PROC_STAT_MAJFLT], NULL, 10);
stat->child_minor_page_faults = strtoul(proc_stat_values[PROC_STAT_CMINFLT], NULL, 10);
stat->child_major_page_faults = strtoul(proc_stat_values[PROC_STAT_CMAJFLT], NULL, 10);
stat->user_time = strtoul(proc_stat_values[PROC_STAT_UTIME], NULL, 10);
stat->system_time = strtoul(proc_stat_values[PROC_STAT_STIME], NULL, 10);
stat->guest_time = strtoul(proc_stat_values[PROC_STAT_GUEST_TIME], NULL, 10);
}

@ -2,6 +2,7 @@
#include <pthread.h>
#include <sys/epoll.h> /* for epoll_create1(), epoll_ctl(), struct epoll_event */
#include <sys/types.h> /* for pid_t */
#include <stdatomic.h>
#include <stdbool.h>
@ -34,6 +35,7 @@ enum RUNTIME_SIGALRM_HANDLER
RUNTIME_SIGALRM_HANDLER_TRIAGED = 1
};
extern pid_t runtime_pid;
extern bool runtime_preemption_enabled;
extern uint32_t runtime_processor_speed_MHz;
extern uint32_t runtime_quantum_us;

@ -354,15 +354,16 @@ on_metrics_server_epoll_event(struct epoll_event *evt)
/* Accept as many clients requests as possible, returning when we would have blocked */
while (true) {
/* We accept the client connection with blocking semantics because we spawn ephemeral worker threads */
int client_socket = accept4(metrics_server.socket_descriptor, (struct sockaddr *)&client_address,
&address_length, SOCK_NONBLOCK);
&address_length, 0);
if (unlikely(client_socket < 0)) {
if (errno == EWOULDBLOCK || errno == EAGAIN) return;
panic("accept4: %s", strerror(errno));
}
metrics_server_handler(client_socket);
metrics_server_thread_spawn(client_socket);
}
}
@ -412,7 +413,6 @@ listener_thread_main(void *dummy)
generic_thread_initialize();
metrics_server_init();
metrics_server_listen();
listener_thread_register_metrics_server();
/* Set my priority */

@ -7,6 +7,7 @@
#include <sched.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#ifdef LOG_TO_FILE
@ -39,6 +40,7 @@ enum RUNTIME_SIGALRM_HANDLER runtime_sigalrm_handler = RUNTIME_SIGALRM_HANDLER_B
bool runtime_preemption_enabled = true;
uint32_t runtime_quantum_us = 5000; /* 5ms */
uint64_t runtime_boot_timestamp;
pid_t runtime_pid = 0;
/**
* Returns instructions on use of CLI if used incorrectly
@ -430,6 +432,8 @@ main(int argc, char **argv)
exit(-1);
}
runtime_pid = getpid();
printf("Starting the Sledge runtime\n");
log_compiletime_config();

@ -1,24 +1,39 @@
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "admissions_control.h"
#include "tcp_server.h"
#include "debuglog.h"
#include "http.h"
#include "http_total.h"
#include "proc_stat.h"
#include "runtime.h"
#include "sandbox_total.h"
#include "sandbox_state.h"
#include "tcp_server.h"
/* We run threads on the "reserved OS core" using blocking semantics */
#define METRICS_SERVER_CORE_ID 0
static pthread_attr_t metrics_server_thread_settings;
struct tcp_server metrics_server;
static void *metrics_server_handler(void *arg);
void
metrics_server_init()
{
tcp_server_init(&metrics_server, 1776);
}
int rc = tcp_server_listen(&metrics_server);
assert(rc == 0);
int
metrics_server_listen()
{
return tcp_server_listen(&metrics_server);
/* Configure pthread attributes to pin metrics server threads to CPU 0 */
pthread_attr_init(&metrics_server_thread_settings);
cpu_set_t cs;
CPU_ZERO(&cs);
CPU_SET(METRICS_SERVER_CORE_ID, &cs);
pthread_attr_setaffinity_np(&metrics_server_thread_settings, sizeof(cpu_set_t), &cs);
}
int
@ -28,8 +43,40 @@ metrics_server_close()
}
void
metrics_server_handler(int client_socket)
metrics_server_thread_spawn(int client_socket)
{
/* Duplicate fd so fclose doesn't close the actual client_socket */
int temp_fd = dup(client_socket);
FILE *req_body = fdopen(temp_fd, "r");
/* Basic L7 routing to filter out favicon requests */
char http_status_code_buf[256];
fgets(http_status_code_buf, 256, req_body);
fclose(req_body);
if (strncmp(http_status_code_buf, "GET / HTTP", 10) != 0) {
write(client_socket, http_header_build(404), http_header_len(404));
close(client_socket);
return;
}
/* Fire and forget, so we don't save the thread handles */
pthread_t metrics_server_thread;
int rc = pthread_create(&metrics_server_thread, &metrics_server_thread_settings, metrics_server_handler,
(void *)(long)client_socket);
if (rc != 0) {
debuglog("Metrics Server failed to spawn pthread with %s\n", strerror(rc));
close(client_socket);
}
}
static void *
metrics_server_handler(void *arg)
{
/* Intermediate cast to integral value of 64-bit width to silence compiler nits */
int client_socket = (int)(long)arg;
int rc = 0;
char *ostream_base = NULL;
@ -130,14 +177,43 @@ metrics_server_handler(int client_socket)
fprintf(ostream, "total_sandboxes_error: %d\n", total_sandboxes_error);
#endif
fflush(ostream);
write(client_socket, ostream_base, ostream_size);
struct proc_stat_metrics stat;
proc_stat_metrics_init(&stat);
fprintf(ostream, "# TYPE os_proc_major_page_faults counter\n");
fprintf(ostream, "os_proc_major_page_faults: %lu\n", stat.major_page_faults);
fprintf(ostream, "# TYPE os_proc_minor_page_faults counter\n");
fprintf(ostream, "os_proc_minor_page_faults: %lu\n", stat.minor_page_faults);
fprintf(ostream, "# TYPE os_proc_child_major_page_faults counter\n");
fprintf(ostream, "os_proc_child_major_page_faults: %lu\n", stat.child_major_page_faults);
fprintf(ostream, "# TYPE os_proc_child_minor_page_faults counter\n");
fprintf(ostream, "os_proc_child_minor_page_faults: %lu\n", stat.child_minor_page_faults);
fprintf(ostream, "# TYPE os_proc_user_time counter\n");
fprintf(ostream, "os_proc_user_time: %lu\n", stat.user_time);
fprintf(ostream, "# TYPE os_proc_sys_time counter\n");
fprintf(ostream, "os_proc_sys_time: %lu\n", stat.system_time);
fprintf(ostream, "# TYPE os_proc_guest_time counter\n");
fprintf(ostream, "os_proc_guest_time: %lu\n", stat.guest_time);
fflush(ostream);
assert(ostream_size > 0);
rc = fclose(ostream);
assert(rc == 0);
/* Closing the memstream does not close the generated buffer */
ssize_t nwritten = write(client_socket, ostream_base, ostream_size);
assert(nwritten == ostream_size);
free(ostream_base);
ostream_size = 0;
close(client_socket);
pthread_exit(NULL);
}

Loading…
Cancel
Save