feat: mcs locks and profiling

main
Sean McBride 4 years ago
parent c291b049c8
commit 6673734857

@ -1,8 +1,11 @@
#pragma once
#include <spinlock/fas.h>
#include <spinlock/mcs.h>
#include <stdint.h>
#include "runtime.h"
#include "worker_thread.h"
/* Should be Power of 2! */
#define PERF_WINDOW_BUFFER_SIZE 16
@ -13,7 +16,7 @@
struct perf_window {
uint64_t buffer[PERF_WINDOW_BUFFER_SIZE];
uint64_t count;
ck_spinlock_fas_t lock;
ck_spinlock_mcs_t queue;
double mean;
};
@ -26,7 +29,7 @@ static inline void
perf_window_update_mean(struct perf_window *self)
{
assert(self != NULL);
assert(ck_spinlock_fas_locked(&self->lock));
assert(ck_spinlock_mcs_locked(&self->queue));
uint64_t limit = self->count;
if (limit > PERF_WINDOW_BUFFER_SIZE) { limit = PERF_WINDOW_BUFFER_SIZE; }
@ -47,7 +50,7 @@ perf_window_initialize(struct perf_window *self)
{
assert(self != NULL);
ck_spinlock_fas_init(&self->lock);
ck_spinlock_mcs_init(&self->queue);
self->count = 0;
self->mean = 0;
memset(&self->buffer, 0, sizeof(uint64_t) * PERF_WINDOW_BUFFER_SIZE);
@ -64,14 +67,18 @@ perf_window_add(struct perf_window *self, uint64_t value)
{
assert(self != NULL);
/* A successful invocation should run for a non-zero amount of time */
assert(value > 0);
ck_spinlock_fas_lock(&self->lock);
struct ck_spinlock_mcs lock;
uint64_t pre = __getcycles();
ck_spinlock_mcs_lock(&self->queue, &lock);
worker_thread_lock_duration += (__getcycles() - pre);
self->buffer[self->count++ % PERF_WINDOW_BUFFER_SIZE] = value;
perf_window_update_mean(self);
ck_spinlock_fas_unlock(&self->lock);
ck_spinlock_mcs_unlock(&self->queue, &lock);
}
/**

@ -1,7 +1,10 @@
#ifndef PRIORITY_QUEUE_H
#define PRIORITY_QUEUE_H
#include <spinlock/fas.h>
#include <spinlock/mcs.h>
#include "runtime.h"
#include "worker_thread.h"
#define MAX 4096
@ -17,7 +20,7 @@ typedef uint64_t (*priority_queue_get_priority_fn_t)(void *element);
/* We assume that priority is expressed in terms of a 64 bit unsigned integral */
struct priority_queue {
ck_spinlock_fas_t lock;
ck_spinlock_mcs_t queue;
uint64_t highest_priority;
void * items[MAX];
int first_free;

@ -8,6 +8,8 @@
If there are fewer cores than this, main dynamically overrides this and uses all available */
#define WORKER_THREAD_CORE_COUNT (NCORES > 1 ? NCORES - 1 : NCORES)
extern __thread uint64_t worker_thread_lock_duration;
extern __thread uint64_t worker_thread_start_timestamp;
extern __thread uv_loop_t worker_thread_uvio_handle;
void *worker_thread_main(void *return_code);

@ -22,7 +22,7 @@ static inline int
priority_queue_append(struct priority_queue *self, void *new_item)
{
assert(self != NULL);
assert(ck_spinlock_fas_locked(&self->lock));
assert(ck_spinlock_mcs_locked(&self->queue));
if (self->first_free >= MAX) return -ENOSPC;
@ -39,7 +39,7 @@ priority_queue_percolate_up(struct priority_queue *self)
{
assert(self != NULL);
assert(self->get_priority_fn != NULL);
assert(ck_spinlock_fas_locked(&self->lock));
assert(ck_spinlock_mcs_locked(&self->queue));
for (int i = self->first_free - 1;
i / 2 != 0 && self->get_priority_fn(self->items[i]) < self->get_priority_fn(self->items[i / 2]); i /= 2) {
@ -64,7 +64,7 @@ priority_queue_find_smallest_child(struct priority_queue *self, int parent_index
assert(self != NULL);
assert(parent_index >= 1 && parent_index < self->first_free);
assert(self->get_priority_fn != NULL);
assert(ck_spinlock_fas_locked(&self->lock));
assert(ck_spinlock_mcs_locked(&self->queue));
int left_child_index = 2 * parent_index;
int right_child_index = 2 * parent_index + 1;
@ -92,7 +92,7 @@ priority_queue_percolate_down(struct priority_queue *self, int parent_index)
{
assert(self != NULL);
assert(self->get_priority_fn != NULL);
assert(ck_spinlock_fas_locked(&self->lock));
assert(ck_spinlock_mcs_locked(&self->queue));
int left_child_index = 2 * parent_index;
while (left_child_index >= 2 && left_child_index < self->first_free) {
@ -120,7 +120,7 @@ static inline bool
priority_queue_is_empty_locked(struct priority_queue *self)
{
assert(self != NULL);
assert(ck_spinlock_fas_locked(&self->lock));
assert(ck_spinlock_mcs_locked(&self->queue));
return self->first_free == 1;
}
@ -141,7 +141,7 @@ priority_queue_initialize(struct priority_queue *self, priority_queue_get_priori
memset(self->items, 0, sizeof(void *) * MAX);
ck_spinlock_fas_init(&self->lock);
ck_spinlock_mcs_init(&self->queue);
self->first_free = 1;
self->get_priority_fn = get_priority_fn;
@ -157,9 +157,15 @@ int
priority_queue_length(struct priority_queue *self)
{
assert(self != NULL);
ck_spinlock_fas_lock(&self->lock);
struct ck_spinlock_mcs lock;
uint64_t pre = __getcycles();
ck_spinlock_mcs_lock(&self->queue, &lock);
worker_thread_lock_duration += (__getcycles() - pre);
int length = self->first_free - 1;
ck_spinlock_fas_unlock(&self->lock);
ck_spinlock_mcs_unlock(&self->queue, &lock);
return length;
}
@ -172,7 +178,11 @@ int
priority_queue_enqueue(struct priority_queue *self, void *value)
{
assert(self != NULL);
ck_spinlock_fas_lock(&self->lock);
struct ck_spinlock_mcs lock;
uint64_t pre = __getcycles();
ck_spinlock_mcs_lock(&self->queue, &lock);
worker_thread_lock_duration += (__getcycles() - pre);
if (priority_queue_append(self, value) == -ENOSPC) return -ENOSPC;
@ -182,7 +192,9 @@ priority_queue_enqueue(struct priority_queue *self, void *value)
} else {
priority_queue_percolate_up(self);
}
ck_spinlock_fas_unlock(&self->lock);
ck_spinlock_mcs_unlock(&self->queue, &lock);
return 0;
}
/**
@ -194,7 +206,11 @@ int
priority_queue_delete(struct priority_queue *self, void *value)
{
assert(self != NULL);
ck_spinlock_fas_lock(&self->lock);
struct ck_spinlock_mcs lock;
uint64_t pre = __getcycles();
ck_spinlock_mcs_lock(&self->queue, &lock);
worker_thread_lock_duration += (__getcycles() - pre);
bool did_delete = false;
for (int i = 1; i < self->first_free; i++) {
@ -206,7 +222,8 @@ priority_queue_delete(struct priority_queue *self, void *value)
}
}
ck_spinlock_fas_unlock(&self->lock);
ck_spinlock_mcs_unlock(&self->queue, &lock);
if (!did_delete) return -1;
return 0;
}
@ -225,10 +242,14 @@ priority_queue_dequeue(struct priority_queue *self, void **dequeued_element)
int return_code;
if (ck_spinlock_fas_trylock(&self->lock) == false) {
struct ck_spinlock_mcs lock;
uint64_t pre = __getcycles();
if (ck_spinlock_mcs_trylock(&self->queue, &lock) == false) {
worker_thread_lock_duration += (__getcycles() - pre);
return_code = -EAGAIN;
goto done;
};
worker_thread_lock_duration += (__getcycles() - pre);
if (priority_queue_is_empty_locked(self)) {
return_code = -ENOENT;
@ -250,7 +271,7 @@ priority_queue_dequeue(struct priority_queue *self, void **dequeued_element)
return_code = 0;
release_lock:
ck_spinlock_fas_unlock(&self->lock);
ck_spinlock_mcs_unlock(&self->queue, &lock);
done:
return return_code;
}
@ -270,10 +291,14 @@ priority_queue_top(struct priority_queue *self, void **dequeued_element)
int return_code;
if (ck_spinlock_fas_trylock(&self->lock) == false) {
struct ck_spinlock_mcs lock;
uint64_t pre = __getcycles();
if (ck_spinlock_mcs_trylock(&self->queue, &lock) == false) {
worker_thread_lock_duration += (__getcycles() - pre);
return_code = -EAGAIN;
goto done;
};
worker_thread_lock_duration += (__getcycles() - pre);
if (priority_queue_is_empty_locked(self)) {
return_code = -ENOENT;
@ -284,7 +309,7 @@ priority_queue_top(struct priority_queue *self, void **dequeued_element)
return_code = 0;
release_lock:
ck_spinlock_fas_unlock(&self->lock);
ck_spinlock_mcs_unlock(&self->queue, &lock);
done:
return return_code;
}

@ -29,10 +29,29 @@ __thread uv_loop_t worker_thread_uvio_handle;
/* Flag to signify if the thread is currently running callbacks in the libuv event loop */
static __thread bool worker_thread_is_in_libuv_event_loop = false;
/* Total Lock Contention in Cycles */
__thread uint64_t worker_thread_lock_duration;
/* Timestamp when worker thread began executing */
__thread uint64_t worker_thread_start_timestamp;
/***********************
* Worker Thread Logic *
**********************/
/**
* Reports lock contention for the worker thread
*/
static inline void
worker_thread_dump_lock_overhead()
{
#ifdef DEBUG
uint64_t worker_duration = __getcycles() - worker_thread_start_timestamp;
debuglog("Locks consumed %lu / %lu cycles, or %f%%\n", worker_thread_lock_duration, worker_duration,
(double)worker_thread_lock_duration / worker_duration * 100);
#endif
}
/**
* Conditionally triggers appropriate state changes for exiting sandboxes
* @param exiting_sandbox - The sandbox that ran to completion
@ -228,6 +247,10 @@ worker_thread_execute_libuv_event_loop(void)
void *
worker_thread_main(void *return_code)
{
/* Initialize Bookkeeping */
worker_thread_start_timestamp = __getcycles();
worker_thread_lock_duration = 0;
/* Initialize Base Context */
arch_context_init(&worker_thread_base_context, 0, 0);
@ -286,6 +309,7 @@ worker_thread_on_sandbox_exit(struct sandbox *exiting_sandbox)
{
assert(exiting_sandbox);
software_interrupt_disable();
worker_thread_dump_lock_overhead();
worker_thread_switch_to_base_context();
assert(0);
}

Loading…
Cancel
Save