#pragma once #include #include "lock.h" #include "runtime.h" #include "worker_thread.h" /* Should be Power of 2! */ #define PERF_WINDOW_BUFFER_SIZE 16 #if ((PERF_WINDOW_BUFFER_SIZE == 0) || (PERF_WINDOW_BUFFER_SIZE & (PERF_WINDOW_BUFFER_SIZE - 1)) != 0) #error "PERF_WINDOW_BUFFER_SIZE must be power of 2!" #endif /* * The sorted array sorts the last N executions by execution time * The buffer array acts as a circular buffer of indices into the sorted array * * This provides a sorted circular buffer */ struct execution_node { uint32_t execution_time; uint16_t buffer_idx; /* Reverse Index back to the sorted bin equal to this index */ }; struct perf_window { struct execution_node sorted[PERF_WINDOW_BUFFER_SIZE]; uint16_t buffer[PERF_WINDOW_BUFFER_SIZE]; uint64_t count; lock_t lock; }; /** * Initializes perf window * @param self */ static inline void perf_window_initialize(struct perf_window *self) { assert(self != NULL); LOCK_INIT(&self->lock); self->count = 0; memset(&self->sorted, 0, sizeof(struct execution_node) * PERF_WINDOW_BUFFER_SIZE); memset(&self->buffer, 0, sizeof(uint16_t) * PERF_WINDOW_BUFFER_SIZE); } /** * Swaps two execution nodes in the sorted array, including updating the indices in the circular buffer * @param self * @param first_sorted_idx * @param second_sorted_idx */ static inline void perf_window_swap(struct perf_window *self, uint16_t first_sorted_idx, uint16_t second_sorted_idx) { assert(LOCK_IS_LOCKED(&self->lock)); assert(self != NULL); assert(first_sorted_idx >= 0 && first_sorted_idx < PERF_WINDOW_BUFFER_SIZE); assert(second_sorted_idx >= 0 && second_sorted_idx < PERF_WINDOW_BUFFER_SIZE); uint16_t first_buffer_idx = self->sorted[first_sorted_idx].buffer_idx; uint16_t second_buffer_idx = self->sorted[second_sorted_idx].buffer_idx; /* The execution node's buffer_idx points to a buffer cell equal to its own sorted index */ assert(self->buffer[first_buffer_idx] == first_sorted_idx); assert(self->buffer[second_buffer_idx] == second_sorted_idx); uint32_t first_execution_time = self->sorted[first_sorted_idx].execution_time; uint32_t second_execution_time = self->sorted[second_sorted_idx].execution_time; /* Swap Indices in Buffer*/ self->buffer[first_buffer_idx] = second_sorted_idx; self->buffer[second_buffer_idx] = first_sorted_idx; /* Swap buffer_idx */ struct execution_node tmp_node = self->sorted[first_sorted_idx]; self->sorted[first_sorted_idx] = self->sorted[second_sorted_idx]; self->sorted[second_sorted_idx] = tmp_node; /* The circular buffer indices should always point to the same execution times across all swaps */ assert(self->sorted[self->buffer[first_buffer_idx]].execution_time == first_execution_time); assert(self->sorted[self->buffer[second_buffer_idx]].execution_time == second_execution_time); } /** * Adds a new value to the perf window * Not intended to be called directly! * @param self * @param value */ static inline void perf_window_add(struct perf_window *self, uint32_t value) { assert(self != NULL); /* A successful invocation should run for a non-zero amount of time */ assert(value > 0); LOCK_LOCK(&self->lock); /* If count is 0, then fill entire array with initial execution times */ if (self->count == 0) { for (int i = 0; i < PERF_WINDOW_BUFFER_SIZE; i++) { self->buffer[i] = i; self->sorted[i] = (struct execution_node){ .execution_time = value, .buffer_idx = i }; } self->count = PERF_WINDOW_BUFFER_SIZE; goto done; } /* Otherwise, replace the oldest value, and then sort */ uint16_t idx_of_oldest = self->buffer[self->count % PERF_WINDOW_BUFFER_SIZE]; bool check_up = value > self->sorted[idx_of_oldest].execution_time; self->sorted[idx_of_oldest].execution_time = value; if (check_up) { for (uint16_t i = idx_of_oldest; i + 1 < PERF_WINDOW_BUFFER_SIZE && self->sorted[i + 1].execution_time < self->sorted[i].execution_time; i++) { perf_window_swap(self, i, i + 1); } } else { for (uint16_t i = idx_of_oldest; i - 1 >= 0 && self->sorted[i - 1].execution_time > self->sorted[i].execution_time; i--) { perf_window_swap(self, i, i - 1); } } /* The idx that we replaces should still point to the same value */ assert(self->sorted[self->buffer[self->count % PERF_WINDOW_BUFFER_SIZE]].execution_time == value); /* The sorted array should be ordered by execution time */ #ifndef NDEBUG for (int i = 1; i < PERF_WINDOW_BUFFER_SIZE; i++) { assert(self->sorted[i - 1].execution_time <= self->sorted[i].execution_time); } #endif self->count++; done: LOCK_UNLOCK(&self->lock); } /** * Returns pXX execution time * @param self * @param percentile represented by double between 0 and 1 * @returns execution time or -1 if buffer is empty */ static inline uint32_t perf_window_get_percentile(struct perf_window *self, double percentile) { assert(self != NULL); assert(percentile > 0 && percentile < 1); if (self->count == 0) return -1; return self->sorted[(int)(PERF_WINDOW_BUFFER_SIZE * percentile)].execution_time; } /** * Returns the total count of executions * @returns total count */ static inline uint64_t perf_window_get_count(struct perf_window *self) { assert(self != NULL); return self->count; }