feat: percentiles for execution estimates

4 years ago · d1d27a9162
parent c9bcf086fa
commit d1d27a9162
3 changed files with 104 additions and 42 deletions
--- a/runtime/include/perf_window.h
+++ b/runtime/include/perf_window.h
@ -13,51 +13,80 @@
 #error "PERF_WINDOW_BUFFER_SIZE must be power of 2!"
 #endif

+/*
+ * The sorted array sorts the last N executions by execution time
+ * The buffer array acts as a circular buffer of indices into the sorted array
+ *
+ * This provides a sorted circular buffer
+ */
+struct execution_node {
+	uint64_t execution_time;
+	uint16_t buffer_idx; /* Reverse Index back to the sorted bin equal to this index */
+};
+
 struct perf_window {
-	uint64_t buffer[PERF_WINDOW_BUFFER_SIZE];
+	struct execution_node sorted[PERF_WINDOW_BUFFER_SIZE];
+	uint16_t              buffer[PERF_WINDOW_BUFFER_SIZE];
 	uint64_t              count;
 	lock_t                lock;
-	double   mean;
 };

 /**
- * Iterates through the values in the buffer and updates the mean
- * Not intended to be called directly!
+ * Initializes perf window
 * @param self
 */
 static inline void
-perf_window_update_mean(struct perf_window *self)
+perf_window_initialize(struct perf_window *self)
 {
 	assert(self != NULL);
-	assert(LOCK_IS_LOCKED(&self->lock));

-	uint64_t limit = self->count;
-	if (limit > PERF_WINDOW_BUFFER_SIZE) { limit = PERF_WINDOW_BUFFER_SIZE; }
-
-	uint64_t sum = 0;
-	for (uint64_t i = 0; i < limit; i++) sum += self->buffer[i];
+	LOCK_INIT(&self->lock);
+	self->count = 0;
+	memset(&self->sorted, 0, sizeof(struct execution_node) * PERF_WINDOW_BUFFER_SIZE);
+	memset(&self->buffer, 0, sizeof(uint16_t) * PERF_WINDOW_BUFFER_SIZE);
+}

-	self->mean = (double)(sum) / limit;
-};

 /**
- * Iterates through the values in the buffer and updates the mean
- * Not intended to be called directly!
+ * Swaps two execution nodes in the sorted array, including updating the indices in the circular buffer
 * @param self
+ * @param first_sorted_idx
+ * @param second_sorted_idx
 */
 static inline void
-perf_window_initialize(struct perf_window *self)
+perf_window_swap(struct perf_window *self, uint16_t first_sorted_idx, uint16_t second_sorted_idx)
 {
+	assert(LOCK_IS_LOCKED(&self->lock));
 	assert(self != NULL);
+	assert(first_sorted_idx >= 0 && first_sorted_idx < PERF_WINDOW_BUFFER_SIZE);
+	assert(second_sorted_idx >= 0 && second_sorted_idx < PERF_WINDOW_BUFFER_SIZE);

-	LOCK_INIT(&self->lock);
-	self->count = 0;
-	self->mean  = 0;
-	memset(&self->buffer, 0, sizeof(uint64_t) * PERF_WINDOW_BUFFER_SIZE);
+	uint16_t first_buffer_idx  = self->sorted[first_sorted_idx].buffer_idx;
+	uint16_t second_buffer_idx = self->sorted[second_sorted_idx].buffer_idx;
+
+	/* The execution node's buffer_idx points to a buffer cell equal to its own sorted index  */
+	assert(self->buffer[first_buffer_idx] == first_sorted_idx);
+	assert(self->buffer[second_buffer_idx] == second_sorted_idx);
+
+	uint64_t first_execution_time  = self->sorted[first_sorted_idx].execution_time;
+	uint64_t second_execution_time = self->sorted[second_sorted_idx].execution_time;
+
+	/* Swap Indices in Buffer*/
+	self->buffer[first_buffer_idx]  = second_sorted_idx;
+	self->buffer[second_buffer_idx] = first_sorted_idx;
+
+	/* Swap buffer_idx */
+	struct execution_node tmp_node  = self->sorted[first_sorted_idx];
+	self->sorted[first_sorted_idx]  = self->sorted[second_sorted_idx];
+	self->sorted[second_sorted_idx] = tmp_node;
+
+	/* The circular buffer indices should always point to the same execution times across all swaps  */
+	assert(self->sorted[self->buffer[first_buffer_idx]].execution_time == first_execution_time);
+	assert(self->sorted[self->buffer[second_buffer_idx]].execution_time == second_execution_time);
 }

 /**
- * Iterates through the values in the buffer and updates the mean
+ * Adds a new value to the perf window
 * Not intended to be called directly!
 * @param self
 * @param value
@ -71,23 +100,65 @@ perf_window_add(struct perf_window *self, uint64_t value)
 	assert(value > 0);

 	LOCK_LOCK(&self->lock);
-	self->buffer[self->count++ % PERF_WINDOW_BUFFER_SIZE] = value;
-	perf_window_update_mean(self);
+
+	/* If count is 0, then fill entire array with initial execution times */
+	if (self->count == 0) {
+		for (int i = 0; i < PERF_WINDOW_BUFFER_SIZE; i++) {
+			self->buffer[i] = i;
+			self->sorted[i] = (struct execution_node){ .execution_time = value, .buffer_idx = i };
+		}
+		self->count = PERF_WINDOW_BUFFER_SIZE;
+		goto done;
+	}
+
+	/* Otherwise, replace the oldest value, and then sort */
+	uint16_t idx_of_oldest = self->buffer[self->count % PERF_WINDOW_BUFFER_SIZE];
+	bool     check_up      = value > self->sorted[idx_of_oldest].execution_time;
+
+	self->sorted[idx_of_oldest].execution_time = value;
+
+	if (check_up) {
+		for (uint16_t i = idx_of_oldest; i + 1 < PERF_WINDOW_BUFFER_SIZE
+		                                 && self->sorted[i + 1].execution_time < self->sorted[i].execution_time;
+		     i++) {
+			perf_window_swap(self, i, i + 1);
+		}
+	} else {
+		for (uint16_t i = idx_of_oldest;
+		     i - 1 >= 0 && self->sorted[i - 1].execution_time > self->sorted[i].execution_time; i--) {
+			perf_window_swap(self, i, i - 1);
+		}
+	}
+
+	/* The idx that we replaces should still point to the same value */
+	assert(self->sorted[self->buffer[self->count % PERF_WINDOW_BUFFER_SIZE]].execution_time == value);
+
+	/* The sorted array should be ordered by execution time */
+	for (int i = 1; i < PERF_WINDOW_BUFFER_SIZE; i++) {
+		assert(self->sorted[i - 1].execution_time <= self->sorted[i].execution_time);
+	}
+
+	self->count++;
+
+done:
 	LOCK_UNLOCK(&self->lock);
 }

 /**
- * Returns mean perf value across all executions
- * @returns mean or -1 if buffer is empty
+ * Returns pXX execution time
+ * @param self
+ * @param percentile represented by double between 0 and 1
+ * @returns execution time or -1 if buffer is empty
 */
-static inline double
-perf_window_get_mean(struct perf_window *self)
+static inline uint64_t
+perf_window_get_percentile(struct perf_window *self, double percentile)
 {
 	assert(self != NULL);
+	assert(percentile > 0 && percentile < 1);

 	if (self->count == 0) return -1;

-	return self->mean;
+	return self->sorted[(int)(PERF_WINDOW_BUFFER_SIZE * percentile)].execution_time;
 }

 /**
--- a/runtime/src/runtime.c
+++ b/runtime/src/runtime.c
@ -98,17 +98,10 @@ listener_thread_main(void *dummy)

 			/* Perform Admission Control */

-			/*
-			 * TODO: Enhance to use configurable percentiles rather than just mean. This can be policy
-			 * defined in the module specification
-			 */
-			uint64_t estimated_execution = perf_window_get_mean(&module->perf_window);
-
+			uint64_t estimated_execution = perf_window_get_percentile(&module->perf_window, 0.5);
 			/*
 			 * If this is the first execution, assume a default execution
 			 * TODO: Enhance module specification to provide "seed" value of estimated duration
-			 * TODO: Should we "rate limit" or only admit one request before we have actual data? Otherwise
-			 * we might be flooded with sandboxes that possibly underestimate
 			 */
 			if (estimated_execution == -1) estimated_execution = 1000;

@ -131,8 +124,7 @@ listener_thread_main(void *dummy)

 				/* Add to work accepted by the runtime */
 				runtime_admitted += admissions_estimate;
-				debuglog("Runtime Utilization: %f%%\n",
-				         runtime_admitted / runtime_worker_threads_count * 100);
+				debuglog("Runtime Admitted: %f / %u\n", runtime_admitted, runtime_worker_threads_count);
 			}
 		}
 	}
--- a/runtime/src/sandbox.c
+++ b/runtime/src/sandbox.c
@ -785,9 +785,8 @@ sandbox_set_as_complete(struct sandbox *sandbox, sandbox_state_t last_state)
 	perf_window_add(&sandbox->module->perf_window, sandbox->running_duration);

 	runtime_admitted -= sandbox->admissions_estimate;
-	assert(runtime_admitted >= 0);

-	debuglog("Runtime Utilization: %f%%\n", runtime_admitted / runtime_worker_threads_count * 100);
+	debuglog("Runtime Admitted: %f / %u\n", runtime_admitted, runtime_worker_threads_count);

 	sandbox_print_perf(sandbox);