parent
9204ab8f16
commit
d678e34ce3
@ -0,0 +1,15 @@
|
||||
# Bimodal Distribution
|
||||
|
||||
This experiment drives a bimodal distribution of long-running low-priority and short-running high-priority workloads
|
||||
|
||||
Relative Deadlines are tuned such that the scheduler should always preempt the low-priority workload for the high-priority workload if preemption is disabled.
|
||||
|
||||
The two workloads are run separately as a baseline. They are then run concurrently, starting the low-priority long-running workload first such that the system begins execution and accumulates requests in the data structures. The high-priority short-running workload then begins.
|
||||
|
||||
## Independent Variable
|
||||
|
||||
The Scheduling Policy: EDF versus FIFO
|
||||
|
||||
## Dependent Variables
|
||||
|
||||
Latency of high priority workload
|
@ -0,0 +1,222 @@
|
||||
#!/bin/bash
|
||||
source ../common.sh
|
||||
|
||||
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
|
||||
# Success - The percentage of requests that complete by their deadlines
|
||||
# TODO: Does this handle non-200s?
|
||||
# Throughput - The mean number of successful requests per second
|
||||
# Latency - the rount-trip resonse time (unit?) of successful requests at the p50, p90, p99, and p100 percetiles
|
||||
|
||||
# Sends requests until the per-module perf window buffers are full
|
||||
# This ensures that Sledge has accurate estimates of execution time
|
||||
run_samples() {
|
||||
local hostname="${1:-localhost}"
|
||||
|
||||
# Scrape the perf window size from the source if possible
|
||||
local -r perf_window_path="../../include/perf_window.h"
|
||||
local -i perf_window_buffer_size
|
||||
if ! perf_window_buffer_size=$(grep "#define PERF_WINDOW_BUFFER_SIZE" < "$perf_window_path" | cut -d\ -f3); then
|
||||
echo "Failed to scrape PERF_WINDOW_BUFFER_SIZE from ../../include/perf_window.h"
|
||||
echo "Defaulting to 16"
|
||||
perf_window_buffer_size=16
|
||||
fi
|
||||
local -ir perf_window_buffer_size
|
||||
|
||||
echo -n "Running Samples: "
|
||||
hey -n "$perf_window_buffer_size" -c "$perf_window_buffer_size" -cpus 3 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10040" 1> /dev/null 2> /dev/null || {
|
||||
error_msg "fib40 samples failed"
|
||||
return 1
|
||||
}
|
||||
|
||||
hey -n "$perf_window_buffer_size" -c "$perf_window_buffer_size" -cpus 3 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:100010" 1> /dev/null 2> /dev/null || {
|
||||
error_msg "fib10 samples failed"
|
||||
return 1
|
||||
}
|
||||
|
||||
echo "[OK]"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Execute the fib10 and fib40 experiments sequentially and concurrently
|
||||
# $1 (results_directory) - a directory where we will store our results
|
||||
# $2 (hostname="localhost") - an optional parameter that sets the hostname. Defaults to localhost
|
||||
run_experiments() {
|
||||
if (($# < 1 || $# > 2)); then
|
||||
error_msg "invalid number of arguments \"$1\""
|
||||
return 1
|
||||
elif ! [[ -d "$1" ]]; then
|
||||
error_msg "directory \"$1\" does not exist"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local results_directory="$1"
|
||||
local hostname="${2:-localhost}"
|
||||
|
||||
# The duration in seconds that we want the client to send requests
|
||||
local -ir duration_sec=15
|
||||
|
||||
# The duration in seconds that the low priority task should run before the high priority task starts
|
||||
local -ir offset=5
|
||||
|
||||
printf "Running Experiments\n"
|
||||
|
||||
# Run each separately
|
||||
printf "\tfib40: "
|
||||
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" "http://$hostname:10040" > "$results_directory/fib40.csv" 2> /dev/null || {
|
||||
printf "[ERR]\n"
|
||||
error_msg "fib40 failed"
|
||||
return 1
|
||||
}
|
||||
get_result_count "$results_directory/fib40.csv" || {
|
||||
printf "[ERR]\n"
|
||||
error_msg "fib40 unexpectedly has zero requests"
|
||||
return 1
|
||||
}
|
||||
printf "[OK]\n"
|
||||
|
||||
printf "\tfib10: "
|
||||
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" "http://$hostname:10010" > "$results_directory/fib10.csv" 2> /dev/null || {
|
||||
printf "[ERR]\n"
|
||||
error_msg "fib10 failed"
|
||||
return 1
|
||||
}
|
||||
get_result_count "$results_directory/fib10.csv" || {
|
||||
printf "[ERR]\n"
|
||||
error_msg "fib10 unexpectedly has zero requests"
|
||||
return 1
|
||||
}
|
||||
printf "[OK]\n"
|
||||
|
||||
# Run concurrently
|
||||
# The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
|
||||
# This asynchronously trigger jobs and then wait on their pids
|
||||
local fib40_con_PID
|
||||
local fib10_con_PID
|
||||
|
||||
hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10040" > "$results_directory/fib40_con.csv" 2> /dev/null &
|
||||
fib40_con_PID="$!"
|
||||
|
||||
sleep $offset
|
||||
|
||||
hey -z "${duration_sec}s" -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:10010" > "$results_directory/fib10_con.csv" 2> /dev/null &
|
||||
fib10_con_PID="$!"
|
||||
|
||||
wait -f "$fib10_con_PID" || {
|
||||
printf "\tfib10_con: [ERR]\n"
|
||||
error_msg "failed to wait -f ${fib10_con_PID}"
|
||||
return 1
|
||||
}
|
||||
get_result_count "$results_directory/fib10_con.csv" || {
|
||||
printf "\tfib10_con: [ERR]\n"
|
||||
error_msg "fib10_con has zero requests. This might be because fib40_con saturated the runtime"
|
||||
return 1
|
||||
}
|
||||
printf "\tfib10_con: [OK]\n"
|
||||
|
||||
wait -f "$fib40_con_PID" || {
|
||||
printf "\tfib40_con: [ERR]\n"
|
||||
error_msg "failed to wait -f ${fib40_con_PID}"
|
||||
return 1
|
||||
}
|
||||
get_result_count "$results_directory/fib40_con.csv" || {
|
||||
printf "\tfib40_con: [ERR]\n"
|
||||
error_msg "fib40_con has zero requests."
|
||||
return 1
|
||||
}
|
||||
printf "\tfib40_con: [OK]\n"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Process the experimental results and generate human-friendly results for success rate, throughput, and latency
|
||||
process_results() {
|
||||
if (($# != 1)); then
|
||||
error_msg "invalid number of arguments ($#, expected 1)"
|
||||
return 1
|
||||
elif ! [[ -d "$1" ]]; then
|
||||
error_msg "directory $1 does not exist"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local -r results_directory="$1"
|
||||
|
||||
echo -n "Processing Results: "
|
||||
|
||||
# Write headers to CSVs
|
||||
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
|
||||
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
|
||||
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
|
||||
|
||||
# The four types of results that we are capturing.
|
||||
# fib10 and fib 40 are run sequentially.
|
||||
# fib10_con and fib40_con are run concurrently
|
||||
local -ar payloads=(fib10 fib10_con fib40 fib40_con)
|
||||
|
||||
# The deadlines for each of the workloads
|
||||
# TODO: Scrape these from spec.json
|
||||
local -Ar deadlines_ms=(
|
||||
[fib10]=2
|
||||
[fib40]=3000
|
||||
)
|
||||
|
||||
for payload in "${payloads[@]}"; do
|
||||
# Strip the _con suffix when getting the deadline
|
||||
local -i deadline=${deadlines_ms[${payload/_con/}]}
|
||||
|
||||
# Get Number of Requests, subtracting the header
|
||||
local -i requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
|
||||
((requests == 0)) && {
|
||||
echo "$payload unexpectedly has zero requests"
|
||||
continue
|
||||
}
|
||||
|
||||
# Calculate Success Rate for csv
|
||||
awk -F, '
|
||||
$7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++}
|
||||
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
|
||||
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
|
||||
|
||||
# Filter on 200s, convert from s to ms, and sort
|
||||
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
|
||||
| sort -g > "$results_directory/$payload-response.csv"
|
||||
|
||||
# Get Number of 200s
|
||||
oks=$(wc -l < "$results_directory/$payload-response.csv")
|
||||
((oks == 0)) && continue # If all errors, skip line
|
||||
|
||||
# We determine duration by looking at the timestamp of the last complete request
|
||||
# TODO: Should this instead just use the client-side synthetic duration_sec value?
|
||||
duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
|
||||
|
||||
# Throughput is calculated as the mean number of successful requests per second
|
||||
throughput=$(echo "$oks/$duration" | bc)
|
||||
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
|
||||
|
||||
# Generate Latency Data for csv
|
||||
awk '
|
||||
BEGIN {
|
||||
sum = 0
|
||||
p50 = int('"$oks"' * 0.5)
|
||||
p90 = int('"$oks"' * 0.9)
|
||||
p99 = int('"$oks"' * 0.99)
|
||||
p100 = '"$oks"'
|
||||
printf "'"$payload"',"
|
||||
}
|
||||
NR==p50 {printf "%1.4f,", $0}
|
||||
NR==p90 {printf "%1.4f,", $0}
|
||||
NR==p99 {printf "%1.4f,", $0}
|
||||
NR==p100 {printf "%1.4f\n", $0}
|
||||
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
|
||||
|
||||
# Delete scratch file used for sorting/counting
|
||||
# rm -rf "$results_directory/$payload-response.csv"
|
||||
done
|
||||
|
||||
# Transform csvs to dat files for gnuplot
|
||||
csv_to_dat "$results_directory/success.csv" "$results_directory/throughput.csv" "$results_directory/latency.csv"
|
||||
|
||||
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
|
||||
# generate_gnuplots
|
||||
}
|
||||
|
||||
main "$@"
|
@ -1,19 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Executes the runtime in GDB
|
||||
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
|
||||
# This allows debugging outside of the Docker container
|
||||
# Also disables pagination and stopping on SIGUSR1
|
||||
|
||||
experiment_directory=$(pwd)
|
||||
project_directory=$(cd ../.. && pwd)
|
||||
binary_directory=$(cd "$project_directory"/bin && pwd)
|
||||
|
||||
export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH"
|
||||
export PATH="$binary_directory:$PATH"
|
||||
|
||||
gdb --eval-command="handle SIGUSR1 nostop" \
|
||||
--eval-command="handle SIGPIPE nostop" \
|
||||
--eval-command="set pagination off" \
|
||||
--eval-command="set substitute-path /sledge/runtime $project_directory" \
|
||||
--eval-command="run $experiment_directory/spec.json" \
|
||||
sledgert
|
@ -1,14 +1,16 @@
|
||||
{
|
||||
"active": true,
|
||||
"name": "empty",
|
||||
"path": "empty_wasm.so",
|
||||
"port": 10000,
|
||||
"relative-deadline-us": 50000,
|
||||
"argsize": 1,
|
||||
"http-req-headers": [],
|
||||
"http-req-content-type": "text/plain",
|
||||
"http-req-size": 1024,
|
||||
"http-resp-headers": [],
|
||||
"http-resp-size": 1024,
|
||||
"http-resp-content-type": "text/plain"
|
||||
"active": true,
|
||||
"name": "empty",
|
||||
"path": "empty_wasm.so",
|
||||
"port": 10000,
|
||||
"expected-execution-us": 500,
|
||||
"admissions-percentile": 70,
|
||||
"relative-deadline-us": 50000,
|
||||
"argsize": 1,
|
||||
"http-req-headers": [],
|
||||
"http-req-content-type": "text/plain",
|
||||
"http-req-size": 1024,
|
||||
"http-resp-headers": [],
|
||||
"http-resp-size": 1024,
|
||||
"http-resp-content-type": "text/plain"
|
||||
}
|
||||
|
@ -1,48 +0,0 @@
|
||||
# Admissions Control
|
||||
|
||||
## Discussion of Implementation
|
||||
|
||||
The admissions control subsystem seeks to ensure that the system does not accept more work than it can execute while meeting the relative deadline defined in a module's JSON specification.
|
||||
|
||||
The system maintains an integral value expressing the capacity of the system as millionths of a worker core. This assumes that the runtime has "pinned" these workers to underlying processors and has no contention with other workloads.
|
||||
|
||||
The system maintains a second integral value expressing the total accepted work.
|
||||
|
||||
The module specification provides a relative deadline, an expected execution time, and a percentile target expressing the pXX latency that the admissions control system should use when making admissions decisions (tunable from 50% to 99%). Tuning this percentile expresses how conservative the system should be with regard to scheduling. Selecting a lower value, such as 50%, reserves less processor time and results in a higher likelihood that the relative deadline is not met. Selecting a higher value, such as 99%, reserves more processor time and provides a higher likelihood that that the relative deadline will be met. The provided expected execution time is assumed to match the percentile provided.
|
||||
|
||||
Dividing the expected execution time by the relative deadline yields the fraction of a worker needed to meet the deadline.
|
||||
|
||||
If the existing accepted workload plus the required work of this new workload is less than the system capacity, the workload is accepted, and the integral value expressing the total accepted work is increased. The resulting sandbox request is tagged with the fraction of a worker it was calculated to use, and when the request completes, the total accepted work is decreased by this amount.
|
||||
|
||||
If the existing accepted workload plus the required work of this new workload is greater than the system capacity, the request is rejected and the runtime sends the client an HTTP 503 response.
|
||||
|
||||
While the module specification provides an expected execution time, the system does not trust this value and only uses it in the absence of better information. Each sandbox is profiled as it runs through the system, and the end-to-end execution time of successful sandbox requests are added to a specialized performance window data structure that stores the last N execution times sorted in order of execution time. This structure optimizes for quick lookups of a specific ppXX percentile
|
||||
|
||||
Once data is seeded into this data structure, the initial execution estimate provided in the module specification is ignored, and the pXX target is instead used to lookup the actual pXX performance metric.
|
||||
|
||||
Future Work:
|
||||
|
||||
Currently, the scheduler takes no actual when an executing sandbox exceeds its pXX execution time or deadline.
|
||||
|
||||
In the case of the pXX workload, this means that a workload configured to target p50 during admissions control decisions with exceptionally poor p99 performance causes system-wide overheads that can cause other systems to miss their deadlines.
|
||||
|
||||
Even worse, when executing beyond the relative deadline, the request might be too stale for the client.
|
||||
|
||||
In the absolute worst case, one can imagine a client workload caught in an infinite loop that causes permanent head of line blocking because its deadline is earlier than the current time, such that nothing can possibly preempt the executing workload.
|
||||
|
||||
## Question
|
||||
|
||||
- Does Admissions Control guarantee that deadlines are met?
|
||||
|
||||
## Independent Variable
|
||||
|
||||
Deadline is disabled versus deadline is enabled
|
||||
|
||||
## Invariants
|
||||
|
||||
Single workload
|
||||
Use FIFO policy
|
||||
|
||||
## Dependent Variables
|
||||
|
||||
End-to-end execution time of a workload measured from a client measured relative to its deadline
|
@ -1,562 +0,0 @@
|
||||
#!/bin/bash
|
||||
source ../common.sh
|
||||
|
||||
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
|
||||
# Success - The percentage of requests that complete by their deadlines
|
||||
# TODO: Does this handle non-200s?
|
||||
# Throughput - The mean number of successful requests per second
|
||||
# Latency - the rount-trip resonse time (unit?) of successful requests at the p50, p90, p99, and p100 percetiles
|
||||
|
||||
# Use -d flag if running under gdb
|
||||
# TODO: Just use ENV for policy and other runtime dynamic variables?
|
||||
usage() {
|
||||
echo "$0 [options...]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -t,--target=<target url> Execute as client against remote URL"
|
||||
echo " -s,--serve=<EDF|FIFO> Serve with scheduling policy, but do not run client"
|
||||
echo " -d,--debug=<EDF|FIFO> Debug under GDB with scheduling policy, but do not run client"
|
||||
echo " -p,--perf=<EDF|FIFO> Run under perf with scheduling policy. Run on baremetal Linux host!"
|
||||
}
|
||||
|
||||
# Declares application level global state
|
||||
initialize_globals() {
|
||||
# timestamp is used to name the results directory for a particular test run
|
||||
# shellcheck disable=SC2155
|
||||
declare -gir timestamp=$(date +%s)
|
||||
|
||||
# shellcheck disable=SC2155
|
||||
declare -gr experiment_directory=$(pwd)
|
||||
|
||||
# shellcheck disable=SC2155
|
||||
declare -gr binary_directory=$(cd ../../bin && pwd)
|
||||
|
||||
# Scrape the perf window size from the source if possible
|
||||
local -r perf_window_path="../../include/perf_window.h"
|
||||
declare -gi perf_window_buffer_size
|
||||
if ! perf_window_buffer_size=$(grep "#define PERF_WINDOW_BUFFER_SIZE" < "$perf_window_path" | cut -d\ -f3); then
|
||||
echo "Failed to scrape PERF_WINDOW_BUFFER_SIZE from ../../include/perf_window.h"
|
||||
echo "Defaulting to 16"
|
||||
declare -ir perf_window_buffer_size=16
|
||||
fi
|
||||
declare -gir perf_window_buffer_size
|
||||
|
||||
# Globals used by parse_arguments
|
||||
declare -g target=""
|
||||
declare -g policy=""
|
||||
declare -g role=""
|
||||
|
||||
# Configure environment variables
|
||||
export PATH=$binary_directory:$PATH
|
||||
export LD_LIBRARY_PATH=$binary_directory:$LD_LIBRARY_PATH
|
||||
export SLEDGE_NWORKERS=5
|
||||
}
|
||||
|
||||
# Parses arguments from the user and sets associates global state
|
||||
parse_arguments() {
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
-t=* | --target=*)
|
||||
if [[ "$role" == "server" ]]; then
|
||||
echo "Cannot set target when server"
|
||||
usage
|
||||
return 1
|
||||
fi
|
||||
role=client
|
||||
target="${i#*=}"
|
||||
shift
|
||||
;;
|
||||
-s=* | --serve=*)
|
||||
if [[ "$role" == "client" ]]; then
|
||||
echo "Cannot use -s,--serve with -t,--target"
|
||||
usage
|
||||
return 1
|
||||
fi
|
||||
role=server
|
||||
policy="${i#*=}"
|
||||
if [[ ! $policy =~ ^(EDF|FIFO)$ ]]; then
|
||||
echo "\"$policy\" is not a valid policy. EDF or FIFO allowed"
|
||||
usage
|
||||
return 1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
-d=* | --debug=*)
|
||||
if [[ "$role" == "client" ]]; then
|
||||
echo "Cannot use -d,--debug with -t,--target"
|
||||
usage
|
||||
return 1
|
||||
fi
|
||||
role=debug
|
||||
policy="${i#*=}"
|
||||
if [[ ! $policy =~ ^(EDF|FIFO)$ ]]; then
|
||||
echo "\"$policy\" is not a valid policy. EDF or FIFO allowed"
|
||||
usage
|
||||
return 1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
-p=* | --perf=*)
|
||||
if [[ "$role" == "perf" ]]; then
|
||||
echo "Cannot use -p,--perf with -t,--target"
|
||||
usage
|
||||
return 1
|
||||
fi
|
||||
role=perf
|
||||
policy="${i#*=}"
|
||||
if [[ ! $policy =~ ^(EDF|FIFO)$ ]]; then
|
||||
echo "\"$policy\" is not a valid policy. EDF or FIFO allowed"
|
||||
usage
|
||||
return 1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
-h | --help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "$1 is a not a valid option"
|
||||
usage
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# default to both if no arguments were passed
|
||||
if [[ -z "$role" ]]; then
|
||||
role="both"
|
||||
fi
|
||||
|
||||
# Set globals as read only
|
||||
declare -r target
|
||||
declare -r policy
|
||||
declare -r role
|
||||
}
|
||||
|
||||
# Starts the Sledge Runtime
|
||||
start_runtime() {
|
||||
printf "Starting Runtime: "
|
||||
if (($# < 2 || $# > 3)); then
|
||||
printf "[ERR]\n"
|
||||
error_msg "invalid number of arguments \"$1\""
|
||||
return 1
|
||||
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
|
||||
printf "[ERR]\n"
|
||||
error_msg "expected EDF or FIFO was \"$1\""
|
||||
return 1
|
||||
elif ! [[ -d "$2" ]]; then
|
||||
printf "[ERR]\n"
|
||||
error_msg "directory \"$2\" does not exist"
|
||||
return 1
|
||||
elif ! [[ $3 =~ ^(foreground|background)$ ]]; then
|
||||
printf "[ERR]\n"
|
||||
error_msg "expected foreground or background was \"$3\""
|
||||
return 1
|
||||
fi
|
||||
|
||||
local -r scheduler="$1"
|
||||
local -r results_directory="$2"
|
||||
local -r how_to_run="${3:-background}"
|
||||
|
||||
local -r log_name=log.txt
|
||||
local log="$results_directory/${log_name}"
|
||||
|
||||
log_environment >> "$log"
|
||||
|
||||
case "$how_to_run" in
|
||||
"background")
|
||||
SLEDGE_SCHEDULER="$scheduler" \
|
||||
sledgert "$experiment_directory/spec.json" >> "$log" 2>> "$log" &
|
||||
;;
|
||||
"foreground")
|
||||
SLEDGE_SCHEDULER="$scheduler" \
|
||||
sledgert "$experiment_directory/spec.json"
|
||||
;;
|
||||
esac
|
||||
|
||||
printf "[OK]\n"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Sends requests until the per-module perf window buffers are full
|
||||
# This ensures that Sledge has accurate estimates of execution time
|
||||
run_samples() {
|
||||
local hostname="${1:-localhost}"
|
||||
|
||||
echo -n "Running Samples: "
|
||||
hey -n "$perf_window_buffer_size" -c "$perf_window_buffer_size" -cpus 3 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10040" 1> /dev/null 2> /dev/null || {
|
||||
error_msg "fib40 samples failed"
|
||||
return 1
|
||||
}
|
||||
|
||||
hey -n "$perf_window_buffer_size" -c "$perf_window_buffer_size" -cpus 3 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:100010" 1> /dev/null 2> /dev/null || {
|
||||
error_msg "fib10 samples failed"
|
||||
return 1
|
||||
}
|
||||
|
||||
echo "[OK]"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Execute the fib10 and fib40 experiments sequentially and concurrently
|
||||
# $1 (results_directory) - a directory where we will store our results
|
||||
# $2 (hostname="localhost") - an optional parameter that sets the hostname. Defaults to localhost
|
||||
run_experiments() {
|
||||
if (($# < 1 || $# > 2)); then
|
||||
error_msg "invalid number of arguments \"$1\""
|
||||
return 1
|
||||
elif ! [[ -d "$1" ]]; then
|
||||
error_msg "directory \"$1\" does not exist"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local results_directory="$1"
|
||||
local hostname="${2:-localhost}"
|
||||
|
||||
# The duration in seconds that we want the client to send requests
|
||||
local -ir duration_sec=15
|
||||
|
||||
# The duration in seconds that the low priority task should run before the high priority task starts
|
||||
local -ir offset=5
|
||||
|
||||
printf "Running Experiments\n"
|
||||
|
||||
# Run each separately
|
||||
printf "\tfib40: "
|
||||
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" "http://$hostname:10040" > "$results_directory/fib40.csv" 2> /dev/null || {
|
||||
printf "[ERR]\n"
|
||||
error_msg "fib40 failed"
|
||||
return 1
|
||||
}
|
||||
get_result_count "$results_directory/fib40.csv" || {
|
||||
printf "[ERR]\n"
|
||||
error_msg "fib40 unexpectedly has zero requests"
|
||||
return 1
|
||||
}
|
||||
printf "[OK]\n"
|
||||
|
||||
printf "\tfib10: "
|
||||
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" "http://$hostname:10010" > "$results_directory/fib10.csv" 2> /dev/null || {
|
||||
printf "[ERR]\n"
|
||||
error_msg "fib10 failed"
|
||||
return 1
|
||||
}
|
||||
get_result_count "$results_directory/fib10.csv" || {
|
||||
printf "[ERR]\n"
|
||||
error_msg "fib10 unexpectedly has zero requests"
|
||||
return 1
|
||||
}
|
||||
printf "[OK]\n"
|
||||
|
||||
# Run concurrently
|
||||
# The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
|
||||
# This asynchronously trigger jobs and then wait on their pids
|
||||
local fib40_con_PID
|
||||
local fib10_con_PID
|
||||
|
||||
hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10040" > "$results_directory/fib40_con.csv" 2> /dev/null &
|
||||
fib40_con_PID="$!"
|
||||
|
||||
sleep $offset
|
||||
|
||||
hey -z "${duration_sec}s" -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:10010" > "$results_directory/fib10_con.csv" 2> /dev/null &
|
||||
fib10_con_PID="$!"
|
||||
|
||||
wait -f "$fib10_con_PID" || {
|
||||
printf "\tfib10_con: [ERR]\n"
|
||||
error_msg "failed to wait -f ${fib10_con_PID}"
|
||||
return 1
|
||||
}
|
||||
get_result_count "$results_directory/fib10_con.csv" || {
|
||||
printf "\tfib10_con: [ERR]\n"
|
||||
error_msg "fib10_con has zero requests. This might be because fib40_con saturated the runtime"
|
||||
return 1
|
||||
}
|
||||
printf "\tfib10_con: [OK]\n"
|
||||
|
||||
wait -f "$fib40_con_PID" || {
|
||||
printf "\tfib40_con: [ERR]\n"
|
||||
error_msg "failed to wait -f ${fib40_con_PID}"
|
||||
return 1
|
||||
}
|
||||
get_result_count "$results_directory/fib40_con.csv" || {
|
||||
printf "\tfib40_con: [ERR]\n"
|
||||
error_msg "fib40_con has zero requests."
|
||||
return 1
|
||||
}
|
||||
printf "\tfib40_con: [OK]\n"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Process the experimental results and generate human-friendly results for success rate, throughput, and latency
|
||||
process_results() {
|
||||
if (($# != 1)); then
|
||||
error_msg "invalid number of arguments ($#, expected 1)"
|
||||
return 1
|
||||
elif ! [[ -d "$1" ]]; then
|
||||
error_msg "directory $1 does not exist"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local -r results_directory="$1"
|
||||
|
||||
echo -n "Processing Results: "
|
||||
|
||||
# Write headers to CSVs
|
||||
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
|
||||
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
|
||||
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
|
||||
|
||||
# The four types of results that we are capturing.
|
||||
# fib10 and fib 40 are run sequentially.
|
||||
# fib10_con and fib40_con are run concurrently
|
||||
local -ar payloads=(fib10 fib10_con fib40 fib40_con)
|
||||
|
||||
# The deadlines for each of the workloads
|
||||
# TODO: Scrape these from spec.json
|
||||
local -Ar deadlines_ms=(
|
||||
[fib10]=2
|
||||
[fib40]=3000
|
||||
)
|
||||
|
||||
for payload in "${payloads[@]}"; do
|
||||
# Strip the _con suffix when getting the deadline
|
||||
local -i deadline=${deadlines_ms[${payload/_con/}]}
|
||||
|
||||
# Get Number of Requests, subtracting the header
|
||||
local -i requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
|
||||
((requests == 0)) && {
|
||||
echo "$payload unexpectedly has zero requests"
|
||||
continue
|
||||
}
|
||||
|
||||
# Calculate Success Rate for csv
|
||||
awk -F, '
|
||||
$7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++}
|
||||
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
|
||||
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
|
||||
|
||||
# Filter on 200s, convery from s to ms, and sort
|
||||
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
|
||||
| sort -g > "$results_directory/$payload-response.csv"
|
||||
|
||||
# Get Number of 200s
|
||||
oks=$(wc -l < "$results_directory/$payload-response.csv")
|
||||
((oks == 0)) && continue # If all errors, skip line
|
||||
|
||||
# We determine duration by looking at the timestamp of the last complete request
|
||||
# TODO: Should this instead just use the client-side synthetic duration_sec value?
|
||||
duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
|
||||
|
||||
# Throughput is calculated as the mean number of successful requests per second
|
||||
throughput=$(echo "$oks/$duration" | bc)
|
||||
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
|
||||
|
||||
# Generate Latency Data for csv
|
||||
awk '
|
||||
BEGIN {
|
||||
sum = 0
|
||||
p50 = int('"$oks"' * 0.5)
|
||||
p90 = int('"$oks"' * 0.9)
|
||||
p99 = int('"$oks"' * 0.99)
|
||||
p100 = '"$oks"'
|
||||
printf "'"$payload"',"
|
||||
}
|
||||
NR==p50 {printf "%1.4f,", $0}
|
||||
NR==p90 {printf "%1.4f,", $0}
|
||||
NR==p99 {printf "%1.4f,", $0}
|
||||
NR==p100 {printf "%1.4f\n", $0}
|
||||
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
|
||||
|
||||
# Delete scratch file used for sorting/counting
|
||||
# rm -rf "$results_directory/$payload-response.csv"
|
||||
done
|
||||
|
||||
# Transform csvs to dat files for gnuplot
|
||||
csv_to_dat "$results_directory/success.csv" "$results_directory/throughput.csv" "$results_directory/latency.csv"
|
||||
|
||||
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
|
||||
# generate_gnuplots
|
||||
}
|
||||
|
||||
run_server() {
|
||||
if (($# != 1)); then
|
||||
error_msg "invalid number of arguments \"$1\""
|
||||
return 1
|
||||
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
|
||||
error_msg "expected EDF or FIFO was \"$1\""
|
||||
return 1
|
||||
fi
|
||||
|
||||
local -r scheduler="$1"
|
||||
|
||||
if [[ "$role" == "both" ]]; then
|
||||
local -r results_directory="$experiment_directory/res/$timestamp/$scheduler"
|
||||
local -r how_to_run="background"
|
||||
elif [[ "$role" == "server" ]]; then
|
||||
local -r results_directory="$experiment_directory/res/$timestamp"
|
||||
local -r how_to_run="foreground"
|
||||
else
|
||||
error_msg "Unexpected $role"
|
||||
return 1
|
||||
fi
|
||||
|
||||
mkdir -p "$results_directory"
|
||||
|
||||
start_runtime "$scheduler" "$results_directory" "$how_to_run" || {
|
||||
echo "start_runtime RC: $?"
|
||||
error_msg "Error calling start_runtime $scheduler $results_directory"
|
||||
return 1
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
run_perf() {
|
||||
if (($# != 1)); then
|
||||
printf "[ERR]\n"
|
||||
error_msg "invalid number of arguments \"$1\""
|
||||
return 1
|
||||
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
|
||||
printf "[ERR]\n"
|
||||
error_msg "expected EDF or FIFO was \"$1\""
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! command -v perf; then
|
||||
echo "perf is not present."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local -r scheduler="$1"
|
||||
|
||||
SLEDGE_SCHEDULER="$scheduler" perf record -g -s sledgert "$experiment_directory/spec.json"
|
||||
}
|
||||
|
||||
# Starts the Sledge Runtime under GDB
|
||||
run_debug() {
|
||||
# shellcheck disable=SC2155
|
||||
local project_directory=$(cd ../.. && pwd)
|
||||
if (($# != 1)); then
|
||||
printf "[ERR]\n"
|
||||
error_msg "invalid number of arguments \"$1\""
|
||||
return 1
|
||||
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
|
||||
printf "[ERR]\n"
|
||||
error_msg "expected EDF or FIFO was \"$1\""
|
||||
return 1
|
||||
fi
|
||||
|
||||
local -r scheduler="$1"
|
||||
|
||||
if [[ "$project_directory" != "/sledge/runtime" ]]; then
|
||||
printf "It appears that you are not running in the container. Substituting path to match host environment\n"
|
||||
SLEDGE_SCHEDULER="$scheduler" gdb \
|
||||
--eval-command="handle SIGUSR1 nostop" \
|
||||
--eval-command="handle SIGPIPE nostop" \
|
||||
--eval-command="set pagination off" \
|
||||
--eval-command="set substitute-path /sledge/runtime $project_directory" \
|
||||
--eval-command="run $experiment_directory/spec.json" \
|
||||
sledgert
|
||||
else
|
||||
SLEDGE_SCHEDULER="$scheduler" gdb \
|
||||
--eval-command="handle SIGUSR1 nostop" \
|
||||
--eval-command="handle SIGPIPE nostop" \
|
||||
--eval-command="set pagination off" \
|
||||
--eval-command="run $experiment_directory/spec.json" \
|
||||
sledgert
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
run_client() {
|
||||
if [[ "$role" == "both" ]]; then
|
||||
local results_directory="$experiment_directory/res/$timestamp/$scheduler"
|
||||
elif [[ "$role" == "client" ]]; then
|
||||
local results_directory="$experiment_directory/res/$timestamp"
|
||||
else
|
||||
error_msg "${FUNCNAME[0]} Unexpected $role"
|
||||
return 1
|
||||
fi
|
||||
|
||||
mkdir -p "$results_directory"
|
||||
|
||||
run_samples "$target" || {
|
||||
error_msg "Error calling run_samples $target"
|
||||
return 1
|
||||
}
|
||||
|
||||
run_experiments "$results_directory" || {
|
||||
error_msg "Error calling run_experiments $results_directory"
|
||||
return 1
|
||||
}
|
||||
|
||||
process_results "$results_directory" || {
|
||||
error_msg "Error calling process_results $results_directory"
|
||||
return 1
|
||||
}
|
||||
|
||||
echo "[OK]"
|
||||
return 0
|
||||
}
|
||||
|
||||
run_both() {
|
||||
local -ar schedulers=(EDF FIFO)
|
||||
for scheduler in "${schedulers[@]}"; do
|
||||
printf "Running %s\n" "$scheduler"
|
||||
|
||||
run_server "$scheduler" || {
|
||||
error_msg "Error calling run_server"
|
||||
return 1
|
||||
}
|
||||
|
||||
run_client || {
|
||||
error_msg "Error calling run_client"
|
||||
kill_runtime
|
||||
return 1
|
||||
}
|
||||
|
||||
kill_runtime || {
|
||||
error_msg "Error calling kill_runtime"
|
||||
return 1
|
||||
}
|
||||
|
||||
done
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
main() {
|
||||
initialize_globals
|
||||
parse_arguments "$@" || {
|
||||
exit 1
|
||||
}
|
||||
|
||||
case $role in
|
||||
both)
|
||||
run_both
|
||||
;;
|
||||
server)
|
||||
run_server "$policy"
|
||||
;;
|
||||
debug)
|
||||
run_debug "$policy"
|
||||
;;
|
||||
perf)
|
||||
run_perf "$policy"
|
||||
;;
|
||||
client)
|
||||
run_client
|
||||
;;
|
||||
*)
|
||||
echo "Invalid state"
|
||||
false
|
||||
;;
|
||||
esac
|
||||
|
||||
exit "$?"
|
||||
}
|
||||
|
||||
main "$@"
|
@ -1 +0,0 @@
|
||||
res
|
@ -1,28 +0,0 @@
|
||||
# Preemption
|
||||
|
||||
## Question
|
||||
|
||||
- How do mixed criticality workloads perform under the Sledge scheduler policies?
|
||||
- How does the latency of a high criticality workload that triggers preemption on a system under load compare to being the only workload on the system?
|
||||
- What is the slowdown on the low priority workload?
|
||||
- How does this affect aggregate throughput?
|
||||
|
||||
## Setup
|
||||
|
||||
The system is configured with admission control disabled.
|
||||
|
||||
The driver script drives a bimodal distribution of long-running low-priority and short-running high-priority workloads
|
||||
|
||||
Relative Deadlines are tuned such that the scheduler should always preempt the low-priority workload for the high-priority workload.
|
||||
|
||||
A driver script runs the two workloads separately as a baseline
|
||||
|
||||
It then runs them concurrently, starting the low-priority long-running workload first such that the system begins execution and accumulates requests in the data structures. The high-priority short-running workload then begins.
|
||||
|
||||
## Independent Variable
|
||||
|
||||
The Scheduling Policy: EDF versus FIFO
|
||||
|
||||
## Dependent Variables
|
||||
|
||||
Latency of high priority workload
|
@ -1,18 +0,0 @@
|
||||
#!/bin/bash
|
||||
source ../common.sh
|
||||
|
||||
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
|
||||
# Use -d flag if running under gdb
|
||||
|
||||
timestamp=$(date +%s)
|
||||
experiment_directory=$(pwd)
|
||||
binary_directory=$(cd ../../bin && pwd)
|
||||
|
||||
results_directory="$experiment_directory/res/$timestamp/$scheduler"
|
||||
log=log.txt
|
||||
|
||||
mkdir -p "$results_directory"
|
||||
log_environment >> "$results_directory/$log"
|
||||
|
||||
# Start the runtime
|
||||
PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" | tee -a "$results_directory/$log"
|
@ -1,111 +0,0 @@
|
||||
#!/bin/bash
|
||||
source ../common.sh
|
||||
|
||||
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
|
||||
# Modified to target a remote host
|
||||
|
||||
timestamp=$(date +%s)
|
||||
experiment_directory=$(pwd)
|
||||
host=192.168.1.13
|
||||
|
||||
results_directory="$experiment_directory/res/$timestamp"
|
||||
|
||||
mkdir -p "$results_directory"
|
||||
|
||||
# Start the runtime
|
||||
inputs=(40 10)
|
||||
duration_sec=30
|
||||
offset=5
|
||||
|
||||
# Execute workloads long enough for runtime to learn excepted execution time
|
||||
echo -n "Running Samples: "
|
||||
for input in ${inputs[*]}; do
|
||||
hey -n 45 -c 4 -t 0 -o csv -m GET -d "$input\n" http://"$host":$((10000 + input))
|
||||
done
|
||||
echo "[DONE]"
|
||||
sleep 30
|
||||
|
||||
echo "Running Experiments"
|
||||
# Run each separately
|
||||
hey -z ${duration_sec}s -cpus 6 -c 100 -t 0 -o csv -m GET -d "10\n" "http://$host:10010" > "$results_directory/fib10.csv"
|
||||
echo "fib(10) Complete"
|
||||
sleep 60
|
||||
|
||||
hey -z ${duration_sec}s -cpus 6 -c 100 -t 0 -o csv -m GET -d "40\n" "http://$host:10040" > "$results_directory/fib40.csv"
|
||||
echo "fib(40) Complete"
|
||||
sleep 120
|
||||
|
||||
# Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
|
||||
hey -z $((duration_sec + 2 * offset))s -cpus 3 -c 100 -t 0 -o csv -m GET -d "40\n" "http://$host:10040" > "$results_directory/fib40-con.csv" &
|
||||
sleep $offset
|
||||
hey -z ${duration_sec}s -cpus 3 -c 100 -t 0 -o csv -m GET -d "10\n" "http://$host:10010" > "$results_directory/fib10-con.csv" &
|
||||
sleep $((duration_sec + offset + 15))
|
||||
echo "fib(10) & fib(40) Complete"
|
||||
|
||||
# Generate *.csv and *.dat results
|
||||
echo -n "Parsing Results: "
|
||||
|
||||
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
|
||||
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
|
||||
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
|
||||
|
||||
durations_s=(15 15 15 25)
|
||||
payloads=(fib10 fib10-con fib40 fib40-con)
|
||||
|
||||
for payload in ${payloads[*]}; do
|
||||
# Get Number of Requests
|
||||
requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
|
||||
((requests == 0)) && continue
|
||||
|
||||
duration=${durations_s[$i]}
|
||||
|
||||
# Calculate Success Rate for csv
|
||||
awk -F, '
|
||||
$7 == 200 {ok++}
|
||||
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
|
||||
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
|
||||
|
||||
# Filter on 200s, convery from s to ms, and sort
|
||||
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
|
||||
| sort -g > "$results_directory/$payload-response.csv"
|
||||
|
||||
# Get Number of 200s
|
||||
oks=$(wc -l < "$results_directory/$payload-response.csv")
|
||||
((oks == 0)) && continue # If all errors, skip line
|
||||
|
||||
# Get Latest Timestamp
|
||||
# duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
|
||||
throughput=$(echo "$oks/$duration" | bc)
|
||||
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
|
||||
|
||||
# Generate Latency Data for csv
|
||||
awk '
|
||||
BEGIN {
|
||||
sum = 0
|
||||
p50 = int('"$oks"' * 0.5)
|
||||
p90 = int('"$oks"' * 0.9)
|
||||
p99 = int('"$oks"' * 0.99)
|
||||
p100 = '"$oks"'
|
||||
printf "'"$payload"',"
|
||||
}
|
||||
NR==p50 {printf "%1.4f,", $0}
|
||||
NR==p90 {printf "%1.4f,", $0}
|
||||
NR==p99 {printf "%1.4f,", $0}
|
||||
NR==p100 {printf "%1.4f\n", $0}
|
||||
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
|
||||
|
||||
# Delete scratch file used for sorting/counting
|
||||
# rm -rf "$results_directory/$payload-response.csv"
|
||||
done
|
||||
|
||||
# Transform csvs to dat files for gnuplot
|
||||
for file in success latency throughput; do
|
||||
echo -n "#" > "$results_directory/$file.dat"
|
||||
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
|
||||
done
|
||||
|
||||
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
|
||||
# generate_gnuplots
|
||||
|
||||
# Cleanup, if required
|
||||
echo "[DONE]"
|
@ -1,20 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Executes the runtime in GDB
|
||||
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
|
||||
# This allows debugging outside of the Docker container
|
||||
# Also disables pagination and stopping on SIGUSR1
|
||||
|
||||
experiment_directory=$(pwd)
|
||||
project_directory=$(cd ../.. && pwd)
|
||||
binary_directory=$(cd "$project_directory"/bin && pwd)
|
||||
|
||||
export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH"
|
||||
export PATH="$binary_directory:$PATH"
|
||||
export SLEDGE_SCHEDULER="EDF"
|
||||
|
||||
gdb --eval-command="handle SIGUSR1 nostop" \
|
||||
--eval-command="handle SIGPIPE nostop" \
|
||||
--eval-command="set pagination off" \
|
||||
--eval-command="set substitute-path /sledge/runtime $project_directory" \
|
||||
--eval-command="run $experiment_directory/spec.json" \
|
||||
sledgert
|
@ -1,81 +0,0 @@
|
||||
#!/bin/bash
|
||||
source ../common.sh
|
||||
|
||||
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
|
||||
# Modified to target a remote host
|
||||
|
||||
timestamp=1606608313-FIFO
|
||||
experiment_directory=$(pwd)
|
||||
results_directory="$experiment_directory/res/$timestamp"
|
||||
|
||||
# Generate *.csv and *.dat results
|
||||
echo -n "Parsing Results: "
|
||||
|
||||
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
|
||||
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
|
||||
printf "Payload,p50,p90,p99,p998,p999,p100\n" >> "$results_directory/latency.csv"
|
||||
|
||||
durations_s=(15 15 15 25)
|
||||
payloads=(fib10 fib10-con fib40 fib40-con)
|
||||
|
||||
for payload in ${payloads[*]}; do
|
||||
# Get Number of Requests
|
||||
requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
|
||||
((requests == 0)) && continue
|
||||
|
||||
duration=${durations_s[$i]}
|
||||
|
||||
# Calculate Success Rate for csv
|
||||
awk -F, '
|
||||
$7 == 200 {ok++}
|
||||
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
|
||||
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
|
||||
|
||||
# Filter on 200s, convery from s to ms, and sort
|
||||
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
|
||||
| sort -g > "$results_directory/$payload-response.csv"
|
||||
|
||||
# Get Number of 200s
|
||||
oks=$(wc -l < "$results_directory/$payload-response.csv")
|
||||
((oks == 0)) && continue # If all errors, skip line
|
||||
|
||||
# Get Latest Timestamp
|
||||
# duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
|
||||
throughput=$(echo "$oks/$duration" | bc)
|
||||
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
|
||||
|
||||
# Generate Latency Data for csv
|
||||
awk '
|
||||
BEGIN {
|
||||
sum = 0
|
||||
p50 = int('"$oks"' * 0.5)
|
||||
p90 = int('"$oks"' * 0.9)
|
||||
p99 = int('"$oks"' * 0.99)
|
||||
p998 = int('"$oks"' * 0.998)
|
||||
p999 = int('"$oks"' * 0.999)
|
||||
p100 = '"$oks"'
|
||||
printf "'"$payload"',"
|
||||
}
|
||||
NR==p50 {printf "%1.4f,", $0}
|
||||
NR==p90 {printf "%1.4f,", $0}
|
||||
NR==p99 {printf "%1.4f,", $0}
|
||||
NR==p998 {printf "%1.4f,", $0}
|
||||
NR==p999 {printf "%1.4f,", $0}
|
||||
NR==p100 {printf "%1.4f\n", $0}
|
||||
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
|
||||
|
||||
# Delete scratch file used for sorting/counting
|
||||
# rm -rf "$results_directory/$payload-response.csv"
|
||||
done
|
||||
|
||||
# Transform csvs to dat files for gnuplot
|
||||
for file in success latency throughput; do
|
||||
echo -n "#" > "$results_directory/$file.dat"
|
||||
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
|
||||
done
|
||||
|
||||
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
|
||||
# generate_gnuplots
|
||||
|
||||
# Cleanup, if required
|
||||
echo "[DONE]"
|
@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Executes the runtime in GDB
|
||||
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
|
||||
# This allows debugging outside of the Docker container
|
||||
# Also disables pagination and stopping on SIGUSR1
|
||||
|
||||
experiment_directory=$(pwd)
|
||||
project_directory=$(cd ../.. && pwd)
|
||||
binary_directory=$(cd "$project_directory"/bin && pwd)
|
||||
|
||||
export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH"
|
||||
export PATH="$binary_directory:$PATH"
|
||||
|
||||
SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=EDF perf record -g -s sledgert "$experiment_directory/spec.json"
|
@ -1,134 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
|
||||
# Use -d flag if running under gdb
|
||||
|
||||
source ../common.sh
|
||||
|
||||
# Validate dependencies
|
||||
declare -a -r dependencies=(awk hey wc)
|
||||
for dependency in "${dependencies[@]}"; do
|
||||
if ! command -v "$dependency" &> /dev/null; then
|
||||
echo "$dependency could not be found"
|
||||
exit
|
||||
fi
|
||||
done
|
||||
|
||||
timestamp=$(date +%s)
|
||||
experiment_directory=$(pwd)
|
||||
binary_directory=$(cd ../../bin && pwd)
|
||||
|
||||
schedulers=(EDF FIFO)
|
||||
for scheduler in ${schedulers[*]}; do
|
||||
|
||||
results_directory="$experiment_directory/res/$timestamp/$scheduler"
|
||||
log=log.txt
|
||||
|
||||
mkdir -p "$results_directory"
|
||||
log_environment >> "$results_directory/$log"
|
||||
|
||||
# Start the runtime
|
||||
if [ "$1" != "-d" ]; then
|
||||
SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=$scheduler PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >> "$results_directory/$log" 2>> "$results_directory/$log" &
|
||||
sleep 1
|
||||
else
|
||||
echo "Running under gdb"
|
||||
echo "Running under gdb" >> "$results_directory/$log"
|
||||
fi
|
||||
|
||||
inputs=(40 10)
|
||||
duration_sec=15
|
||||
offset=5
|
||||
|
||||
# Execute workloads long enough for runtime to learn excepted execution time
|
||||
echo -n "Running Samples: "
|
||||
for input in ${inputs[*]}; do
|
||||
hey -z ${duration_sec}s -cpus 3 -t 0 -o csv -m GET -d "$input\n" http://localhost:$((10000 + input))
|
||||
done
|
||||
echo "[DONE]"
|
||||
sleep 5
|
||||
|
||||
echo "Running Experiments"
|
||||
# Run each separately
|
||||
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 > "$results_directory/fib40.csv"
|
||||
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 > "$results_directory/fib10.csv"
|
||||
|
||||
# Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
|
||||
hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 > "$results_directory/fib40-con.csv" &
|
||||
sleep $offset
|
||||
hey -z ${duration_sec}s -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 > "$results_directory/fib10-con.csv" &
|
||||
sleep $((duration_sec + offset + 15))
|
||||
|
||||
# Stop the runtime if not in debug mode
|
||||
[ "$1" != "-d" ] && kill_runtime
|
||||
|
||||
# Generate *.csv and *.dat results
|
||||
echo -n "Parsing Results: "
|
||||
|
||||
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
|
||||
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
|
||||
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
|
||||
|
||||
deadlines_ms=(2 2 3000 3000)
|
||||
payloads=(fib10 fib10-con fib40 fib40-con)
|
||||
|
||||
for ((i = 0; i < 4; i++)); do
|
||||
# for payload in ${payloads[*]}; do
|
||||
payload=${payloads[$i]}
|
||||
deadline=${deadlines_ms[$i]}
|
||||
|
||||
# Get Number of Requests
|
||||
requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
|
||||
((requests == 0)) && continue
|
||||
|
||||
# Calculate Success Rate for csv
|
||||
awk -F, '
|
||||
$7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++}
|
||||
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
|
||||
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
|
||||
|
||||
# Filter on 200s, convery from s to ms, and sort
|
||||
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
|
||||
| sort -g > "$results_directory/$payload-response.csv"
|
||||
|
||||
# Get Number of 200s
|
||||
oks=$(wc -l < "$results_directory/$payload-response.csv")
|
||||
((oks == 0)) && continue # If all errors, skip line
|
||||
|
||||
# Get Latest Timestamp
|
||||
duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
|
||||
throughput=$(echo "$oks/$duration" | bc)
|
||||
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
|
||||
|
||||
# Generate Latency Data for csv
|
||||
awk '
|
||||
BEGIN {
|
||||
sum = 0
|
||||
p50 = int('"$oks"' * 0.5)
|
||||
p90 = int('"$oks"' * 0.9)
|
||||
p99 = int('"$oks"' * 0.99)
|
||||
p100 = '"$oks"'
|
||||
printf "'"$payload"',"
|
||||
}
|
||||
NR==p50 {printf "%1.4f,", $0}
|
||||
NR==p90 {printf "%1.4f,", $0}
|
||||
NR==p99 {printf "%1.4f,", $0}
|
||||
NR==p100 {printf "%1.4f\n", $0}
|
||||
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
|
||||
|
||||
# Delete scratch file used for sorting/counting
|
||||
# rm -rf "$results_directory/$payload-response.csv"
|
||||
done
|
||||
|
||||
# Transform csvs to dat files for gnuplot
|
||||
for file in success latency throughput; do
|
||||
echo -n "#" > "$results_directory/$file.dat"
|
||||
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
|
||||
done
|
||||
|
||||
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
|
||||
# generate_gnuplots
|
||||
|
||||
# Cleanup, if requires
|
||||
echo "[DONE]"
|
||||
done
|
@ -1,124 +0,0 @@
|
||||
#!/bin/bash
|
||||
source ../common.sh
|
||||
|
||||
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
|
||||
# Use -d flag if running under gdb
|
||||
|
||||
timestamp=$(date +%s)
|
||||
experiment_directory=$(pwd)
|
||||
binary_directory=$(cd ../../bin && pwd)
|
||||
|
||||
schedulers=(EDF FIFO)
|
||||
for scheduler in ${schedulers[*]}; do
|
||||
|
||||
results_directory="$experiment_directory/res/$timestamp/$scheduler"
|
||||
log=log.txt
|
||||
|
||||
mkdir -p "$results_directory"
|
||||
log_environment >> "$results_directory/$log"
|
||||
|
||||
# Start the runtime
|
||||
if [ "$1" != "-d" ]; then
|
||||
SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=$scheduler PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >> "$results_directory/$log" 2>> "$results_directory/$log" &
|
||||
sleep 1
|
||||
else
|
||||
echo "Running under gdb"
|
||||
echo "Running under gdb" >> "$results_directory/$log"
|
||||
fi
|
||||
|
||||
inputs=(40 10)
|
||||
duration_sec=15
|
||||
offset=5
|
||||
|
||||
# Execute workloads long enough for runtime to learn excepted execution time
|
||||
echo -n "Running Samples: "
|
||||
for input in ${inputs[*]}; do
|
||||
hey -z ${duration_sec}s -cpus 3 -t 0 -o csv -m GET -d "$input\n" http://localhost:$((10000 + input))
|
||||
done
|
||||
echo "[DONE]"
|
||||
sleep 5
|
||||
|
||||
echo "Running Experiments"
|
||||
# Run each separately
|
||||
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 > "$results_directory/fib40.csv"
|
||||
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 > "$results_directory/fib10.csv"
|
||||
|
||||
# Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
|
||||
hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 > "$results_directory/fib40-con.csv" &
|
||||
sleep $offset
|
||||
hey -z ${duration_sec}s -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 > "$results_directory/fib10-con.csv" &
|
||||
sleep $((duration_sec + offset + 15))
|
||||
|
||||
# Stop the runtime if not in debug mode
|
||||
[ "$1" != "-d" ] && kill_runtime
|
||||
|
||||
# Generate *.csv and *.dat results
|
||||
echo -n "Parsing Results: "
|
||||
|
||||
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
|
||||
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
|
||||
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
|
||||
|
||||
deadlines_ms=(2 2 3000 3000)
|
||||
payloads=(fib10 fib10-con fib40 fib40-con)
|
||||
|
||||
for ((i = 0; i < 4; i++)); do
|
||||
# for payload in ${payloads[*]}; do
|
||||
payload=${payloads[$i]}
|
||||
deadline=${deadlines_ms[$i]}
|
||||
|
||||
# Get Number of Requests
|
||||
requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
|
||||
((requests == 0)) && continue
|
||||
|
||||
# Calculate Success Rate for csv
|
||||
awk -F, '
|
||||
$7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++}
|
||||
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
|
||||
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
|
||||
|
||||
# Filter on 200s, convery from s to ms, and sort
|
||||
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
|
||||
| sort -g > "$results_directory/$payload-response.csv"
|
||||
|
||||
# Get Number of 200s
|
||||
oks=$(wc -l < "$results_directory/$payload-response.csv")
|
||||
((oks == 0)) && continue # If all errors, skip line
|
||||
|
||||
# Get Latest Timestamp
|
||||
duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
|
||||
throughput=$(echo "$oks/$duration" | bc)
|
||||
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
|
||||
|
||||
# Generate Latency Data for csv
|
||||
awk '
|
||||
BEGIN {
|
||||
sum = 0
|
||||
p50 = int('"$oks"' * 0.5)
|
||||
p90 = int('"$oks"' * 0.9)
|
||||
p99 = int('"$oks"' * 0.99)
|
||||
p100 = '"$oks"'
|
||||
printf "'"$payload"',"
|
||||
}
|
||||
NR==p50 {printf "%1.4f%,", $0 / '"$deadline"' * 100}
|
||||
NR==p90 {printf "%1.4f%,", $0 / '"$deadline"' * 100}
|
||||
NR==p99 {printf "%1.4f%,", $0 / '"$deadline"' * 100}
|
||||
NR==p100 {printf "%1.4f%\n", $0 / '"$deadline"' * 100}
|
||||
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
|
||||
|
||||
# Delete scratch file used for sorting/counting
|
||||
# rm -rf "$results_directory/$payload-response.csv"
|
||||
done
|
||||
|
||||
# Transform csvs to dat files for gnuplot
|
||||
for file in success latency throughput; do
|
||||
echo -n "#" > "$results_directory/$file.dat"
|
||||
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
|
||||
done
|
||||
|
||||
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
|
||||
# generate_gnuplots
|
||||
|
||||
# Cleanup, if requires
|
||||
echo "[DONE]"
|
||||
done
|
@ -1,5 +0,0 @@
|
||||
|
||||
|
||||
hey -n 200 -c 200 -t 0 -m GET -d "40\n" http://localhost:10040
|
||||
|
||||
hey -n 500 -c 500 -t 0 -m GET -d "10\n" http://localhost:10010
|
@ -1,30 +0,0 @@
|
||||
{
|
||||
"active": true,
|
||||
"name": "fibonacci_10",
|
||||
"path": "fibonacci_wasm.so",
|
||||
"port": 10010,
|
||||
"expected-execution-us": 600,
|
||||
"relative-deadline-us": 2000,
|
||||
"argsize": 1,
|
||||
"http-req-headers": [],
|
||||
"http-req-content-type": "text/plain",
|
||||
"http-req-size": 1024,
|
||||
"http-resp-headers": [],
|
||||
"http-resp-size": 1024,
|
||||
"http-resp-content-type": "text/plain"
|
||||
},
|
||||
{
|
||||
"active": true,
|
||||
"name": "fibonacci_40",
|
||||
"path": "fibonacci_wasm.so",
|
||||
"port": 10040,
|
||||
"expected-execution-us": 550000,
|
||||
"relative-deadline-us": 300000000,
|
||||
"argsize": 1,
|
||||
"http-req-headers": [],
|
||||
"http-req-content-type": "text/plain",
|
||||
"http-req-size": 1024,
|
||||
"http-resp-headers": [],
|
||||
"http-resp-size": 1024,
|
||||
"http-resp-content-type": "text/plain"
|
||||
}
|
Loading…
Reference in new issue