test: centralize bimodal and refactor concurrency

main
Sean McBride 4 years ago
parent 9204ab8f16
commit d678e34ce3

@ -0,0 +1,15 @@
# Bimodal Distribution
This experiment drives a bimodal distribution of long-running low-priority and short-running high-priority workloads
Relative Deadlines are tuned such that the scheduler should always preempt the low-priority workload for the high-priority workload if preemption is disabled.
The two workloads are run separately as a baseline. They are then run concurrently, starting the low-priority long-running workload first such that the system begins execution and accumulates requests in the data structures. The high-priority short-running workload then begins.
## Independent Variable
The Scheduling Policy: EDF versus FIFO
## Dependent Variables
Latency of high priority workload

@ -0,0 +1,222 @@
#!/bin/bash
source ../common.sh
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
# Success - The percentage of requests that complete by their deadlines
# TODO: Does this handle non-200s?
# Throughput - The mean number of successful requests per second
# Latency - the rount-trip resonse time (unit?) of successful requests at the p50, p90, p99, and p100 percetiles
# Sends requests until the per-module perf window buffers are full
# This ensures that Sledge has accurate estimates of execution time
run_samples() {
local hostname="${1:-localhost}"
# Scrape the perf window size from the source if possible
local -r perf_window_path="../../include/perf_window.h"
local -i perf_window_buffer_size
if ! perf_window_buffer_size=$(grep "#define PERF_WINDOW_BUFFER_SIZE" < "$perf_window_path" | cut -d\ -f3); then
echo "Failed to scrape PERF_WINDOW_BUFFER_SIZE from ../../include/perf_window.h"
echo "Defaulting to 16"
perf_window_buffer_size=16
fi
local -ir perf_window_buffer_size
echo -n "Running Samples: "
hey -n "$perf_window_buffer_size" -c "$perf_window_buffer_size" -cpus 3 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10040" 1> /dev/null 2> /dev/null || {
error_msg "fib40 samples failed"
return 1
}
hey -n "$perf_window_buffer_size" -c "$perf_window_buffer_size" -cpus 3 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:100010" 1> /dev/null 2> /dev/null || {
error_msg "fib10 samples failed"
return 1
}
echo "[OK]"
return 0
}
# Execute the fib10 and fib40 experiments sequentially and concurrently
# $1 (results_directory) - a directory where we will store our results
# $2 (hostname="localhost") - an optional parameter that sets the hostname. Defaults to localhost
run_experiments() {
if (($# < 1 || $# > 2)); then
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ -d "$1" ]]; then
error_msg "directory \"$1\" does not exist"
return 1
fi
local results_directory="$1"
local hostname="${2:-localhost}"
# The duration in seconds that we want the client to send requests
local -ir duration_sec=15
# The duration in seconds that the low priority task should run before the high priority task starts
local -ir offset=5
printf "Running Experiments\n"
# Run each separately
printf "\tfib40: "
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" "http://$hostname:10040" > "$results_directory/fib40.csv" 2> /dev/null || {
printf "[ERR]\n"
error_msg "fib40 failed"
return 1
}
get_result_count "$results_directory/fib40.csv" || {
printf "[ERR]\n"
error_msg "fib40 unexpectedly has zero requests"
return 1
}
printf "[OK]\n"
printf "\tfib10: "
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" "http://$hostname:10010" > "$results_directory/fib10.csv" 2> /dev/null || {
printf "[ERR]\n"
error_msg "fib10 failed"
return 1
}
get_result_count "$results_directory/fib10.csv" || {
printf "[ERR]\n"
error_msg "fib10 unexpectedly has zero requests"
return 1
}
printf "[OK]\n"
# Run concurrently
# The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
# This asynchronously trigger jobs and then wait on their pids
local fib40_con_PID
local fib10_con_PID
hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10040" > "$results_directory/fib40_con.csv" 2> /dev/null &
fib40_con_PID="$!"
sleep $offset
hey -z "${duration_sec}s" -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:10010" > "$results_directory/fib10_con.csv" 2> /dev/null &
fib10_con_PID="$!"
wait -f "$fib10_con_PID" || {
printf "\tfib10_con: [ERR]\n"
error_msg "failed to wait -f ${fib10_con_PID}"
return 1
}
get_result_count "$results_directory/fib10_con.csv" || {
printf "\tfib10_con: [ERR]\n"
error_msg "fib10_con has zero requests. This might be because fib40_con saturated the runtime"
return 1
}
printf "\tfib10_con: [OK]\n"
wait -f "$fib40_con_PID" || {
printf "\tfib40_con: [ERR]\n"
error_msg "failed to wait -f ${fib40_con_PID}"
return 1
}
get_result_count "$results_directory/fib40_con.csv" || {
printf "\tfib40_con: [ERR]\n"
error_msg "fib40_con has zero requests."
return 1
}
printf "\tfib40_con: [OK]\n"
return 0
}
# Process the experimental results and generate human-friendly results for success rate, throughput, and latency
process_results() {
if (($# != 1)); then
error_msg "invalid number of arguments ($#, expected 1)"
return 1
elif ! [[ -d "$1" ]]; then
error_msg "directory $1 does not exist"
return 1
fi
local -r results_directory="$1"
echo -n "Processing Results: "
# Write headers to CSVs
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
# The four types of results that we are capturing.
# fib10 and fib 40 are run sequentially.
# fib10_con and fib40_con are run concurrently
local -ar payloads=(fib10 fib10_con fib40 fib40_con)
# The deadlines for each of the workloads
# TODO: Scrape these from spec.json
local -Ar deadlines_ms=(
[fib10]=2
[fib40]=3000
)
for payload in "${payloads[@]}"; do
# Strip the _con suffix when getting the deadline
local -i deadline=${deadlines_ms[${payload/_con/}]}
# Get Number of Requests, subtracting the header
local -i requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
((requests == 0)) && {
echo "$payload unexpectedly has zero requests"
continue
}
# Calculate Success Rate for csv
awk -F, '
$7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++}
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
# Filter on 200s, convert from s to ms, and sort
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
| sort -g > "$results_directory/$payload-response.csv"
# Get Number of 200s
oks=$(wc -l < "$results_directory/$payload-response.csv")
((oks == 0)) && continue # If all errors, skip line
# We determine duration by looking at the timestamp of the last complete request
# TODO: Should this instead just use the client-side synthetic duration_sec value?
duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
# Throughput is calculated as the mean number of successful requests per second
throughput=$(echo "$oks/$duration" | bc)
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
# Generate Latency Data for csv
awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p100 = '"$oks"'
printf "'"$payload"',"
}
NR==p50 {printf "%1.4f,", $0}
NR==p90 {printf "%1.4f,", $0}
NR==p99 {printf "%1.4f,", $0}
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
# Delete scratch file used for sorting/counting
# rm -rf "$results_directory/$payload-response.csv"
done
# Transform csvs to dat files for gnuplot
csv_to_dat "$results_directory/success.csv" "$results_directory/throughput.csv" "$results_directory/latency.csv"
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
# generate_gnuplots
}
main "$@"

@ -86,6 +86,352 @@ get_result_count() {
fi
}
usage() {
echo "$0 [options...]"
echo ""
echo "Options:"
echo " -t,--target=<target url> Execute as client against remote URL"
echo " -s,--serve=<EDF|FIFO> Serve with scheduling policy, but do not run client"
echo " -d,--debug=<EDF|FIFO> Debug under GDB with scheduling policy, but do not run client"
echo " -p,--perf=<EDF|FIFO> Run under perf with scheduling policy. Run on baremetal Linux host!"
}
# Parses arguments from the user and sets associates global state
parse_arguments() {
for i in "$@"; do
case $i in
-t=* | --target=*)
if [[ "$role" == "server" ]]; then
echo "Cannot set target when server"
usage
return 1
fi
role=client
target="${i#*=}"
shift
;;
-s=* | --serve=*)
if [[ "$role" == "client" ]]; then
echo "Cannot use -s,--serve with -t,--target"
usage
return 1
fi
role=server
policy="${i#*=}"
if [[ ! $policy =~ ^(EDF|FIFO)$ ]]; then
echo "\"$policy\" is not a valid policy. EDF or FIFO allowed"
usage
return 1
fi
shift
;;
-d=* | --debug=*)
if [[ "$role" == "client" ]]; then
echo "Cannot use -d,--debug with -t,--target"
usage
return 1
fi
role=debug
policy="${i#*=}"
if [[ ! $policy =~ ^(EDF|FIFO)$ ]]; then
echo "\"$policy\" is not a valid policy. EDF or FIFO allowed"
usage
return 1
fi
shift
;;
-p=* | --perf=*)
if [[ "$role" == "perf" ]]; then
echo "Cannot use -p,--perf with -t,--target"
usage
return 1
fi
role=perf
policy="${i#*=}"
if [[ ! $policy =~ ^(EDF|FIFO)$ ]]; then
echo "\"$policy\" is not a valid policy. EDF or FIFO allowed"
usage
return 1
fi
shift
;;
-h | --help)
usage
exit 0
;;
*)
echo "$1 is a not a valid option"
usage
return 1
;;
esac
done
# default to both if no arguments were passed
if [[ -z "$role" ]]; then
role="both"
fi
# Set globals as read only
declare -r target
declare -r policy
declare -r role
}
# Declares application level global state
initialize_globals() {
# timestamp is used to name the results directory for a particular test run
# shellcheck disable=SC2155
# shellcheck disable=SC2034
declare -gir timestamp=$(date +%s)
# shellcheck disable=SC2155
declare -gr experiment_directory=$(pwd)
# shellcheck disable=SC2155
declare -gr binary_directory=$(cd ../../bin && pwd)
# Globals used by parse_arguments
declare -g target=""
declare -g policy=""
declare -g role=""
# Configure environment variables
export PATH=$binary_directory:$PATH
export LD_LIBRARY_PATH=$binary_directory:$LD_LIBRARY_PATH
export SLEDGE_NWORKERS=5
}
# $1 - Scheduler Variant (EDF|FIFO)
# $2 - Results Directory
# $3 - How to run (foreground|background)
# $4 - JSON specification
start_runtime() {
printf "Starting Runtime: "
if (($# != 4)); then
printf "[ERR]\n"
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
printf "[ERR]\n"
error_msg "expected EDF or FIFO was \"$1\""
return 1
elif ! [[ -d "$2" ]]; then
printf "[ERR]\n"
error_msg "directory \"$2\" does not exist"
return 1
elif ! [[ $3 =~ ^(foreground|background)$ ]]; then
printf "[ERR]\n"
error_msg "expected foreground or background was \"$3\""
return 1
elif [[ ! -f "$4" || "$4" != *.json ]]; then
printf "[ERR]\n"
error_msg "\"$4\" does not exist or is not a JSON"
return 1
fi
local -r scheduler="$1"
local -r results_directory="$2"
local -r how_to_run="$3"
local -r specification="$4"
local -r log_name=log.txt
local log="$results_directory/${log_name}"
log_environment >> "$log"
case "$how_to_run" in
"background")
SLEDGE_SCHEDULER="$scheduler" \
sledgert "$specification" >> "$log" 2>> "$log" &
;;
"foreground")
SLEDGE_SCHEDULER="$scheduler" \
sledgert "$specification"
;;
esac
printf "[OK]\n"
return 0
}
run_server() {
if (($# != 1)); then
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
error_msg "expected EDF or FIFO was \"$1\""
return 1
fi
local -r scheduler="$1"
if [[ "$role" == "both" ]]; then
local -r results_directory="$experiment_directory/res/$timestamp/$scheduler"
local -r how_to_run="background"
elif [[ "$role" == "server" ]]; then
local -r results_directory="$experiment_directory/res/$timestamp"
local -r how_to_run="foreground"
else
error_msg "Unexpected $role"
return 1
fi
mkdir -p "$results_directory"
start_runtime "$scheduler" "$results_directory" "$how_to_run" "$experiment_directory/spec.json" || {
echo "start_runtime RC: $?"
error_msg "Error calling start_runtime $scheduler $results_directory"
return 1
}
return 0
}
run_perf() {
if (($# != 1)); then
printf "[ERR]\n"
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
printf "[ERR]\n"
error_msg "expected EDF or FIFO was \"$1\""
return 1
fi
if ! command -v perf; then
echo "perf is not present."
exit 1
fi
local -r scheduler="$1"
SLEDGE_SCHEDULER="$scheduler" perf record -g -s sledgert "$experiment_directory/spec.json"
}
# Starts the Sledge Runtime under GDB
run_debug() {
# shellcheck disable=SC2155
local project_directory=$(cd ../.. && pwd)
if (($# != 1)); then
printf "[ERR]\n"
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
printf "[ERR]\n"
error_msg "expected EDF or FIFO was \"$1\""
return 1
fi
local -r scheduler="$1"
if [[ "$project_directory" != "/sledge/runtime" ]]; then
printf "It appears that you are not running in the container. Substituting path to match host environment\n"
SLEDGE_SCHEDULER="$scheduler" gdb \
--eval-command="handle SIGUSR1 nostop" \
--eval-command="handle SIGPIPE nostop" \
--eval-command="set pagination off" \
--eval-command="set substitute-path /sledge/runtime $project_directory" \
--eval-command="run $experiment_directory/spec.json" \
sledgert
else
SLEDGE_SCHEDULER="$scheduler" gdb \
--eval-command="handle SIGUSR1 nostop" \
--eval-command="handle SIGPIPE nostop" \
--eval-command="set pagination off" \
--eval-command="run $experiment_directory/spec.json" \
sledgert
fi
return 0
}
run_client() {
if [[ "$role" == "both" ]]; then
local results_directory="$experiment_directory/res/$timestamp/$scheduler"
elif [[ "$role" == "client" ]]; then
local results_directory="$experiment_directory/res/$timestamp"
else
error_msg "${FUNCNAME[0]} Unexpected $role"
return 1
fi
mkdir -p "$results_directory"
run_samples "$target" || {
error_msg "Error calling run_samples $target"
return 1
}
run_experiments "$results_directory" || {
error_msg "Error calling run_experiments $results_directory"
return 1
}
process_results "$results_directory" || {
error_msg "Error calling process_results $results_directory"
return 1
}
return 0
}
run_both() {
local -ar schedulers=(EDF FIFO)
for scheduler in "${schedulers[@]}"; do
printf "Running %s\n" "$scheduler"
run_server "$scheduler" || {
error_msg "Error calling run_server"
return 1
}
run_client || {
error_msg "Error calling run_client"
kill_runtime
return 1
}
kill_runtime || {
error_msg "Error calling kill_runtime"
return 1
}
done
return 0
}
main() {
initialize_globals
parse_arguments "$@" || {
exit 1
}
case $role in
both)
run_both
;;
server)
run_server "$policy"
;;
debug)
run_debug "$policy"
;;
perf)
run_perf "$policy"
;;
client)
run_client
;;
*)
echo "Invalid state"
false
;;
esac
exit "$?"
}
kill_runtime() {
printf "Stopping Runtime: "
pkill sledgert > /dev/null 2> /dev/null
@ -93,6 +439,8 @@ kill_runtime() {
printf "[OK]\n"
}
# Takes a variadic number of *.gnuplot filenames and generates the resulting images
# Assumes that the gnuplot definitions are in the experiment directory
generate_gnuplots() {
if ! command -v gnuplot &> /dev/null; then
echo "${FUNCNAME[0]} error: gnuplot could not be found in path"
@ -111,9 +459,9 @@ generate_gnuplots() {
exit 1
fi
cd "$results_directory" || exit
gnuplot ../../latency.gnuplot
gnuplot ../../success.gnuplot
gnuplot ../../throughput.gnuplot
for gnuplot_file in "${@}"; do
gnuplot "$experiment_directory/$gnuplot_file.gnuplot"
done
cd "$experiment_directory" || exit
}

@ -20,16 +20,6 @@ _How does increasing levels of concurrent client requests affect tail latency, t
- `hey` (https://github.com/rakyll/hey) is available in your PATH
- You have compiled `sledgert` and the `empty.so` test workload
## To Execute
1. Run `./run.sh`
2. View the results in the newest timestamped directory in `./res`
## To Debug
1. Run `./debug.sh` in a tab
2. Run `./run.sh -d` in a second tab
## TODO
- Harden scripts to validate assumptions

@ -1,19 +0,0 @@
#!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
project_directory=$(cd ../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH"
export PATH="$binary_directory:$PATH"
gdb --eval-command="handle SIGUSR1 nostop" \
--eval-command="handle SIGPIPE nostop" \
--eval-command="set pagination off" \
--eval-command="set substitute-path /sledge/runtime $project_directory" \
--eval-command="run $experiment_directory/spec.json" \
sledgert

@ -2,80 +2,101 @@
source ../common.sh
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
# Use -d flag if running under gdb
timestamp=$(date +%s)
experiment_directory=$(pwd)
binary_directory=$(cd ../../bin && pwd)
results_directory="$experiment_directory/res/$timestamp"
log=log.txt
mkdir -p "$results_directory"
log_environment >> "$results_directory/$log"
# Start the runtime
if [ "$1" != "-d" ]; then
PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >> "$results_directory/$log" 2>> "$results_directory/$log" &
sleep 1
else
echo "Running under gdb"
echo "Running under gdb" >> "$results_directory/$log"
fi
iterations=10000
# Execute workloads long enough for runtime to learn excepted execution time
echo -n "Running Samples: "
hey -n "$iterations" -c 3 -q 200 -o csv -m GET http://localhost:10000
sleep 5
echo "[DONE]"
declare -gi iterations=10000
declare -ga concurrency=(1 20 40 60 80 100)
run_samples() {
local hostname="${1:-localhost}"
# Scrape the perf window size from the source if possible
local -r perf_window_path="../../include/perf_window.h"
local -i perf_window_buffer_size
if ! perf_window_buffer_size=$(grep "#define PERF_WINDOW_BUFFER_SIZE" < "$perf_window_path" | cut -d\ -f3); then
echo "Failed to scrape PERF_WINDOW_BUFFER_SIZE from ../../include/perf_window.h"
echo "Defaulting to 16"
perf_window_buffer_size=16
fi
local -ir perf_window_buffer_size
printf "Running Samples: "
hey -n "$perf_window_buffer_size" -c "$perf_window_buffer_size" -q 200 -cpus 3 -o csv -m GET "http://${hostname}:10000" 1> /dev/null 2> /dev/null || {
printf "[ERR]\n"
error_msg "samples failed"
return 1
}
echo "[OK]"
return 0
}
# Execute the experiments
concurrency=(1 20 40 60 80 100)
echo "Running Experiments"
for conn in ${concurrency[*]}; do
printf "\t%d Concurrency: " "$conn"
hey -n "$iterations" -c "$conn" -cpus 2 -o csv -m GET http://localhost:10000 > "$results_directory/con$conn.csv"
echo "[DONE]"
done
# Stop the runtime
if [ "$1" != "-d" ]; then
sleep 5
kill_runtime
fi
# Generate *.csv and *.dat results
echo -n "Parsing Results: "
printf "Concurrency,Success_Rate\n" >> "$results_directory/success.csv"
printf "Concurrency,Throughput\n" >> "$results_directory/throughput.csv"
printf "Con,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
for conn in ${concurrency[*]}; do
# Calculate Success Rate for csv
awk -F, '
# $1 (results_directory) - a directory where we will store our results
# $2 (hostname="localhost") - an optional parameter that sets the hostname. Defaults to localhost
run_experiments() {
if (($# < 1 || $# > 2)); then
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ -d "$1" ]]; then
error_msg "directory \"$1\" does not exist"
return 1
fi
local results_directory="$1"
local hostname="${2:-localhost}"
# Execute the experiments
echo "Running Experiments"
for conn in ${concurrency[*]}; do
printf "\t%d Concurrency: " "$conn"
hey -n "$iterations" -c "$conn" -cpus 2 -o csv -m GET "http://$hostname:10000" > "$results_directory/con$conn.csv" 2> /dev/null
echo "[OK]"
done
}
process_results() {
if (($# != 1)); then
error_msg "invalid number of arguments ($#, expected 1)"
return 1
elif ! [[ -d "$1" ]]; then
error_msg "directory $1 does not exist"
return 1
fi
local -r results_directory="$1"
echo -n "Processing Results: "
# Write headers to CSVs
printf "Concurrency,Success_Rate\n" >> "$results_directory/success.csv"
printf "Concurrency,Throughput\n" >> "$results_directory/throughput.csv"
printf "Con,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
for conn in ${concurrency[*]}; do
# Calculate Success Rate for csv (percent of requests resulting in 200)
awk -F, '
$7 == 200 {ok++}
END{printf "'"$conn"',%3.5f\n", (ok / '"$iterations"' * 100)}
' < "$results_directory/con$conn.csv" >> "$results_directory/success.csv"
# Filter on 200s, convery from s to ms, and sort
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/con$conn.csv" \
| sort -g > "$results_directory/con$conn-response.csv"
# Filter on 200s, convert from s to ms, and sort
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/con$conn.csv" \
| sort -g > "$results_directory/con$conn-response.csv"
# Get Number of 200s
oks=$(wc -l < "$results_directory/con$conn-response.csv")
((oks == 0)) && continue # If all errors, skip line
# Get Number of 200s
oks=$(wc -l < "$results_directory/con$conn-response.csv")
((oks == 0)) && continue # If all errors, skip line
# We determine duration by looking at the timestamp of the last complete request
# TODO: Should this instead just use the client-side synthetic duration_sec value?
duration=$(tail -n1 "$results_directory/con$conn.csv" | cut -d, -f8)
# Get Latest Timestamp
duration=$(tail -n1 "$results_directory/con$conn.csv" | cut -d, -f8)
throughput=$(echo "$oks/$duration" | bc)
printf "%d,%f\n" "$conn" "$throughput" >> "$results_directory/throughput.csv"
# Throughput is calculated as the mean number of successful requests per second
throughput=$(echo "$oks/$duration" | bc)
printf "%d,%f\n" "$conn" "$throughput" >> "$results_directory/throughput.csv"
# Generate Latency Data for csv
awk '
# Generate Latency Data for csv
awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
@ -90,18 +111,22 @@ for conn in ${concurrency[*]}; do
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/con$conn-response.csv" >> "$results_directory/latency.csv"
# Delete scratch file used for sorting/counting
rm -rf "$results_directory/con$conn-response.csv"
done
# Delete scratch file used for sorting/counting
rm -rf "$results_directory/con$conn-response.csv"
done
# Transform csvs to dat files for gnuplot
for file in success latency throughput; do
echo -n "#" > "$results_directory/$file.dat"
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
done
# Generate gnuplots
generate_gnuplots latency success throughput
# Transform csvs to dat files for gnuplot
for file in success latency throughput; do
echo -n "#" > "$results_directory/$file.dat"
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
done
# Cleanup, if requires
echo "[OK]"
# Generate gnuplots
generate_gnuplots
}
# Cleanup, if requires
echo "[DONE]"
main "$@"

@ -1,14 +1,16 @@
{
"active": true,
"name": "empty",
"path": "empty_wasm.so",
"port": 10000,
"relative-deadline-us": 50000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 1024,
"http-resp-headers": [],
"http-resp-size": 1024,
"http-resp-content-type": "text/plain"
"active": true,
"name": "empty",
"path": "empty_wasm.so",
"port": 10000,
"expected-execution-us": 500,
"admissions-percentile": 70,
"relative-deadline-us": 50000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 1024,
"http-resp-headers": [],
"http-resp-size": 1024,
"http-resp-content-type": "text/plain"
}

@ -1,48 +0,0 @@
# Admissions Control
## Discussion of Implementation
The admissions control subsystem seeks to ensure that the system does not accept more work than it can execute while meeting the relative deadline defined in a module's JSON specification.
The system maintains an integral value expressing the capacity of the system as millionths of a worker core. This assumes that the runtime has "pinned" these workers to underlying processors and has no contention with other workloads.
The system maintains a second integral value expressing the total accepted work.
The module specification provides a relative deadline, an expected execution time, and a percentile target expressing the pXX latency that the admissions control system should use when making admissions decisions (tunable from 50% to 99%). Tuning this percentile expresses how conservative the system should be with regard to scheduling. Selecting a lower value, such as 50%, reserves less processor time and results in a higher likelihood that the relative deadline is not met. Selecting a higher value, such as 99%, reserves more processor time and provides a higher likelihood that that the relative deadline will be met. The provided expected execution time is assumed to match the percentile provided.
Dividing the expected execution time by the relative deadline yields the fraction of a worker needed to meet the deadline.
If the existing accepted workload plus the required work of this new workload is less than the system capacity, the workload is accepted, and the integral value expressing the total accepted work is increased. The resulting sandbox request is tagged with the fraction of a worker it was calculated to use, and when the request completes, the total accepted work is decreased by this amount.
If the existing accepted workload plus the required work of this new workload is greater than the system capacity, the request is rejected and the runtime sends the client an HTTP 503 response.
While the module specification provides an expected execution time, the system does not trust this value and only uses it in the absence of better information. Each sandbox is profiled as it runs through the system, and the end-to-end execution time of successful sandbox requests are added to a specialized performance window data structure that stores the last N execution times sorted in order of execution time. This structure optimizes for quick lookups of a specific ppXX percentile
Once data is seeded into this data structure, the initial execution estimate provided in the module specification is ignored, and the pXX target is instead used to lookup the actual pXX performance metric.
Future Work:
Currently, the scheduler takes no actual when an executing sandbox exceeds its pXX execution time or deadline.
In the case of the pXX workload, this means that a workload configured to target p50 during admissions control decisions with exceptionally poor p99 performance causes system-wide overheads that can cause other systems to miss their deadlines.
Even worse, when executing beyond the relative deadline, the request might be too stale for the client.
In the absolute worst case, one can imagine a client workload caught in an infinite loop that causes permanent head of line blocking because its deadline is earlier than the current time, such that nothing can possibly preempt the executing workload.
## Question
- Does Admissions Control guarantee that deadlines are met?
## Independent Variable
Deadline is disabled versus deadline is enabled
## Invariants
Single workload
Use FIFO policy
## Dependent Variables
End-to-end execution time of a workload measured from a client measured relative to its deadline

@ -1,562 +0,0 @@
#!/bin/bash
source ../common.sh
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
# Success - The percentage of requests that complete by their deadlines
# TODO: Does this handle non-200s?
# Throughput - The mean number of successful requests per second
# Latency - the rount-trip resonse time (unit?) of successful requests at the p50, p90, p99, and p100 percetiles
# Use -d flag if running under gdb
# TODO: Just use ENV for policy and other runtime dynamic variables?
usage() {
echo "$0 [options...]"
echo ""
echo "Options:"
echo " -t,--target=<target url> Execute as client against remote URL"
echo " -s,--serve=<EDF|FIFO> Serve with scheduling policy, but do not run client"
echo " -d,--debug=<EDF|FIFO> Debug under GDB with scheduling policy, but do not run client"
echo " -p,--perf=<EDF|FIFO> Run under perf with scheduling policy. Run on baremetal Linux host!"
}
# Declares application level global state
initialize_globals() {
# timestamp is used to name the results directory for a particular test run
# shellcheck disable=SC2155
declare -gir timestamp=$(date +%s)
# shellcheck disable=SC2155
declare -gr experiment_directory=$(pwd)
# shellcheck disable=SC2155
declare -gr binary_directory=$(cd ../../bin && pwd)
# Scrape the perf window size from the source if possible
local -r perf_window_path="../../include/perf_window.h"
declare -gi perf_window_buffer_size
if ! perf_window_buffer_size=$(grep "#define PERF_WINDOW_BUFFER_SIZE" < "$perf_window_path" | cut -d\ -f3); then
echo "Failed to scrape PERF_WINDOW_BUFFER_SIZE from ../../include/perf_window.h"
echo "Defaulting to 16"
declare -ir perf_window_buffer_size=16
fi
declare -gir perf_window_buffer_size
# Globals used by parse_arguments
declare -g target=""
declare -g policy=""
declare -g role=""
# Configure environment variables
export PATH=$binary_directory:$PATH
export LD_LIBRARY_PATH=$binary_directory:$LD_LIBRARY_PATH
export SLEDGE_NWORKERS=5
}
# Parses arguments from the user and sets associates global state
parse_arguments() {
for i in "$@"; do
case $i in
-t=* | --target=*)
if [[ "$role" == "server" ]]; then
echo "Cannot set target when server"
usage
return 1
fi
role=client
target="${i#*=}"
shift
;;
-s=* | --serve=*)
if [[ "$role" == "client" ]]; then
echo "Cannot use -s,--serve with -t,--target"
usage
return 1
fi
role=server
policy="${i#*=}"
if [[ ! $policy =~ ^(EDF|FIFO)$ ]]; then
echo "\"$policy\" is not a valid policy. EDF or FIFO allowed"
usage
return 1
fi
shift
;;
-d=* | --debug=*)
if [[ "$role" == "client" ]]; then
echo "Cannot use -d,--debug with -t,--target"
usage
return 1
fi
role=debug
policy="${i#*=}"
if [[ ! $policy =~ ^(EDF|FIFO)$ ]]; then
echo "\"$policy\" is not a valid policy. EDF or FIFO allowed"
usage
return 1
fi
shift
;;
-p=* | --perf=*)
if [[ "$role" == "perf" ]]; then
echo "Cannot use -p,--perf with -t,--target"
usage
return 1
fi
role=perf
policy="${i#*=}"
if [[ ! $policy =~ ^(EDF|FIFO)$ ]]; then
echo "\"$policy\" is not a valid policy. EDF or FIFO allowed"
usage
return 1
fi
shift
;;
-h | --help)
usage
exit 0
;;
*)
echo "$1 is a not a valid option"
usage
return 1
;;
esac
done
# default to both if no arguments were passed
if [[ -z "$role" ]]; then
role="both"
fi
# Set globals as read only
declare -r target
declare -r policy
declare -r role
}
# Starts the Sledge Runtime
start_runtime() {
printf "Starting Runtime: "
if (($# < 2 || $# > 3)); then
printf "[ERR]\n"
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
printf "[ERR]\n"
error_msg "expected EDF or FIFO was \"$1\""
return 1
elif ! [[ -d "$2" ]]; then
printf "[ERR]\n"
error_msg "directory \"$2\" does not exist"
return 1
elif ! [[ $3 =~ ^(foreground|background)$ ]]; then
printf "[ERR]\n"
error_msg "expected foreground or background was \"$3\""
return 1
fi
local -r scheduler="$1"
local -r results_directory="$2"
local -r how_to_run="${3:-background}"
local -r log_name=log.txt
local log="$results_directory/${log_name}"
log_environment >> "$log"
case "$how_to_run" in
"background")
SLEDGE_SCHEDULER="$scheduler" \
sledgert "$experiment_directory/spec.json" >> "$log" 2>> "$log" &
;;
"foreground")
SLEDGE_SCHEDULER="$scheduler" \
sledgert "$experiment_directory/spec.json"
;;
esac
printf "[OK]\n"
return 0
}
# Sends requests until the per-module perf window buffers are full
# This ensures that Sledge has accurate estimates of execution time
run_samples() {
local hostname="${1:-localhost}"
echo -n "Running Samples: "
hey -n "$perf_window_buffer_size" -c "$perf_window_buffer_size" -cpus 3 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10040" 1> /dev/null 2> /dev/null || {
error_msg "fib40 samples failed"
return 1
}
hey -n "$perf_window_buffer_size" -c "$perf_window_buffer_size" -cpus 3 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:100010" 1> /dev/null 2> /dev/null || {
error_msg "fib10 samples failed"
return 1
}
echo "[OK]"
return 0
}
# Execute the fib10 and fib40 experiments sequentially and concurrently
# $1 (results_directory) - a directory where we will store our results
# $2 (hostname="localhost") - an optional parameter that sets the hostname. Defaults to localhost
run_experiments() {
if (($# < 1 || $# > 2)); then
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ -d "$1" ]]; then
error_msg "directory \"$1\" does not exist"
return 1
fi
local results_directory="$1"
local hostname="${2:-localhost}"
# The duration in seconds that we want the client to send requests
local -ir duration_sec=15
# The duration in seconds that the low priority task should run before the high priority task starts
local -ir offset=5
printf "Running Experiments\n"
# Run each separately
printf "\tfib40: "
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" "http://$hostname:10040" > "$results_directory/fib40.csv" 2> /dev/null || {
printf "[ERR]\n"
error_msg "fib40 failed"
return 1
}
get_result_count "$results_directory/fib40.csv" || {
printf "[ERR]\n"
error_msg "fib40 unexpectedly has zero requests"
return 1
}
printf "[OK]\n"
printf "\tfib10: "
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" "http://$hostname:10010" > "$results_directory/fib10.csv" 2> /dev/null || {
printf "[ERR]\n"
error_msg "fib10 failed"
return 1
}
get_result_count "$results_directory/fib10.csv" || {
printf "[ERR]\n"
error_msg "fib10 unexpectedly has zero requests"
return 1
}
printf "[OK]\n"
# Run concurrently
# The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
# This asynchronously trigger jobs and then wait on their pids
local fib40_con_PID
local fib10_con_PID
hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" "http://${hostname}:10040" > "$results_directory/fib40_con.csv" 2> /dev/null &
fib40_con_PID="$!"
sleep $offset
hey -z "${duration_sec}s" -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" "http://${hostname}:10010" > "$results_directory/fib10_con.csv" 2> /dev/null &
fib10_con_PID="$!"
wait -f "$fib10_con_PID" || {
printf "\tfib10_con: [ERR]\n"
error_msg "failed to wait -f ${fib10_con_PID}"
return 1
}
get_result_count "$results_directory/fib10_con.csv" || {
printf "\tfib10_con: [ERR]\n"
error_msg "fib10_con has zero requests. This might be because fib40_con saturated the runtime"
return 1
}
printf "\tfib10_con: [OK]\n"
wait -f "$fib40_con_PID" || {
printf "\tfib40_con: [ERR]\n"
error_msg "failed to wait -f ${fib40_con_PID}"
return 1
}
get_result_count "$results_directory/fib40_con.csv" || {
printf "\tfib40_con: [ERR]\n"
error_msg "fib40_con has zero requests."
return 1
}
printf "\tfib40_con: [OK]\n"
return 0
}
# Process the experimental results and generate human-friendly results for success rate, throughput, and latency
process_results() {
if (($# != 1)); then
error_msg "invalid number of arguments ($#, expected 1)"
return 1
elif ! [[ -d "$1" ]]; then
error_msg "directory $1 does not exist"
return 1
fi
local -r results_directory="$1"
echo -n "Processing Results: "
# Write headers to CSVs
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
# The four types of results that we are capturing.
# fib10 and fib 40 are run sequentially.
# fib10_con and fib40_con are run concurrently
local -ar payloads=(fib10 fib10_con fib40 fib40_con)
# The deadlines for each of the workloads
# TODO: Scrape these from spec.json
local -Ar deadlines_ms=(
[fib10]=2
[fib40]=3000
)
for payload in "${payloads[@]}"; do
# Strip the _con suffix when getting the deadline
local -i deadline=${deadlines_ms[${payload/_con/}]}
# Get Number of Requests, subtracting the header
local -i requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
((requests == 0)) && {
echo "$payload unexpectedly has zero requests"
continue
}
# Calculate Success Rate for csv
awk -F, '
$7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++}
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
# Filter on 200s, convery from s to ms, and sort
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
| sort -g > "$results_directory/$payload-response.csv"
# Get Number of 200s
oks=$(wc -l < "$results_directory/$payload-response.csv")
((oks == 0)) && continue # If all errors, skip line
# We determine duration by looking at the timestamp of the last complete request
# TODO: Should this instead just use the client-side synthetic duration_sec value?
duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
# Throughput is calculated as the mean number of successful requests per second
throughput=$(echo "$oks/$duration" | bc)
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
# Generate Latency Data for csv
awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p100 = '"$oks"'
printf "'"$payload"',"
}
NR==p50 {printf "%1.4f,", $0}
NR==p90 {printf "%1.4f,", $0}
NR==p99 {printf "%1.4f,", $0}
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
# Delete scratch file used for sorting/counting
# rm -rf "$results_directory/$payload-response.csv"
done
# Transform csvs to dat files for gnuplot
csv_to_dat "$results_directory/success.csv" "$results_directory/throughput.csv" "$results_directory/latency.csv"
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
# generate_gnuplots
}
run_server() {
if (($# != 1)); then
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
error_msg "expected EDF or FIFO was \"$1\""
return 1
fi
local -r scheduler="$1"
if [[ "$role" == "both" ]]; then
local -r results_directory="$experiment_directory/res/$timestamp/$scheduler"
local -r how_to_run="background"
elif [[ "$role" == "server" ]]; then
local -r results_directory="$experiment_directory/res/$timestamp"
local -r how_to_run="foreground"
else
error_msg "Unexpected $role"
return 1
fi
mkdir -p "$results_directory"
start_runtime "$scheduler" "$results_directory" "$how_to_run" || {
echo "start_runtime RC: $?"
error_msg "Error calling start_runtime $scheduler $results_directory"
return 1
}
return 0
}
run_perf() {
if (($# != 1)); then
printf "[ERR]\n"
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
printf "[ERR]\n"
error_msg "expected EDF or FIFO was \"$1\""
return 1
fi
if ! command -v perf; then
echo "perf is not present."
exit 1
fi
local -r scheduler="$1"
SLEDGE_SCHEDULER="$scheduler" perf record -g -s sledgert "$experiment_directory/spec.json"
}
# Starts the Sledge Runtime under GDB
run_debug() {
# shellcheck disable=SC2155
local project_directory=$(cd ../.. && pwd)
if (($# != 1)); then
printf "[ERR]\n"
error_msg "invalid number of arguments \"$1\""
return 1
elif ! [[ $1 =~ ^(EDF|FIFO)$ ]]; then
printf "[ERR]\n"
error_msg "expected EDF or FIFO was \"$1\""
return 1
fi
local -r scheduler="$1"
if [[ "$project_directory" != "/sledge/runtime" ]]; then
printf "It appears that you are not running in the container. Substituting path to match host environment\n"
SLEDGE_SCHEDULER="$scheduler" gdb \
--eval-command="handle SIGUSR1 nostop" \
--eval-command="handle SIGPIPE nostop" \
--eval-command="set pagination off" \
--eval-command="set substitute-path /sledge/runtime $project_directory" \
--eval-command="run $experiment_directory/spec.json" \
sledgert
else
SLEDGE_SCHEDULER="$scheduler" gdb \
--eval-command="handle SIGUSR1 nostop" \
--eval-command="handle SIGPIPE nostop" \
--eval-command="set pagination off" \
--eval-command="run $experiment_directory/spec.json" \
sledgert
fi
return 0
}
run_client() {
if [[ "$role" == "both" ]]; then
local results_directory="$experiment_directory/res/$timestamp/$scheduler"
elif [[ "$role" == "client" ]]; then
local results_directory="$experiment_directory/res/$timestamp"
else
error_msg "${FUNCNAME[0]} Unexpected $role"
return 1
fi
mkdir -p "$results_directory"
run_samples "$target" || {
error_msg "Error calling run_samples $target"
return 1
}
run_experiments "$results_directory" || {
error_msg "Error calling run_experiments $results_directory"
return 1
}
process_results "$results_directory" || {
error_msg "Error calling process_results $results_directory"
return 1
}
echo "[OK]"
return 0
}
run_both() {
local -ar schedulers=(EDF FIFO)
for scheduler in "${schedulers[@]}"; do
printf "Running %s\n" "$scheduler"
run_server "$scheduler" || {
error_msg "Error calling run_server"
return 1
}
run_client || {
error_msg "Error calling run_client"
kill_runtime
return 1
}
kill_runtime || {
error_msg "Error calling kill_runtime"
return 1
}
done
return 0
}
main() {
initialize_globals
parse_arguments "$@" || {
exit 1
}
case $role in
both)
run_both
;;
server)
run_server "$policy"
;;
debug)
run_debug "$policy"
;;
perf)
run_perf "$policy"
;;
client)
run_client
;;
*)
echo "Invalid state"
false
;;
esac
exit "$?"
}
main "$@"

@ -1,28 +0,0 @@
# Preemption
## Question
- How do mixed criticality workloads perform under the Sledge scheduler policies?
- How does the latency of a high criticality workload that triggers preemption on a system under load compare to being the only workload on the system?
- What is the slowdown on the low priority workload?
- How does this affect aggregate throughput?
## Setup
The system is configured with admission control disabled.
The driver script drives a bimodal distribution of long-running low-priority and short-running high-priority workloads
Relative Deadlines are tuned such that the scheduler should always preempt the low-priority workload for the high-priority workload.
A driver script runs the two workloads separately as a baseline
It then runs them concurrently, starting the low-priority long-running workload first such that the system begins execution and accumulates requests in the data structures. The high-priority short-running workload then begins.
## Independent Variable
The Scheduling Policy: EDF versus FIFO
## Dependent Variables
Latency of high priority workload

@ -1,18 +0,0 @@
#!/bin/bash
source ../common.sh
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
# Use -d flag if running under gdb
timestamp=$(date +%s)
experiment_directory=$(pwd)
binary_directory=$(cd ../../bin && pwd)
results_directory="$experiment_directory/res/$timestamp/$scheduler"
log=log.txt
mkdir -p "$results_directory"
log_environment >> "$results_directory/$log"
# Start the runtime
PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" | tee -a "$results_directory/$log"

@ -1,111 +0,0 @@
#!/bin/bash
source ../common.sh
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
# Modified to target a remote host
timestamp=$(date +%s)
experiment_directory=$(pwd)
host=192.168.1.13
results_directory="$experiment_directory/res/$timestamp"
mkdir -p "$results_directory"
# Start the runtime
inputs=(40 10)
duration_sec=30
offset=5
# Execute workloads long enough for runtime to learn excepted execution time
echo -n "Running Samples: "
for input in ${inputs[*]}; do
hey -n 45 -c 4 -t 0 -o csv -m GET -d "$input\n" http://"$host":$((10000 + input))
done
echo "[DONE]"
sleep 30
echo "Running Experiments"
# Run each separately
hey -z ${duration_sec}s -cpus 6 -c 100 -t 0 -o csv -m GET -d "10\n" "http://$host:10010" > "$results_directory/fib10.csv"
echo "fib(10) Complete"
sleep 60
hey -z ${duration_sec}s -cpus 6 -c 100 -t 0 -o csv -m GET -d "40\n" "http://$host:10040" > "$results_directory/fib40.csv"
echo "fib(40) Complete"
sleep 120
# Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
hey -z $((duration_sec + 2 * offset))s -cpus 3 -c 100 -t 0 -o csv -m GET -d "40\n" "http://$host:10040" > "$results_directory/fib40-con.csv" &
sleep $offset
hey -z ${duration_sec}s -cpus 3 -c 100 -t 0 -o csv -m GET -d "10\n" "http://$host:10010" > "$results_directory/fib10-con.csv" &
sleep $((duration_sec + offset + 15))
echo "fib(10) & fib(40) Complete"
# Generate *.csv and *.dat results
echo -n "Parsing Results: "
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
durations_s=(15 15 15 25)
payloads=(fib10 fib10-con fib40 fib40-con)
for payload in ${payloads[*]}; do
# Get Number of Requests
requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
((requests == 0)) && continue
duration=${durations_s[$i]}
# Calculate Success Rate for csv
awk -F, '
$7 == 200 {ok++}
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
# Filter on 200s, convery from s to ms, and sort
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
| sort -g > "$results_directory/$payload-response.csv"
# Get Number of 200s
oks=$(wc -l < "$results_directory/$payload-response.csv")
((oks == 0)) && continue # If all errors, skip line
# Get Latest Timestamp
# duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
throughput=$(echo "$oks/$duration" | bc)
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
# Generate Latency Data for csv
awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p100 = '"$oks"'
printf "'"$payload"',"
}
NR==p50 {printf "%1.4f,", $0}
NR==p90 {printf "%1.4f,", $0}
NR==p99 {printf "%1.4f,", $0}
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
# Delete scratch file used for sorting/counting
# rm -rf "$results_directory/$payload-response.csv"
done
# Transform csvs to dat files for gnuplot
for file in success latency throughput; do
echo -n "#" > "$results_directory/$file.dat"
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
done
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
# generate_gnuplots
# Cleanup, if required
echo "[DONE]"

@ -1,20 +0,0 @@
#!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
project_directory=$(cd ../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH"
export PATH="$binary_directory:$PATH"
export SLEDGE_SCHEDULER="EDF"
gdb --eval-command="handle SIGUSR1 nostop" \
--eval-command="handle SIGPIPE nostop" \
--eval-command="set pagination off" \
--eval-command="set substitute-path /sledge/runtime $project_directory" \
--eval-command="run $experiment_directory/spec.json" \
sledgert

@ -1,81 +0,0 @@
#!/bin/bash
source ../common.sh
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
# Modified to target a remote host
timestamp=1606608313-FIFO
experiment_directory=$(pwd)
results_directory="$experiment_directory/res/$timestamp"
# Generate *.csv and *.dat results
echo -n "Parsing Results: "
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
printf "Payload,p50,p90,p99,p998,p999,p100\n" >> "$results_directory/latency.csv"
durations_s=(15 15 15 25)
payloads=(fib10 fib10-con fib40 fib40-con)
for payload in ${payloads[*]}; do
# Get Number of Requests
requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
((requests == 0)) && continue
duration=${durations_s[$i]}
# Calculate Success Rate for csv
awk -F, '
$7 == 200 {ok++}
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
# Filter on 200s, convery from s to ms, and sort
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
| sort -g > "$results_directory/$payload-response.csv"
# Get Number of 200s
oks=$(wc -l < "$results_directory/$payload-response.csv")
((oks == 0)) && continue # If all errors, skip line
# Get Latest Timestamp
# duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
throughput=$(echo "$oks/$duration" | bc)
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
# Generate Latency Data for csv
awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p998 = int('"$oks"' * 0.998)
p999 = int('"$oks"' * 0.999)
p100 = '"$oks"'
printf "'"$payload"',"
}
NR==p50 {printf "%1.4f,", $0}
NR==p90 {printf "%1.4f,", $0}
NR==p99 {printf "%1.4f,", $0}
NR==p998 {printf "%1.4f,", $0}
NR==p999 {printf "%1.4f,", $0}
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
# Delete scratch file used for sorting/counting
# rm -rf "$results_directory/$payload-response.csv"
done
# Transform csvs to dat files for gnuplot
for file in success latency throughput; do
echo -n "#" > "$results_directory/$file.dat"
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
done
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
# generate_gnuplots
# Cleanup, if required
echo "[DONE]"

@ -1,14 +0,0 @@
#!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
project_directory=$(cd ../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH"
export PATH="$binary_directory:$PATH"
SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=EDF perf record -g -s sledgert "$experiment_directory/spec.json"

@ -1,134 +0,0 @@
#!/bin/bash
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
# Use -d flag if running under gdb
source ../common.sh
# Validate dependencies
declare -a -r dependencies=(awk hey wc)
for dependency in "${dependencies[@]}"; do
if ! command -v "$dependency" &> /dev/null; then
echo "$dependency could not be found"
exit
fi
done
timestamp=$(date +%s)
experiment_directory=$(pwd)
binary_directory=$(cd ../../bin && pwd)
schedulers=(EDF FIFO)
for scheduler in ${schedulers[*]}; do
results_directory="$experiment_directory/res/$timestamp/$scheduler"
log=log.txt
mkdir -p "$results_directory"
log_environment >> "$results_directory/$log"
# Start the runtime
if [ "$1" != "-d" ]; then
SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=$scheduler PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >> "$results_directory/$log" 2>> "$results_directory/$log" &
sleep 1
else
echo "Running under gdb"
echo "Running under gdb" >> "$results_directory/$log"
fi
inputs=(40 10)
duration_sec=15
offset=5
# Execute workloads long enough for runtime to learn excepted execution time
echo -n "Running Samples: "
for input in ${inputs[*]}; do
hey -z ${duration_sec}s -cpus 3 -t 0 -o csv -m GET -d "$input\n" http://localhost:$((10000 + input))
done
echo "[DONE]"
sleep 5
echo "Running Experiments"
# Run each separately
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 > "$results_directory/fib40.csv"
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 > "$results_directory/fib10.csv"
# Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 > "$results_directory/fib40-con.csv" &
sleep $offset
hey -z ${duration_sec}s -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 > "$results_directory/fib10-con.csv" &
sleep $((duration_sec + offset + 15))
# Stop the runtime if not in debug mode
[ "$1" != "-d" ] && kill_runtime
# Generate *.csv and *.dat results
echo -n "Parsing Results: "
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
deadlines_ms=(2 2 3000 3000)
payloads=(fib10 fib10-con fib40 fib40-con)
for ((i = 0; i < 4; i++)); do
# for payload in ${payloads[*]}; do
payload=${payloads[$i]}
deadline=${deadlines_ms[$i]}
# Get Number of Requests
requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
((requests == 0)) && continue
# Calculate Success Rate for csv
awk -F, '
$7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++}
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
# Filter on 200s, convery from s to ms, and sort
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
| sort -g > "$results_directory/$payload-response.csv"
# Get Number of 200s
oks=$(wc -l < "$results_directory/$payload-response.csv")
((oks == 0)) && continue # If all errors, skip line
# Get Latest Timestamp
duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
throughput=$(echo "$oks/$duration" | bc)
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
# Generate Latency Data for csv
awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p100 = '"$oks"'
printf "'"$payload"',"
}
NR==p50 {printf "%1.4f,", $0}
NR==p90 {printf "%1.4f,", $0}
NR==p99 {printf "%1.4f,", $0}
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
# Delete scratch file used for sorting/counting
# rm -rf "$results_directory/$payload-response.csv"
done
# Transform csvs to dat files for gnuplot
for file in success latency throughput; do
echo -n "#" > "$results_directory/$file.dat"
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
done
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
# generate_gnuplots
# Cleanup, if requires
echo "[DONE]"
done

@ -1,124 +0,0 @@
#!/bin/bash
source ../common.sh
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
# Use -d flag if running under gdb
timestamp=$(date +%s)
experiment_directory=$(pwd)
binary_directory=$(cd ../../bin && pwd)
schedulers=(EDF FIFO)
for scheduler in ${schedulers[*]}; do
results_directory="$experiment_directory/res/$timestamp/$scheduler"
log=log.txt
mkdir -p "$results_directory"
log_environment >> "$results_directory/$log"
# Start the runtime
if [ "$1" != "-d" ]; then
SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=$scheduler PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >> "$results_directory/$log" 2>> "$results_directory/$log" &
sleep 1
else
echo "Running under gdb"
echo "Running under gdb" >> "$results_directory/$log"
fi
inputs=(40 10)
duration_sec=15
offset=5
# Execute workloads long enough for runtime to learn excepted execution time
echo -n "Running Samples: "
for input in ${inputs[*]}; do
hey -z ${duration_sec}s -cpus 3 -t 0 -o csv -m GET -d "$input\n" http://localhost:$((10000 + input))
done
echo "[DONE]"
sleep 5
echo "Running Experiments"
# Run each separately
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 > "$results_directory/fib40.csv"
hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 > "$results_directory/fib10.csv"
# Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run
hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 > "$results_directory/fib40-con.csv" &
sleep $offset
hey -z ${duration_sec}s -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 > "$results_directory/fib10-con.csv" &
sleep $((duration_sec + offset + 15))
# Stop the runtime if not in debug mode
[ "$1" != "-d" ] && kill_runtime
# Generate *.csv and *.dat results
echo -n "Parsing Results: "
printf "Payload,Success_Rate\n" >> "$results_directory/success.csv"
printf "Payload,Throughput\n" >> "$results_directory/throughput.csv"
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
deadlines_ms=(2 2 3000 3000)
payloads=(fib10 fib10-con fib40 fib40-con)
for ((i = 0; i < 4; i++)); do
# for payload in ${payloads[*]}; do
payload=${payloads[$i]}
deadline=${deadlines_ms[$i]}
# Get Number of Requests
requests=$(($(wc -l < "$results_directory/$payload.csv") - 1))
((requests == 0)) && continue
# Calculate Success Rate for csv
awk -F, '
$7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++}
END{printf "'"$payload"',%3.5f\n", (ok / (NR - 1) * 100)}
' < "$results_directory/$payload.csv" >> "$results_directory/success.csv"
# Filter on 200s, convery from s to ms, and sort
awk -F, '$7 == 200 {print ($1 * 1000)}' < "$results_directory/$payload.csv" \
| sort -g > "$results_directory/$payload-response.csv"
# Get Number of 200s
oks=$(wc -l < "$results_directory/$payload-response.csv")
((oks == 0)) && continue # If all errors, skip line
# Get Latest Timestamp
duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8)
throughput=$(echo "$oks/$duration" | bc)
printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv"
# Generate Latency Data for csv
awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p100 = '"$oks"'
printf "'"$payload"',"
}
NR==p50 {printf "%1.4f%,", $0 / '"$deadline"' * 100}
NR==p90 {printf "%1.4f%,", $0 / '"$deadline"' * 100}
NR==p99 {printf "%1.4f%,", $0 / '"$deadline"' * 100}
NR==p100 {printf "%1.4f%\n", $0 / '"$deadline"' * 100}
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
# Delete scratch file used for sorting/counting
# rm -rf "$results_directory/$payload-response.csv"
done
# Transform csvs to dat files for gnuplot
for file in success latency throughput; do
echo -n "#" > "$results_directory/$file.dat"
tr ',' ' ' < "$results_directory/$file.csv" | column -t >> "$results_directory/$file.dat"
done
# Generate gnuplots. Commented out because we don't have *.gnuplots defined
# generate_gnuplots
# Cleanup, if requires
echo "[DONE]"
done

@ -1,5 +0,0 @@
hey -n 200 -c 200 -t 0 -m GET -d "40\n" http://localhost:10040
hey -n 500 -c 500 -t 0 -m GET -d "10\n" http://localhost:10010

@ -1,30 +0,0 @@
{
"active": true,
"name": "fibonacci_10",
"path": "fibonacci_wasm.so",
"port": 10010,
"expected-execution-us": 600,
"relative-deadline-us": 2000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 1024,
"http-resp-headers": [],
"http-resp-size": 1024,
"http-resp-content-type": "text/plain"
},
{
"active": true,
"name": "fibonacci_40",
"path": "fibonacci_wasm.so",
"port": 10040,
"expected-execution-us": 550000,
"relative-deadline-us": 300000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 1024,
"http-resp-headers": [],
"http-resp-size": 1024,
"http-resp-content-type": "text/plain"
}
Loading…
Cancel
Save