From 57b60bbb7d6996142b055f55fa131806dcf9c94a Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Wed, 11 Nov 2020 09:13:08 -0500 Subject: [PATCH] chore: push update of experiments --- .../applications/fivebyeight/spec.json | 3 +- runtime/experiments/deadline/.gitignore | 1 + runtime/experiments/deadline/README.md | 48 +++++++ runtime/experiments/deadline/debug.sh | 19 +++ runtime/experiments/deadline/demo.sh | 34 +++++ runtime/experiments/deadline/perf.sh | 14 ++ runtime/experiments/deadline/run.sh | 124 ++++++++++++++++++ runtime/experiments/deadline/run_relative.sh | 124 ++++++++++++++++++ runtime/experiments/deadline/scratch.txt | 5 + runtime/experiments/deadline/spec.json | 90 +++++++++++++ runtime/experiments/payload/spec.json | 16 ++- runtime/experiments/payload/test.sh | 3 + runtime/experiments/preemption/README.md | 26 ++-- runtime/experiments/preemption/perf.sh | 14 ++ runtime/experiments/preemption/post.lua | 58 -------- runtime/experiments/preemption/run.sh | 22 ++-- .../experiments/preemption/run_relative.sh | 124 ++++++++++++++++++ runtime/experiments/preemption/scratch.txt | 5 + runtime/experiments/preemption/spec.json | 62 +-------- 19 files changed, 647 insertions(+), 145 deletions(-) create mode 100644 runtime/experiments/deadline/.gitignore create mode 100644 runtime/experiments/deadline/README.md create mode 100755 runtime/experiments/deadline/debug.sh create mode 100755 runtime/experiments/deadline/demo.sh create mode 100755 runtime/experiments/deadline/perf.sh create mode 100755 runtime/experiments/deadline/run.sh create mode 100755 runtime/experiments/deadline/run_relative.sh create mode 100644 runtime/experiments/deadline/scratch.txt create mode 100644 runtime/experiments/deadline/spec.json create mode 100755 runtime/experiments/payload/test.sh create mode 100755 runtime/experiments/preemption/perf.sh delete mode 100644 runtime/experiments/preemption/post.lua create mode 100755 runtime/experiments/preemption/run_relative.sh create mode 100644 runtime/experiments/preemption/scratch.txt diff --git a/runtime/experiments/applications/fivebyeight/spec.json b/runtime/experiments/applications/fivebyeight/spec.json index e1357f3..49677a1 100644 --- a/runtime/experiments/applications/fivebyeight/spec.json +++ b/runtime/experiments/applications/fivebyeight/spec.json @@ -3,7 +3,8 @@ "name": "gocr", "path": "gocr.aso", "port": 10000, - "relative-deadline-us": 50000000000, + "relative-deadline-us": 500000000, + "expected-execution-us": 5000000, "argsize": 1, "http-req-headers": [], "http-req-content-type": "text/plain", diff --git a/runtime/experiments/deadline/.gitignore b/runtime/experiments/deadline/.gitignore new file mode 100644 index 0000000..64f722e --- /dev/null +++ b/runtime/experiments/deadline/.gitignore @@ -0,0 +1 @@ +res diff --git a/runtime/experiments/deadline/README.md b/runtime/experiments/deadline/README.md new file mode 100644 index 0000000..2275b7c --- /dev/null +++ b/runtime/experiments/deadline/README.md @@ -0,0 +1,48 @@ +# Admissions Control + +## Discussion of Implementation + +The admissions control subsystem seeks to ensure that the system does not accept more work than it can execute while meeting the relative deadline defined in a module's JSON specification. + +The system maintains an integral value expressing the capacity of the system as millionths of a worker core. This assumes that the runtime has "pinned" these workers to underlying processors and has no contention with other workloads. + +The system maintains a second integral value expressing the total accepted work. + +The module specification provides a relative deadline, an expected execution time, and a percentile target expressing the pXX latency that the admissions control system should use when making admissions decisions (tunable from 50% to 99%). Tuning this percentile expresses how conservative the system should be with regard to scheduling. Selecting a lower value, such as 50%, reserves less processor time and results in a higher likelihood that the relative deadline is not met. Selecting a higher value, such as 99%, reserves more processor time and provides a higher likelihood that that the relative deadline will be met. The provided expected execution time is assumed to match the percentile provided. + +Dividing the expected execution time by the relative deadline yields the fraction of a worker needed to meet the deadline. + +If the existing accepted workload plus the required work of this new workload is less than the system capacity, the workload is accepted, and the integral value expressing the total accepted work is increased. The resulting sandbox request is tagged with the fraction of a worker it was calculated to use, and when the request completes, the total accepted work is decreased by this amount. + +If the existing accepted workload plus the required work of this new workload is greater than the system capacity, the request is rejected and the runtime sends the client an HTTP 503 response. + +While the module specification provides an expected execution time, the system does not trust this value and only uses it in the absence of better information. Each sandbox is profiled as it runs through the system, and the end-to-end execution time of successful sandbox requests are added to a specialized performance window data structure that stores the last N execution times sorted in order of execution time. This structure optimizes for quick lookups of a specific ppXX percentile + +Once data is seeded into this data structure, the initial execution estimate provided in the module specification is ignored, and the pXX target is instead used to lookup the actual pXX performance metric. + +Future Work: + +Currently, the scheduler takes no actual when an executing sandbox exceeds its pXX execution time or deadline. + +In the case of the pXX workload, this means that a workload configured to target p50 during admissions control decisions with exceptionally poor p99 performance causes system-wide overheads that can cause other systems to miss their deadlines. + +Even worse, when executing beyond the relative deadline, the request might be too stale for the client. + +In the absolute worst case, one can imagine a client workload caught in an infinite loop that causes permanent head of line blocking because its deadline is earlier than the current time, such that nothing can possibly preempt the executing workload. + +## Question + +- Does Admissions Control guarantee that deadlines are met? + +## Independent Variable + +Deadline is disabled versus deadline is enabled + +## Invariants + +Single workload +Use FIFO policy + +## Dependent Variables + +End-to-end execution time of a workload measured from a client measured relative to its deadline diff --git a/runtime/experiments/deadline/debug.sh b/runtime/experiments/deadline/debug.sh new file mode 100755 index 0000000..62c64e3 --- /dev/null +++ b/runtime/experiments/deadline/debug.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Executes the runtime in GDB +# Substitutes the absolute path from the container with a path relatively derived from the location of this script +# This allows debugging outside of the Docker container +# Also disables pagination and stopping on SIGUSR1 + +experiment_directory=$(pwd) +project_directory=$(cd ../.. && pwd) +binary_directory=$(cd "$project_directory"/bin && pwd) + +export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" +export PATH="$binary_directory:$PATH" + +gdb --eval-command="handle SIGUSR1 nostop" \ + --eval-command="handle SIGPIPE nostop" \ + --eval-command="set pagination off" \ + --eval-command="set substitute-path /sledge/runtime $project_directory" \ + --eval-command="run $experiment_directory/spec.json" \ + sledgert diff --git a/runtime/experiments/deadline/demo.sh b/runtime/experiments/deadline/demo.sh new file mode 100755 index 0000000..3fdb1e9 --- /dev/null +++ b/runtime/experiments/deadline/demo.sh @@ -0,0 +1,34 @@ +#!/bin/bash +cd ../../bin +LD_LIBRARY_PATH="$(pwd):$LD_LIBRARY_PATH" ./sledgert ../tests/mixed_preemption/test_mixed_preemption.json & +cd ../tests/mixed_preemption/ + +# Run small samples on each port to let the runtime figure out the execution time +sleep 10 +echo "Running Samples" +wrk -d 20s -t1 -s post.lua http://localhost:10010 -- --delay 500 10\n +wrk -d 20s -t1 -s post.lua http://localhost:10020 -- --delay 500 20\n +wrk -d 20s -t1 -s post.lua http://localhost:10030 -- --delay 500 25\n + +# Run in Parallel +sleep 10 +echo "Running Experiments" +wrk -d 1m -t1 -s post.lua http://localhost:10010 -- --delay 125 10\n >./res/fib10.txt & +wrk -d 2m -t1 -s post.lua http://localhost:10020 -- --delay 250 20\n >./res/fib20.txt & +wrk -d 3m -t1 -s post.lua http://localhost:10025 -- --delay 500 25\n >./res/fib25.txt + +# Kill the Background Sledge processes +sleep 10 +echo "Running Cleanup" +pkill sledgert +pkill wrk + +# Extract the Latency CSV Data from the Log + +echo 'Fib10, Fib10' >./res/fib10.csv +grep -A200 -m1 -e 'Percentile, Latency' ./res/fib10.txt >>./res/fib10.csv +echo 'Fib20, Fib20' >./res/fib20.csv +grep -A200 -m1 -e 'Percentile, Latency' ./res/fib20.txt >>./res/fib20.csv +echo 'Fib25, Fib25' >./res/fib25.csv +grep -A200 -m1 -e 'Percentile, Latency' ./res/fib25.txt >>./res/fib25.csv +paste -d, ./res/fib10.csv ./res/fib20.csv ./res/fib25.csv >./res/merged.csv diff --git a/runtime/experiments/deadline/perf.sh b/runtime/experiments/deadline/perf.sh new file mode 100755 index 0000000..c87504f --- /dev/null +++ b/runtime/experiments/deadline/perf.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Executes the runtime in GDB +# Substitutes the absolute path from the container with a path relatively derived from the location of this script +# This allows debugging outside of the Docker container +# Also disables pagination and stopping on SIGUSR1 + +experiment_directory=$(pwd) +project_directory=$(cd ../.. && pwd) +binary_directory=$(cd "$project_directory"/bin && pwd) + +export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" +export PATH="$binary_directory:$PATH" + +SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=EDF perf record -g -s sledgert "$experiment_directory/spec.json" diff --git a/runtime/experiments/deadline/run.sh b/runtime/experiments/deadline/run.sh new file mode 100755 index 0000000..bf38f2a --- /dev/null +++ b/runtime/experiments/deadline/run.sh @@ -0,0 +1,124 @@ +#!/bin/bash +source ../common.sh + +# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate +# Use -d flag if running under gdb + +timestamp=$(date +%s) +experiment_directory=$(pwd) +binary_directory=$(cd ../../bin && pwd) + +schedulers=(EDF FIFO) +for scheduler in ${schedulers[*]}; do + + results_directory="$experiment_directory/res/$timestamp/$scheduler" + log=log.txt + + mkdir -p "$results_directory" + log_environment >>"$results_directory/$log" + + # Start the runtime + if [ "$1" != "-d" ]; then + SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=$scheduler PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >>"$results_directory/$log" 2>>"$results_directory/$log" & + sleep 1 + else + echo "Running under gdb" + echo "Running under gdb" >>"$results_directory/$log" + fi + + inputs=(40 10) + duration_sec=15 + offset=5 + + # Execute workloads long enough for runtime to learn excepted execution time + echo -n "Running Samples: " + for input in ${inputs[*]}; do + hey -z ${duration_sec}s -cpus 3 -t 0 -o csv -m GET -d "$input\n" http://localhost:$((10000 + input)) + done + echo "[DONE]" + sleep 5 + + echo "Running Experiments" + # Run each separately + hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40.csv" + hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10.csv" + + # Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run + hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40-con.csv" & + sleep $offset + hey -z ${duration_sec}s -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10-con.csv" & + sleep $((duration_sec + offset + 15)) + + # Stop the runtime if not in debug mode + [ "$1" != "-d" ] && kill_runtime + + # Generate *.csv and *.dat results + echo -n "Parsing Results: " + + printf "Payload,Success_Rate\n" >>"$results_directory/success.csv" + printf "Payload,Throughput\n" >>"$results_directory/throughput.csv" + printf "Payload,p50,p90,p99,p100\n" >>"$results_directory/latency.csv" + + deadlines_ms=(2 2 3000 3000) + payloads=(fib10 fib10-con fib40 fib40-con) + + for ((i = 0; i < 4; i++)); do + # for payload in ${payloads[*]}; do + payload=${payloads[$i]} + deadline=${deadlines_ms[$i]} + + # Get Number of Requests + requests=$(($(wc -l <"$results_directory/$payload.csv") - 1)) + ((requests == 0)) && continue + + # Calculate Success Rate for csv + awk -F, ' + $7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++} + END{printf "'"$payload"',%3.5f%\n", (ok / (NR - 1) * 100)} + ' <"$results_directory/$payload.csv" >>"$results_directory/success.csv" + + # Filter on 200s, convery from s to ms, and sort + awk -F, '$7 == 200 {print ($1 * 1000)}' <"$results_directory/$payload.csv" | + sort -g >"$results_directory/$payload-response.csv" + + # Get Number of 200s + oks=$(wc -l <"$results_directory/$payload-response.csv") + ((oks == 0)) && continue # If all errors, skip line + + # Get Latest Timestamp + duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8) + throughput=$(echo "$oks/$duration" | bc) + printf "%s,%f\n" "$payload" "$throughput" >>"$results_directory/throughput.csv" + + # Generate Latency Data for csv + awk ' + BEGIN { + sum = 0 + p50 = int('"$oks"' * 0.5) + p90 = int('"$oks"' * 0.9) + p99 = int('"$oks"' * 0.99) + p100 = '"$oks"' + printf "'"$payload"'," + } + NR==p50 {printf "%1.4f,", $0} + NR==p90 {printf "%1.4f,", $0} + NR==p99 {printf "%1.4f,", $0} + NR==p100 {printf "%1.4f\n", $0} + ' <"$results_directory/$payload-response.csv" >>"$results_directory/latency.csv" + + # Delete scratch file used for sorting/counting + # rm -rf "$results_directory/$payload-response.csv" + done + + # Transform csvs to dat files for gnuplot + for file in success latency throughput; do + echo -n "#" >"$results_directory/$file.dat" + tr ',' ' ' <"$results_directory/$file.csv" | column -t >>"$results_directory/$file.dat" + done + + # Generate gnuplots. Commented out because we don't have *.gnuplots defined + # generate_gnuplots + + # Cleanup, if requires + echo "[DONE]" +done diff --git a/runtime/experiments/deadline/run_relative.sh b/runtime/experiments/deadline/run_relative.sh new file mode 100755 index 0000000..bd072e4 --- /dev/null +++ b/runtime/experiments/deadline/run_relative.sh @@ -0,0 +1,124 @@ +#!/bin/bash +source ../common.sh + +# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate +# Use -d flag if running under gdb + +timestamp=$(date +%s) +experiment_directory=$(pwd) +binary_directory=$(cd ../../bin && pwd) + +schedulers=(EDF FIFO) +for scheduler in ${schedulers[*]}; do + + results_directory="$experiment_directory/res/$timestamp/$scheduler" + log=log.txt + + mkdir -p "$results_directory" + log_environment >>"$results_directory/$log" + + # Start the runtime + if [ "$1" != "-d" ]; then + SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=$scheduler PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >>"$results_directory/$log" 2>>"$results_directory/$log" & + sleep 1 + else + echo "Running under gdb" + echo "Running under gdb" >>"$results_directory/$log" + fi + + inputs=(40 10) + duration_sec=15 + offset=5 + + # Execute workloads long enough for runtime to learn excepted execution time + echo -n "Running Samples: " + for input in ${inputs[*]}; do + hey -z ${duration_sec}s -cpus 3 -t 0 -o csv -m GET -d "$input\n" http://localhost:$((10000 + input)) + done + echo "[DONE]" + sleep 5 + + echo "Running Experiments" + # Run each separately + hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40.csv" + hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10.csv" + + # Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run + hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40-con.csv" & + sleep $offset + hey -z ${duration_sec}s -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10-con.csv" & + sleep $((duration_sec + offset + 15)) + + # Stop the runtime if not in debug mode + [ "$1" != "-d" ] && kill_runtime + + # Generate *.csv and *.dat results + echo -n "Parsing Results: " + + printf "Payload,Success_Rate\n" >>"$results_directory/success.csv" + printf "Payload,Throughput\n" >>"$results_directory/throughput.csv" + printf "Payload,p50,p90,p99,p100\n" >>"$results_directory/latency.csv" + + deadlines_ms=(2 2 3000 3000) + payloads=(fib10 fib10-con fib40 fib40-con) + + for ((i = 0; i < 4; i++)); do + # for payload in ${payloads[*]}; do + payload=${payloads[$i]} + deadline=${deadlines_ms[$i]} + + # Get Number of Requests + requests=$(($(wc -l <"$results_directory/$payload.csv") - 1)) + ((requests == 0)) && continue + + # Calculate Success Rate for csv + awk -F, ' + $7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++} + END{printf "'"$payload"',%3.5f%\n", (ok / (NR - 1) * 100)} + ' <"$results_directory/$payload.csv" >>"$results_directory/success.csv" + + # Filter on 200s, convery from s to ms, and sort + awk -F, '$7 == 200 {print ($1 * 1000)}' <"$results_directory/$payload.csv" | + sort -g >"$results_directory/$payload-response.csv" + + # Get Number of 200s + oks=$(wc -l <"$results_directory/$payload-response.csv") + ((oks == 0)) && continue # If all errors, skip line + + # Get Latest Timestamp + duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8) + throughput=$(echo "$oks/$duration" | bc) + printf "%s,%f\n" "$payload" "$throughput" >>"$results_directory/throughput.csv" + + # Generate Latency Data for csv + awk ' + BEGIN { + sum = 0 + p50 = int('"$oks"' * 0.5) + p90 = int('"$oks"' * 0.9) + p99 = int('"$oks"' * 0.99) + p100 = '"$oks"' + printf "'"$payload"'," + } + NR==p50 {printf "%1.4f%,", $0 / '"$deadline"' * 100} + NR==p90 {printf "%1.4f%,", $0 / '"$deadline"' * 100} + NR==p99 {printf "%1.4f%,", $0 / '"$deadline"' * 100} + NR==p100 {printf "%1.4f%\n", $0 / '"$deadline"' * 100} + ' <"$results_directory/$payload-response.csv" >>"$results_directory/latency.csv" + + # Delete scratch file used for sorting/counting + # rm -rf "$results_directory/$payload-response.csv" + done + + # Transform csvs to dat files for gnuplot + for file in success latency throughput; do + echo -n "#" >"$results_directory/$file.dat" + tr ',' ' ' <"$results_directory/$file.csv" | column -t >>"$results_directory/$file.dat" + done + + # Generate gnuplots. Commented out because we don't have *.gnuplots defined + # generate_gnuplots + + # Cleanup, if requires + echo "[DONE]" +done diff --git a/runtime/experiments/deadline/scratch.txt b/runtime/experiments/deadline/scratch.txt new file mode 100644 index 0000000..2f45190 --- /dev/null +++ b/runtime/experiments/deadline/scratch.txt @@ -0,0 +1,5 @@ + + +hey -n 200 -c 200 -t 0 -m GET -d "40\n" http://localhost:10040 + +hey -n 500 -c 500 -t 0 -m GET -d "10\n" http://localhost:10010 diff --git a/runtime/experiments/deadline/spec.json b/runtime/experiments/deadline/spec.json new file mode 100644 index 0000000..1d64c1b --- /dev/null +++ b/runtime/experiments/deadline/spec.json @@ -0,0 +1,90 @@ +{ + "active": "yes", + "name": "fibonacci_10", + "path": "fibonacci_wasm.so", + "port": 10010, + "expected-execution-us": 600, + "relative-deadline-us": 2000, + "argsize": 1, + "http-req-headers": [], + "http-req-content-type": "text/plain", + "http-req-size": 1024, + "http-resp-headers": [], + "http-resp-size": 1024, + "http-resp-content-type": "text/plain" +}, +{ + "active": "yes", + "name": "fibonacci_20", + "path": "fibonacci_wasm.so", + "port": 10020, + "expected-execution-us": 900, + "relative-deadline-us": 5000, + "argsize": 1, + "http-req-headers": [], + "http-req-content-type": "text/plain", + "http-req-size": 1024, + "http-resp-headers": [], + "http-resp-size": 1024, + "http-resp-content-type": "text/plain" +}, +{ + "active": "yes", + "name": "fibonacci_25", + "path": "fibonacci_wasm.so", + "port": 10025, + "expected-execution-us": 90000, + "relative-deadline-us": 200000, + "argsize": 1, + "http-req-headers": [], + "http-req-content-type": "text/plain", + "http-req-size": 1024, + "http-resp-headers": [], + "http-resp-size": 1024, + "http-resp-content-type": "text/plain" +}, +{ + "active": "yes", + "name": "fibonacci_30", + "path": "fibonacci_wasm.so", + "port": 10030, + "expected-execution-us": 9000, + "relative-deadline-us": 80000, + "argsize": 1, + "http-req-headers": [], + "http-req-content-type": "text/plain", + "http-req-size": 1024, + "http-resp-headers": [], + "http-resp-size": 1024, + "http-resp-content-type": "text/plain" +}, +{ + "active": "yes", + "name": "fibonacci_35", + "path": "fibonacci_wasm.so", + "port": 10035, + "expected-execution-us": 9000, + "relative-deadline-us": 53000, + "argsize": 1, + "http-req-headers": [], + "http-req-content-type": "text/plain", + "http-req-size": 1024, + "http-resp-headers": [], + "http-resp-size": 1024, + "http-resp-content-type": "text/plain" +}, +{ + "active": "yes", + "name": "fibonacci_40", + "path": "fibonacci_wasm.so", + "port": 10040, + "expected-execution-us": 550000, + "relative-deadline-us": 300000000, + "argsize": 1, + "http-req-headers": [], + "http-req-content-type": "text/plain", + "http-req-size": 1024, + "http-resp-headers": [], + "http-resp-size": 1024, + "http-resp-content-type": "text/plain" +} diff --git a/runtime/experiments/payload/spec.json b/runtime/experiments/payload/spec.json index 34b9040..fd1ff1c 100644 --- a/runtime/experiments/payload/spec.json +++ b/runtime/experiments/payload/spec.json @@ -3,13 +3,14 @@ "name": "work1k", "path": "work1k_wasm.so", "port": 10000, - "relative-deadline-us": 5000000, + "expected-execution-us": 400, + "relative-deadline-us": 2000, "argsize": 1, "http-req-headers": [], "http-req-content-type": "text/plain", - "http-req-size": 1048, + "http-req-size": 1548, "http-resp-headers": [], - "http-resp-size": 1048, + "http-resp-size": 1548, "http-resp-content-type": "text/plain" }, { @@ -17,7 +18,8 @@ "name": "work10k", "path": "work10k_wasm.so", "port": 10001, - "relative-deadline-us": 5000000, + "expected-execution-us": 600, + "relative-deadline-us": 2000, "argsize": 1, "http-req-headers": [], "http-req-content-type": "text/plain", @@ -31,7 +33,8 @@ "name": "work100k", "path": "work100k_wasm.so", "port": 10002, - "relative-deadline-us": 5000000, + "expected-execution-us": 700, + "relative-deadline-us": 2000, "argsize": 1, "http-req-headers": [], "http-req-content-type": "text/plain", @@ -45,7 +48,8 @@ "name": "work1m", "path": "work1m_wasm.so", "port": 10003, - "relative-deadline-us": 5000000, + "expected-execution-us": 2000, + "relative-deadline-us": 6000, "argsize": 1, "http-req-headers": [], "http-req-content-type": "text/plain", diff --git a/runtime/experiments/payload/test.sh b/runtime/experiments/payload/test.sh new file mode 100755 index 0000000..7115085 --- /dev/null +++ b/runtime/experiments/payload/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +hey -n 100 -c 3 -q 100 -m GET -D "./body/1024.txt" http://localhost:10000 diff --git a/runtime/experiments/preemption/README.md b/runtime/experiments/preemption/README.md index 44de926..c2160ef 100644 --- a/runtime/experiments/preemption/README.md +++ b/runtime/experiments/preemption/README.md @@ -2,17 +2,27 @@ ## Question -TODO +- How do mixed criticality workloads perform under the Sledge scheduler policies? +- How does the latency of a high criticality workload that triggers preemption on a system under load compare to being the only workload on the system? +- What is the slowdown on the low priority workload? +- How does this affect aggregate throughput? -## Independent Variable +## Setup -TODO +The system is configured with admission control disabled. -## Dependent Variables +The driver script drives a bimodal distribution of long-running low-priority and short-running high-priority workloads + +Relative Deadlines are tuned such that the scheduler should always preempt the low-priority workload for the high-priority workload. + +A driver script runs the two workloads separately as a baseline -TODO +It then runs them concurrently, starting the low-priority long-running workload first such that the system begins execution and accumulates requests in the data structures. The high-priority short-running workload then begins. -## TODO +## Independent Variable + +The Scheduling Policy: EDF versus FIFO + +## Dependent Variables -- Clarify what this experiment is actually trying to do -- Replace `wrk` with `hey` using the patterns established in the `concurrency` experiment +Latency of high priority workload diff --git a/runtime/experiments/preemption/perf.sh b/runtime/experiments/preemption/perf.sh new file mode 100755 index 0000000..c87504f --- /dev/null +++ b/runtime/experiments/preemption/perf.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Executes the runtime in GDB +# Substitutes the absolute path from the container with a path relatively derived from the location of this script +# This allows debugging outside of the Docker container +# Also disables pagination and stopping on SIGUSR1 + +experiment_directory=$(pwd) +project_directory=$(cd ../.. && pwd) +binary_directory=$(cd "$project_directory"/bin && pwd) + +export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" +export PATH="$binary_directory:$PATH" + +SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=EDF perf record -g -s sledgert "$experiment_directory/spec.json" diff --git a/runtime/experiments/preemption/post.lua b/runtime/experiments/preemption/post.lua deleted file mode 100644 index 84d7779..0000000 --- a/runtime/experiments/preemption/post.lua +++ /dev/null @@ -1,58 +0,0 @@ --- Default to 1 request / second -wrk.method = "POST" -wrk.body = "10\n" -wrk.headers["Content-Type"] = "text/plain" -local delay_val = 1000 - -function init(args) - if #args == 0 then - io.write("[wrk stuff] -- --delay [delay in ms] [args ...]\n") - os.exit(); - end - - local current_arg = 1 - while current_arg <= #args do - if args[current_arg] == "--delay" then - delay_val = args[current_arg + 1] - current_arg = current_arg + 2; - io.write(string.format("Delay: %s\n", delay_val)) - else - -- Concatenate all remaining args - local buffer = "" - for i = current_arg, #args, 1 do - buffer = buffer .. args[i] - end - io.write(string.format("Buffer: %s\n", buffer)) - wrk.body = buffer - -- And exit loop - break; - end - end -end - --- Uncomment to dynamically generate a different request each time --- function request() --- return wrk.format(nil, nil, nil,tostring(math.random(10, 23)) .."\n") --- end - --- Wrk calls a function name delay to get the delay between requests (in ms) -function delay() - return delay_val -end - -function response(status, headers, body) - -- io.write(string.format("%s: %s\n", status, body)) -end - --- Done Phase - --- Called when complete, presenting aggregate results -function done(summary, latency, requests) - io.write("Percentile, Latency\n"); - for i = 1, 99 do - io.write(string.format("%d, %d\n", i, latency:percentile(i))) - end -end - - - diff --git a/runtime/experiments/preemption/run.sh b/runtime/experiments/preemption/run.sh index 2a4de74..bf38f2a 100755 --- a/runtime/experiments/preemption/run.sh +++ b/runtime/experiments/preemption/run.sh @@ -33,21 +33,21 @@ for scheduler in ${schedulers[*]}; do # Execute workloads long enough for runtime to learn excepted execution time echo -n "Running Samples: " for input in ${inputs[*]}; do - hey -z ${duration_sec}s -c 3 -o csv -m GET -d "$input\n" http://localhost:$((10000 + input)) + hey -z ${duration_sec}s -cpus 3 -t 0 -o csv -m GET -d "$input\n" http://localhost:$((10000 + input)) done echo "[DONE]" sleep 5 echo "Running Experiments" # Run each separately - hey -z ${duration_sec}s -cpus 3 -c 3 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40.csv" - hey -z ${duration_sec}s -cpus 3 -c 3 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10.csv" + hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40.csv" + hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10.csv" # Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run - hey -z $((duration_sec + 2 * offset))s -cpus 3 -c 3 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40-con.csv" & + hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40-con.csv" & sleep $offset - hey -z ${duration_sec}s -cpus 3 -c 3 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10-con.csv" & - sleep $((duration_sec + offset + 5)) + hey -z ${duration_sec}s -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10-con.csv" & + sleep $((duration_sec + offset + 15)) # Stop the runtime if not in debug mode [ "$1" != "-d" ] && kill_runtime @@ -59,7 +59,7 @@ for scheduler in ${schedulers[*]}; do printf "Payload,Throughput\n" >>"$results_directory/throughput.csv" printf "Payload,p50,p90,p99,p100\n" >>"$results_directory/latency.csv" - deadlines_ms=(1.5 1.5 660 660) + deadlines_ms=(2 2 3000 3000) payloads=(fib10 fib10-con fib40 fib40-con) for ((i = 0; i < 4; i++)); do @@ -100,10 +100,10 @@ for scheduler in ${schedulers[*]}; do p100 = '"$oks"' printf "'"$payload"'," } - NR==p50 {printf "%1.4f%,", $0 / '"$deadline"' * 100} - NR==p90 {printf "%1.4f%,", $0 / '"$deadline"' * 100} - NR==p99 {printf "%1.4f%,", $0 / '"$deadline"' * 100} - NR==p100 {printf "%1.4f%\n", $0 / '"$deadline"' * 100} + NR==p50 {printf "%1.4f,", $0} + NR==p90 {printf "%1.4f,", $0} + NR==p99 {printf "%1.4f,", $0} + NR==p100 {printf "%1.4f\n", $0} ' <"$results_directory/$payload-response.csv" >>"$results_directory/latency.csv" # Delete scratch file used for sorting/counting diff --git a/runtime/experiments/preemption/run_relative.sh b/runtime/experiments/preemption/run_relative.sh new file mode 100755 index 0000000..bd072e4 --- /dev/null +++ b/runtime/experiments/preemption/run_relative.sh @@ -0,0 +1,124 @@ +#!/bin/bash +source ../common.sh + +# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate +# Use -d flag if running under gdb + +timestamp=$(date +%s) +experiment_directory=$(pwd) +binary_directory=$(cd ../../bin && pwd) + +schedulers=(EDF FIFO) +for scheduler in ${schedulers[*]}; do + + results_directory="$experiment_directory/res/$timestamp/$scheduler" + log=log.txt + + mkdir -p "$results_directory" + log_environment >>"$results_directory/$log" + + # Start the runtime + if [ "$1" != "-d" ]; then + SLEDGE_NWORKERS=5 SLEDGE_SCHEDULER=$scheduler PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >>"$results_directory/$log" 2>>"$results_directory/$log" & + sleep 1 + else + echo "Running under gdb" + echo "Running under gdb" >>"$results_directory/$log" + fi + + inputs=(40 10) + duration_sec=15 + offset=5 + + # Execute workloads long enough for runtime to learn excepted execution time + echo -n "Running Samples: " + for input in ${inputs[*]}; do + hey -z ${duration_sec}s -cpus 3 -t 0 -o csv -m GET -d "$input\n" http://localhost:$((10000 + input)) + done + echo "[DONE]" + sleep 5 + + echo "Running Experiments" + # Run each separately + hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40.csv" + hey -z ${duration_sec}s -cpus 4 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10.csv" + + # Run lower priority first, then higher priority. The lower priority has offsets to ensure it runs the entire time the high priority is trying to run + hey -z $((duration_sec + 2 * offset))s -cpus 2 -c 100 -t 0 -o csv -m GET -d "40\n" http://localhost:10040 >"$results_directory/fib40-con.csv" & + sleep $offset + hey -z ${duration_sec}s -cpus 2 -c 100 -t 0 -o csv -m GET -d "10\n" http://localhost:10010 >"$results_directory/fib10-con.csv" & + sleep $((duration_sec + offset + 15)) + + # Stop the runtime if not in debug mode + [ "$1" != "-d" ] && kill_runtime + + # Generate *.csv and *.dat results + echo -n "Parsing Results: " + + printf "Payload,Success_Rate\n" >>"$results_directory/success.csv" + printf "Payload,Throughput\n" >>"$results_directory/throughput.csv" + printf "Payload,p50,p90,p99,p100\n" >>"$results_directory/latency.csv" + + deadlines_ms=(2 2 3000 3000) + payloads=(fib10 fib10-con fib40 fib40-con) + + for ((i = 0; i < 4; i++)); do + # for payload in ${payloads[*]}; do + payload=${payloads[$i]} + deadline=${deadlines_ms[$i]} + + # Get Number of Requests + requests=$(($(wc -l <"$results_directory/$payload.csv") - 1)) + ((requests == 0)) && continue + + # Calculate Success Rate for csv + awk -F, ' + $7 == 200 && ($1 * 1000) <= '"$deadline"' {ok++} + END{printf "'"$payload"',%3.5f%\n", (ok / (NR - 1) * 100)} + ' <"$results_directory/$payload.csv" >>"$results_directory/success.csv" + + # Filter on 200s, convery from s to ms, and sort + awk -F, '$7 == 200 {print ($1 * 1000)}' <"$results_directory/$payload.csv" | + sort -g >"$results_directory/$payload-response.csv" + + # Get Number of 200s + oks=$(wc -l <"$results_directory/$payload-response.csv") + ((oks == 0)) && continue # If all errors, skip line + + # Get Latest Timestamp + duration=$(tail -n1 "$results_directory/$payload.csv" | cut -d, -f8) + throughput=$(echo "$oks/$duration" | bc) + printf "%s,%f\n" "$payload" "$throughput" >>"$results_directory/throughput.csv" + + # Generate Latency Data for csv + awk ' + BEGIN { + sum = 0 + p50 = int('"$oks"' * 0.5) + p90 = int('"$oks"' * 0.9) + p99 = int('"$oks"' * 0.99) + p100 = '"$oks"' + printf "'"$payload"'," + } + NR==p50 {printf "%1.4f%,", $0 / '"$deadline"' * 100} + NR==p90 {printf "%1.4f%,", $0 / '"$deadline"' * 100} + NR==p99 {printf "%1.4f%,", $0 / '"$deadline"' * 100} + NR==p100 {printf "%1.4f%\n", $0 / '"$deadline"' * 100} + ' <"$results_directory/$payload-response.csv" >>"$results_directory/latency.csv" + + # Delete scratch file used for sorting/counting + # rm -rf "$results_directory/$payload-response.csv" + done + + # Transform csvs to dat files for gnuplot + for file in success latency throughput; do + echo -n "#" >"$results_directory/$file.dat" + tr ',' ' ' <"$results_directory/$file.csv" | column -t >>"$results_directory/$file.dat" + done + + # Generate gnuplots. Commented out because we don't have *.gnuplots defined + # generate_gnuplots + + # Cleanup, if requires + echo "[DONE]" +done diff --git a/runtime/experiments/preemption/scratch.txt b/runtime/experiments/preemption/scratch.txt new file mode 100644 index 0000000..2f45190 --- /dev/null +++ b/runtime/experiments/preemption/scratch.txt @@ -0,0 +1,5 @@ + + +hey -n 200 -c 200 -t 0 -m GET -d "40\n" http://localhost:10040 + +hey -n 500 -c 500 -t 0 -m GET -d "10\n" http://localhost:10010 diff --git a/runtime/experiments/preemption/spec.json b/runtime/experiments/preemption/spec.json index 1a5067c..f10f229 100644 --- a/runtime/experiments/preemption/spec.json +++ b/runtime/experiments/preemption/spec.json @@ -13,73 +13,13 @@ "http-resp-size": 1024, "http-resp-content-type": "text/plain" }, -{ - "active": "yes", - "name": "fibonacci_20", - "path": "fibonacci_wasm.so", - "port": 10020, - "expected-execution-us": 900, - "relative-deadline-us": 5000, - "argsize": 1, - "http-req-headers": [], - "http-req-content-type": "text/plain", - "http-req-size": 1024, - "http-resp-headers": [], - "http-resp-size": 1024, - "http-resp-content-type": "text/plain" -}, -{ - "active": "yes", - "name": "fibonacci_25", - "path": "fibonacci_wasm.so", - "port": 10025, - "expected-execution-us": 90000, - "relative-deadline-us": 200000, - "argsize": 1, - "http-req-headers": [], - "http-req-content-type": "text/plain", - "http-req-size": 1024, - "http-resp-headers": [], - "http-resp-size": 1024, - "http-resp-content-type": "text/plain" -}, -{ - "active": "yes", - "name": "fibonacci_30", - "path": "fibonacci_wasm.so", - "port": 10030, - "expected-execution-us": 9000, - "relative-deadline-us": 80000, - "argsize": 1, - "http-req-headers": [], - "http-req-content-type": "text/plain", - "http-req-size": 1024, - "http-resp-headers": [], - "http-resp-size": 1024, - "http-resp-content-type": "text/plain" -}, -{ - "active": "yes", - "name": "fibonacci_35", - "path": "fibonacci_wasm.so", - "port": 10035, - "expected-execution-us": 9000, - "relative-deadline-us": 53000, - "argsize": 1, - "http-req-headers": [], - "http-req-content-type": "text/plain", - "http-req-size": 1024, - "http-resp-headers": [], - "http-resp-size": 1024, - "http-resp-content-type": "text/plain" -}, { "active": "yes", "name": "fibonacci_40", "path": "fibonacci_wasm.so", "port": 10040, "expected-execution-us": 550000, - "relative-deadline-us": 1000000, + "relative-deadline-us": 300000000, "argsize": 1, "http-req-headers": [], "http-req-content-type": "text/plain",