feat: deadline description to real world workload mix

master
Sean McBride 4 years ago
parent 6dc172952c
commit 9ef5d732e0

@ -0,0 +1,6 @@
1. Start the server with the benchmark spec
2. From the client, run the benchmark client logic to generate sorted execution times
3. From the client, run the deadline_description script generate the spec.json
4. Manually copy spec.json to the server and restart it
5. Run

@ -0,0 +1,3 @@
SLEDGE_SCHEDULER=FIFO
SLEDGE_DISABLE_PREEMPTION=true
SLEDGE_NWORKERS=1

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 177 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 358 KiB

@ -0,0 +1,148 @@
#!/bin/bash
set -x
# Add bash_libraries directory to path
__run_sh__base_path="$(dirname "$(realpath --logical "${BASH_SOURCE[0]}")")"
__run_sh__bash_libraries_relative_path="../bash_libraries"
__run_sh__bash_libraries_absolute_path=$(cd "$__run_sh__base_path" && cd "$__run_sh__bash_libraries_relative_path" && pwd)
export PATH="$__run_sh__bash_libraries_absolute_path:$PATH"
source csv_to_dat.sh || exit 1
source framework.sh || exit 1
source get_result_count.sh || exit 1
source panic.sh || exit 1
source path_join.sh || exit 1
declare -a workloads=(ekf resize lpd gocr)
profile() {
local hostname="$1"
local -r results_directory="$2"
echo "$results_directory/ekf/benchmark.csv"
# ekf
mkdir "$results_directory/ekf"
hey -disable-compression -disable-keepalive -disable-redirects -n 16 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./ekf/initial_state.dat" "http://${hostname}:10000" > /dev/null
hey -disable-compression -disable-keepalive -disable-redirects -n 256 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./ekf/initial_state.dat" "http://${hostname}:10000" > "$results_directory/ekf/benchmark.csv"
# Resize
mkdir "$results_directory/resize"
hey -disable-compression -disable-keepalive -disable-redirects -n 16 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./resize/shrinking_man_large.jpg" "http://${hostname}:10001" > /dev/null
hey -disable-compression -disable-keepalive -disable-redirects -n 256 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./resize/shrinking_man_large.jpg" "http://${hostname}:10001" > "$results_directory/resize/benchmark.csv"
# lpd
mkdir "$results_directory/lpd"
hey -disable-compression -disable-keepalive -disable-redirects -n 16 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./lpd/Cars0.png" "http://${hostname}:10002" > /dev/null
hey -disable-compression -disable-keepalive -disable-redirects -n 256 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./lpd/Cars0.png" "http://${hostname}:10002" > "$results_directory/lpd/benchmark.csv"
# gocr - Hit error. Commented out temporarily
# mkdir "$results_directory/gocr"
# hey -disable-compression -disable-keepalive -disable-redirects -n 16 -c 1 -cpus 1 -t 0 -o csv -H 'Expect:' -H "Content-Type: text/plain" -m GET -D "./gocr/hyde.pnm" "http://${hostname}:10003" > /dev/null
# hey -disable-compression -disable-keepalive -disable-redirects -n 256 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./gocr/hyde.pnm" "http://${hostname}:10003" > "$results_directory/gocr/benchmark.csv"
}
get_baseline_execution() {
local -r results_directory="$1"
local -r module="$2"
local -ir percentile="$3"
local response_times_file="$results_directory/$module/response_times_sorted.csv"
# Skip empty results
local -i oks
oks=$(wc -l < "$response_times_file")
((oks == 0)) && return 1
# Generate Latency Data for csv
awk '
BEGIN {idx = int('"$oks"' * ('"$percentile"' / 100))}
NR==idx {printf "%1.4f\n", $0}
' < "$response_times_file"
}
get_random_from_interval() {
local -r lower="$1"
local -r upper="$2"
awk "BEGIN { \"date +%N\" | getline seed; srand(seed); print rand() * ($upper - $lower) + $lower}"
}
calculate_relative_deadline() {
local -r baseline="$1"
local -r multiplier="$2"
awk "BEGIN { printf \"%.0f\n\", ($baseline * $multiplier)}"
}
generate_relative_deadline() {
local -r results_directory="$1"
local -r workload="$2"
local baseline
local multiplier
local relative_deadline
local -ri percentile=90
baseline="$(get_baseline_execution "$results_directory" "$workload" $percentile)"
multiplier="$(get_random_from_interval 1.5 2.0)"
relative_deadline=$(calculate_relative_deadline "$baseline" "$multiplier")
echo "$relative_deadline"
}
generate_spec() {
local results_directory="$1"
# Run initial test run to get CDF of execution time for each app
# I have to do this out-of-band on the
local ekf_relative_deadline="$(generate_relative_deadline "$results_directory" ekf)"
local resize_relative_deadline="$(generate_relative_deadline "$results_directory" resize)"
local lpd_relative_deadline="$(generate_relative_deadline "$results_directory" lpd)"
# Our JSON format is not spec complaint. I have to hack in a wrapping array before jq and delete it afterwards
# Excluding gocr because of difficulty used gocr with hey
{
echo "["
cat ./spec.json
echo "]"
} | jq "\
[ \
.[] | \
if (.name == \"ekf\") then . + { \"relative-deadline-us\": $ekf_relative_deadline} else . end | \
if (.name == \"resize\") then . + { \"relative-deadline-us\": $resize_relative_deadline} else . end | \
if (.name == \"lpd\") then . + { \"relative-deadline-us\": $lpd_relative_deadline} else . end \
]" | tail -n +2 | head -n-1 > "$results_directory/spec.json"
# Get the baseline execution using a target in this CDF
# generate a relative deadline per module
# Use JQ to template baseline as "expected-execution-us" and deadline at "relative-deadline-us"
# Execute experiment with deadlines and workload mix (all equal for now)
# Capture "sandbox state" log and perhaps other logs?
}
# Process the experimental results and generate human-friendly results for success rate, throughput, and latency
process_results() {
local results_directory="$1"
for workload in "${workloads[@]}"; do
# Filter on 200s, subtract DNS time, convert from s to ns, and sort
awk -F, '$7 == 200 {print (($1 - $2) * 1000000)}' < "$results_directory/$workload/benchmark.csv" \
| sort -g > "$results_directory/$workload/response_times_sorted.csv"
done
generate_spec "$results_directory"
return 0
}
experiment_main() {
local -r hostname="$1"
local -r results_directory="$2"
profile "$hostname" "$results_directory" || return 1
process_results "$results_directory"
}
main "$@"

@ -0,0 +1,60 @@
{
"active": true,
"name": "ekf",
"path": "ekf_wasm.so",
"port": 10000,
"expected-execution-us": 5000,
"relative-deadline-us": 50000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "application/octet-stream",
"http-req-size": 1024000,
"http-resp-headers": [],
"http-resp-size": 1024000,
"http-resp-content-type": "application/octet-stream"
},
{
"active": true,
"name": "resize",
"path": "resize_wasm.so",
"port": 10001,
"expected-execution-us": 5000,
"relative-deadline-us": 50000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "image/jpeg",
"http-req-size": 1024000,
"http-resp-headers": [],
"http-resp-size": 1024000,
"http-resp-content-type": "image/png"
},
{
"active": true,
"name": "lpd",
"path": "lpd_wasm.so",
"port": 10002,
"expected-execution-us": 5000,
"relative-deadline-us": 50000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "image/jpeg",
"http-req-size": 1002400,
"http-resp-headers": [],
"http-resp-size": 1048576,
"http-resp-content-type": "text/plain"
},
{
"active": true,
"name": "gocr",
"path": "gocr_wasm.so",
"port": 10003,
"expected-execution-us": 5000,
"relative-deadline-us": 360000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
}

@ -0,0 +1,3 @@
res
perf.data
perf.data.old

@ -0,0 +1,2 @@
SLEDGE_SCHEDULER=EDF
SLEDGE_DISABLE_PREEMPTION=true

@ -0,0 +1,3 @@
SLEDGE_SCHEDULER=EDF
SLEDGE_DISABLE_PREEMPTION=false
SLEDGE_SIGALRM_HANDLER=TRIAGED

@ -0,0 +1,2 @@
SLEDGE_SCHEDULER=FIFO
SLEDGE_DISABLE_PREEMPTION=true

@ -0,0 +1,2 @@
SLEDGE_SCHEDULER=FIFO
SLEDGE_DISABLE_PREEMPTION=false

File diff suppressed because one or more lines are too long

@ -0,0 +1,13 @@
#!/bin/bash
if ! command -v hey > /dev/null; then
HEY_URL=https://hey-release.s3.us-east-2.amazonaws.com/hey_linux_amd64
wget $HEY_URL -O hey
chmod +x hey
if [[ $(whoami) == "root" ]]; then
mv hey /usr/bin/hey
else
sudo mv hey /usr/bin/hey
fi
fi

Binary file not shown.

After

Width:  |  Height:  |  Size: 177 KiB

@ -0,0 +1,3 @@
33,ekf
33,resize
33,lpd
1 33 ekf
2 33 resize
3 33 lpd

Binary file not shown.

After

Width:  |  Height:  |  Size: 358 KiB

@ -0,0 +1,244 @@
#!/bin/bash
# This experiment is intended to document how the level of concurrent requests influence the latency, throughput, and success/failure rate
# Success - The percentage of requests that complete by their deadlines
# TODO: Does this handle non-200s?
# Throughput - The mean number of successful requests per second
# Latency - the rount-trip resonse time (unit?) of successful requests at the p50, p90, p99, and p100 percetiles
# Add bash_libraries directory to path
__run_sh__base_path="$(dirname "$(realpath --logical "${BASH_SOURCE[0]}")")"
__run_sh__bash_libraries_relative_path="../bash_libraries"
__run_sh__bash_libraries_absolute_path=$(cd "$__run_sh__base_path" && cd "$__run_sh__bash_libraries_relative_path" && pwd)
export PATH="$__run_sh__bash_libraries_absolute_path:$PATH"
source csv_to_dat.sh || exit 1
source framework.sh || exit 1
# source generate_gnuplots.sh || exit 1
source get_result_count.sh || exit 1
source panic.sh || exit 1
source path_join.sh || exit 1
if ! command -v hey > /dev/null; then
echo "hey is not present."
exit 1
fi
# Sends requests until the per-module perf window buffers are full
# This ensures that Sledge has accurate estimates of execution time
run_samples() {
if (($# != 1)); then
panic "invalid number of arguments \"$1\""
return 1
elif [[ -z "$1" ]]; then
panic "hostname \"$1\" was empty"
return 1
fi
local hostname="${1}"
# Scrape the perf window size from the source if possible
# TODO: Make a util function
local -r perf_window_path="$(path_join "$__run_sh__base_path" ../../include/perf_window_t.h)"
local -i perf_window_buffer_size
if ! perf_window_buffer_size=$(grep "#define PERF_WINDOW_BUFFER_SIZE" < "$perf_window_path" | cut -d\ -f3); then
printf "Failed to scrape PERF_WINDOW_BUFFER_SIZE from ../../include/perf_window.h\n"
printf "Defaulting to 16\n"
perf_window_buffer_size=16
fi
local -ir perf_window_buffer_size
printf "Running Samples: "
# EKF
hey -disable-compression -disable-keepalive -disable-redirects -n "$perf_window_buffer_size" -c 1 -cpus 1 -t 0 -o csv -m GET -D "./ekf/initial_state.dat" "http://${hostname}:10000" 1> /dev/null 2> /dev/null || {
printf "[ERR]\n"
panic "ekf samples failed with $?"
return 1
}
# Resize
hey -disable-compression -disable-keepalive -disable-redirects -n "$perf_window_buffer_size" -c 1 -cpus 1 -t 0 -o csv -m GET -D "./resize/shrinking_man_large.jpg" "http://${hostname}:10001" 1> /dev/null 2> /dev/null || {
printf "[ERR]\n"
panic "resize samples failed with $?"
return 1
}
# lpd
hey -disable-compression -disable-keepalive -disable-redirects -n "$perf_window_buffer_size" -c 1 -cpus 1 -t 0 -o csv -m GET -D "./lpd/Cars0.png" "http://${hostname}:10002" 1> /dev/null 2> /dev/null || {
printf "[ERR]\n"
panic "resize samples failed with $?"
return 1
}
# TODO: gocr
printf "[OK]\n"
return 0
}
# Execute the fib10 and fib40 experiments sequentially and concurrently
# $1 (hostname)
# $2 (results_directory) - a directory where we will store our results
run_experiments() {
if (($# != 2)); then
panic "invalid number of arguments \"$1\""
return 1
elif [[ -z "$1" ]]; then
panic "hostname \"$1\" was empty"
return 1
elif [[ ! -d "$2" ]]; then
panic "directory \"$2\" does not exist"
return 1
fi
local hostname="$1"
local results_directory="$2"
local -a workloads=()
local -Ar port=(
[ekf]=10000
[resize]=10001
[lpd]=10002
)
local -Ar body=(
[ekf]="-D ./ekf/initial_state.dat"
[resize]="-D ./resize/shrinking_man_large.jpg"
[lpd]="-D ./lpd/Cars0.png"
)
local -A floor=()
local -A length=()
local -i total=0
local -a buffer=()
local workload=""
local -i odds=0
while read -r line; do
# Read into buffer array, splitting on commas
readarray -t -d, buffer < <(echo -n "$line")
# Use human friendly names
odds="${buffer[0]}"
workload="${buffer[1]}"
# Update workload mix structures
workloads+=("$workload")
floor+=(["$workload"]=$total)
length+=(["$workload"]=$odds)
((total += odds))
done < mix.csv
declare -ir random_max=32767
# Validate Workload Mix
if ((total <= 0 || total > random_max)); then
echo "total must be between 1 and $random_max"
exit 1
fi
# TODO: Check that workload is in spec.json
local -ir batch_size=1
local -i batch_id=0
local -i roll=0
local -ir total_iterations=1000
local -ir worker_max=50
local pids
printf "Running Experiments: "
# Select a random workload using the workload mix and run command, writing output to disk
for ((i = 0; i < total_iterations; i += batch_size)); do
# Block waiting for a worker to finish if we are at our max
while (($(pgrep --count hey) >= worker_max)); do
wait -n $(pgrep hey | tr '\n' ' ')
done
roll=$((RANDOM % total))
((batch_id++))
for workload in "${workloads[@]}"; do
if ((roll >= floor[$workload] && roll < floor[$workload] + length[$workload])); then
set -x
hey -disable-compression -disable-keepalive -disable-redirects -n $batch_size -c 1 -cpus 1 -t 0 -o csv -m GET ${body[$workload]} "http://${hostname}:${port[$workload]}" > "$results_directory/${workload}_${batch_id}.csv" 2> /dev/null &
break
unset -x
fi
done
done
pids=$(pgrep hey | tr '\n' ' ')
[[ -n $pids ]] && wait -f $pids
printf "[OK]\n"
for workload in "${workloads[@]}"; do
tail --quiet -n +2 "$results_directory/${workload}"_*.csv >> "$results_directory/${workload}.csv"
rm "$results_directory/${workload}"_*.csv
done
return 0
}
# Process the experimental results and generate human-friendly results for success rate, throughput, and latency
process_results() {
if (($# != 1)); then
error_msg "invalid number of arguments ($#, expected 1)"
return 1
elif ! [[ -d "$1" ]]; then
error_msg "directory $1 does not exist"
return 1
fi
local -r results_directory="$1"
printf "Processing Results: "
# Write headers to CSVs
printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
local -ar payloads=(ekf resize lpd)
for payload in "${payloads[@]}"; do
# Filter on 200s, subtract DNS time, convert from s to ms, and sort
awk -F, '$7 == 200 {print (($1 - $2) * 1000)}' < "$results_directory/$payload.csv" \
| sort -g > "$results_directory/$payload-response.csv"
oks=$(wc -l < "$results_directory/$payload-response.csv")
((oks == 0)) && continue # If all errors, skip line
# Generate Latency Data for csv
awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p100 = '"$oks"'
printf "'"$payload"',"
}
NR==p50 {printf "%1.4f,", $0}
NR==p90 {printf "%1.4f,", $0}
NR==p99 {printf "%1.4f,", $0}
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv"
# Delete scratch file used for sorting/counting
rm -rf "$results_directory/$payload-response.csv"
done
# Transform csvs to dat files for gnuplot
csv_to_dat "$results_directory/latency.csv"
printf "[OK]\n"
return 0
}
# Expected Symbol used by the framework
experiment_main() {
local -r target_hostname="$1"
local -r results_directory="$2"
run_samples "$target_hostname" || return 1
run_experiments "$target_hostname" "$results_directory" || return 1
process_results "$results_directory" || return 1
return 0
}
main "$@"

@ -0,0 +1,60 @@
{
"active": true,
"name": "ekf",
"path": "ekf_wasm.so",
"port": 10000,
"expected-execution-us": 5000,
"relative-deadline-us": 354,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "application/octet-stream",
"http-req-size": 1024000,
"http-resp-headers": [],
"http-resp-size": 1024000,
"http-resp-content-type": "application/octet-stream"
},
{
"active": true,
"name": "resize",
"path": "resize_wasm.so",
"port": 10001,
"expected-execution-us": 5000,
"relative-deadline-us": 242058,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "image/jpeg",
"http-req-size": 1024000,
"http-resp-headers": [],
"http-resp-size": 1024000,
"http-resp-content-type": "image/png"
},
{
"active": true,
"name": "lpd",
"path": "lpd_wasm.so",
"port": 10002,
"expected-execution-us": 5000,
"relative-deadline-us": 52425,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "image/jpeg",
"http-req-size": 1002400,
"http-resp-headers": [],
"http-resp-size": 1048576,
"http-resp-content-type": "text/plain"
},
{
"active": true,
"name": "gocr",
"path": "gocr_wasm.so",
"port": 10003,
"expected-execution-us": 5000,
"relative-deadline-us": 360000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
}
Loading…
Cancel
Save