From 7640cd5fb6e09e1e7c41eacad65322f953523d8d Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Mon, 14 Jun 2021 09:36:46 -0400 Subject: [PATCH] Percentile Cleanup, Processor Speed Hack, and Experimental Config (#257) * chore: Hackily hardcode CPU speed * chore: dump execution profile in spec generation * chore: also generate dat * fix: awk off by one error * chore: write dummy row * chore: Remove ekf, up concur, up iter * chore: awk statistical significance * fix: wrap awk string in quotes * fix: add awk trailing delimiter * chore: disable ekf in deadline description * chore: Increase iters * chore: generate deadline key * chore: make percentile global * chore: update mix for new multiples * chore: deadline csv key * docs: Better explain how to run run experiments * refactor: Inital percentile table cleanup * refactor: additional percentile table cleanup * refactor: payload percentiles table * refactor: workload_mix percentile table * feat: final percentiles row stuff * chore: revert processor hack --- .../applications/ekf/by_iteration/run.sh | 18 +---- .../applications/imageclassification/run.sh | 22 +----- .../imageresize/by_resolution/run.sh | 18 +---- .../licenseplate/by_plate_count/run.sh | 21 +---- .../applications/ocr/by_dpi/run.sh | 26 ++----- .../applications/ocr/by_font/run.sh | 24 +----- .../applications/ocr/by_word/run.sh | 25 +----- .../bash_libraries/percentiles_table.sh | 77 +++++++++++++++++++ runtime/experiments/bimodal/run.sh | 20 +---- .../experiments/concurrency/latency.gnuplot | 10 ++- runtime/experiments/concurrency/run.sh | 18 +---- .../deadline_description/README.md | 17 ++-- .../experiments/deadline_description/run.sh | 32 +++++--- runtime/experiments/payload/run.sh | 26 ++----- runtime/experiments/workload_mix/run.sh | 23 +----- .../workload_mix_realworld/mix.csv | 30 ++------ .../experiments/workload_mix_realworld/run.sh | 46 +++-------- 17 files changed, 176 insertions(+), 277 deletions(-) create mode 100644 runtime/experiments/bash_libraries/percentiles_table.sh diff --git a/runtime/experiments/applications/ekf/by_iteration/run.sh b/runtime/experiments/applications/ekf/by_iteration/run.sh index ca30430..b7f3b7d 100755 --- a/runtime/experiments/applications/ekf/by_iteration/run.sh +++ b/runtime/experiments/applications/ekf/by_iteration/run.sh @@ -11,6 +11,7 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 run_functional_tests() { local hostname="$1" @@ -98,7 +99,7 @@ process_results() { printf "Processing Results: " # Write headers to CSVs - printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" for workload in "${workloads[@]}"; do @@ -110,20 +111,7 @@ process_results() { ((oks == 0)) && continue # If all errors, skip line # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$workload"'," - } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} - ' < "$results_directory/$workload-response.csv" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/$workload-response.csv" "$results_directory/latency.csv" "$workload" # Delete scratch file used for sorting/counting rm -rf "$results_directory/$workload-response.csv" diff --git a/runtime/experiments/applications/imageclassification/run.sh b/runtime/experiments/applications/imageclassification/run.sh index 38fe425..5eb0b61 100755 --- a/runtime/experiments/applications/imageclassification/run.sh +++ b/runtime/experiments/applications/imageclassification/run.sh @@ -10,6 +10,7 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 source validate_dependencies.sh || exit 1 get_random_image() { @@ -90,7 +91,7 @@ process_results() { printf "Processing Results: \n" # Write headers to CSVs - printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" for workload in "${workloads[@]}"; do @@ -98,24 +99,7 @@ process_results() { awk -F, '$7 == 200 {print (($1 - $2) * 1000)}' < "$results_directory/$workload.csv" \ | sort -g > "$results_directory/$workload-response.csv" - oks=$(wc -l < "$results_directory/$workload-response.csv") - ((oks == 0)) && continue # If all errors, skip line - - # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$workload"'," - } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} - ' < "$results_directory/$workload-response.csv" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/$workload-response.csv" "$results_directory/latency.csv" "$workload" # Delete scratch file used for sorting/counting rm -rf "$results_directory/$workload-response.csv" diff --git a/runtime/experiments/applications/imageresize/by_resolution/run.sh b/runtime/experiments/applications/imageresize/by_resolution/run.sh index 29f2ff5..1593586 100755 --- a/runtime/experiments/applications/imageresize/by_resolution/run.sh +++ b/runtime/experiments/applications/imageresize/by_resolution/run.sh @@ -10,6 +10,7 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 source validate_dependencies.sh || exit 1 run_functional_tests() { @@ -121,7 +122,7 @@ process_results() { printf "Processing Results: " # Write headers to CSVs - printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" for workload in "${workloads[@]}"; do @@ -133,20 +134,7 @@ process_results() { ((oks == 0)) && continue # If all errors, skip line # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$workload"'," - } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} - ' < "$results_directory/$workload-response.csv" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/$workload-response.csv" "$results_directory/latency.csv" "$workload" # Delete scratch file used for sorting/counting rm -rf "$results_directory/$workload-response.csv" diff --git a/runtime/experiments/applications/licenseplate/by_plate_count/run.sh b/runtime/experiments/applications/licenseplate/by_plate_count/run.sh index 6770118..8ef3377 100755 --- a/runtime/experiments/applications/licenseplate/by_plate_count/run.sh +++ b/runtime/experiments/applications/licenseplate/by_plate_count/run.sh @@ -10,6 +10,7 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 source validate_dependencies.sh || exit 1 get_random_image() { @@ -39,7 +40,7 @@ process_results() { printf "Processing Results: " # Write headers to CSVs - printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" for workload in "${workloads[@]}"; do @@ -47,24 +48,8 @@ process_results() { awk -F, '$7 == 200 {print (($1 - $2) * 1000)}' < "$results_directory/$workload.csv" \ | sort -g > "$results_directory/$workload-response.csv" - oks=$(wc -l < "$results_directory/$workload-response.csv") - ((oks == 0)) && continue # If all errors, skip line - # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$workload"'," - } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} - ' < "$results_directory/$workload-response.csv" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/$workload-response.csv" "$results_directory/latency.csv" "$workload" # Delete scratch file used for sorting/counting rm -rf "$results_directory/$workload-response.csv" diff --git a/runtime/experiments/applications/ocr/by_dpi/run.sh b/runtime/experiments/applications/ocr/by_dpi/run.sh index a656858..fca7a47 100755 --- a/runtime/experiments/applications/ocr/by_dpi/run.sh +++ b/runtime/experiments/applications/ocr/by_dpi/run.sh @@ -10,6 +10,7 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 source validate_dependencies.sh || exit 1 experiment_client() { @@ -62,7 +63,8 @@ experiment_client() { # Process Results # Write Headers to CSV files printf "DPI,Success_Rate\n" >> "$results_directory/success.csv" - printf "DPI,min,mean,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" "dpi" + for dpi in "${dpis[@]}"; do # Skip empty results oks=$(wc -l < "$results_directory/${dpi}_time.txt") @@ -83,26 +85,8 @@ experiment_client() { awk -F, '{print ($0 * 1000)}' < "$results_directory/${dpi}_time.txt" | sort -g > "$results_directory/${dpi}_time_sorted.txt" # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50_idx = int('"$oks"' * 0.5) - p90_idx = int('"$oks"' * 0.9) - p99_idx = int('"$oks"' * 0.99) - p100_idx = '"$oks"' - printf "'"$dpi"'," - } - {sum += $0} - NR==1 {min = $0} - NR==p50_idx {p50 = $0} - NR==p90_idx {p90 = $0} - NR==p99_idx {p99 = $0} - NR==p100_idx {p100 = $0} - END { - mean = sum / NR - printf "%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f\n", min, mean, p50, p90, p99, p100 - } - ' < "$results_directory/${dpi}_time_sorted.txt" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/${dpi}_time_sorted.txt" "$results_directory/latency.csv" "$dpi" + done # Transform csvs to dat files for gnuplot diff --git a/runtime/experiments/applications/ocr/by_font/run.sh b/runtime/experiments/applications/ocr/by_font/run.sh index 73f1573..d373eb9 100755 --- a/runtime/experiments/applications/ocr/by_font/run.sh +++ b/runtime/experiments/applications/ocr/by_font/run.sh @@ -10,6 +10,7 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 source validate_dependencies.sh || exit 1 experiment_client() { @@ -64,7 +65,7 @@ experiment_client() { # Process Results # Write Headers to CSV files printf "font,Success_Rate\n" >> "$results_directory/success.csv" - printf "font,min,mean,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" for font in "${fonts[@]}"; do font_file="${font// /_}" @@ -87,26 +88,7 @@ experiment_client() { awk -F, '{print ($0 * 1000)}' < "$results_directory/${font_file}_time.txt" | sort -g > "$results_directory/${font_file}_time_sorted.txt" # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50_idx = int('"$oks"' * 0.5) - p90_idx = int('"$oks"' * 0.9) - p99_idx = int('"$oks"' * 0.99) - p100_idx = '"$oks"' - printf "'"$font_file"'," - } - {sum += $0} - NR==1 {min = $0} - NR==p50_idx {p50 = $0} - NR==p90_idx {p90 = $0} - NR==p99_idx {p99 = $0} - NR==p100_idx {p100 = $0} - END { - mean = sum / NR - printf "%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f\n", min, mean, p50, p90, p99, p100 - } - ' < "$results_directory/${font_file}_time_sorted.txt" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/${font_file}_time_sorted.txt" "$results_directory/latency.csv" "$font_file" done # Transform csvs to dat files for gnuplot diff --git a/runtime/experiments/applications/ocr/by_word/run.sh b/runtime/experiments/applications/ocr/by_word/run.sh index 12988ae..cf1a9eb 100755 --- a/runtime/experiments/applications/ocr/by_word/run.sh +++ b/runtime/experiments/applications/ocr/by_word/run.sh @@ -11,6 +11,7 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 source validate_dependencies.sh || exit 1 experiment_client() { @@ -68,7 +69,8 @@ experiment_client() { # Process Results printf "words,Success_Rate\n" >> "$results_directory/success.csv" - printf "words,min,mean,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" "words" + for word_count in "${word_counts[@]}"; do word_count_file="${word_count}_words" @@ -91,26 +93,7 @@ experiment_client() { awk -F, '{print ($0 * 1000)}' < "$results_directory/${word_count_file}_time.txt" | sort -g > "$results_directory/${word_count_file}_time_sorted.txt" # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50_idx = int('"$oks"' * 0.5) - p90_idx = int('"$oks"' * 0.9) - p99_idx = int('"$oks"' * 0.99) - p100_idx = '"$oks"' - printf "'"$word_count_file"'," - } - {sum += $0} - NR==1 {min = $0} - NR==p50_idx {p50 = $0} - NR==p90_idx {p90 = $0} - NR==p99_idx {p99 = $0} - NR==p100_idx {p100 = $0} - END { - mean = sum / NR - printf "%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f\n", min, mean, p50, p90, p99, p100 - } - ' < "$results_directory/${word_count_file}_time_sorted.txt" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/${word_count_file}_time_sorted.txt" "$results_directory/latency.csv" "$word_count_file" done # Transform csvs to dat files for gnuplot diff --git a/runtime/experiments/bash_libraries/percentiles_table.sh b/runtime/experiments/bash_libraries/percentiles_table.sh new file mode 100644 index 0000000..a7fd333 --- /dev/null +++ b/runtime/experiments/bash_libraries/percentiles_table.sh @@ -0,0 +1,77 @@ +# shellcheck shell=bash + +source "type_checks.sh" || exit 1 + +# These utility functions are used to generate percentile tables that summarize distributions of data. +# Each row represents an independent variable, such as a scheduling policy +# The data for each column is provided as a single column of sorted data +# If the data is too course, a percentile might be statistically insignificant. If this is the case, +# The script writes an * to the appropriate cell +# +# Example: +# +# percentiles_table_header "./table.csv" +# for $variant in (fifo_nopreemption fifo_preemption edf_nopreemption edf_preemption); do +# percentiles_table_row "./${variant}.csv" "./table.csv" "$variant" +# done +# +# See Also: +# - csv_to_dat - Can transform a table into a *.dat file suitable for gnuplot +# +# References +# - The AWK Programming Language - https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf +# - GAWK: Effective AWK Programming - https://www.gnu.org/software/gawk/manual/gawk.pdf + +percentiles_table_header() { + local table_file="${1:?table_file not set}" + # Can optionally override "app" in header + local label_header="${2:-app}" + echo "${label_header},cnt,min,mean,p50,p90,p99,max" > "$table_file" +} + +# columnar_data_file is assumed to be a file containing a single column or sorted data +percentiles_table_row() { + local -r columnar_data_file="${1:?columnar_data_file not set}" + check_file columnar_data_file + local -r table_file="${2:?table_file not set}" + check_file table_file + local -r row_label="${3:?row_label not set}" + local -r format_string="${4:-%1.4f}" + + # Count the number of results + local -i sample_size + sample_size=$(wc -l < "$columnar_data_file") + + if ((sample_size == 0)); then + # We might not have actually run every variant depending on iterations and workload mix + # Insert a degenerate row if this is the case + echo "$row_label,0,*,*,*,*,*,*" >> "$table_file" + else + awk ' + BEGIN { + sample_size='"$sample_size"' + row_label="'"$row_label"'" + format_string="'"$format_string"'" + invalid_number_symbol="*" + sum = 0 + p50_idx = int(sample_size * 0.5) + p90_idx = int(sample_size * 0.9) + p99_idx = int(sample_size * 0.99) + p100_idx = sample_size + } + + # Empty pattern matches all rows + { sum += $0 } + NR==1 { min = sample_size > 0 ? sprintf(format_string, $0) : invalid_number_symbol } + NR==p50_idx { p50 = sample_size >= 3 ? sprintf(format_string, $0) : invalid_number_symbol } + NR==p90_idx { p90 = sample_size >= 10 ? sprintf(format_string, $0) : invalid_number_symbol } + NR==p99_idx { p99 = sample_size >= 100 ? sprintf(format_string, $0) : invalid_number_symbol } + NR==p100_idx { p100 = sample_size > 0 ? sprintf(format_string, $0) : invalid_number_symbol } + + END { + mean = sample_size > 0 ? sprintf(format_string, sum / NR) : invalid_number_symbol + printf "%s,%d,%s,%s,%s,%s,%s,%s\n", row_label, sample_size, min, mean, p50, p90, p99, p100 + } + ' < "$columnar_data_file" >> "$table_file" + fi +} diff --git a/runtime/experiments/bimodal/run.sh b/runtime/experiments/bimodal/run.sh index d8175e2..76e50e5 100755 --- a/runtime/experiments/bimodal/run.sh +++ b/runtime/experiments/bimodal/run.sh @@ -18,6 +18,7 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 if ! command -v hey > /dev/null; then echo "hey is not present." @@ -176,7 +177,7 @@ process_results() { # Write headers to CSVs printf "Payload,Success_Rate\n" >> "$results_directory/success.csv" printf "Payload,Throughput\n" >> "$results_directory/throughput.csv" - printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" # The four types of results that we are capturing. # fib10 and fib 40 are run sequentially. @@ -217,23 +218,10 @@ process_results() { printf "%s,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv" # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$payload"'," - } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} - ' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/$payload-response.csv" "$results_directory/latency.csv" "$payload" # Delete scratch file used for sorting/counting - # rm -rf "$results_directory/$payload-response.csv" + rm -rf "$results_directory/$payload-response.csv" done # Transform csvs to dat files for gnuplot diff --git a/runtime/experiments/concurrency/latency.gnuplot b/runtime/experiments/concurrency/latency.gnuplot index af8cffe..c13db68 100644 --- a/runtime/experiments/concurrency/latency.gnuplot +++ b/runtime/experiments/concurrency/latency.gnuplot @@ -13,7 +13,9 @@ set yrange [0:] set style histogram columnstacked -plot 'latency.dat' using 1:2 title 'p50', \ - 'latency.dat' using 1:3 title 'p90', \ - 'latency.dat' using 1:4 title 'p99', \ - 'latency.dat' using 1:5 title 'p100', \ +plot 'latency.dat' using 1:8 title 'p100', \ + 'latency.dat' using 1:7 title 'p99', \ + 'latency.dat' using 1:6 title 'p90', \ + 'latency.dat' using 1:5 title 'p50', \ + 'latency.dat' using 1:4 title 'mean', \ + 'latency.dat' using 1:3 title 'min', \ diff --git a/runtime/experiments/concurrency/run.sh b/runtime/experiments/concurrency/run.sh index 0571116..ac3a5f5 100755 --- a/runtime/experiments/concurrency/run.sh +++ b/runtime/experiments/concurrency/run.sh @@ -13,6 +13,7 @@ source framework.sh || exit 1 source generate_gnuplots.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 +source percentiles_table.sh || exit 1 source path_join.sh || exit 1 if ! command -v hey > /dev/null; then @@ -110,7 +111,7 @@ process_results() { # Write headers to CSVs printf "Concurrency,Success_Rate\n" >> "$results_directory/success.csv" printf "Concurrency,Throughput\n" >> "$results_directory/throughput.csv" - printf "Con,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" "Con" for conn in ${concurrency[*]}; do @@ -143,20 +144,7 @@ process_results() { printf "%d,%f\n" "$conn" "$throughput" >> "$results_directory/throughput.csv" # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$conn"'," - } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} - ' < "$results_directory/con$conn-response.csv" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/con$conn-response.csv" "$results_directory/latency.csv" "$conn" # Delete scratch file used for sorting/counting rm -rf "$results_directory/con$conn-response.csv" diff --git a/runtime/experiments/deadline_description/README.md b/runtime/experiments/deadline_description/README.md index 4c0e5b9..664fc28 100644 --- a/runtime/experiments/deadline_description/README.md +++ b/runtime/experiments/deadline_description/README.md @@ -22,7 +22,7 @@ cd ../workload_mix_realworld/ ```sh cd ~/projects/sledge-serverless-framework/runtime/experiments/workload_mix_realworld/ -scp sean@192.168.7.26:~/projects/sledge-serverless-framework/runtime/experiments/workload_mix_realworld/spec.json spec.json +scp sean@192.168.7.26:~/projects/sledge-serverless-framework/runtime/experiments/workload_mix_realworld/spec.json spec.json ``` 4. If the deadline interval was modified, you may need to manually modify the `mix.csv` to properly map to the module names in `spec.json`. Once complete, the experiment is ready to run @@ -30,14 +30,19 @@ scp sean@192.168.7.26:~/projects/sledge-serverless-framework/runtime/experiments 5. On the server, start the runtime using one of the configurations expressed as .env files. ```sh -rm -rf ./res/myrun/fifo_nopreemption ./run.sh -s -e=fifo_nopreemption.env --name=myrun ``` -6. On the client, run the client driver script targeting the server +6. On the client, run the client driver script targeting the server. +```sh +./run.sh -t=192.168.7.26 ``` -./run.sh -t=192.168.7.26 --name=myrun + +The client does not have the concept of an "env file" given that that is a server config, but if you want to namespace output from the client, you can define a directory as the name. This would write the client results to `./res/myrun/client/fifo_nopreemption/`. + +```sh +./run.sh -t=192.168.7.26 --name=myrun/client/fifo_nopreemption/ ``` 7. Repeat for steps 5,6 for each desired scheduling policy. @@ -45,5 +50,7 @@ rm -rf ./res/myrun/fifo_nopreemption 8. The results are on the server. You may want to copy them to your client to more easily inspect. ```sh -scp -r sean@192.168.7.26:~/projects/sledge-serverless-framework/runtime/experiments/workload_mix_realworld/res/myrun ./res/myrun +scp -r sean@192.168.7.26:~/projects/sledge-serverless-framework/runtime/experiments/workload_mix_realworld/res/myrun/* ./res/myrun ``` + +9. If you are assembling a spreadsheet, you might want to also copy the results from the `deadline_description` experiment. The `deadlines.csv` file and the execution_time files are useful for understanding the performance characteristics of a single workload. diff --git a/runtime/experiments/deadline_description/run.sh b/runtime/experiments/deadline_description/run.sh index e006c26..8d2d6f2 100755 --- a/runtime/experiments/deadline_description/run.sh +++ b/runtime/experiments/deadline_description/run.sh @@ -11,21 +11,24 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 source validate_dependencies.sh || exit 1 validate_dependencies awk hey jq # Please keep the element ordered alphabetically! -declare -a workloads=(cifar10 ekf gocr lpd resize) -declare -a multiples=(1.5 1.6 1.7 1.8 1.9 2.0) +# declare -a workloads=(cifar10 ekf gocr lpd resize) +declare -a workloads=(cifar10 gocr lpd resize) +declare -a multiples=(1.5 2.0 3.0 4.0) +declare -ri percentile=50 profile() { local hostname="$1" local -r results_directory="$2" # ekf - hey -disable-compression -disable-keepalive -disable-redirects -n 256 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./ekf/initial_state.dat" "http://${hostname}:10000" > /dev/null - printf "[ekf: OK]\n" + # hey -disable-compression -disable-keepalive -disable-redirects -n 256 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./ekf/initial_state.dat" "http://${hostname}:10000" > /dev/null + # printf "[ekf: OK]\n" # Resize hey -disable-compression -disable-keepalive -disable-redirects -n 256 -c 1 -cpus 1 -t 0 -o csv -m GET -D "./resize/shrinking_man_large.jpg" "http://${hostname}:10001" > /dev/null @@ -47,9 +50,8 @@ profile() { get_baseline_execution() { local -r results_directory="$1" local -r module="$2" - local -ir percentile="$3" - local response_times_file="$results_directory/$module/response_times_sorted.csv" + local response_times_file="$results_directory/$module/execution_times_sorted.csv" # Skip empty results local -i oks @@ -58,7 +60,7 @@ get_baseline_execution() { # Generate Latency Data for csv awk ' - BEGIN {idx = int('"$oks"' * ('"$percentile"' / 100))} + BEGIN {idx = int('"$oks"' * ('"$percentile"' / 100)) + 1} NR==idx {printf "%1.4f\n", $0} ' < "$response_times_file" } @@ -73,7 +75,6 @@ generate_spec() { local results_directory="$1" # Multiplier Interval and Expected Execution Percentile is currently the same for all workloads - local -ri percentile=90 ((percentile < 50 || percentile > 99)) && panic "Percentile should be between 50 and 99 inclusive, was $percentile" local -A baseline_execution=() @@ -81,7 +82,7 @@ generate_spec() { local relative_deadline for workload in "${workloads[@]}"; do - baseline_execution["$workload"]="$(get_baseline_execution "$results_directory" "$workload" $percentile)" + baseline_execution["$workload"]="$(get_baseline_execution "$results_directory" "$workload" "$percentile")" [[ -z "${baseline_execution[$workload]}" ]] && { panic "Failed to get baseline execution for $workload" exit 1 @@ -90,6 +91,7 @@ generate_spec() { # Generates unique module specs on different ports using the different multiples for multiple in "${multiples[@]}"; do relative_deadline=$(calculate_relative_deadline "${baseline_execution[$workload]}" "${multiple}") + echo "${workload}_${multiple},${relative_deadline}" >> "$results_directory/deadlines.csv" jq ". + { \ \"admissions-percentile\": $percentile,\ \"expected-execution-us\": ${baseline_execution[${workload}]},\ @@ -118,11 +120,21 @@ generate_spec() { process_results() { local results_directory="$1" + percentiles_table_header "$results_directory/execution_time.csv" "Payload" + for workload in "${workloads[@]}"; do mkdir "$results_directory/$workload" - awk -F, '$2 == "'"$workload"'" {printf("%.0f\n", $6 / $13)}' < "$results_directory/perf.log" | sort -g > "$results_directory/$workload/response_times_sorted.csv" + awk -F, '$2 == "'"$workload"'" {printf("%.4f\n", $6 / $13)}' < "$results_directory/perf.log" | sort -g > "$results_directory/$workload/execution_times_sorted.csv" + + oks=$(wc -l < "$results_directory/$workload/execution_times_sorted.csv") + ((oks == 0)) && continue # If all errors, skip line + + # Generate Latency Data for csv + percentiles_table_row "$results_directory/$workload/execution_times_sorted.csv" "$results_directory/execution_time.csv" "$workload" done + csv_to_dat "$results_directory/execution_time.csv" + generate_spec "$results_directory" return 0 diff --git a/runtime/experiments/payload/run.sh b/runtime/experiments/payload/run.sh index a9fb115..f52d6db 100755 --- a/runtime/experiments/payload/run.sh +++ b/runtime/experiments/payload/run.sh @@ -11,10 +11,11 @@ __run_sh__bash_libraries_absolute_path=$(cd "$__run_sh__base_path" && cd "$__run export PATH="$__run_sh__bash_libraries_absolute_path:$PATH" # Source libraries from bash_libraries directory -source "path_join.sh" || exit 1 -source "framework.sh" || exit 1 -source "get_result_count.sh" || exit 1 -source "generate_gnuplots.sh" || exit 1 +source path_join.sh || exit 1 +source framework.sh || exit 1 +source get_result_count.sh || exit 1 +source generate_gnuplots.sh || exit 1 +source percentiles_table.sh || exit 1 if ! command -v hey > /dev/null; then echo "hey is not present." @@ -111,7 +112,7 @@ process_results() { printf "Payload,Success_Rate\n" >> "$results_directory/success.csv" printf "Payload,Throughput\n" >> "$results_directory/throughput.csv" - printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" "Payload" for payload in ${payloads[*]}; do # Calculate Success Rate for csv @@ -137,20 +138,7 @@ process_results() { printf "%d,%f\n" "$payload" "$throughput" >> "$results_directory/throughput.csv" # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$payload"'," - } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} - ' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/$payload-response.csv" "$results_directory/latency.csv" "$payload" # Delete scratch file used for sorting/counting rm -rf "$results_directory/$payload-response.csv" diff --git a/runtime/experiments/workload_mix/run.sh b/runtime/experiments/workload_mix/run.sh index 7105598..7b616a5 100755 --- a/runtime/experiments/workload_mix/run.sh +++ b/runtime/experiments/workload_mix/run.sh @@ -18,11 +18,9 @@ source framework.sh || exit 1 source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 +source percentiles_table.sh || exit 1 -if ! command -v hey > /dev/null; then - echo "hey is not present." - exit 1 -fi +validate_dependencies hey # Sends requests until the per-module perf window buffers are full # This ensures that Sledge has accurate estimates of execution time @@ -174,7 +172,7 @@ process_results() { printf "Processing Results: " # Write headers to CSVs - printf "Payload,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" + percentiles_table_header "$results_directory/latency.csv" local -ar payloads=(fibonacci_10 fibonacci_40) for payload in "${payloads[@]}"; do @@ -187,20 +185,7 @@ process_results() { ((oks == 0)) && continue # If all errors, skip line # Generate Latency Data for csv - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$payload"'," - } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} - ' < "$results_directory/$payload-response.csv" >> "$results_directory/latency.csv" + percentiles_table_row "$results_directory/$payload-response.csv" "$results_directory/latency.csv" "$payload" # Delete scratch file used for sorting/counting rm -rf "$results_directory/$payload-response.csv" diff --git a/runtime/experiments/workload_mix_realworld/mix.csv b/runtime/experiments/workload_mix_realworld/mix.csv index d355d4a..c8bf7a4 100644 --- a/runtime/experiments/workload_mix_realworld/mix.csv +++ b/runtime/experiments/workload_mix_realworld/mix.csv @@ -1,30 +1,16 @@ 100,cifar10_1.5 -100,cifar10_1.6 -100,cifar10_1.7 -100,cifar10_1.8 -100,cifar10_1.9 100,cifar10_2.0 -1000,ekf_1.5 -1000,ekf_1.6 -1000,ekf_1.7 -1000,ekf_1.8 -1000,ekf_1.9 -1000,ekf_2.0 +100,cifar10_3.0 +100,cifar10_4.0 1,gocr_1.5 -1,gocr_1.6 -1,gocr_1.7 -1,gocr_1.8 -1,gocr_1.9 1,gocr_2.0 +1,gocr_3.0 +1,gocr_4.0 24,lpd_1.5 -24,lpd_1.6 -24,lpd_1.7 -24,lpd_1.8 -24,lpd_1.9 24,lpd_2.0 +24,lpd_3.0 +24,lpd_4.0 3,resize_1.5 -3,resize_1.6 -3,resize_1.7 -3,resize_1.8 -3,resize_1.9 3,resize_2.0 +3,resize_3.0 +3,resize_4.0 diff --git a/runtime/experiments/workload_mix_realworld/run.sh b/runtime/experiments/workload_mix_realworld/run.sh index 41ba8e5..f06a46a 100755 --- a/runtime/experiments/workload_mix_realworld/run.sh +++ b/runtime/experiments/workload_mix_realworld/run.sh @@ -21,6 +21,7 @@ source get_result_count.sh || exit 1 source panic.sh || exit 1 source path_join.sh || exit 1 source validate_dependencies.sh || exit 1 +source percentiles_table.sh || exit 1 validate_dependencies hey jq @@ -105,7 +106,7 @@ run_experiments() { local -i batch_id=0 local -i roll=0 local -ir total_iterations=10000 - local -ir worker_max=30 + local -ir worker_max=50 local pids printf "Running Experiments: " @@ -160,33 +161,18 @@ process_results() { # Write headers to CSVs for metric in "${metrics[@]}"; do - printf "module,p50,p90,p99,p100\n" >> "$results_directory/$metric.csv" + percentiles_table_header "$results_directory/$metric.csv" "module" done - printf "module,p50,p90,p99,p100\n" >> "$results_directory/memalloc.csv" + percentiles_table_header "$results_directory/memalloc.csv" "module" for workload in "${workloads[@]}"; do mkdir "$results_directory/$workload" # TODO: Only include Complete - for metric in "${metrics[@]}"; do - awk -F, '$2 == "'"$workload"'" {printf("%.0f\n", $'"${fields[$metric]}"' / $13)}' < "$results_directory/perf.log" | sort -g > "$results_directory/$workload/${metric}_sorted.csv" - oks=$(wc -l < "$results_directory/$workload/${metric}_sorted.csv") - ((oks == 0)) && continue # If all errors, skip line - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$workload"'," - } - NR==p50 {printf "%1.0f,", $0} - NR==p90 {printf "%1.0f,", $0} - NR==p99 {printf "%1.0f,", $0} - NR==p100 {printf "%1.0f\n", $0} - ' < "$results_directory/$workload/${metric}_sorted.csv" >> "$results_directory/${metric}.csv" + awk -F, '$2 == "'"$workload"'" {printf("%.4f\n", $'"${fields[$metric]}"' / $13)}' < "$results_directory/perf.log" | sort -g > "$results_directory/$workload/${metric}_sorted.csv" + + percentiles_table_row "$results_directory/$workload/${metric}_sorted.csv" "$results_directory/${metric}.csv" "$workload" # Delete scratch file used for sorting/counting # rm -rf "$results_directory/$workload/${metric}_sorted.csv" @@ -194,22 +180,8 @@ process_results() { # Memory Allocation awk -F, '$2 == "'"$workload"'" {printf("%.0f\n", $14)}' < "$results_directory/perf.log" | sort -g > "$results_directory/$workload/memalloc_sorted.csv" - oks=$(wc -l < "$results_directory/$workload/memalloc_sorted.csv") - ((oks == 0)) && continue # If all errors, skip line - awk ' - BEGIN { - sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' - printf "'"$workload"'," - } - NR==p50 {printf "%1.0f,", $0} - NR==p90 {printf "%1.0f,", $0} - NR==p99 {printf "%1.0f,", $0} - NR==p100 {printf "%1.0f\n", $0} - ' < "$results_directory/$workload/memalloc_sorted.csv" >> "$results_directory/memalloc.csv" + + percentiles_table_row "$results_directory/$workload/memalloc_sorted.csv" "$results_directory/memalloc.csv" "$workload" "%1.0f" # Delete scratch file used for sorting/counting # rm -rf "$results_directory/$workload/memalloc_sorted.csv"