From cb26514ad322ab7a9002b6fbe15bbf82c78534c8 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Thu, 6 May 2021 16:13:08 -0400 Subject: [PATCH] feat: add new fields and logs to OCR experiments --- .../applications/ocr/by_dpi/run.sh | 35 ++++++++++++------- .../applications/ocr/by_font/run.sh | 35 ++++++++++++------- .../applications/ocr/by_word/run.sh | 33 +++++++++++------ 3 files changed, 66 insertions(+), 37 deletions(-) diff --git a/runtime/experiments/applications/ocr/by_dpi/run.sh b/runtime/experiments/applications/ocr/by_dpi/run.sh index e665084..d86e902 100755 --- a/runtime/experiments/applications/ocr/by_dpi/run.sh +++ b/runtime/experiments/applications/ocr/by_dpi/run.sh @@ -44,10 +44,6 @@ experiment_main() { local -r hostname="$1" local -r results_directory="$2" - # Write Headers to CSV files - printf "DPI,Success_Rate\n" >> "$results_directory/success.csv" - printf "DPI,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" - # Perform Experiments printf "Running Experiments\n" local -ar dpis=(72 108 144) @@ -69,11 +65,18 @@ experiment_main() { rm "${dpi}"_dpi.png "${dpi}"_dpi.pnm # Logs the number of words that don't match - diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") | wc -l >> "$results_directory/${dpi}_results.txt" + echo "word count: $word_count" >> "$results_directory/${dpi}_full_results.txt" + diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") \ + | tee -a "$results_directory/${dpi}_full_results.txt" \ + | wc -l >> "$results_directory/${dpi}_results.txt" + echo "==========================================" >> "$results_directory/${dpi}_full_results.txt" done done # Process Results + # Write Headers to CSV files + printf "DPI,Success_Rate\n" >> "$results_directory/success.csv" + printf "DPI,min,mean,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" for dpi in "${dpis[@]}"; do # Skip empty results oks=$(wc -l < "$results_directory/${dpi}_time.txt") @@ -97,16 +100,22 @@ experiment_main() { awk ' BEGIN { sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' + p50_idx = int('"$oks"' * 0.5) + p90_idx = int('"$oks"' * 0.9) + p99_idx = int('"$oks"' * 0.99) + p100_idx = '"$oks"' printf "'"$dpi"'," } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} + {sum += $0} + NR==1 {min = $0} + NR==p50_idx {p50 = $0} + NR==p90_idx {p90 = $0} + NR==p99_idx {p99 = $0} + NR==p100_idx {p100 = $0} + END { + mean = sum / NR + printf "%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f\n", min, mean, p50, p90, p99, p100 + } ' < "$results_directory/${dpi}_time_sorted.txt" >> "$results_directory/latency.csv" done diff --git a/runtime/experiments/applications/ocr/by_font/run.sh b/runtime/experiments/applications/ocr/by_font/run.sh index 0948166..726c5fb 100755 --- a/runtime/experiments/applications/ocr/by_font/run.sh +++ b/runtime/experiments/applications/ocr/by_font/run.sh @@ -44,10 +44,6 @@ experiment_main() { local -r hostname="$1" local -r results_directory="$2" - # Write Headers to CSV files - printf "font,Success_Rate\n" >> "$results_directory/success.csv" - printf "font,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" - # Perform Experiments printf "Running Experiments\n" local -ra fonts=("DejaVu Sans Mono" "Roboto" "Cascadia Code") @@ -71,11 +67,18 @@ experiment_main() { rm "${font_file}"_words.png "${font_file}"_words.pnm # Logs the number of words that don't match - diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") | wc -l >> "$results_directory/${font_file}_results.txt" + echo "font: $font_file" >> "$results_directory/${font_file}_full_results.txt" + diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") \ + | tee -a "$results_directory/${font_file}_full_results.txt" \ + | wc -l >> "$results_directory/${font_file}_results.txt" + echo "==========================================" >> "$results_directory/${font_file}_full_results.txt" done done # Process Results + # Write Headers to CSV files + printf "font,Success_Rate\n" >> "$results_directory/success.csv" + printf "font,min,mean,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" for font in "${fonts[@]}"; do font_file="${font// /_}" @@ -101,16 +104,22 @@ experiment_main() { awk ' BEGIN { sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' + p50_idx = int('"$oks"' * 0.5) + p90_idx = int('"$oks"' * 0.9) + p99_idx = int('"$oks"' * 0.99) + p100_idx = '"$oks"' printf "'"$font_file"'," } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} + {sum += $0} + NR==1 {min = $0} + NR==p50_idx {p50 = $0} + NR==p90_idx {p90 = $0} + NR==p99_idx {p99 = $0} + NR==p100_idx {p100 = $0} + END { + mean = sum / NR + printf "%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f\n", min, mean, p50, p90, p99, p100 + } ' < "$results_directory/${font_file}_time_sorted.txt" >> "$results_directory/latency.csv" done diff --git a/runtime/experiments/applications/ocr/by_word/run.sh b/runtime/experiments/applications/ocr/by_word/run.sh index a53d015..8cb98b9 100755 --- a/runtime/experiments/applications/ocr/by_word/run.sh +++ b/runtime/experiments/applications/ocr/by_word/run.sh @@ -45,8 +45,6 @@ experiment_main() { local -r results_directory="$2" # Write Headers to CSV files - printf "words,Success_Rate\n" >> "$results_directory/success.csv" - printf "words,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" local -Ar word_count_to_port=( ["1_words"]=10000 @@ -72,11 +70,18 @@ experiment_main() { rm "$word_count_file.png" "$word_count_file.pnm" # Logs the number of words that don't match - diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") | wc -l >> "$results_directory/${word_count_file}_results.txt" + # Also tees the full diff into a separate file + echo "word count: $word_count" >> "$results_directory/${word_count_file}_full_results.txt" + diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") \ + | tee -a "$results_directory/${word_count_file}_full_results.txt" \ + | wc -l >> "$results_directory/${word_count_file}_results.txt" + echo "==========================================" >> "$results_directory/${word_count_file}_full_results.txt" done done # Process Results + printf "words,Success_Rate\n" >> "$results_directory/success.csv" + printf "words,min,mean,p50,p90,p99,p100\n" >> "$results_directory/latency.csv" for word_count in "${word_counts[@]}"; do word_count_file="${word_count}_words" @@ -102,16 +107,22 @@ experiment_main() { awk ' BEGIN { sum = 0 - p50 = int('"$oks"' * 0.5) - p90 = int('"$oks"' * 0.9) - p99 = int('"$oks"' * 0.99) - p100 = '"$oks"' + p50_idx = int('"$oks"' * 0.5) + p90_idx = int('"$oks"' * 0.9) + p99_idx = int('"$oks"' * 0.99) + p100_idx = '"$oks"' printf "'"$word_count_file"'," } - NR==p50 {printf "%1.4f,", $0} - NR==p90 {printf "%1.4f,", $0} - NR==p99 {printf "%1.4f,", $0} - NR==p100 {printf "%1.4f\n", $0} + {sum += $0} + NR==1 {min = $0} + NR==p50_idx {p50 = $0} + NR==p90_idx {p90 = $0} + NR==p99_idx {p99 = $0} + NR==p100_idx {p100 = $0} + END { + mean = sum / NR + printf "%1.4f,%1.4f,%1.4f,%1.4f,%1.4f,%1.4f\n", min, mean, p50, p90, p99, p100 + } ' < "$results_directory/${word_count_file}_time_sorted.txt" >> "$results_directory/latency.csv" done