feat: port OCR experiments to framework

master
Sean McBride 4 years ago
parent f435cea9c8
commit a80418b779

@ -3,3 +3,4 @@
*.csv *.csv
*.txt *.txt
*.log *.log
*.dat

@ -0,0 +1,2 @@
SLEDGE_SCHEDULER=FIFO
SLEDGE_DISABLE_PREEMPTION=true

@ -1,54 +1,117 @@
#!/bin/bash #!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd) __run_sh__base_path="$(dirname "$(realpath --logical "${BASH_SOURCE[0]}")")"
echo "$experiment_directory" __run_sh__bash_libraries_relative_path="../../../bash_libraries"
project_directory=$(cd ../../../.. && pwd) __run_sh__bash_libraries_absolute_path=$(cd "$__run_sh__base_path" && cd "$__run_sh__bash_libraries_relative_path" && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd) export PATH="$__run_sh__bash_libraries_absolute_path:$PATH"
log="$experiment_directory/log.csv"
if [ "$1" != "-d" ]; then source csv_to_dat.sh || exit 1
SLEDGE_SANDBOX_PERF_LOG=$log PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" > rt.log 2>&1 & source framework.sh || exit 1
sleep 2 # source generate_gnuplots.sh || exit 1
else source get_result_count.sh || exit 1
echo "Running under gdb" source panic.sh || exit 1
fi source path_join.sh || exit 1
word_count=100 # Validate that required tools are in path
dpis=(72 108 144) declare -a required_binaries=(curl shuf pango-view pngtopnm diff)
declare -A dpi_to_port validate_dependencies() {
dpi_to_port[72]=10000 for required_binary in "${required_binaries[@]}"; do
dpi_to_port[108]=10001 if ! command -v "$required_binary" > /dev/null; then
dpi_to_port[144]=10002 echo "$required_binary is not present."
exit 1
fi
done
}
experiment_main() {
local -ri iteration_count=100
local -ri word_count=100
if (($# != 2)); then
panic "invalid number of arguments \"$1\""
return 1
elif [[ -z "$1" ]]; then
panic "hostname \"$1\" was empty"
return 1
elif [[ ! -d "$2" ]]; then
panic "directory \"$2\" does not exist"
return 1
fi
validate_dependencies
local -r hostname="$1"
local -r results_directory="$2"
# Write Headers to CSV files
printf "DPI,Success_Rate\n" >> "$results_directory/success.csv"
printf "DPI,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
total_count=100 # Perform Experiments
printf "Running Experiments\n"
local -ar dpis=(72 108 144)
local -Ar dpi_to_port=(
[72]=10000
[108]=10001
[144]=10002
)
local words
for ((i = 0; i < iteration_count; i++)); do
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
for ((i = 0; i < total_count; i++)); do for dpi in "${dpis[@]}"; do
words="$(shuf -n"$word_count" /usr/share/dict/american-english)" pango-view --dpi="$dpi" --font=mono -qo "${dpi}"_dpi.png -t "$words"
pngtopnm "${dpi}"_dpi.png > "${dpi}"_dpi.pnm
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${dpi}"_dpi.pnm "$hostname:${dpi_to_port[$dpi]}" --silent -w "%{stderr}%{time_total}\n" 2>> "$results_directory/${dpi}_time.txt")
rm "${dpi}"_dpi.png "${dpi}"_dpi.pnm
# Logs the number of words that don't match
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") | wc -l >> "$results_directory/${dpi}_results.txt"
done
done
# Process Results
for dpi in "${dpis[@]}"; do for dpi in "${dpis[@]}"; do
echo "${dpi}"_dpi.pnm # Skip empty results
pango-view --dpi="$dpi" --font=mono -qo "${dpi}"_dpi.png -t "$words" oks=$(wc -l < "$results_directory/${dpi}_time.txt")
pngtopnm "${dpi}"_dpi.png > "${dpi}"_dpi.pnm ((oks == 0)) && continue
# Calculate success rate
awk '
BEGIN {total_mistakes=0}
{total_mistakes += $1}
END {
average_mistakes = (total_mistakes / NR)
success_rate = ('$word_count' - average_mistakes) / '$word_count' * 100
printf "'"$dpi"',%f\n", success_rate
}
' < "$results_directory/${dpi}_results.txt" >> "$results_directory/success.csv"
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${dpi}"_dpi.pnm localhost:${dpi_to_port[$dpi]} 2> /dev/null) # Convert latency from s to ms, and sort
awk -F, '{print ($0 * 1000)}' < "$results_directory/${dpi}_time.txt" | sort -g > "$results_directory/${dpi}_time_sorted.txt"
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") # Generate Latency Data for csv
echo "===============================================" awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p100 = '"$oks"'
printf "'"$dpi"',"
}
NR==p50 {printf "%1.4f,", $0}
NR==p90 {printf "%1.4f,", $0}
NR==p99 {printf "%1.4f,", $0}
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/${dpi}_time_sorted.txt" >> "$results_directory/latency.csv"
done done
done # Transform csvs to dat files for gnuplot
csv_to_dat "$results_directory/success.csv" "$results_directory/latency.csv"
if [ "$1" != "-d" ]; then }
sleep 2
echo -n "Running Cleanup: " main "$@"
rm ./*.png ./*.pnm
pkill --signal sigterm sledgert > /dev/null 2> /dev/null
sleep 2
pkill sledgert -9 > /dev/null 2> /dev/null
echo "[DONE]"
fi

@ -3,3 +3,4 @@
*.csv *.csv
*.txt *.txt
*.log *.log
*.dat

@ -0,0 +1,2 @@
SLEDGE_SCHEDULER=FIFO
SLEDGE_DISABLE_PREEMPTION=true

@ -1,65 +1,121 @@
#!/bin/bash #!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
echo "$experiment_directory"
project_directory=$(cd ../../../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
log="$experiment_directory/log.csv"
if [ "$1" != "-d" ]; then
SLEDGE_SANDBOX_PERF_LOG=$log PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" > rt.log 2>&1 &
sleep 2
else
echo "Running under gdb"
fi
word_count=100
fonts=("DejaVu Sans Mono" "Roboto" "Cascadia Code")
total_count=10
for ((i = 1; i <= total_count; i++)); do
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
__run_sh__base_path="$(dirname "$(realpath --logical "${BASH_SOURCE[0]}")")"
__run_sh__bash_libraries_relative_path="../../../bash_libraries"
__run_sh__bash_libraries_absolute_path=$(cd "$__run_sh__base_path" && cd "$__run_sh__bash_libraries_relative_path" && pwd)
export PATH="$__run_sh__bash_libraries_absolute_path:$PATH"
source csv_to_dat.sh || exit 1
source framework.sh || exit 1
# source generate_gnuplots.sh || exit 1
source get_result_count.sh || exit 1
source panic.sh || exit 1
source path_join.sh || exit 1
# Validate that required tools are in path
declare -a required_binaries=(curl shuf pango-view pngtopnm diff)
validate_dependencies() {
for required_binary in "${required_binaries[@]}"; do
if ! command -v "$required_binary" > /dev/null; then
echo "$required_binary is not present."
exit 1
fi
done
}
experiment_main() {
local -ri iteration_count=100
local -ri word_count=100
if (($# != 2)); then
panic "invalid number of arguments \"$1\""
return 1
elif [[ -z "$1" ]]; then
panic "hostname \"$1\" was empty"
return 1
elif [[ ! -d "$2" ]]; then
panic "directory \"$2\" does not exist"
return 1
fi
validate_dependencies
local -r hostname="$1"
local -r results_directory="$2"
# Write Headers to CSV files
printf "font,Success_Rate\n" >> "$results_directory/success.csv"
printf "font,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
# Perform Experiments
printf "Running Experiments\n"
local -ra fonts=("DejaVu Sans Mono" "Roboto" "Cascadia Code")
local -Ar font_to_port=(
["DejaVu Sans Mono"]=10000
["Roboto"]=10001
["Cascadia Code"]=10002
)
local words
for ((i = 1; i <= iteration_count; i++)); do
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
for font in "${fonts[@]}"; do
font_file="${font// /_}"
pango-view --font="$font" -qo "${font_file}_words.png" -t "$words" || exit 1
pngtopnm "${font_file}_words.png" > "${font_file}_words.pnm"
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${font_file}_words.pnm" "$hostname:${font_to_port[$font]}" --silent -w "%{stderr}%{time_total}\n" 2>> "$results_directory/${font_file}_time.txt")
rm "${font_file}"_words.png "${font_file}"_words.pnm
# Logs the number of words that don't match
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") | wc -l >> "$results_directory/${font_file}_results.txt"
done
done
# Process Results
for font in "${fonts[@]}"; do for font in "${fonts[@]}"; do
# For whatever reason, templating in multiple word strips was a pain, so brute forcing font_file="${font// /_}"
case "$font" in
"DejaVu Sans Mono") # Skip empty results
echo "DejaVu Sans Mono" oks=$(wc -l < "$results_directory/${font_file}_time.txt")
pango-view --font="DejaVu Sans Mono" -qo mono_words.png -t "$words" || exit 1 ((oks == 0)) && continue
pngtopnm mono_words.png > mono_words.pnm || exit 1
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @mono_words.pnm localhost:10000 2> /dev/null) # Calculate success rate
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") awk '
;; BEGIN {total_mistakes=0}
"Roboto") {total_mistakes += $1}
echo "Roboto" END {
pango-view --font="Roboto" -qo Roboto_words.png -t "$words" || exit 1 average_mistakes = (total_mistakes / NR)
pngtopnm Roboto_words.png > Roboto_words.pnm || exit 1 success_rate = ('$word_count' - average_mistakes) / '$word_count' * 100
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @Roboto_words.pnm localhost:10002 2> /dev/null) printf "'"$font_file"',%f\n", success_rate
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") }
;; ' < "$results_directory/${font_file}_results.txt" >> "$results_directory/success.csv"
"Cascadia Code")
echo "Cascadia Code" # Convert latency from s to ms, and sort
pango-view --font="Cascadia Code" -qo Cascadia_Code_words.png -t "$words" || exit 1 awk -F, '{print ($0 * 1000)}' < "$results_directory/${font_file}_time.txt" | sort -g > "$results_directory/${font_file}_time_sorted.txt"
pngtopnm Cascadia_Code_words.png > Cascadia_Code_words.pnm || exit 1
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @Cascadia_Code_words.pnm localhost:10001 2> /dev/null) # Generate Latency Data for csv
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") awk '
;; BEGIN {
esac sum = 0
echo "===============================================" p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p100 = '"$oks"'
printf "'"$font_file"',"
}
NR==p50 {printf "%1.4f,", $0}
NR==p90 {printf "%1.4f,", $0}
NR==p99 {printf "%1.4f,", $0}
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/${font_file}_time_sorted.txt" >> "$results_directory/latency.csv"
done done
done # Transform csvs to dat files for gnuplot
csv_to_dat "$results_directory/success.csv" "$results_directory/latency.csv"
if [ "$1" != "-d" ]; then }
sleep 2
echo -n "Running Cleanup: " main "$@"
rm ./*.png ./*.pnm
pkill --signal sigterm sledgert > /dev/null 2> /dev/null
sleep 2
pkill sledgert -9 > /dev/null 2> /dev/null
echo "[DONE]"
fi

@ -3,3 +3,4 @@
*.csv *.csv
*.txt *.txt
*.log *.log
*.dat

@ -0,0 +1,2 @@
SLEDGE_SCHEDULER=FIFO
SLEDGE_DISABLE_PREEMPTION=true

@ -1,54 +1,122 @@
#!/bin/bash #!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script __run_sh__base_path="$(dirname "$(realpath --logical "${BASH_SOURCE[0]}")")"
# This allows debugging outside of the Docker container __run_sh__bash_libraries_relative_path="../../../bash_libraries"
# Also disables pagination and stopping on SIGUSR1 __run_sh__bash_libraries_absolute_path=$(cd "$__run_sh__base_path" && cd "$__run_sh__bash_libraries_relative_path" && pwd)
export PATH="$__run_sh__bash_libraries_absolute_path:$PATH"
experiment_directory=$(pwd)
echo "$experiment_directory" source csv_to_dat.sh || exit 1
project_directory=$(cd ../../../.. && pwd) source framework.sh || exit 1
binary_directory=$(cd "$project_directory"/bin && pwd) # source generate_gnuplots.sh || exit 1
log="$experiment_directory/log.csv" source get_result_count.sh || exit 1
source panic.sh || exit 1
if [ "$1" != "-d" ]; then source path_join.sh || exit 1
SLEDGE_SANDBOX_PERF_LOG=$log PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" > rt.log 2>&1 &
sleep 2 # Validate that required tools are in path
else declare -a required_binaries=(curl shuf pango-view pngtopnm diff)
echo "Running under gdb"
fi validate_dependencies() {
for required_binary in "${required_binaries[@]}"; do
word_counts=(1 10 100) if ! command -v "$required_binary" > /dev/null; then
echo "$required_binary is not present."
declare -A word_count_to_port exit 1
word_count_to_port["1_words.pnm"]=10000 fi
word_count_to_port["10_words.pnm"]=10001 done
word_count_to_port["100_words.pnm"]=10002 }
total_count=100 experiment_main() {
local -ir iteration_count=100
for ((i = 0; i < total_count; i++)); do local -ra word_counts=(1 10 100)
if (($# != 2)); then
panic "invalid number of arguments \"$1\""
return 1
elif [[ -z "$1" ]]; then
panic "hostname \"$1\" was empty"
return 1
elif [[ ! -d "$2" ]]; then
panic "directory \"$2\" does not exist"
return 1
fi
validate_dependencies
local -r hostname="$1"
local -r results_directory="$2"
# Write Headers to CSV files
printf "words,Success_Rate\n" >> "$results_directory/success.csv"
printf "words,p50,p90,p99,p100\n" >> "$results_directory/latency.csv"
local -Ar word_count_to_port=(
["1_words"]=10000
["10_words"]=10001
["100_words"]=10002
)
local words
for ((i = 0; i < iteration_count; i++)); do
for word_count in "${word_counts[@]}"; do
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
word_count_file="${word_count}_words"
pango-view --font=mono -qo "$word_count_file.png" -t "$words" || exit 1
pngtopnm "$word_count_file.png" > "$word_count_file.pnm" || exit 1
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"$word_count_file.pnm" "$hostname:${word_count_to_port[$word_count_file]}" --silent -w "%{stderr}%{time_total}\n" 2>> "$results_directory/${word_count_file}_time.txt")
# If the OCR does not produce a guess, fail
[[ -z "$result" ]] && exit 1
rm "$word_count_file.png" "$word_count_file.pnm"
# Logs the number of words that don't match
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") | wc -l >> "$results_directory/${word_count_file}_results.txt"
done
done
# Process Results
for word_count in "${word_counts[@]}"; do for word_count in "${word_counts[@]}"; do
echo "${word_count}"_words.pnm word_count_file="${word_count}_words"
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
pango-view --font=mono -qo "$word_count"_words.png -t "$words" || exit 1 # Skip empty results
pngtopnm "$word_count"_words.png > "$word_count"_words.pnm || exit 1 oks=$(wc -l < "$results_directory/${word_count_file}_time.txt")
((oks == 0)) && continue
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${word_count}"_words.pnm localhost:${word_count_to_port["$word_count"_words.pnm]} 2> /dev/null) # Calculate success rate
awk '
BEGIN {total_mistakes=0}
{total_mistakes += $1}
END {
average_mistakes = (total_mistakes / NR)
success_rate = ('"$word_count"' - average_mistakes) / '"$word_count"' * 100
printf "'"$word_count_file"',%f\n", success_rate
}
' < "$results_directory/${word_count_file}_results.txt" >> "$results_directory/success.csv"
# If the OCR does not produce a guess, fail # Convert latency from s to ms, and sort
[[ -z "$result" ]] && exit 1 awk -F, '{print ($0 * 1000)}' < "$results_directory/${word_count_file}_time.txt" | sort -g > "$results_directory/${word_count_file}_time_sorted.txt"
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result") # Generate Latency Data for csv
echo "===============================================" awk '
BEGIN {
sum = 0
p50 = int('"$oks"' * 0.5)
p90 = int('"$oks"' * 0.9)
p99 = int('"$oks"' * 0.99)
p100 = '"$oks"'
printf "'"$word_count_file"',"
}
NR==p50 {printf "%1.4f,", $0}
NR==p90 {printf "%1.4f,", $0}
NR==p99 {printf "%1.4f,", $0}
NR==p100 {printf "%1.4f\n", $0}
' < "$results_directory/${word_count_file}_time_sorted.txt" >> "$results_directory/latency.csv"
done done
done
# Transform csvs to dat files for gnuplot
if [ "$1" != "-d" ]; then csv_to_dat "$results_directory/success.csv" "$results_directory/latency.csv"
sleep 2 }
echo -n "Running Cleanup: "
rm ./*.png ./*.pnm main "$@"
pkill --signal sigterm sledgert > /dev/null 2> /dev/null
sleep 2
pkill sledgert -9 > /dev/null 2> /dev/null
echo "[DONE]"
fi

@ -45,7 +45,9 @@ __framework_sh__initialize_globals() {
# Configure environment variables # Configure environment variables
# shellcheck disable=SC2155 # shellcheck disable=SC2155
declare -gr __framework_sh__application_directory="$(dirname "$(realpath "$0"))")" declare -gr __framework_sh__application_directory="$(dirname "$(realpath "$0"))")"
local -r binary_directory="$(cd "$__framework_sh__application_directory" && cd ../../bin && pwd)" # shellcheck disable=SC2155
declare -gr __framework_sh__path=$(dirname "$(realpath "${BASH_SOURCE[0]}")")
local -r binary_directory="$(cd "$__framework_sh__path" && cd ../../bin && pwd)"
export PATH=$binary_directory:$PATH export PATH=$binary_directory:$PATH
export LD_LIBRARY_PATH=$binary_directory:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=$binary_directory:$LD_LIBRARY_PATH
} }
@ -148,7 +150,7 @@ __framework_sh__log_environment() {
echo "************" echo "************"
echo "* Makefile *" echo "* Makefile *"
echo "************" echo "************"
cat "$(path_join "$__framework_sh__application_directory" ../../Makefile)" cat "$(path_join "$__framework_sh__path" ../../Makefile)"
echo "" echo ""
echo "**********" echo "**********"
@ -309,6 +311,7 @@ __framework_sh__unset_env_file() {
__framework_sh__run_both() { __framework_sh__run_both() {
local short_name local short_name
shopt -s nullglob
for envfile in "$__framework_sh__application_directory"/*.env; do for envfile in "$__framework_sh__application_directory"/*.env; do
short_name="$(basename "${envfile/.env/}")" short_name="$(basename "${envfile/.env/}")"
printf "Running %s\n" "$short_name" printf "Running %s\n" "$short_name"

Loading…
Cancel
Save