fix: Update gocr tests to send PNGs instead of PNMs

master
Emil Abbasov 7 months ago
parent 8f1ea67df9
commit 7a534581c5

@ -9,7 +9,6 @@
PHONY: gocr__by_dpi
gocr__by_dpi: ./runtime/bin/gocr.wasm.so
# cd ./tests/gocr/by_dpi && ./install.sh
cd ./tests/gocr/by_dpi && ./run.sh
PHONY: gocr__by_font

@ -1,4 +1,3 @@
*.pnm
*.png
*.csv
*.txt

@ -16,22 +16,19 @@ client-72dpi:
mkdir -p tmp
shuf -n10 /usr/share/dict/american-english > tmp/10_words.txt
pango-view --dpi=72 --font=mono -qo tmp/72_dpi.png tmp/10_words.txt
pngtopnm tmp/72_dpi.png > tmp/72_dpi.pnm
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @tmp/72_dpi.pnm "${HOSTNAME}:10000/gocr_72_dpi"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @tmp/72_dpi.png "${HOSTNAME}:10000/gocr_72_dpi"
rm -rf tmp
client-108dpi:
mkdir -p tmp
shuf -n10 /usr/share/dict/american-english > tmp/10_words.txt
pango-view --dpi=108 --font=mono -qo tmp/108_dpi.png tmp/10_words.txt
pngtopnm tmp/108_dpi.png > tmp/108_dpi.pnm
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @tmp/108_dpi.pnm "${HOSTNAME}:10000/gocr_108_dpi"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @tmp/108_dpi.png "${HOSTNAME}:10000/gocr_108_dpi"
rm -rf tmp
client-144dpi:
mkdir -p tmp
shuf -n10 /usr/share/dict/american-english > tmp/10_words.txt
pango-view --dpi=144 --font=mono -qo tmp/144_dpi.png tmp/10_words.txt
pngtopnm tmp/144_dpi.png > tmp/144_dpi.pnm
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @tmp/144_dpi.pnm "${HOSTNAME}:10000/gocr_144_dpi"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @tmp/144_dpi.png "${HOSTNAME}:10000/gocr_144_dpi"
rm -rf tmp

@ -1,6 +1,6 @@
# OCR varied by DPI
Generates pnm image files containing random words from the wamerican dictionary.
Generates png image files containing random words from the wamerican dictionary.
The number of words are held constant at 100, and the `mono` font is used for all runs. The dpi of the image is varied to judge how more visual details affects OCR accuracy and performance.

@ -14,23 +14,12 @@ source percentiles_table.sh || exit 1
source validate_dependencies.sh || exit 1
experiment_client() {
local -ri iteration_count=100
local -ri word_count=100
if (($# != 2)); then
panic "invalid number of arguments \"$1\""
return 1
elif [[ -z "$1" ]]; then
panic "hostname \"$1\" was empty"
return 1
elif [[ ! -d "$2" ]]; then
panic "directory \"$2\" does not exist"
return 1
fi
local -r hostname="$1"
local -r results_directory="$2"
local -ri iteration_count=100
local -ri word_count=100
# Perform Experiments
printf "Running Experiments\n"
local -ar dpis=(72 108 144)
@ -39,17 +28,17 @@ experiment_client() {
[108]=/gocr_108_dpi
[144]=/gocr_144_dpi
)
local words
for ((i = 0; i < iteration_count; i++)); do
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
for dpi in "${dpis[@]}"; do
pango-view --dpi="$dpi" --font=mono -qo "${dpi}"_dpi.png -t "$words"
pngtopnm "${dpi}"_dpi.png > "${dpi}"_dpi.pnm
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${dpi}"_dpi.pnm "$hostname:10000${dpi_to_path[$dpi]}" --silent -w "%{stderr}%{time_total}\n" 2>> "$results_directory/${dpi}_time.txt")
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${dpi}"_dpi.png "$hostname:10000${dpi_to_path[$dpi]}" --silent -w "%{stderr}%{time_total}\n" 2>> "$results_directory/${dpi}_time.txt")
rm "${dpi}"_dpi.png "${dpi}"_dpi.pnm
rm "${dpi}"_dpi.png
# Logs the number of words that don't match
echo "word count: $word_count" >> "$results_directory/${dpi}_full_results.txt"
@ -94,6 +83,6 @@ experiment_client() {
}
# Validate that required tools are in path
validate_dependencies curl shuf pango-view pngtopnm diff
validate_dependencies curl shuf pango-view diff
framework_init "$@"

@ -6,21 +6,18 @@
{
"route": "/gocr_72_dpi",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
},
{
"route": "/gocr_108_dpi",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
},
{
"route": "/gocr_144_dpi",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
}

@ -1,4 +1,3 @@
*.pnm
*.png
*.csv
*.txt

@ -16,22 +16,19 @@ client-mono:
mkdir -p tmp
shuf -n10 /usr/share/dict/american-english > tmp/10_words.txt
pango-view --font="DejaVu Sans Mono" -qo tmp/10_words.png tmp/10_words.txt
pngtopnm tmp/10_words.png > tmp/10_words.pnm
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/10_words.pnm" "${HOSTNAME}:10000/gocr_mono"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/10_words.png" "${HOSTNAME}:10000/gocr_mono"
rm -rf tmp
client-roboto:
mkdir -p tmp
shuf -n10 /usr/share/dict/american-english > tmp/10_words.txt
pango-view --font="Roboto" -qo tmp/10_words.png tmp/10_words.txt
pngtopnm tmp/10_words.png > tmp/10_words.pnm
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/10_words.pnm" "${HOSTNAME}:10000/gocr_roboto"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/10_words.png" "${HOSTNAME}:10000/gocr_roboto"
rm -rf tmp
client-cascadia:
mkdir -p tmp
shuf -n10 /usr/share/dict/american-english > tmp/10_words.txt
pango-view --font="Cascadia Code" -qo tmp/10_words.png tmp/10_words.txt
pngtopnm tmp/10_words.png > tmp/10_words.pnm
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/10_words.pnm" "${HOSTNAME}:10000/gocr_cascadia"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/10_words.png" "${HOSTNAME}:10000/gocr_cascadia"
rm -rf tmp

@ -1,6 +1,6 @@
# OCR varied by font
Generates pnm image files containing random words from the wamerican dictionary.
Generates png image files containing random words from the wamerican dictionary.
The number of words is held constant at 100, but a different font is used to render the text. Generally, `mono` performs the best, followed by `URW Gothic`, followed by `Lobster Two`, which is a stylistic font that performs quite poorly.

@ -14,23 +14,12 @@ source percentiles_table.sh || exit 1
source validate_dependencies.sh || exit 1
experiment_client() {
local -ri iteration_count=100
local -ri word_count=100
if (($# != 2)); then
panic "invalid number of arguments \"$1\""
return 1
elif [[ -z "$1" ]]; then
panic "hostname \"$1\" was empty"
return 1
elif [[ ! -d "$2" ]]; then
panic "directory \"$2\" does not exist"
return 1
fi
local -r hostname="$1"
local -r results_directory="$2"
local -ri iteration_count=100
local -ri word_count=100
# Perform Experiments
printf "Running Experiments\n"
local -ra fonts=("DejaVu Sans Mono" "Roboto" "Cascadia Code")
@ -39,6 +28,7 @@ experiment_client() {
["Roboto"]=/gocr_roboto
["Cascadia Code"]=/gocr_cascadia
)
local words
for ((i = 1; i <= iteration_count; i++)); do
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
@ -47,11 +37,10 @@ experiment_client() {
font_file="${font// /_}"
pango-view --font="$font" -qo "${font_file}_words.png" -t "$words" || exit 1
pngtopnm "${font_file}_words.png" > "${font_file}_words.pnm"
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${font_file}_words.pnm" "$hostname:10000${font_to_path[$font]}" --silent -w "%{stderr}%{time_total}\n" 2>> "$results_directory/${font_file}_time.txt")
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${font_file}_words.png" "$hostname:10000${font_to_path[$font]}" --silent -w "%{stderr}%{time_total}\n" 2>> "$results_directory/${font_file}_time.txt")
rm "${font_file}"_words.png "${font_file}"_words.pnm
rm "${font_file}"_words.png
# Logs the number of words that don't match
echo "font: $font_file" >> "$results_directory/${font_file}_full_results.txt"
@ -95,6 +84,6 @@ experiment_client() {
csv_to_dat "$results_directory/success.csv" "$results_directory/latency.csv"
}
validate_dependencies curl shuf pango-view pngtopnm diff
validate_dependencies curl shuf pango-view diff
framework_init "$@"

@ -6,21 +6,18 @@
{
"route": "/gocr_mono",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
},
{
"route": "/gocr_roboto",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
},
{
"route": "/gocr_cascadia",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
}

@ -16,14 +16,12 @@ client-10:
mkdir -p tmp
shuf -n10 /usr/share/dict/american-english > tmp/10_words.txt
pango-view --font=mono -qo tmp/10_words.png tmp/10_words.txt
pngtopnm tmp/10_words.png > tmp/10_words.pnm
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/10_words.pnm" "${HOSTNAME}:10000/gocr_10_words"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/10_words.png" "${HOSTNAME}:10000/gocr_10_words"
rm -rf tmp
client-100:
mkdir -p tmp
shuf -n100 /usr/share/dict/american-english > tmp/100_words.txt
pango-view --font=mono -qo tmp/100_words.png tmp/100_words.txt
pngtopnm tmp/100_words.png > tmp/100_words.pnm
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/100_words.pnm" "${HOSTNAME}:10000/gocr_100_words"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@tmp/100_words.png" "${HOSTNAME}:10000/gocr_100_words"
rm -rf tmp

@ -1,6 +1,6 @@
# OCR varied by word
Generates pnm image files containing random words from the wamerican dictionary.
Generates png image files containing random words from the wamerican dictionary.
The number of words are varied between 1, 10, 100 in order to assess how the size of text OCRed affects the execution profile of the sandbox serving the request.

@ -7,7 +7,6 @@ export PATH="$__run_sh__bash_libraries_absolute_path:$PATH"
source csv_to_dat.sh || exit 1
source framework.sh || exit 1
# source generate_gnuplots.sh || exit 1
source get_result_count.sh || exit 1
source panic.sh || exit 1
source path_join.sh || exit 1
@ -15,24 +14,11 @@ source percentiles_table.sh || exit 1
source validate_dependencies.sh || exit 1
experiment_client() {
local -ir iteration_count=100
local -ra word_counts=(1 10 100)
if (($# != 2)); then
panic "invalid number of arguments \"$1\""
return 1
elif [[ -z "$1" ]]; then
panic "hostname \"$1\" was empty"
return 1
elif [[ ! -d "$2" ]]; then
panic "directory \"$2\" does not exist"
return 1
fi
local -r hostname="$1"
local -r results_directory="$2"
# Write Headers to CSV files
local -ir iteration_count=100
local -ra word_counts=(1 10 100)
local -Ar word_count_to_path=(
["1_words"]=/gocr_1_word
@ -48,14 +34,13 @@ experiment_client() {
word_count_file="${word_count}_words"
pango-view --font=mono -qo "$word_count_file.png" -t "$words" || exit 1
pngtopnm "$word_count_file.png" > "$word_count_file.pnm" || exit 1
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"$word_count_file.pnm" "$hostname:10000${word_count_to_path[$word_count_file]}" --silent -w "%{stderr}%{time_total}\n" 2>> "$results_directory/${word_count_file}_time.txt")
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"$word_count_file.png" "$hostname:10000${word_count_to_path[$word_count_file]}" --silent -w "%{stderr}%{time_total}\n" 2>> "$results_directory/${word_count_file}_time.txt")
# If the OCR does not produce a guess, fail
[[ -z "$result" ]] && exit 1
rm "$word_count_file.png" "$word_count_file.pnm"
rm "$word_count_file.png"
# Logs the number of words that don't match
# Also tees the full diff into a separate file
@ -68,6 +53,7 @@ experiment_client() {
done
# Process Results
# Write Headers to CSV files
printf "words,Success_Rate\n" >> "$results_directory/success.csv"
percentiles_table_header "$results_directory/latency.csv" "words"
@ -100,6 +86,6 @@ experiment_client() {
csv_to_dat "$results_directory/success.csv" "$results_directory/latency.csv"
}
validate_dependencies curl shuf pango-view pngtopnm diff
validate_dependencies curl shuf pango-view diff
framework_init "$@"

@ -6,21 +6,18 @@
{
"route": "/gocr_1_word",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
},
{
"route": "/gocr_10_words",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
},
{
"route": "/gocr_100_words",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
}

File diff suppressed because one or more lines are too long

@ -13,4 +13,4 @@ debug:
SLEDGE_DISABLE_PREEMPTION=true SLEDGE_NWORKERS=1 LD_LIBRARY_PATH=${SLEDGE_BINARY_DIR} gdb ${SLEDGE_BINARY_DIR}/sledgert --eval-command="run spec.json"
client:
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@5x8.pnm" "${HOSTNAME}:10000/gocr"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@5x8.png" "${HOSTNAME}:10000/gocr"

@ -1,2 +0,0 @@
pnm file was generated as follows
`pngtopnm 5x8.png >5x8.pnm`

@ -23,7 +23,7 @@ experiment_client() {
local result
for ((i = 0; i < total_count; i++)); do
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@5x8.pnm" "$hostname:10000/gocr" 2> /dev/null)
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@5x8.png" "$hostname:10000/gocr" 2> /dev/null)
if [[ "$result" == "$expected_result" ]]; then
((success_count++))
else

@ -6,7 +6,6 @@
{
"route": "/gocr",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
}

@ -13,4 +13,4 @@ debug:
LD_LIBRARY_PATH=${SLEDGE_BINARY_DIR} gdb ${SLEDGE_BINARY_DIR}/sledgert --eval-command="run spec.json"
client:
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@handwrt1.pnm" "${HOSTNAME}:10000/gocr"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@handwrt.png" "${HOSTNAME}:10000/gocr"

Binary file not shown.

After

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

File diff suppressed because one or more lines are too long

@ -22,7 +22,7 @@ experiment_client() {
local -ir total_count=10
for ((i = 0; i < total_count; i++)); do
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@handwrt1.pnm" "$hostname:10000/gocr" 2> /dev/null)
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@handwrt.png" "$hostname:10000/gocr" 2> /dev/null)
if [[ "$result" == "$expected_result" ]]; then
((success_count++))
else

@ -6,7 +6,6 @@
{
"route": "/gocr",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 36000,
"http-resp-content-type": "text/plain"
}

@ -16,10 +16,4 @@ valgrind:
SLEDGE_DISABLE_PREEMPTION=true SLEDGE_NWORKERS=1 LD_LIBRARY_PATH=${SLEDGE_BINARY_DIR} valgrind --leak-check=full --max-stackframe=11150456 --run-libc-freeres=no --run-cxx-freeres=no ${SLEDGE_BINARY_DIR}/sledgert spec.json
client:
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@hyde.pnm" "${HOSTNAME}:10000/gocr"
client2:
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@../handwriting/handwrt1.pnm" "${HOSTNAME}:10000/gocr"
client3:
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@../fivebyeight/5x8.pnm" "${HOSTNAME}:10000/gocr"
curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@hyde.png" "${HOSTNAME}:10000/gocr"

@ -1,5 +1,3 @@
# A Page from Dr. Jekyll and Mister Hyde
This is a larger OCR example. The \*.pnm file is 5mb or so.
In the future, this OCR example might be a good use case for a "step-wise" function, as the file is substantially smaller before conversion to the \*.pnm format.
This is a larger OCR example.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 823 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 567 KiB

File diff suppressed because one or more lines are too long

@ -21,7 +21,7 @@ experiment_client() {
local -ir total_count=10
for ((i = 0; i < total_count; i++)); do
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@hyde.pnm" "$hostname:10000/gocr" 2> /dev/null)
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary "@hyde.png" "$hostname:10000/gocr" 2> /dev/null)
if [[ "$result" == "$expected_result" ]]; then
((success_count++))
else

@ -6,7 +6,6 @@
{
"route": "/gocr",
"path": "gocr.wasm.so",
"expected-execution-us": 5000,
"relative-deadline-us": 360000,
"http-resp-content-type": "text/plain"
}

Loading…
Cancel
Save