Merge pull request #158 from gwsystems/ocr-inputs

Ocr inputs
master
Sean McBride 4 years ago committed by GitHub
commit ca7306103d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,5 @@
*.pnm
*.png
*.csv
*.txt
*.log

@ -0,0 +1,7 @@
# OCR varied by DPI
Generates pnm image files containing random words from the wamerican dictionary.
The number of words are held constant at 100, and the `mono` font is used for all runs. The dpi of the image is varied to judge how more visual details affects OCR accuracy and performance.
Run `install.sh` to install required tools and `run.sh` to run the tests.

@ -0,0 +1,19 @@
#!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
project_directory=$(cd ../../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH"
export PATH="$binary_directory:$PATH"
gdb --eval-command="handle SIGUSR1 nostop" \
--eval-command="handle SIGPIPE nostop" \
--eval-command="set pagination off" \
--eval-command="set substitute-path /sledge/runtime $project_directory" \
--eval-command="run $experiment_directory/spec.json" \
sledgert

@ -0,0 +1,4 @@
#!/bin/bash
# Installs the deps needed for run.sh
sudo apt-get install netpbm pango1.0-tools wamerican

@ -0,0 +1,54 @@
#!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
project_directory=$(cd ../../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
log="$experiment_directory/log.csv"
if [ "$1" != "-d" ]; then
SLEDGE_SANDBOX_PERF_LOG=$log PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >rt.log 2>&1 &
sleep 2
else
echo "Running under gdb"
fi
word_count=100
dpis=(72 108 144)
declare -A dpi_to_port
dpi_to_port[72]=10000
dpi_to_port[108]=10001
dpi_to_port[144]=10002
total_count=100
for ((i = 0; i < total_count; i++)); do
echo "$i"
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
for dpi in "${dpis[@]}"; do
echo "${dpi}"_dpi.pnm
pango-view --dpi=$dpi --font=mono -qo "${dpi}"_dpi.png -t "$words"
pngtopnm "${dpi}"_dpi.png >"${dpi}"_dpi.pnm
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${dpi}"_dpi.pnm localhost:${dpi_to_port[$dpi]} 2>/dev/null)
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result")
echo "==============================================="
done
done
if [ "$1" != "-d" ]; then
sleep 2
echo -n "Running Cleanup: "
rm ./*.png ./*.pnm
pkill --signal sigterm sledgert >/dev/null 2>/dev/null
sleep 2
pkill sledgert -9 >/dev/null 2>/dev/null
echo "[DONE]"
fi

@ -0,0 +1,42 @@
{
"active": "yes",
"name": "gocr_72_dpi",
"path": "gocr_wasm.so",
"port": 10000,
"relative-deadline-us": 50000000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
},
{
"active": "yes",
"name": "gocr_108_dpi",
"path": "gocr_wasm.so",
"port": 10001,
"relative-deadline-us": 50000000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
},
{
"active": "yes",
"name": "gocr_144_dpi",
"path": "gocr_wasm.so",
"port": 10002,
"relative-deadline-us": 50000000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
}

@ -0,0 +1,5 @@
*.pnm
*.png
*.csv
*.txt
*.log

@ -0,0 +1,7 @@
# OCR varied by font
Generates pnm image files containing random words from the wamerican dictionary.
The number of words is held constant at 100, but a different font is used to render the text. Generally, `mono` performs the best, followed by `URW Gothic`, followed by `Lobster Two`, which is a stylistic font that performs quite poorly.
Run `install.sh` to install required tools and `run.sh` to run the tests.

@ -0,0 +1,19 @@
#!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
project_directory=$(cd ../../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH"
export PATH="$binary_directory:$PATH"
gdb --eval-command="handle SIGUSR1 nostop" \
--eval-command="handle SIGPIPE nostop" \
--eval-command="set pagination off" \
--eval-command="set substitute-path /sledge/runtime $project_directory" \
--eval-command="run $experiment_directory/spec.json" \
sledgert

@ -0,0 +1,4 @@
#!/bin/bash
# Installs the deps needed for run.sh
sudo apt-get install netpbm pango1.0-tools wamerican

@ -0,0 +1,65 @@
#!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
project_directory=$(cd ../../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
log="$experiment_directory/log.csv"
if [ "$1" != "-d" ]; then
SLEDGE_SANDBOX_PERF_LOG=$log PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >rt.log 2>&1 &
sleep 2
else
echo "Running under gdb"
fi
word_count=100
fonts=("mono" "URW Gothic" "Lobster Two")
total_count=100
for ((i = 1; i <= total_count; i++)); do
echo "Test $i"
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
for font in "${fonts[@]}"; do
# For whatever reason, templating in multiple word strips was a pain, so brute forcing
case "$font" in
"mono")
echo "Mono"
pango-view --font="mono" -qo mono_words.png -t "$words"
pngtopnm mono_words.png >mono_words.pnm
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @mono_words.pnm localhost:10000 2>/dev/null)
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result")
;;
"URW Gothic")
echo "URW Gothic"
pango-view --font="URW Gothic" -qo URW_Gothic_words.png -t "$words"
pngtopnm URW_Gothic_words.png >URW_Gothic_words.pnm
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @URW_Gothic_words.pnm localhost:10002 2>/dev/null)
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result")
;;
"Lobster Two")
echo "Lobster Two"
pango-view --font="Lobster Two" -qo Lobster_Two_words.png -t "$words"
pngtopnm Lobster_Two_words.png >Lobster_Two_words.pnm
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @Lobster_Two_words.pnm localhost:10001 2>/dev/null)
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result")
;;
esac
echo "==============================================="
done
done
if [ "$1" != "-d" ]; then
sleep 2
echo -n "Running Cleanup: "
rm ./*.png ./*.pnm
pkill --signal sigterm sledgert >/dev/null 2>/dev/null
sleep 2
pkill sledgert -9 >/dev/null 2>/dev/null
echo "[DONE]"
fi

@ -0,0 +1,42 @@
{
"active": "yes",
"name": "gocr_mono",
"path": "gocr_wasm.so",
"port": 10000,
"relative-deadline-us": 50000000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
},
{
"active": "yes",
"name": "gocr_urw_gothic",
"path": "gocr_wasm.so",
"port": 10001,
"relative-deadline-us": 50000000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
},
{
"active": "yes",
"name": "gocr_lobster_2",
"path": "gocr_wasm.so",
"port": 10002,
"relative-deadline-us": 50000000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
}

@ -0,0 +1,5 @@
*.pnm
*.png
*.csv
*.txt
*.log

@ -0,0 +1,7 @@
# OCR varied by word
Generates pnm image files containing random words from the wamerican dictionary.
The number of words are varied between 1, 10, 100 in order to assess how the size of text OCRed affects the execution profile of the sandbox serving the request.
Run `install.sh` to install required tools and `run.sh` to run the tests.

@ -0,0 +1,19 @@
#!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
project_directory=$(cd ../../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
export LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH"
export PATH="$binary_directory:$PATH"
gdb --eval-command="handle SIGUSR1 nostop" \
--eval-command="handle SIGPIPE nostop" \
--eval-command="set pagination off" \
--eval-command="set substitute-path /sledge/runtime $project_directory" \
--eval-command="run $experiment_directory/spec.json" \
sledgert

@ -0,0 +1,4 @@
#!/bin/bash
# Installs the deps needed for run.sh
sudo apt-get install netpbm pango1.0-tools wamerican

@ -0,0 +1,53 @@
#!/bin/bash
# Executes the runtime in GDB
# Substitutes the absolute path from the container with a path relatively derived from the location of this script
# This allows debugging outside of the Docker container
# Also disables pagination and stopping on SIGUSR1
experiment_directory=$(pwd)
project_directory=$(cd ../../.. && pwd)
binary_directory=$(cd "$project_directory"/bin && pwd)
log="$experiment_directory/log.csv"
if [ "$1" != "-d" ]; then
SLEDGE_SANDBOX_PERF_LOG=$log PATH="$binary_directory:$PATH" LD_LIBRARY_PATH="$binary_directory:$LD_LIBRARY_PATH" sledgert "$experiment_directory/spec.json" >rt.log 2>&1 &
sleep 2
else
echo "Running under gdb"
fi
word_counts=(1 10 100)
declare -A word_count_to_port
word_count_to_port["1_words.pnm"]=10000
word_count_to_port["10_words.pnm"]=10001
word_count_to_port["100_words.pnm"]=10002
total_count=100
for ((i = 0; i < total_count; i++)); do
echo "$i"
for word_count in "${word_counts[@]}"; do
echo "${word_count}"_words.pnm
words="$(shuf -n"$word_count" /usr/share/dict/american-english)"
pango-view --font=mono -qo "$word_count"_words.png -t "$words"
pngtopnm "$word_count"_words.png >"$word_count"_words.pnm
result=$(curl -H 'Expect:' -H "Content-Type: text/plain" --data-binary @"${word_count}"_words.pnm localhost:${word_count_to_port["$word_count"_words.pnm]} 2>/dev/null)
diff -ywBZE --suppress-common-lines <(echo "$words") <(echo "$result")
echo "==============================================="
done
done
if [ "$1" != "-d" ]; then
sleep 2
echo -n "Running Cleanup: "
rm ./*.png ./*.pnm
pkill --signal sigterm sledgert >/dev/null 2>/dev/null
sleep 2
pkill sledgert -9 >/dev/null 2>/dev/null
echo "[DONE]"
fi

@ -0,0 +1,42 @@
{
"active": "yes",
"name": "gocr_1_word",
"path": "gocr_wasm.so",
"port": 10000,
"relative-deadline-us": 50000000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
},
{
"active": "yes",
"name": "gocr_10_words",
"path": "gocr_wasm.so",
"port": 10001,
"relative-deadline-us": 50000000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
},
{
"active": "yes",
"name": "gocr_100_words",
"path": "gocr_wasm.so",
"port": 10002,
"relative-deadline-us": 50000000000,
"argsize": 1,
"http-req-headers": [],
"http-req-content-type": "text/plain",
"http-req-size": 5335057,
"http-resp-headers": [],
"http-resp-size": 5335057,
"http-resp-content-type": "text/plain"
}

@ -182,9 +182,18 @@ runtime_start_runtime_worker_threads()
debuglog("Sandboxing environment ready!\n");
}
void
runtime_cleanup()
{
if (runtime_sandbox_perf_log != NULL) fflush(runtime_sandbox_perf_log);
exit(EXIT_SUCCESS);
}
void
runtime_configure()
{
signal(SIGTERM, runtime_cleanup);
/* Scheduler Policy */
char *scheduler_policy = getenv("SLEDGE_SCHEDULER");
if (scheduler_policy == NULL) scheduler_policy = "FIFO";

Loading…
Cancel
Save