You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
3.0 KiB

# shellcheck shell=bash
source "type_checks.sh" || exit 1
# These utility functions are used to generate percentile tables that summarize distributions of data.
# Each row represents an independent variable, such as a scheduling policy
# The data for each column is provided as a single column of sorted data
# If the data is too course, a percentile might be statistically insignificant. If this is the case,
# The script writes an * to the appropriate cell
#
# Example:
#
# percentiles_table_header "./table.csv"
# for $variant in (fifo_nopreemption fifo_preemption edf_nopreemption edf_preemption); do
# percentiles_table_row "./${variant}.csv" "./table.csv" "$variant"
# done
#
# See Also:
# - csv_to_dat - Can transform a table into a *.dat file suitable for gnuplot
#
# References
# - The AWK Programming Language - https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf
# - GAWK: Effective AWK Programming - https://www.gnu.org/software/gawk/manual/gawk.pdf
percentiles_table_header() {
local table_file="${1:?table_file not set}"
# Can optionally override "app" in header
local label_header="${2:-app}"
echo "${label_header},cnt,min,mean,p50,p90,p99,max" > "$table_file"
}
# columnar_data_file is assumed to be a file containing a single column or sorted data
percentiles_table_row() {
local -r columnar_data_file="${1:?columnar_data_file not set}"
check_file columnar_data_file
local -r table_file="${2:?table_file not set}"
check_file table_file
local -r row_label="${3:?row_label not set}"
local -r format_string="${4:-%1.0f}"
# Count the number of results
local -i sample_size
sample_size=$(wc -l < "$columnar_data_file")
if ((sample_size == 0)); then
# We might not have actually run every variant depending on iterations and workload mix
# Insert a degenerate row if this is the case
echo "$row_label,0,*,*,*,*,*,*" >> "$table_file"
else
awk '
BEGIN {
sample_size='"$sample_size"'
row_label="'"$row_label"'"
format_string="'"$format_string"'"
invalid_number_symbol="*"
sum = 0
p50_idx = int(sample_size * 0.5)
p90_idx = int(sample_size * 0.9)
p99_idx = int(sample_size * 0.99)
p100_idx = sample_size
}
# Empty pattern matches all rows
{ sum += $0 }
NR==1 { min = sample_size > 0 ? sprintf(format_string, $0) : invalid_number_symbol }
NR==p50_idx { p50 = sample_size >= 3 ? sprintf(format_string, $0) : invalid_number_symbol }
NR==p90_idx { p90 = sample_size >= 10 ? sprintf(format_string, $0) : invalid_number_symbol }
NR==p99_idx { p99 = sample_size >= 100 ? sprintf(format_string, $0) : invalid_number_symbol }
NR==p100_idx { p100 = sample_size > 0 ? sprintf(format_string, $0) : invalid_number_symbol }
END {
mean = sample_size > 0 ? sprintf(format_string, sum / NR) : invalid_number_symbol
printf "%s,%d,%s,%s,%s,%s,%s,%s\n", row_label, sample_size, min, mean, p50, p90, p99, p100
}
' < "$columnar_data_file" >> "$table_file"
fi
}