# shellcheck shell=bash source "type_checks.sh" || exit 1 # These utility functions are used to generate percentile tables that summarize distributions of data. # Each row represents an independent variable, such as a scheduling policy # The data for each column is provided as a single column of sorted data # If the data is too course, a percentile might be statistically insignificant. If this is the case, # The script writes an * to the appropriate cell # # Example: # # percentiles_table_header "./table.csv" # for $variant in (fifo_nopreemption fifo_preemption edf_nopreemption edf_preemption); do # percentiles_table_row "./${variant}.csv" "./table.csv" "$variant" # done # # See Also: # - csv_to_dat - Can transform a table into a *.dat file suitable for gnuplot # # References # - The AWK Programming Language - https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf # - GAWK: Effective AWK Programming - https://www.gnu.org/software/gawk/manual/gawk.pdf percentiles_table_header() { local table_file="${1:?table_file not set}" # Can optionally override "app" in header local label_header="${2:-app}" echo "${label_header},cnt,min,mean,p50,p90,p99,max" > "$table_file" } # columnar_data_file is assumed to be a file containing a single column or sorted data percentiles_table_row() { local -r columnar_data_file="${1:?columnar_data_file not set}" check_file columnar_data_file local -r table_file="${2:?table_file not set}" check_file table_file local -r row_label="${3:?row_label not set}" local -r format_string="${4:-%1.2f}" # Count the number of results local -i sample_size sample_size=$(wc -l < "$columnar_data_file") if ((sample_size == 0)); then # We might not have actually run every variant depending on iterations and workload mix # Insert a degenerate row if this is the case echo "$row_label,0,*,*,*,*,*,*" >> "$table_file" else awk ' BEGIN { sample_size='"$sample_size"' row_label="'"$row_label"'" format_string="'"$format_string"'" invalid_number_symbol="*" sum = 0 p50_idx = int(sample_size * 0.5) p90_idx = int(sample_size * 0.9) p99_idx = int(sample_size * 0.99) p100_idx = sample_size } # Empty pattern matches all rows { sum += $0 } NR==1 { min = sample_size > 0 ? sprintf(format_string, $0) : invalid_number_symbol } NR==p50_idx { p50 = sample_size >= 3 ? sprintf(format_string, $0) : invalid_number_symbol } NR==p90_idx { p90 = sample_size >= 10 ? sprintf(format_string, $0) : invalid_number_symbol } NR==p99_idx { p99 = sample_size >= 100 ? sprintf(format_string, $0) : invalid_number_symbol } NR==p100_idx { p100 = sample_size > 0 ? sprintf(format_string, $0) : invalid_number_symbol } END { mean = sample_size > 0 ? sprintf(format_string, sum / NR) : invalid_number_symbol printf "%s,%d,%s,%s,%s,%s,%s,%s\n", row_label, sample_size, min, mean, p50, p90, p99, p100 } ' < "$columnar_data_file" >> "$table_file" fi }