sledge-serverles-framwork/tests/bash_libraries/percentiles_table.sh

# shellcheck shell=bash

source "type_checks.sh" || exit 1

# These utility functions are used to generate percentile tables that summarize distributions of data.
# Each row represents an independent variable, such as a scheduling policy
# The data for each column is provided as a single column of sorted data
# If the data is too course, a percentile might be statistically insignificant. If this is the case,
# The script writes an * to the appropriate cell
#
# Example:
#
# percentiles_table_header "./table.csv"
# for $variant in (fifo_nopreemption fifo_preemption edf_nopreemption edf_preemption); do
#     percentiles_table_row "./${variant}.csv" "./table.csv" "$variant"
# done
#
# See Also:
#   - csv_to_dat - Can transform a table into a *.dat file suitable for gnuplot
#
# References
#   - The AWK Programming Language - https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf
#   - GAWK: Effective AWK Programming - https://www.gnu.org/software/gawk/manual/gawk.pdf

percentiles_table_header() {
	local table_file="${1:?table_file not set}"
	# Can optionally override "app" in header
	local label_header="${2:-app}"
	echo "${label_header},cnt,min,mean,p50,p90,p99,max" > "$table_file"
}

# columnar_data_file is assumed to be a file containing a single column or sorted data
percentiles_table_row() {
	local -r columnar_data_file="${1:?columnar_data_file not set}"
	check_file columnar_data_file
	local -r table_file="${2:?table_file not set}"
	check_file table_file
	local -r row_label="${3:?row_label not set}"
	local -r format_string="${4:-%1.0f}"

	# Count the number of results
	local -i sample_size
	sample_size=$(wc -l < "$columnar_data_file")

	if ((sample_size == 0)); then
		# We might not have actually run every variant depending on iterations and workload mix
		# Insert a degenerate row if this is the case
		echo "$row_label,0,*,*,*,*,*,*" >> "$table_file"
	else
		awk '
			BEGIN {
				sample_size='"$sample_size"'
				row_label="'"$row_label"'"
				format_string="'"$format_string"'"
				invalid_number_symbol="*"
				sum = 0
				p50_idx = int(sample_size * 0.5)
				p90_idx = int(sample_size * 0.9)
				p99_idx = int(sample_size * 0.99)
				p100_idx = sample_size
			}

			# Empty pattern matches all rows
			             { sum += $0 }
			NR==1 		 { min = sample_size  > 0   ? sprintf(format_string, $0) : invalid_number_symbol }
			NR==p50_idx  { p50 = sample_size >= 3   ? sprintf(format_string, $0) : invalid_number_symbol }
			NR==p90_idx  { p90 = sample_size >= 10  ? sprintf(format_string, $0) : invalid_number_symbol }
			NR==p99_idx  { p99 = sample_size >= 100 ? sprintf(format_string, $0) : invalid_number_symbol }
			NR==p100_idx { p100 = sample_size > 0   ? sprintf(format_string, $0) : invalid_number_symbol }

			END {
				mean = sample_size > 0 ? sprintf(format_string, sum / NR) : invalid_number_symbol
				printf "%s,%d,%s,%s,%s,%s,%s,%s\n", row_label, sample_size, min, mean, p50, p90, p99, p100
			}
		' < "$columnar_data_file" >> "$table_file"
	fi
}
Percentile Cleanup, Processor Speed Hack, and Experimental Config (#257) * chore: Hackily hardcode CPU speed * chore: dump execution profile in spec generation * chore: also generate dat * fix: awk off by one error * chore: write dummy row * chore: Remove ekf, up concur, up iter * chore: awk statistical significance * fix: wrap awk string in quotes * fix: add awk trailing delimiter * chore: disable ekf in deadline description * chore: Increase iters * chore: generate deadline key * chore: make percentile global * chore: update mix for new multiples * chore: deadline csv key * docs: Better explain how to run run experiments * refactor: Inital percentile table cleanup * refactor: additional percentile table cleanup * refactor: payload percentiles table * refactor: workload_mix percentile table * feat: final percentiles row stuff * chore: revert processor hack 4 years ago			`# shellcheck shell=bash`

			`source "type_checks.sh" \|\| exit 1`

			`# These utility functions are used to generate percentile tables that summarize distributions of data.`
			`# Each row represents an independent variable, such as a scheduling policy`
			`# The data for each column is provided as a single column of sorted data`
			`# If the data is too course, a percentile might be statistically insignificant. If this is the case,`
			`# The script writes an * to the appropriate cell`
			`#`
			`# Example:`
			`#`
			`# percentiles_table_header "./table.csv"`
			`# for $variant in (fifo_nopreemption fifo_preemption edf_nopreemption edf_preemption); do`
			`# percentiles_table_row "./${variant}.csv" "./table.csv" "$variant"`
			`# done`
			`#`
			`# See Also:`
			`# - csv_to_dat - Can transform a table into a *.dat file suitable for gnuplot`
			`#`
			`# References`
			`# - The AWK Programming Language - https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf`
			`# - GAWK: Effective AWK Programming - https://www.gnu.org/software/gawk/manual/gawk.pdf`

			`percentiles_table_header() {`
			`local table_file="${1:?table_file not set}"`
			`# Can optionally override "app" in header`
			`local label_header="${2:-app}"`
			`echo "${label_header},cnt,min,mean,p50,p90,p99,max" > "$table_file"`
			`}`

			`# columnar_data_file is assumed to be a file containing a single column or sorted data`
			`percentiles_table_row() {`
			`local -r columnar_data_file="${1:?columnar_data_file not set}"`
			`check_file columnar_data_file`
			`local -r table_file="${2:?table_file not set}"`
			`check_file table_file`
			`local -r row_label="${3:?row_label not set}"`
General Cleanup (#351) * refactor: use var accross the Makefile instead of sledgert string * refactor bash libraries - remove hash symbol for scv_to_dat - add set_print_pretty for gdb mode - add logging for client - change printf format for perf log in table from float to int since usec * cleanup: generalize redundant files in the experiments: - remove individual gitignore, have single gitignore in parent (tests) - remove individual env files, have single copies in 'common' dir - remove individual install.sh, have install_tools.sh (in bash libs) * - add comment into install_tools.sh - rename mts to mtds * update the env files dir to common * move multi-tenancy env files to another directory (temp) 3 years ago			`local -r format_string="${4:-%1.0f}"`
Percentile Cleanup, Processor Speed Hack, and Experimental Config (#257) * chore: Hackily hardcode CPU speed * chore: dump execution profile in spec generation * chore: also generate dat * fix: awk off by one error * chore: write dummy row * chore: Remove ekf, up concur, up iter * chore: awk statistical significance * fix: wrap awk string in quotes * fix: add awk trailing delimiter * chore: disable ekf in deadline description * chore: Increase iters * chore: generate deadline key * chore: make percentile global * chore: update mix for new multiples * chore: deadline csv key * docs: Better explain how to run run experiments * refactor: Inital percentile table cleanup * refactor: additional percentile table cleanup * refactor: payload percentiles table * refactor: workload_mix percentile table * feat: final percentiles row stuff * chore: revert processor hack 4 years ago
			`# Count the number of results`
			`local -i sample_size`
			`sample_size=$(wc -l < "$columnar_data_file")`

			`if ((sample_size == 0)); then`
			`# We might not have actually run every variant depending on iterations and workload mix`
			`# Insert a degenerate row if this is the case`
			`echo "$row_label,0,,,,,," >> "$table_file"`
			`else`
			`awk '`
			`BEGIN {`
			`sample_size='"$sample_size"'`
			`row_label="'"$row_label"'"`
			`format_string="'"$format_string"'"`
			`invalid_number_symbol="*"`
			`sum = 0`
			`p50_idx = int(sample_size * 0.5)`
			`p90_idx = int(sample_size * 0.9)`
			`p99_idx = int(sample_size * 0.99)`
			`p100_idx = sample_size`
			`}`

			`# Empty pattern matches all rows`
			`{ sum += $0 }`
			`NR==1 { min = sample_size > 0 ? sprintf(format_string, $0) : invalid_number_symbol }`
			`NR==p50_idx { p50 = sample_size >= 3 ? sprintf(format_string, $0) : invalid_number_symbol }`
			`NR==p90_idx { p90 = sample_size >= 10 ? sprintf(format_string, $0) : invalid_number_symbol }`
			`NR==p99_idx { p99 = sample_size >= 100 ? sprintf(format_string, $0) : invalid_number_symbol }`
			`NR==p100_idx { p100 = sample_size > 0 ? sprintf(format_string, $0) : invalid_number_symbol }`

			`END {`
			`mean = sample_size > 0 ? sprintf(format_string, sum / NR) : invalid_number_symbol`
			`printf "%s,%d,%s,%s,%s,%s,%s,%s\n", row_label, sample_size, min, mean, p50, p90, p99, p100`
			`}`
			`' < "$columnar_data_file" >> "$table_file"`
			`fi`
			`}`