# shellcheck shell=bash

source "type_checks.sh" || exit 1

# These utility functions are used to generate percentile tables that summarize distributions of data.
# Each row represents an independent variable, such as a scheduling policy
# The data for each column is provided as a single column of sorted data
# If the data is too course, a percentile might be statistically insignificant. If this is the case,
# The script writes an * to the appropriate cell
#
# Example:
#
# percentiles_table_header "./table.csv"
# for $variant in (fifo_nopreemption fifo_preemption edf_nopreemption edf_preemption); do
#     percentiles_table_row "./${variant}.csv" "./table.csv" "$variant"
# done
#
# See Also:
#   - csv_to_dat - Can transform a table into a *.dat file suitable for gnuplot
#
# References
#   - The AWK Programming Language - https://ia802309.us.archive.org/25/items/pdfy-MgN0H1joIoDVoIC7/The_AWK_Programming_Language.pdf
#   - GAWK: Effective AWK Programming - https://www.gnu.org/software/gawk/manual/gawk.pdf

percentiles_table_header() {
	local table_file="${1:?table_file not set}"
	# Can optionally override "app" in header
	local label_header="${2:-app}"
	echo "${label_header},cnt,min,mean,p50,p90,p99,max" > "$table_file"
}

# columnar_data_file is assumed to be a file containing a single column or sorted data
percentiles_table_row() {
	local -r columnar_data_file="${1:?columnar_data_file not set}"
	check_file columnar_data_file
	local -r table_file="${2:?table_file not set}"
	check_file table_file
	local -r row_label="${3:?row_label not set}"
	local -r format_string="${4:-%1.2f}"

	# Count the number of results
	local -i sample_size
	sample_size=$(wc -l < "$columnar_data_file")

	if ((sample_size == 0)); then
		# We might not have actually run every variant depending on iterations and workload mix
		# Insert a degenerate row if this is the case
		echo "$row_label,0,*,*,*,*,*,*" >> "$table_file"
	else
		awk '
			BEGIN {
				sample_size='"$sample_size"'
				row_label="'"$row_label"'"
				format_string="'"$format_string"'"
				invalid_number_symbol="*"
				sum = 0
				p50_idx = int(sample_size * 0.5)
				p90_idx = int(sample_size * 0.9)
				p99_idx = int(sample_size * 0.99)
				p100_idx = sample_size
			}

			# Empty pattern matches all rows
			             { sum += $0 }
			NR==1 		 { min = sample_size  > 0   ? sprintf(format_string, $0) : invalid_number_symbol }
			NR==p50_idx  { p50 = sample_size >= 3   ? sprintf(format_string, $0) : invalid_number_symbol }
			NR==p90_idx  { p90 = sample_size >= 10  ? sprintf(format_string, $0) : invalid_number_symbol }
			NR==p99_idx  { p99 = sample_size >= 100 ? sprintf(format_string, $0) : invalid_number_symbol }
			NR==p100_idx { p100 = sample_size > 0   ? sprintf(format_string, $0) : invalid_number_symbol }

			END {
				mean = sample_size > 0 ? sprintf(format_string, sum / NR) : invalid_number_symbol
				printf "%s,%d,%s,%s,%s,%s,%s,%s\n", row_label, sample_size, min, mean, p50, p90, p99, p100
			}
		' < "$columnar_data_file" >> "$table_file"
	fi
}