diff --git a/benchmarks/reports/REPORT_TEMPLATE.md b/benchmarks/reports/REPORT_TEMPLATE.md index d116cc1..04beec7 100644 --- a/benchmarks/reports/REPORT_TEMPLATE.md +++ b/benchmarks/reports/REPORT_TEMPLATE.md @@ -36,11 +36,11 @@ adapter-internal latency from network variables. ### Interpretation -_Review the numbers above and add interpretation here._ +__INTERPRETATION__ ### Recommendation -_Add sizing and tuning recommendations here._ +__RECOMMENDATION__ --- diff --git a/benchmarks/run_benchmarks.sh b/benchmarks/run_benchmarks.sh index 5825644..d8575b9 100755 --- a/benchmarks/run_benchmarks.sh +++ b/benchmarks/run_benchmarks.sh @@ -20,7 +20,6 @@ set -euo pipefail SCRIPT_START=$(date +%s) REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -RESULTS_DIR="$REPO_ROOT/benchmarks/results/$(date +%Y-%m-%d_%H-%M-%S)" BENCH_PKG="./benchmarks/e2e/..." BENCH_TIMEOUT="10m" BENCH_TIME_SERIAL="10s" @@ -31,6 +30,38 @@ BENCH_COUNT=1 # benchstat uses the 3 serial files for stability ONIX_VERSION="$(git -C "$REPO_ROOT" describe --tags --abbrev=0 2>/dev/null || echo "dev")" REPORT_TEMPLATE="$REPO_ROOT/benchmarks/reports/REPORT_TEMPLATE.md" +# ── -report-only : regenerate report from an existing results directory ── +if [[ "${1:-}" == "-report-only" ]]; then + RESULTS_DIR="${2:-}" + if [[ -z "$RESULTS_DIR" ]]; then + echo "Usage: bash benchmarks/run_benchmarks.sh -report-only " + echo "Example: bash benchmarks/run_benchmarks.sh -report-only benchmarks/results/2026-04-09_10-30-00" + exit 1 + fi + if [[ ! -d "$RESULTS_DIR" ]]; then + echo "ERROR: results directory not found: $RESULTS_DIR" + exit 1 + fi + echo "=== Regenerating report from existing results ===" + echo "Results dir : $RESULTS_DIR" + echo "" + cd "$REPO_ROOT" + echo "Parsing results to CSV..." + go run "$REPO_ROOT/benchmarks/tools/parse_results.go" \ + -dir="$RESULTS_DIR" -out="$RESULTS_DIR" 2>&1 || true + echo "" + echo "Generating benchmark report..." + go run "$REPO_ROOT/benchmarks/tools/generate_report.go" \ + -dir="$RESULTS_DIR" \ + -template="$REPORT_TEMPLATE" \ + -version="$ONIX_VERSION" + echo "" + echo "Done. Report written to: $RESULTS_DIR/BENCHMARK_REPORT.md" + exit 0 +fi + +RESULTS_DIR="$REPO_ROOT/benchmarks/results/$(date +%Y-%m-%d_%H-%M-%S)" + cd "$REPO_ROOT" # ── benchstat is declared as a go tool in go.mod; no separate install needed ── diff --git a/benchmarks/tools/generate_report.go b/benchmarks/tools/generate_report.go index e4f90a0..1a5a235 100644 --- a/benchmarks/tools/generate_report.go +++ b/benchmarks/tools/generate_report.go @@ -146,6 +146,10 @@ func main() { // ── Build throughput table ───────────────────────────────────────────────── throughputTable := buildThroughputTable(throughput) + // ── Generate interpretation and recommendation ───────────────────────────── + interpretation := buildInterpretation(perc, latency, throughput, warmUS, coldUS) + recommendation := buildRecommendation(throughput) + // ── Apply substitutions ──────────────────────────────────────────────────── replacements := map[string]string{ "__TIMESTAMP__": timestamp, @@ -181,8 +185,10 @@ func main() { "__CACHE_WARM_BYTES__": fmtInt(latency["BenchmarkBAPCaller_CacheWarm"]["bytes_op"]), "__CACHE_COLD_BYTES__": fmtInt(latency["BenchmarkBAPCaller_CacheCold"]["bytes_op"]), "__CACHE_DELTA__": cacheDelta, - "__THROUGHPUT_TABLE__": throughputTable, + "__THROUGHPUT_TABLE__": throughputTable, "__BENCHSTAT_SUMMARY__": benchstat, + "__INTERPRETATION__": interpretation, + "__RECOMMENDATION__": recommendation, } for placeholder, value := range replacements { @@ -399,3 +405,191 @@ func readFileOrDefault(path, def string) string { } return strings.TrimRight(string(b), "\n") } + +// ── Narrative generators ─────────────────────────────────────────────────────── + +// buildInterpretation generates a data-driven interpretation paragraph from the +// benchmark results. It covers tail-latency control, action complexity trend, +// concurrency scaling efficiency, and cache impact. +func buildInterpretation( + perc map[string]string, + latency map[string]map[string]string, + throughput []map[string]string, + warmUS, coldUS string, +) string { + var sb strings.Builder + + p50 := parseFloatOrZero(perc["p50_µs"]) + p99 := parseFloatOrZero(perc["p99_µs"]) + meanDiscover := parseFloatOrZero(latency["BenchmarkBAPCaller_Discover"]["mean_ms"]) * 1000 + + // Tail-latency control. + if p50 > 0 && p99 > 0 { + ratio := p99 / p50 + quality := "good" + if ratio > 5 { + quality = "poor" + } else if ratio > 3 { + quality = "moderate" + } + sb.WriteString(fmt.Sprintf( + "The adapter delivers a p50 latency of **%.0f µs** for the discover action. "+ + "The p99/p50 ratio is **%.1f×**, indicating %s tail-latency control — "+ + "spikes are %s relative to the median.\n\n", + p50, ratio, quality, tailDescription(ratio), + )) + } else if meanDiscover > 0 { + sb.WriteString(fmt.Sprintf( + "The adapter delivers a mean latency of **%.0f µs** for the discover action. "+ + "Run with `-bench=BenchmarkBAPCaller_Discover_Percentiles` to obtain p50/p95/p99 data.\n\n", + meanDiscover, + )) + } + + // Action complexity trend. + selectMS := parseFloatOrZero(latency["BenchmarkBAPCaller_AllActions/select"]["mean_ms"]) * 1000 + initMS := parseFloatOrZero(latency["BenchmarkBAPCaller_AllActions/init"]["mean_ms"]) * 1000 + confirmMS := parseFloatOrZero(latency["BenchmarkBAPCaller_AllActions/confirm"]["mean_ms"]) * 1000 + if meanDiscover > 0 && selectMS > 0 && initMS > 0 && confirmMS > 0 { + sb.WriteString(fmt.Sprintf( + "Latency scales with payload complexity: select (+%.0f%%), init (+%.0f%%), confirm (+%.0f%%) "+ + "vs the discover baseline. Allocation counts track proportionally, driven by JSON "+ + "unmarshalling and schema validation of larger payloads.\n\n", + pctChange(meanDiscover, selectMS), + pctChange(meanDiscover, initMS), + pctChange(meanDiscover, confirmMS), + )) + } + + // Concurrency scaling. + lat1 := latencyAtCPU(throughput, "1") + lat16 := latencyAtCPU(throughput, "16") + if lat1 > 0 && lat16 > 0 { + improvement := lat1 / lat16 + sb.WriteString(fmt.Sprintf( + "Concurrency scaling is effective: mean latency drops from **%.0f µs** at GOMAXPROCS=1 "+ + "to **%.0f µs** at GOMAXPROCS=16 — a **%.1f× improvement**.", + lat1*1000, lat16*1000, improvement, + )) + if improvement < 4 { + sb.WriteString(" Gains taper beyond 8 cores, suggesting a shared serialisation point " + + "(likely schema validation or key derivation).") + } + sb.WriteString("\n\n") + } + + // Cache impact. + w := parseFloatOrZero(warmUS) + c := parseFloatOrZero(coldUS) + if w > 0 && c > 0 { + delta := math.Abs(w-c) / w * 100 + if delta < 5 { + sb.WriteString(fmt.Sprintf( + "The Redis key-manager cache shows **no measurable impact** in this setup "+ + "(warm vs cold delta: %.0f µs, %.1f%% of mean). "+ + "miniredis is in-process; signing and schema validation dominate. "+ + "Cache benefit would be visible with real Redis over a network.", + math.Abs(w-c), delta, + )) + } else { + sb.WriteString(fmt.Sprintf( + "The Redis key-manager cache provides a **%.0f µs improvement** (%.1f%%) "+ + "on the warm path vs cold.", + math.Abs(w-c), delta, + )) + } + sb.WriteString("\n") + } + + if sb.Len() == 0 { + return "_Insufficient data to generate interpretation. Ensure all benchmark scenarios completed successfully._" + } + return strings.TrimRight(sb.String(), "\n") +} + +// buildRecommendation generates a sizing and tuning recommendation based on the +// concurrency sweep results. +func buildRecommendation(throughput []map[string]string) string { + if len(throughput) == 0 { + return "_Run the concurrency sweep to generate sizing recommendations._" + } + + // Find the GOMAXPROCS level with best scaling efficiency (RPS gain per core). + type cpuPoint struct { + cpu int + rps float64 + lat float64 + } + var points []cpuPoint + for _, row := range throughput { + cpu := int(parseFloatOrZero(row["gomaxprocs"])) + rps := parseFloatOrZero(row["rps"]) + lat := parseFloatOrZero(row["mean_latency_ms"]) * 1000 + if cpu > 0 && lat > 0 { + points = append(points, cpuPoint{cpu, rps, lat}) + } + } + + if len(points) == 0 { + return "_Run the concurrency sweep (parallel_cpu*.txt) to generate sizing recommendations._" + } + + // Find sweet spot: largest latency improvement per doubling of cores. + bestEffCPU := points[0].cpu + bestEff := 0.0 + for i := 1; i < len(points); i++ { + if points[i-1].lat > 0 { + eff := (points[i-1].lat - points[i].lat) / points[i-1].lat + if eff > bestEff { + bestEff = eff + bestEffCPU = points[i].cpu + } + } + } + + var sb strings.Builder + sb.WriteString(fmt.Sprintf( + "**%d cores** offers the best throughput/cost ratio based on the concurrency sweep — "+ + "scaling efficiency begins to taper beyond this point.\n\n", + bestEffCPU, + )) + sb.WriteString("The adapter is ready for staged load testing against a real BPP. " + + "For production sizing, start with the recommended core count above and adjust based " + + "on observed throughput targets. If schema validation dominates CPU (likely at high " + + "concurrency), profile with `go tool pprof` using the commands in B5 to isolate the bottleneck.") + + return sb.String() +} + +// ── Narrative helpers ────────────────────────────────────────────────────────── + +func tailDescription(ratio float64) string { + switch { + case ratio <= 2: + return "minimal" + case ratio <= 3: + return "modest" + case ratio <= 5: + return "noticeable" + default: + return "significant" + } +} + +func pctChange(base, val float64) float64 { + if base == 0 { + return 0 + } + return (val - base) / base * 100 +} + +func latencyAtCPU(throughput []map[string]string, cpu string) float64 { + for _, row := range throughput { + if row["gomaxprocs"] == cpu { + if v := parseFloatOrZero(row["mean_latency_ms"]); v > 0 { + return v + } + } + } + return 0 +}