feat(benchmarks): auto-generate Interpretation/Recommendation and add -report-only flag
generate_report.go: - buildInterpretation: derives narrative from p99/p50 tail-latency ratio, per-action complexity trend (% increase vs discover baseline), concurrency scaling efficiency (GOMAXPROCS=1 vs 16), and cache warm/cold delta - buildRecommendation: identifies the best throughput/cost GOMAXPROCS level from scaling efficiency and adds production sizing guidance run_benchmarks.sh: - Add -report-only <dir> flag: re-runs parse_results.go + generate_report.go against an existing results directory without rerunning benchmarks REPORT_TEMPLATE.md: - Replace manual placeholders with __INTERPRETATION__ and __RECOMMENDATION__ markers filled by the generator
This commit is contained in:
@@ -36,11 +36,11 @@ adapter-internal latency from network variables.
|
|||||||
|
|
||||||
### Interpretation
|
### Interpretation
|
||||||
|
|
||||||
_Review the numbers above and add interpretation here._
|
__INTERPRETATION__
|
||||||
|
|
||||||
### Recommendation
|
### Recommendation
|
||||||
|
|
||||||
_Add sizing and tuning recommendations here._
|
__RECOMMENDATION__
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ set -euo pipefail
|
|||||||
|
|
||||||
SCRIPT_START=$(date +%s)
|
SCRIPT_START=$(date +%s)
|
||||||
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||||
RESULTS_DIR="$REPO_ROOT/benchmarks/results/$(date +%Y-%m-%d_%H-%M-%S)"
|
|
||||||
BENCH_PKG="./benchmarks/e2e/..."
|
BENCH_PKG="./benchmarks/e2e/..."
|
||||||
BENCH_TIMEOUT="10m"
|
BENCH_TIMEOUT="10m"
|
||||||
BENCH_TIME_SERIAL="10s"
|
BENCH_TIME_SERIAL="10s"
|
||||||
@@ -31,6 +30,38 @@ BENCH_COUNT=1 # benchstat uses the 3 serial files for stability
|
|||||||
ONIX_VERSION="$(git -C "$REPO_ROOT" describe --tags --abbrev=0 2>/dev/null || echo "dev")"
|
ONIX_VERSION="$(git -C "$REPO_ROOT" describe --tags --abbrev=0 2>/dev/null || echo "dev")"
|
||||||
REPORT_TEMPLATE="$REPO_ROOT/benchmarks/reports/REPORT_TEMPLATE.md"
|
REPORT_TEMPLATE="$REPO_ROOT/benchmarks/reports/REPORT_TEMPLATE.md"
|
||||||
|
|
||||||
|
# ── -report-only <dir>: regenerate report from an existing results directory ──
|
||||||
|
if [[ "${1:-}" == "-report-only" ]]; then
|
||||||
|
RESULTS_DIR="${2:-}"
|
||||||
|
if [[ -z "$RESULTS_DIR" ]]; then
|
||||||
|
echo "Usage: bash benchmarks/run_benchmarks.sh -report-only <results-dir>"
|
||||||
|
echo "Example: bash benchmarks/run_benchmarks.sh -report-only benchmarks/results/2026-04-09_10-30-00"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ ! -d "$RESULTS_DIR" ]]; then
|
||||||
|
echo "ERROR: results directory not found: $RESULTS_DIR"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "=== Regenerating report from existing results ==="
|
||||||
|
echo "Results dir : $RESULTS_DIR"
|
||||||
|
echo ""
|
||||||
|
cd "$REPO_ROOT"
|
||||||
|
echo "Parsing results to CSV..."
|
||||||
|
go run "$REPO_ROOT/benchmarks/tools/parse_results.go" \
|
||||||
|
-dir="$RESULTS_DIR" -out="$RESULTS_DIR" 2>&1 || true
|
||||||
|
echo ""
|
||||||
|
echo "Generating benchmark report..."
|
||||||
|
go run "$REPO_ROOT/benchmarks/tools/generate_report.go" \
|
||||||
|
-dir="$RESULTS_DIR" \
|
||||||
|
-template="$REPORT_TEMPLATE" \
|
||||||
|
-version="$ONIX_VERSION"
|
||||||
|
echo ""
|
||||||
|
echo "Done. Report written to: $RESULTS_DIR/BENCHMARK_REPORT.md"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
RESULTS_DIR="$REPO_ROOT/benchmarks/results/$(date +%Y-%m-%d_%H-%M-%S)"
|
||||||
|
|
||||||
cd "$REPO_ROOT"
|
cd "$REPO_ROOT"
|
||||||
|
|
||||||
# ── benchstat is declared as a go tool in go.mod; no separate install needed ──
|
# ── benchstat is declared as a go tool in go.mod; no separate install needed ──
|
||||||
|
|||||||
@@ -146,6 +146,10 @@ func main() {
|
|||||||
// ── Build throughput table ─────────────────────────────────────────────────
|
// ── Build throughput table ─────────────────────────────────────────────────
|
||||||
throughputTable := buildThroughputTable(throughput)
|
throughputTable := buildThroughputTable(throughput)
|
||||||
|
|
||||||
|
// ── Generate interpretation and recommendation ─────────────────────────────
|
||||||
|
interpretation := buildInterpretation(perc, latency, throughput, warmUS, coldUS)
|
||||||
|
recommendation := buildRecommendation(throughput)
|
||||||
|
|
||||||
// ── Apply substitutions ────────────────────────────────────────────────────
|
// ── Apply substitutions ────────────────────────────────────────────────────
|
||||||
replacements := map[string]string{
|
replacements := map[string]string{
|
||||||
"__TIMESTAMP__": timestamp,
|
"__TIMESTAMP__": timestamp,
|
||||||
@@ -183,6 +187,8 @@ func main() {
|
|||||||
"__CACHE_DELTA__": cacheDelta,
|
"__CACHE_DELTA__": cacheDelta,
|
||||||
"__THROUGHPUT_TABLE__": throughputTable,
|
"__THROUGHPUT_TABLE__": throughputTable,
|
||||||
"__BENCHSTAT_SUMMARY__": benchstat,
|
"__BENCHSTAT_SUMMARY__": benchstat,
|
||||||
|
"__INTERPRETATION__": interpretation,
|
||||||
|
"__RECOMMENDATION__": recommendation,
|
||||||
}
|
}
|
||||||
|
|
||||||
for placeholder, value := range replacements {
|
for placeholder, value := range replacements {
|
||||||
@@ -399,3 +405,191 @@ func readFileOrDefault(path, def string) string {
|
|||||||
}
|
}
|
||||||
return strings.TrimRight(string(b), "\n")
|
return strings.TrimRight(string(b), "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Narrative generators ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// buildInterpretation generates a data-driven interpretation paragraph from the
|
||||||
|
// benchmark results. It covers tail-latency control, action complexity trend,
|
||||||
|
// concurrency scaling efficiency, and cache impact.
|
||||||
|
func buildInterpretation(
|
||||||
|
perc map[string]string,
|
||||||
|
latency map[string]map[string]string,
|
||||||
|
throughput []map[string]string,
|
||||||
|
warmUS, coldUS string,
|
||||||
|
) string {
|
||||||
|
var sb strings.Builder
|
||||||
|
|
||||||
|
p50 := parseFloatOrZero(perc["p50_µs"])
|
||||||
|
p99 := parseFloatOrZero(perc["p99_µs"])
|
||||||
|
meanDiscover := parseFloatOrZero(latency["BenchmarkBAPCaller_Discover"]["mean_ms"]) * 1000
|
||||||
|
|
||||||
|
// Tail-latency control.
|
||||||
|
if p50 > 0 && p99 > 0 {
|
||||||
|
ratio := p99 / p50
|
||||||
|
quality := "good"
|
||||||
|
if ratio > 5 {
|
||||||
|
quality = "poor"
|
||||||
|
} else if ratio > 3 {
|
||||||
|
quality = "moderate"
|
||||||
|
}
|
||||||
|
sb.WriteString(fmt.Sprintf(
|
||||||
|
"The adapter delivers a p50 latency of **%.0f µs** for the discover action. "+
|
||||||
|
"The p99/p50 ratio is **%.1f×**, indicating %s tail-latency control — "+
|
||||||
|
"spikes are %s relative to the median.\n\n",
|
||||||
|
p50, ratio, quality, tailDescription(ratio),
|
||||||
|
))
|
||||||
|
} else if meanDiscover > 0 {
|
||||||
|
sb.WriteString(fmt.Sprintf(
|
||||||
|
"The adapter delivers a mean latency of **%.0f µs** for the discover action. "+
|
||||||
|
"Run with `-bench=BenchmarkBAPCaller_Discover_Percentiles` to obtain p50/p95/p99 data.\n\n",
|
||||||
|
meanDiscover,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Action complexity trend.
|
||||||
|
selectMS := parseFloatOrZero(latency["BenchmarkBAPCaller_AllActions/select"]["mean_ms"]) * 1000
|
||||||
|
initMS := parseFloatOrZero(latency["BenchmarkBAPCaller_AllActions/init"]["mean_ms"]) * 1000
|
||||||
|
confirmMS := parseFloatOrZero(latency["BenchmarkBAPCaller_AllActions/confirm"]["mean_ms"]) * 1000
|
||||||
|
if meanDiscover > 0 && selectMS > 0 && initMS > 0 && confirmMS > 0 {
|
||||||
|
sb.WriteString(fmt.Sprintf(
|
||||||
|
"Latency scales with payload complexity: select (+%.0f%%), init (+%.0f%%), confirm (+%.0f%%) "+
|
||||||
|
"vs the discover baseline. Allocation counts track proportionally, driven by JSON "+
|
||||||
|
"unmarshalling and schema validation of larger payloads.\n\n",
|
||||||
|
pctChange(meanDiscover, selectMS),
|
||||||
|
pctChange(meanDiscover, initMS),
|
||||||
|
pctChange(meanDiscover, confirmMS),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Concurrency scaling.
|
||||||
|
lat1 := latencyAtCPU(throughput, "1")
|
||||||
|
lat16 := latencyAtCPU(throughput, "16")
|
||||||
|
if lat1 > 0 && lat16 > 0 {
|
||||||
|
improvement := lat1 / lat16
|
||||||
|
sb.WriteString(fmt.Sprintf(
|
||||||
|
"Concurrency scaling is effective: mean latency drops from **%.0f µs** at GOMAXPROCS=1 "+
|
||||||
|
"to **%.0f µs** at GOMAXPROCS=16 — a **%.1f× improvement**.",
|
||||||
|
lat1*1000, lat16*1000, improvement,
|
||||||
|
))
|
||||||
|
if improvement < 4 {
|
||||||
|
sb.WriteString(" Gains taper beyond 8 cores, suggesting a shared serialisation point " +
|
||||||
|
"(likely schema validation or key derivation).")
|
||||||
|
}
|
||||||
|
sb.WriteString("\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache impact.
|
||||||
|
w := parseFloatOrZero(warmUS)
|
||||||
|
c := parseFloatOrZero(coldUS)
|
||||||
|
if w > 0 && c > 0 {
|
||||||
|
delta := math.Abs(w-c) / w * 100
|
||||||
|
if delta < 5 {
|
||||||
|
sb.WriteString(fmt.Sprintf(
|
||||||
|
"The Redis key-manager cache shows **no measurable impact** in this setup "+
|
||||||
|
"(warm vs cold delta: %.0f µs, %.1f%% of mean). "+
|
||||||
|
"miniredis is in-process; signing and schema validation dominate. "+
|
||||||
|
"Cache benefit would be visible with real Redis over a network.",
|
||||||
|
math.Abs(w-c), delta,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
sb.WriteString(fmt.Sprintf(
|
||||||
|
"The Redis key-manager cache provides a **%.0f µs improvement** (%.1f%%) "+
|
||||||
|
"on the warm path vs cold.",
|
||||||
|
math.Abs(w-c), delta,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
sb.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
if sb.Len() == 0 {
|
||||||
|
return "_Insufficient data to generate interpretation. Ensure all benchmark scenarios completed successfully._"
|
||||||
|
}
|
||||||
|
return strings.TrimRight(sb.String(), "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildRecommendation generates a sizing and tuning recommendation based on the
|
||||||
|
// concurrency sweep results.
|
||||||
|
func buildRecommendation(throughput []map[string]string) string {
|
||||||
|
if len(throughput) == 0 {
|
||||||
|
return "_Run the concurrency sweep to generate sizing recommendations._"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the GOMAXPROCS level with best scaling efficiency (RPS gain per core).
|
||||||
|
type cpuPoint struct {
|
||||||
|
cpu int
|
||||||
|
rps float64
|
||||||
|
lat float64
|
||||||
|
}
|
||||||
|
var points []cpuPoint
|
||||||
|
for _, row := range throughput {
|
||||||
|
cpu := int(parseFloatOrZero(row["gomaxprocs"]))
|
||||||
|
rps := parseFloatOrZero(row["rps"])
|
||||||
|
lat := parseFloatOrZero(row["mean_latency_ms"]) * 1000
|
||||||
|
if cpu > 0 && lat > 0 {
|
||||||
|
points = append(points, cpuPoint{cpu, rps, lat})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(points) == 0 {
|
||||||
|
return "_Run the concurrency sweep (parallel_cpu*.txt) to generate sizing recommendations._"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find sweet spot: largest latency improvement per doubling of cores.
|
||||||
|
bestEffCPU := points[0].cpu
|
||||||
|
bestEff := 0.0
|
||||||
|
for i := 1; i < len(points); i++ {
|
||||||
|
if points[i-1].lat > 0 {
|
||||||
|
eff := (points[i-1].lat - points[i].lat) / points[i-1].lat
|
||||||
|
if eff > bestEff {
|
||||||
|
bestEff = eff
|
||||||
|
bestEffCPU = points[i].cpu
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString(fmt.Sprintf(
|
||||||
|
"**%d cores** offers the best throughput/cost ratio based on the concurrency sweep — "+
|
||||||
|
"scaling efficiency begins to taper beyond this point.\n\n",
|
||||||
|
bestEffCPU,
|
||||||
|
))
|
||||||
|
sb.WriteString("The adapter is ready for staged load testing against a real BPP. " +
|
||||||
|
"For production sizing, start with the recommended core count above and adjust based " +
|
||||||
|
"on observed throughput targets. If schema validation dominates CPU (likely at high " +
|
||||||
|
"concurrency), profile with `go tool pprof` using the commands in B5 to isolate the bottleneck.")
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Narrative helpers ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
func tailDescription(ratio float64) string {
|
||||||
|
switch {
|
||||||
|
case ratio <= 2:
|
||||||
|
return "minimal"
|
||||||
|
case ratio <= 3:
|
||||||
|
return "modest"
|
||||||
|
case ratio <= 5:
|
||||||
|
return "noticeable"
|
||||||
|
default:
|
||||||
|
return "significant"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func pctChange(base, val float64) float64 {
|
||||||
|
if base == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return (val - base) / base * 100
|
||||||
|
}
|
||||||
|
|
||||||
|
func latencyAtCPU(throughput []map[string]string, cpu string) float64 {
|
||||||
|
for _, row := range throughput {
|
||||||
|
if row["gomaxprocs"] == cpu {
|
||||||
|
if v := parseFloatOrZero(row["mean_latency_ms"]); v > 0 {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user