From ff4d909b7ee83fbdfe83fa0c1aa19dee168f352a Mon Sep 17 00:00:00 2001
From: Ayush Rawat <ayush.rawat@thewitslab.com>
Date: Thu, 5 Mar 2026 15:31:34 +0530
Subject: [PATCH] Enhance Policy Enforcer Configuration and Add Benchmark Tests

- Added detailed comments and examples for the `policyPaths` configuration in both BAP and BPP YAML files to improve clarity on usage.
- Introduced a new benchmark test suite for the Policy Enforcer to evaluate performance under varying rule counts, measuring both evaluation and compilation times.
---
 config/local-beckn-one-bap.yaml               |  15 +
 config/local-beckn-one-bpp.yaml               |  15 +
 .../policyenforcer/benchmark_test.go          | 308 ++++++++++++++++++
 3 files changed, 338 insertions(+)
 create mode 100644 pkg/plugin/implementation/policyenforcer/benchmark_test.go

diff --git a/config/local-beckn-one-bap.yaml b/config/local-beckn-one-bap.yaml
index f9b12b9..1a745f5 100644
--- a/config/local-beckn-one-bap.yaml
+++ b/config/local-beckn-one-bap.yaml
@@ -89,6 +89,16 @@ modules:
         policyEnforcer:
           id: policyenforcer
           config:
+            # policyPaths: polymorphic, auto-detects each entry as URL, directory, or file
+            # Examples:
+            #   policyPaths: "./policies"                              # local directory
+            #   policyPaths: "https://example.com/compliance.rego"     # remote URL
+            #   policyPaths: "./policies/compliance.rego"              # local file
+            # For multiple sources, use YAML folded scalar (>-):
+            #   policyPaths: >-
+            #     https://example.com/compliance.rego,
+            #     https://example.com/safety.rego,
+            #     ./policies
             policyPaths: "./policies"
         middleware:
           - id: reqpreprocessor
@@ -156,7 +166,12 @@ modules:
             config:
               contextKeys: transaction_id,message_id
               role: bap
+        policyEnforcer:
+          id: policyenforcer
+          config:
+            policyPaths: "./policies"
       steps:
+        - policyEnforcer
         - addRoute
         - sign
         - validateSchema
diff --git a/config/local-beckn-one-bpp.yaml b/config/local-beckn-one-bpp.yaml
index 956b893..dece530 100644
--- a/config/local-beckn-one-bpp.yaml
+++ b/config/local-beckn-one-bpp.yaml
@@ -87,6 +87,16 @@ modules:
         policyEnforcer:
           id: policyenforcer
           config:
+            # policyPaths: polymorphic, auto-detects each entry as URL, directory, or file
+            # Examples:
+            #   policyPaths: "./policies"                              # local directory
+            #   policyPaths: "https://example.com/compliance.rego"     # remote URL
+            #   policyPaths: "./policies/compliance.rego"              # local file
+            # For multiple sources, use YAML folded scalar (>-):
+            #   policyPaths: >-
+            #     https://example.com/compliance.rego,
+            #     https://example.com/safety.rego,
+            #     ./policies
             policyPaths: "./policies"
       steps:
         - validateSign
@@ -149,7 +159,12 @@ modules:
             config:
               contextKeys: transaction_id,message_id
               role: bpp          
+        policyEnforcer:
+          id: policyenforcer
+          config:
+            policyPaths: "./policies"
       steps:
+        - policyEnforcer
         - addRoute
         - sign
         - validateSchema
diff --git a/pkg/plugin/implementation/policyenforcer/benchmark_test.go b/pkg/plugin/implementation/policyenforcer/benchmark_test.go
new file mode 100644
index 0000000..f7cb1dd
--- /dev/null
+++ b/pkg/plugin/implementation/policyenforcer/benchmark_test.go
@@ -0,0 +1,308 @@
+// Benchmarks for policy enforcer evaluation scaling.
+// Measures how OPA evaluation time changes with rule count (1 to 500 rules),
+// covering both realistic (mostly inactive) and worst-case (all active) scenarios.
+// Also benchmarks compilation time (one-time startup cost).
+//
+// Run human-readable report:  go test -run TestBenchmarkReport -v -count=1
+// Run Go benchmarks:          go test -bench=. -benchmem -count=1
+package policyenforcer
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+// generateDummyRules creates a .rego policy file with N rules.
+// Only one rule matches the input (action == "confirm"), the rest have impossible
+// conditions (action == "foobar1", "foobar2", ...) to simulate realistic rule bloat
+// where most rules don't fire.
+func generateDummyRules(n int) string {
+	var sb strings.Builder
+	sb.WriteString("package policy\nimport rego.v1\n\n")
+
+	// One real rule that actually fires
+	sb.WriteString("violations contains \"real_violation\" if {\n")
+	sb.WriteString("    input.context.action == \"confirm\"\n")
+	sb.WriteString("    input.message.order.value > 10000\n")
+	sb.WriteString("}\n\n")
+
+	// N-1 dummy rules with impossible conditions (simulate rule bloat)
+	for i := 1; i < n; i++ {
+		sb.WriteString(fmt.Sprintf("violations contains \"dummy_violation_%d\" if {\n", i))
+		sb.WriteString(fmt.Sprintf("    input.context.action == \"foobar%d\"\n", i))
+		sb.WriteString(fmt.Sprintf("    input.message.order.value > %d\n", i*100))
+		sb.WriteString("}\n\n")
+	}
+
+	return sb.String()
+}
+
+// generateActiveRules creates N rules that ALL fire on the test input.
+// This is the worst case: every rule matches.
+func generateActiveRules(n int) string {
+	var sb strings.Builder
+	sb.WriteString("package policy\nimport rego.v1\n\n")
+
+	for i := 0; i < n; i++ {
+		sb.WriteString(fmt.Sprintf("violations contains \"active_violation_%d\" if {\n", i))
+		sb.WriteString("    input.context.action == \"confirm\"\n")
+		sb.WriteString("}\n\n")
+	}
+
+	return sb.String()
+}
+
+// sampleBecknInput is a realistic beckn confirm message for benchmarking.
+var sampleBecknInput = []byte(`{
+	"context": {
+		"domain": "energy",
+		"action": "confirm",
+		"version": "1.1.0",
+		"bap_id": "buyer-bap.example.com",
+		"bap_uri": "https://buyer-bap.example.com",
+		"bpp_id": "seller-bpp.example.com",
+		"bpp_uri": "https://seller-bpp.example.com",
+		"transaction_id": "txn-12345",
+		"message_id": "msg-67890",
+		"timestamp": "2026-03-04T10:00:00Z"
+	},
+	"message": {
+		"order": {
+			"id": "order-001",
+			"provider": {"id": "seller-1"},
+			"items": [
+				{"id": "item-1", "quantity": {"selected": {"count": 100}}},
+				{"id": "item-2", "quantity": {"selected": {"count": 50}}}
+			],
+			"value": 15000,
+			"fulfillment": {
+				"type": "DELIVERY",
+				"start": {"time": {"timestamp": "2026-03-05T08:00:00Z"}},
+				"end": {"time": {"timestamp": "2026-03-05T18:00:00Z"}}
+			}
+		}
+	}
+}`)
+
+// --- Go Benchmarks (run with: go test -bench=. -benchmem) ---
+
+// BenchmarkEvaluate_MostlyInactive benchmarks evaluation with N rules where
+// only 1 rule fires. This simulates a realistic governance ruleset where
+// most rules are for different actions/conditions.
+func BenchmarkEvaluate_MostlyInactive(b *testing.B) {
+	sizes := []int{1, 10, 50, 100, 250, 500}
+	for _, n := range sizes {
+		b.Run(fmt.Sprintf("rules=%d", n), func(b *testing.B) {
+			dir := b.TempDir()
+			os.WriteFile(filepath.Join(dir, "policy.rego"), []byte(generateDummyRules(n)), 0644)
+
+			eval, err := NewEvaluator([]string{dir}, "data.policy.violations", nil)
+			if err != nil {
+				b.Fatalf("NewEvaluator failed: %v", err)
+			}
+
+			ctx := context.Background()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				_, err := eval.Evaluate(ctx, sampleBecknInput)
+				if err != nil {
+					b.Fatalf("Evaluate failed: %v", err)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkEvaluate_AllActive benchmarks the worst case where ALL N rules fire.
+func BenchmarkEvaluate_AllActive(b *testing.B) {
+	sizes := []int{1, 10, 50, 100, 250, 500}
+	for _, n := range sizes {
+		b.Run(fmt.Sprintf("rules=%d", n), func(b *testing.B) {
+			dir := b.TempDir()
+			os.WriteFile(filepath.Join(dir, "policy.rego"), []byte(generateActiveRules(n)), 0644)
+
+			eval, err := NewEvaluator([]string{dir}, "data.policy.violations", nil)
+			if err != nil {
+				b.Fatalf("NewEvaluator failed: %v", err)
+			}
+
+			ctx := context.Background()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				_, err := eval.Evaluate(ctx, sampleBecknInput)
+				if err != nil {
+					b.Fatalf("Evaluate failed: %v", err)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkCompilation measures how long it takes to compile policies of various sizes.
+// This runs once at startup, so it's less critical but good to know.
+func BenchmarkCompilation(b *testing.B) {
+	sizes := []int{10, 50, 100, 250, 500}
+	for _, n := range sizes {
+		b.Run(fmt.Sprintf("rules=%d", n), func(b *testing.B) {
+			dir := b.TempDir()
+			os.WriteFile(filepath.Join(dir, "policy.rego"), []byte(generateDummyRules(n)), 0644)
+
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				_, err := NewEvaluator([]string{dir}, "data.policy.violations", nil)
+				if err != nil {
+					b.Fatalf("NewEvaluator failed: %v", err)
+				}
+			}
+		})
+	}
+}
+
+// --- Human-Readable Report (run with: go test -run TestBenchmarkReport -v) ---
+
+// TestBenchmarkReport generates a readable table showing how evaluation time
+// scales with rule count. This is the report to share with the team.
+func TestBenchmarkReport(t *testing.T) {
+	sizes := []int{1, 10, 50, 100, 250, 500}
+	iterations := 1000
+
+	fmt.Println()
+	fmt.Println("╔══════════════════════════════════════════════════════════════════════╗")
+	fmt.Println("║        Policy Enforcer — Performance Benchmark Report               ║")
+	fmt.Println("╠══════════════════════════════════════════════════════════════════════╣")
+	fmt.Println()
+
+	// --- Compilation time ---
+	fmt.Println("┌─────────────────────────────────────────────────┐")
+	fmt.Println("│ Compilation Time (one-time startup cost)        │")
+	fmt.Println("├──────────┬──────────────────────────────────────┤")
+	fmt.Println("│ Rules    │ Compilation Time                     │")
+	fmt.Println("├──────────┼──────────────────────────────────────┤")
+	for _, n := range sizes {
+		dir := t.TempDir()
+		os.WriteFile(filepath.Join(dir, "policy.rego"), []byte(generateDummyRules(n)), 0644)
+
+		start := time.Now()
+		_, err := NewEvaluator([]string{dir}, "data.policy.violations", nil)
+		elapsed := time.Since(start)
+		if err != nil {
+			t.Fatalf("NewEvaluator(%d rules) failed: %v", n, err)
+		}
+		fmt.Printf("│ %-8d │ %-36s │\n", n, elapsed.Round(time.Microsecond))
+	}
+	fmt.Println("└──────────┴──────────────────────────────────────┘")
+	fmt.Println()
+
+	// --- Evaluation time (mostly inactive rules) ---
+	fmt.Println("┌─────────────────────────────────────────────────────────────────┐")
+	fmt.Printf("│ Evaluation Time — Mostly Inactive Rules (%d iterations)       │\n", iterations)
+	fmt.Println("│ (1 rule fires, rest have non-matching conditions)               │")
+	fmt.Println("├──────────┬──────────────┬──────────────┬────────────────────────┤")
+	fmt.Println("│ Rules    │ Avg/eval     │ p99          │ Violations             │")
+	fmt.Println("├──────────┼──────────────┼──────────────┼────────────────────────┤")
+	for _, n := range sizes {
+		dir := t.TempDir()
+		os.WriteFile(filepath.Join(dir, "policy.rego"), []byte(generateDummyRules(n)), 0644)
+
+		eval, err := NewEvaluator([]string{dir}, "data.policy.violations", nil)
+		if err != nil {
+			t.Fatalf("NewEvaluator(%d rules) failed: %v", n, err)
+		}
+
+		ctx := context.Background()
+		durations := make([]time.Duration, iterations)
+		var lastViolations []string
+
+		for i := 0; i < iterations; i++ {
+			start := time.Now()
+			v, err := eval.Evaluate(ctx, sampleBecknInput)
+			durations[i] = time.Since(start)
+			if err != nil {
+				t.Fatalf("Evaluate failed: %v", err)
+			}
+			lastViolations = v
+		}
+
+		avg, p99 := calcStats(durations)
+		fmt.Printf("│ %-8d │ %-12s │ %-12s │ %-22d │\n", n, avg.Round(time.Microsecond), p99.Round(time.Microsecond), len(lastViolations))
+	}
+	fmt.Println("└──────────┴──────────────┴──────────────┴────────────────────────┘")
+	fmt.Println()
+
+	// --- Evaluation time (all rules active) ---
+	fmt.Println("┌─────────────────────────────────────────────────────────────────┐")
+	fmt.Printf("│ Evaluation Time — All Rules Active (%d iterations)             │\n", iterations)
+	fmt.Println("│ (every rule fires — worst case scenario)                        │")
+	fmt.Println("├──────────┬──────────────┬──────────────┬────────────────────────┤")
+	fmt.Println("│ Rules    │ Avg/eval     │ p99          │ Violations             │")
+	fmt.Println("├──────────┼──────────────┼──────────────┼────────────────────────┤")
+	for _, n := range sizes {
+		dir := t.TempDir()
+		os.WriteFile(filepath.Join(dir, "policy.rego"), []byte(generateActiveRules(n)), 0644)
+
+		eval, err := NewEvaluator([]string{dir}, "data.policy.violations", nil)
+		if err != nil {
+			t.Fatalf("NewEvaluator(%d rules) failed: %v", n, err)
+		}
+
+		ctx := context.Background()
+		durations := make([]time.Duration, iterations)
+		var lastViolations []string
+
+		for i := 0; i < iterations; i++ {
+			start := time.Now()
+			v, err := eval.Evaluate(ctx, sampleBecknInput)
+			durations[i] = time.Since(start)
+			if err != nil {
+				t.Fatalf("Evaluate failed: %v", err)
+			}
+			lastViolations = v
+		}
+
+		avg, p99 := calcStats(durations)
+		fmt.Printf("│ %-8d │ %-12s │ %-12s │ %-22d │\n", n, avg.Round(time.Microsecond), p99.Round(time.Microsecond), len(lastViolations))
+	}
+	fmt.Println("└──────────┴──────────────┴──────────────┴────────────────────────┘")
+	fmt.Println()
+	fmt.Println("╚══════════════════════════════════════════════════════════════════════╝")
+}
+
+// calcStats returns average and p99 durations from a sorted slice.
+func calcStats(durations []time.Duration) (avg, p99 time.Duration) {
+	n := len(durations)
+	if n == 0 {
+		return 0, 0
+	}
+
+	var total time.Duration
+	for _, d := range durations {
+		total += d
+	}
+	avg = total / time.Duration(n)
+
+	// Sort for p99
+	sorted := make([]time.Duration, n)
+	copy(sorted, durations)
+	sortDurations(sorted)
+	p99 = sorted[int(float64(n)*0.99)]
+
+	return avg, p99
+}
+
+// sortDurations sorts a slice of durations in ascending order (insertion sort, fine for 1000 items).
+func sortDurations(d []time.Duration) {
+	for i := 1; i < len(d); i++ {
+		key := d[i]
+		j := i - 1
+		for j >= 0 && d[j] > key {
+			d[j+1] = d[j]
+			j--
+		}
+		d[j+1] = key
+	}
+}