package opapolicychecker import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "net/url" "os" "path" "path/filepath" "strings" "time" "github.com/open-policy-agent/opa/v1/ast" "github.com/open-policy-agent/opa/v1/bundle" "github.com/open-policy-agent/opa/v1/rego" "github.com/open-policy-agent/opa/v1/storage/inmem" ) // Evaluator wraps the OPA engine: loads and compiles .rego files at startup, // then evaluates messages against the compiled policy set. type Evaluator struct { preparedQuery rego.PreparedEvalQuery query string runtimeConfig map[string]string moduleNames []string // names of loaded .rego modules failOnUndefined bool // if true, empty/undefined results are treated as violations } // ModuleNames returns the names of the loaded .rego policy modules. func (e *Evaluator) ModuleNames() []string { return e.moduleNames } // defaultPolicyFetchTimeout bounds remote policy and bundle fetches during startup // and refresh. This can be overridden via config.fetchTimeoutSeconds. const defaultPolicyFetchTimeout = 30 * time.Second // maxPolicySize is the maximum size of a single .rego file fetched from a URL (1 MB). const maxPolicySize = 1 << 20 // maxBundleSize is the maximum size of a bundle archive (10 MB). const maxBundleSize = 10 << 20 // NewEvaluator creates an Evaluator by loading .rego files from local paths // and/or URLs, then compiling them. runtimeConfig is passed to Rego as data.config. // When isBundle is true, the first policyPath is treated as a URL to an OPA bundle (.tar.gz). func NewEvaluator(policyPaths []string, query string, runtimeConfig map[string]string, isBundle bool, fetchTimeout time.Duration) (*Evaluator, error) { if fetchTimeout <= 0 { fetchTimeout = defaultPolicyFetchTimeout } if isBundle { return newBundleEvaluator(policyPaths, query, runtimeConfig, fetchTimeout) } return newRegoEvaluator(policyPaths, query, runtimeConfig, fetchTimeout) } // newRegoEvaluator loads raw .rego files from local paths and/or URLs. func newRegoEvaluator(policyPaths []string, query string, runtimeConfig map[string]string, fetchTimeout time.Duration) (*Evaluator, error) { modules := make(map[string]string) // Load from policyPaths (resolved locations based on config Type) for _, source := range policyPaths { if isURL(source) { name, content, err := fetchPolicy(source, fetchTimeout) if err != nil { return nil, fmt.Errorf("failed to fetch policy from %s: %w", source, err) } modules[name] = content } else if info, err := os.Stat(source); err == nil && info.IsDir() { // Directory — load all .rego files inside entries, err := os.ReadDir(source) if err != nil { return nil, fmt.Errorf("failed to read policy directory %s: %w", source, err) } for _, entry := range entries { if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".rego") || strings.HasSuffix(entry.Name(), "_test.rego") { continue } fpath := filepath.Join(source, entry.Name()) data, err := os.ReadFile(fpath) if err != nil { return nil, fmt.Errorf("failed to read policy file %s: %w", fpath, err) } modules[entry.Name()] = string(data) } } else { // Local file path data, err := os.ReadFile(source) if err != nil { return nil, fmt.Errorf("failed to read policy file %s: %w", source, err) } modules[filepath.Base(source)] = string(data) } } if len(modules) == 0 { return nil, fmt.Errorf("no .rego policy files found from any configured source") } return compileAndPrepare(modules, nil, query, runtimeConfig, true) } // newBundleEvaluator loads an OPA bundle (.tar.gz) from a URL and compiles it. func newBundleEvaluator(policyPaths []string, query string, runtimeConfig map[string]string, fetchTimeout time.Duration) (*Evaluator, error) { if len(policyPaths) == 0 { return nil, fmt.Errorf("bundle source URL is required") } bundleURL := policyPaths[0] modules, bundleData, err := loadBundle(bundleURL, fetchTimeout) if err != nil { return nil, fmt.Errorf("failed to load bundle from %s: %w", bundleURL, err) } if len(modules) == 0 { return nil, fmt.Errorf("no .rego policy modules found in bundle from %s", bundleURL) } return compileAndPrepare(modules, bundleData, query, runtimeConfig, true) } // loadBundle downloads a .tar.gz OPA bundle from a URL, parses it using OPA's // bundle reader, and returns the modules and data from the bundle. func loadBundle(bundleURL string, fetchTimeout time.Duration) (map[string]string, map[string]interface{}, error) { data, err := fetchBundleArchive(bundleURL, fetchTimeout) if err != nil { return nil, nil, err } return parseBundleArchive(data) } // fetchBundleArchive downloads a bundle .tar.gz from a URL. func fetchBundleArchive(rawURL string, fetchTimeout time.Duration) ([]byte, error) { parsed, err := url.Parse(rawURL) if err != nil { return nil, fmt.Errorf("invalid URL: %w", err) } if parsed.Scheme != "http" && parsed.Scheme != "https" { return nil, fmt.Errorf("unsupported URL scheme %q (only http and https are supported)", parsed.Scheme) } client := &http.Client{Timeout: fetchTimeout} resp, err := client.Get(rawURL) if err != nil { return nil, fmt.Errorf("HTTP request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("HTTP %d from %s", resp.StatusCode, rawURL) } limited := io.LimitReader(resp.Body, int64(maxBundleSize)+1) body, err := io.ReadAll(limited) if err != nil { return nil, fmt.Errorf("failed to read response body: %w", err) } if len(body) > maxBundleSize { return nil, fmt.Errorf("bundle exceeds maximum size of %d bytes", maxBundleSize) } return body, nil } // parseBundleArchive parses a .tar.gz OPA bundle archive and extracts // rego modules and data. Signature verification is skipped. func parseBundleArchive(data []byte) (map[string]string, map[string]interface{}, error) { loader := bundle.NewTarballLoaderWithBaseURL(bytes.NewReader(data), "") reader := bundle.NewCustomReader(loader). WithSkipBundleVerification(true). WithRegoVersion(ast.RegoV1) b, err := reader.Read() if err != nil { return nil, nil, fmt.Errorf("failed to read bundle: %w", err) } modules := make(map[string]string, len(b.Modules)) for _, m := range b.Modules { modules[m.Path] = string(m.Raw) } return modules, b.Data, nil } // compileAndPrepare compiles rego modules and prepares the OPA query for evaluation. func compileAndPrepare(modules map[string]string, bundleData map[string]interface{}, query string, runtimeConfig map[string]string, failOnUndefined bool) (*Evaluator, error) { // Compile modules to catch syntax errors early compiler, err := ast.CompileModulesWithOpt(modules, ast.CompileOpts{ParserOptions: ast.ParserOptions{RegoVersion: ast.RegoV1}}) if err != nil { return nil, fmt.Errorf("failed to compile rego modules: %w", err) } // Build store data: merge bundle data with runtime config store := make(map[string]interface{}) for k, v := range bundleData { store[k] = v } store["config"] = toInterfaceMap(runtimeConfig) pq, err := rego.New( rego.Query(query), rego.Compiler(compiler), rego.Store(inmem.NewFromObject(store)), ).PrepareForEval(context.Background()) if err != nil { return nil, fmt.Errorf("failed to prepare rego query %q: %w", query, err) } names := make([]string, 0, len(modules)) for name := range modules { names = append(names, name) } return &Evaluator{ preparedQuery: pq, query: query, runtimeConfig: runtimeConfig, moduleNames: names, failOnUndefined: failOnUndefined, }, nil } // isURL checks if a source string looks like a remote URL. func isURL(source string) bool { return strings.HasPrefix(source, "http://") || strings.HasPrefix(source, "https://") } // fetchPolicy downloads a .rego file from a URL and returns (filename, content, error). func fetchPolicy(rawURL string, fetchTimeout time.Duration) (string, string, error) { parsed, err := url.Parse(rawURL) if err != nil { return "", "", fmt.Errorf("invalid URL: %w", err) } if parsed.Scheme != "http" && parsed.Scheme != "https" { return "", "", fmt.Errorf("unsupported URL scheme %q (only http and https are supported)", parsed.Scheme) } client := &http.Client{Timeout: fetchTimeout} resp, err := client.Get(rawURL) if err != nil { return "", "", fmt.Errorf("HTTP request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return "", "", fmt.Errorf("HTTP %d from %s", resp.StatusCode, rawURL) } // Read with size limit limited := io.LimitReader(resp.Body, maxPolicySize+1) body, err := io.ReadAll(limited) if err != nil { return "", "", fmt.Errorf("failed to read response body: %w", err) } if len(body) > maxPolicySize { return "", "", fmt.Errorf("policy file exceeds maximum size of %d bytes", maxPolicySize) } // Derive filename from URL path name := path.Base(parsed.Path) if name == "" || name == "." || name == "/" { name = "policy.rego" } if !strings.HasSuffix(name, ".rego") { name += ".rego" } return name, string(body), nil } // Evaluate runs the compiled policy against a JSON message body. // Returns a list of violation strings (empty = compliant). func (e *Evaluator) Evaluate(ctx context.Context, body []byte) ([]string, error) { var input interface{} if err := json.Unmarshal(body, &input); err != nil { return nil, fmt.Errorf("failed to parse message body as JSON: %w", err) } rs, err := e.preparedQuery.Eval(ctx, rego.EvalInput(input)) if err != nil { return nil, fmt.Errorf("rego evaluation failed: %w", err) } // Fail-closed for bundles: if the query returned no result, the policy_query_path // is likely misconfigured or the rule doesn't exist in the bundle. if e.failOnUndefined && len(rs) == 0 { return []string{fmt.Sprintf("policy query %q returned no result (undefined)", e.query)}, nil } return extractViolations(rs) } // extractViolations pulls violations from the OPA result set. // Supported query output formats: // - map with {"valid": bool, "violations": []string}: structured policy_query_path result // - []string / set of strings: each string is a violation message // - bool: false = denied ("policy denied the request"), true = allowed // - string: non-empty = violation message // - empty/undefined: allowed (no violations) func extractViolations(rs rego.ResultSet) ([]string, error) { if len(rs) == 0 { return nil, nil } var violations []string for _, result := range rs { for _, expr := range result.Expressions { switch v := expr.Value.(type) { case bool: // allow/deny pattern: false = denied if !v { violations = append(violations, "policy denied the request") } case string: // single violation string if v != "" { violations = append(violations, v) } case []interface{}: // Result is a list (from set) for _, item := range v { if s, ok := item.(string); ok { violations = append(violations, s) } } case map[string]interface{}: if vs := extractStructuredViolations(v); vs != nil { violations = append(violations, vs...) } } } } return violations, nil } // extractStructuredViolations handles the policy_query_path result format: // {"valid": bool, "violations": []string} // Returns the violation strings if the map matches this format, or nil if it doesn't. func extractStructuredViolations(m map[string]interface{}) []string { validRaw, hasValid := m["valid"] violationsRaw, hasViolations := m["violations"] if !hasValid || !hasViolations { return nil } valid, ok := validRaw.(bool) if !ok { return nil } violationsList, ok := violationsRaw.([]interface{}) if !ok { return nil } // If valid is true and violations is empty, no violations if valid && len(violationsList) == 0 { return []string{} } var violations []string for _, item := range violationsList { if s, ok := item.(string); ok { violations = append(violations, s) } } // If valid is false but violations is empty, report a generic violation if !valid && len(violations) == 0 { violations = append(violations, "policy denied the request") } return violations } // toInterfaceMap converts map[string]string to map[string]interface{} for OPA store. func toInterfaceMap(m map[string]string) map[string]interface{} { result := make(map[string]interface{}, len(m)) for k, v := range m { result[k] = v } return result }