Files
onix/pkg/plugin/implementation/opapolicychecker/evaluator.go

394 lines
12 KiB
Go

package opapolicychecker
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/open-policy-agent/opa/v1/ast"
"github.com/open-policy-agent/opa/v1/bundle"
"github.com/open-policy-agent/opa/v1/rego"
"github.com/open-policy-agent/opa/v1/storage/inmem"
)
// Evaluator wraps the OPA engine: loads and compiles .rego files at startup,
// then evaluates messages against the compiled policy set.
type Evaluator struct {
preparedQuery rego.PreparedEvalQuery
query string
runtimeConfig map[string]string
moduleNames []string // names of loaded .rego modules
failOnUndefined bool // if true, empty/undefined results are treated as violations
}
// ModuleNames returns the names of the loaded .rego policy modules.
func (e *Evaluator) ModuleNames() []string {
return e.moduleNames
}
// defaultPolicyFetchTimeout bounds remote policy and bundle fetches during startup
// and refresh. This can be overridden via config.fetchTimeoutSeconds.
const defaultPolicyFetchTimeout = 30 * time.Second
// maxPolicySize is the maximum size of a single .rego file fetched from a URL (1 MB).
const maxPolicySize = 1 << 20
// maxBundleSize is the maximum size of a bundle archive (10 MB).
const maxBundleSize = 10 << 20
// NewEvaluator creates an Evaluator by loading .rego files from local paths
// and/or URLs, then compiling them. runtimeConfig is passed to Rego as data.config.
// When isBundle is true, the first policyPath is treated as a URL to an OPA bundle (.tar.gz).
func NewEvaluator(policyPaths []string, query string, runtimeConfig map[string]string, isBundle bool, fetchTimeout time.Duration) (*Evaluator, error) {
if fetchTimeout <= 0 {
fetchTimeout = defaultPolicyFetchTimeout
}
if isBundle {
return newBundleEvaluator(policyPaths, query, runtimeConfig, fetchTimeout)
}
return newRegoEvaluator(policyPaths, query, runtimeConfig, fetchTimeout)
}
// newRegoEvaluator loads raw .rego files from local paths and/or URLs.
func newRegoEvaluator(policyPaths []string, query string, runtimeConfig map[string]string, fetchTimeout time.Duration) (*Evaluator, error) {
modules := make(map[string]string)
// Load from policyPaths (resolved locations based on config Type)
for _, source := range policyPaths {
if isURL(source) {
name, content, err := fetchPolicy(source, fetchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to fetch policy from %s: %w", source, err)
}
modules[name] = content
} else if info, err := os.Stat(source); err == nil && info.IsDir() {
// Directory — load all .rego files inside
entries, err := os.ReadDir(source)
if err != nil {
return nil, fmt.Errorf("failed to read policy directory %s: %w", source, err)
}
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".rego") || strings.HasSuffix(entry.Name(), "_test.rego") {
continue
}
fpath := filepath.Join(source, entry.Name())
data, err := os.ReadFile(fpath)
if err != nil {
return nil, fmt.Errorf("failed to read policy file %s: %w", fpath, err)
}
modules[entry.Name()] = string(data)
}
} else {
// Local file path
data, err := os.ReadFile(source)
if err != nil {
return nil, fmt.Errorf("failed to read policy file %s: %w", source, err)
}
modules[filepath.Base(source)] = string(data)
}
}
if len(modules) == 0 {
return nil, fmt.Errorf("no .rego policy files found from any configured source")
}
return compileAndPrepare(modules, nil, query, runtimeConfig, true)
}
// newBundleEvaluator loads an OPA bundle (.tar.gz) from a URL and compiles it.
func newBundleEvaluator(policyPaths []string, query string, runtimeConfig map[string]string, fetchTimeout time.Duration) (*Evaluator, error) {
if len(policyPaths) == 0 {
return nil, fmt.Errorf("bundle source URL is required")
}
bundleURL := policyPaths[0]
modules, bundleData, err := loadBundle(bundleURL, fetchTimeout)
if err != nil {
return nil, fmt.Errorf("failed to load bundle from %s: %w", bundleURL, err)
}
if len(modules) == 0 {
return nil, fmt.Errorf("no .rego policy modules found in bundle from %s", bundleURL)
}
return compileAndPrepare(modules, bundleData, query, runtimeConfig, true)
}
// loadBundle downloads a .tar.gz OPA bundle from a URL, parses it using OPA's
// bundle reader, and returns the modules and data from the bundle.
func loadBundle(bundleURL string, fetchTimeout time.Duration) (map[string]string, map[string]interface{}, error) {
data, err := fetchBundleArchive(bundleURL, fetchTimeout)
if err != nil {
return nil, nil, err
}
return parseBundleArchive(data)
}
// fetchBundleArchive downloads a bundle .tar.gz from a URL.
func fetchBundleArchive(rawURL string, fetchTimeout time.Duration) ([]byte, error) {
parsed, err := url.Parse(rawURL)
if err != nil {
return nil, fmt.Errorf("invalid URL: %w", err)
}
if parsed.Scheme != "http" && parsed.Scheme != "https" {
return nil, fmt.Errorf("unsupported URL scheme %q (only http and https are supported)", parsed.Scheme)
}
client := &http.Client{Timeout: fetchTimeout}
resp, err := client.Get(rawURL)
if err != nil {
return nil, fmt.Errorf("HTTP request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("HTTP %d from %s", resp.StatusCode, rawURL)
}
limited := io.LimitReader(resp.Body, int64(maxBundleSize)+1)
body, err := io.ReadAll(limited)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
if len(body) > maxBundleSize {
return nil, fmt.Errorf("bundle exceeds maximum size of %d bytes", maxBundleSize)
}
return body, nil
}
// parseBundleArchive parses a .tar.gz OPA bundle archive and extracts
// rego modules and data. Signature verification is skipped.
func parseBundleArchive(data []byte) (map[string]string, map[string]interface{}, error) {
loader := bundle.NewTarballLoaderWithBaseURL(bytes.NewReader(data), "")
reader := bundle.NewCustomReader(loader).
WithSkipBundleVerification(true).
WithRegoVersion(ast.RegoV1)
b, err := reader.Read()
if err != nil {
return nil, nil, fmt.Errorf("failed to read bundle: %w", err)
}
modules := make(map[string]string, len(b.Modules))
for _, m := range b.Modules {
modules[m.Path] = string(m.Raw)
}
return modules, b.Data, nil
}
// compileAndPrepare compiles rego modules and prepares the OPA query for evaluation.
func compileAndPrepare(modules map[string]string, bundleData map[string]interface{}, query string, runtimeConfig map[string]string, failOnUndefined bool) (*Evaluator, error) {
// Compile modules to catch syntax errors early
compiler, err := ast.CompileModulesWithOpt(modules, ast.CompileOpts{ParserOptions: ast.ParserOptions{RegoVersion: ast.RegoV1}})
if err != nil {
return nil, fmt.Errorf("failed to compile rego modules: %w", err)
}
// Build store data: merge bundle data with runtime config
store := make(map[string]interface{})
for k, v := range bundleData {
store[k] = v
}
store["config"] = toInterfaceMap(runtimeConfig)
pq, err := rego.New(
rego.Query(query),
rego.Compiler(compiler),
rego.Store(inmem.NewFromObject(store)),
).PrepareForEval(context.Background())
if err != nil {
return nil, fmt.Errorf("failed to prepare rego query %q: %w", query, err)
}
names := make([]string, 0, len(modules))
for name := range modules {
names = append(names, name)
}
return &Evaluator{
preparedQuery: pq,
query: query,
runtimeConfig: runtimeConfig,
moduleNames: names,
failOnUndefined: failOnUndefined,
}, nil
}
// isURL checks if a source string looks like a remote URL.
func isURL(source string) bool {
return strings.HasPrefix(source, "http://") || strings.HasPrefix(source, "https://")
}
// fetchPolicy downloads a .rego file from a URL and returns (filename, content, error).
func fetchPolicy(rawURL string, fetchTimeout time.Duration) (string, string, error) {
parsed, err := url.Parse(rawURL)
if err != nil {
return "", "", fmt.Errorf("invalid URL: %w", err)
}
if parsed.Scheme != "http" && parsed.Scheme != "https" {
return "", "", fmt.Errorf("unsupported URL scheme %q (only http and https are supported)", parsed.Scheme)
}
client := &http.Client{Timeout: fetchTimeout}
resp, err := client.Get(rawURL)
if err != nil {
return "", "", fmt.Errorf("HTTP request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", "", fmt.Errorf("HTTP %d from %s", resp.StatusCode, rawURL)
}
// Read with size limit
limited := io.LimitReader(resp.Body, maxPolicySize+1)
body, err := io.ReadAll(limited)
if err != nil {
return "", "", fmt.Errorf("failed to read response body: %w", err)
}
if len(body) > maxPolicySize {
return "", "", fmt.Errorf("policy file exceeds maximum size of %d bytes", maxPolicySize)
}
// Derive filename from URL path
name := path.Base(parsed.Path)
if name == "" || name == "." || name == "/" {
name = "policy.rego"
}
if !strings.HasSuffix(name, ".rego") {
name += ".rego"
}
return name, string(body), nil
}
// Evaluate runs the compiled policy against a JSON message body.
// Returns a list of violation strings (empty = compliant).
func (e *Evaluator) Evaluate(ctx context.Context, body []byte) ([]string, error) {
var input interface{}
if err := json.Unmarshal(body, &input); err != nil {
return nil, fmt.Errorf("failed to parse message body as JSON: %w", err)
}
rs, err := e.preparedQuery.Eval(ctx, rego.EvalInput(input))
if err != nil {
return nil, fmt.Errorf("rego evaluation failed: %w", err)
}
// Fail-closed for bundles: if the query returned no result, the policy_query_path
// is likely misconfigured or the rule doesn't exist in the bundle.
if e.failOnUndefined && len(rs) == 0 {
return []string{fmt.Sprintf("policy query %q returned no result (undefined)", e.query)}, nil
}
return extractViolations(rs)
}
// extractViolations pulls violations from the OPA result set.
// Supported query output formats:
// - map with {"valid": bool, "violations": []string}: structured policy_query_path result
// - []string / set of strings: each string is a violation message
// - bool: false = denied ("policy denied the request"), true = allowed
// - string: non-empty = violation message
// - empty/undefined: allowed (no violations)
func extractViolations(rs rego.ResultSet) ([]string, error) {
if len(rs) == 0 {
return nil, nil
}
var violations []string
for _, result := range rs {
for _, expr := range result.Expressions {
switch v := expr.Value.(type) {
case bool:
// allow/deny pattern: false = denied
if !v {
violations = append(violations, "policy denied the request")
}
case string:
// single violation string
if v != "" {
violations = append(violations, v)
}
case []interface{}:
// Result is a list (from set)
for _, item := range v {
if s, ok := item.(string); ok {
violations = append(violations, s)
}
}
case map[string]interface{}:
if vs := extractStructuredViolations(v); vs != nil {
violations = append(violations, vs...)
}
}
}
}
return violations, nil
}
// extractStructuredViolations handles the policy_query_path result format:
// {"valid": bool, "violations": []string}
// Returns the violation strings if the map matches this format, or nil if it doesn't.
func extractStructuredViolations(m map[string]interface{}) []string {
validRaw, hasValid := m["valid"]
violationsRaw, hasViolations := m["violations"]
if !hasValid || !hasViolations {
return nil
}
valid, ok := validRaw.(bool)
if !ok {
return nil
}
violationsList, ok := violationsRaw.([]interface{})
if !ok {
return nil
}
// If valid is true and violations is empty, no violations
if valid && len(violationsList) == 0 {
return []string{}
}
var violations []string
for _, item := range violationsList {
if s, ok := item.(string); ok {
violations = append(violations, s)
}
}
// If valid is false but violations is empty, report a generic violation
if !valid && len(violations) == 0 {
violations = append(violations, "policy denied the request")
}
return violations
}
// toInterfaceMap converts map[string]string to map[string]interface{} for OPA store.
func toInterfaceMap(m map[string]string) map[string]interface{} {
result := make(map[string]interface{}, len(m))
for k, v := range m {
result[k] = v
}
return result
}