test: add tests and update docs for extended schema validation

This commit is contained in:
ameersohel45
2025-12-14 23:55:04 +05:30
parent 3b59507f15
commit 706030ccec
3 changed files with 792 additions and 9 deletions

View File

@@ -10,6 +10,7 @@ Validates Beckn protocol requests against OpenAPI 3.1 specifications using kin-o
- TTL-based caching with automatic refresh
- Generic path matching (no hardcoded paths)
- Direct schema validation without router overhead
- Extended schema validation for domain-specific objects with `@context` references
## Configuration
@@ -20,6 +21,11 @@ schemaValidator:
type: url
location: https://example.com/openapi-spec.yaml
cacheTTL: "3600"
extendedSchema_enabled: "true"
extendedSchema_cacheTTL: "86400"
extendedSchema_maxCacheSize: "100"
extendedSchema_downloadTimeout: "30"
extendedSchema_allowedDomains: "beckn.org,example.com"
```
### Configuration Parameters
@@ -29,24 +35,55 @@ schemaValidator:
| `type` | string | Yes | - | Type of spec source: "url" or "file" ("dir" reserved for future) |
| `location` | string | Yes | - | URL or file path to OpenAPI 3.1 spec |
| `cacheTTL` | string | No | "3600" | Cache TTL in seconds before reloading spec |
| `extendedSchema_enabled` | string | No | "false" | Enable extended schema validation for `@context` objects |
| `extendedSchema_cacheTTL` | string | No | "86400" | Domain schema cache TTL in seconds |
| `extendedSchema_maxCacheSize` | string | No | "100" | Maximum number of cached domain schemas |
| `extendedSchema_downloadTimeout` | string | No | "30" | Timeout for downloading domain schemas |
| `extendedSchema_allowedDomains` | string | No | "" | Comma-separated domain whitelist (empty = all allowed) |
## How It Works
1. **Load Spec**: Loads OpenAPI spec from configured URL at startup
2. **Extract Action**: Extracts `action` from request `context.action` field
3. **Find Schema**: Searches all paths and HTTP methods in spec for schema with matching action:
- Checks `properties.context.action.enum` for the action value
- Also checks `properties.context.allOf[].properties.action.enum`
- Stops at first match
4. **Validate**: Validates request body against matched schema using `Schema.VisitJSON()` with:
### Initialization (Load Time)
**Core Protocol Validation Setup**:
1. **Load OpenAPI Spec**: Loads main spec from `location` (URL or file) with external `$ref` resolution
2. **Build Action Index**: Creates action→schema map for O(1) lookup by scanning all paths/methods
3. **Validate Spec**: Validates OpenAPI spec structure (warnings logged, non-fatal)
4. **Cache Spec**: Stores loaded spec with `loadedAt` timestamp
**Extended Schema Setup** (if `extendedSchema_enabled: "true"`):
5. **Initialize Schema Cache**: Creates LRU cache with `maxCacheSize` (default: 100)
6. **Start Background Refresh**: Launches goroutine with two tickers:
- Core spec refresh every `cacheTTL` seconds (default: 3600)
- Extended schema cleanup every `extendedSchema_cacheTTL` seconds (default: 86400)
### Request Validation (Runtime)
**Core Protocol Validation** (always runs):
1. **Parse Request**: Unmarshal JSON and extract `context.action`
2. **Lookup Schema**: O(1) lookup in action index (built at load time)
3. **Validate**: Call `schema.Value.VisitJSON()` with:
- Required fields validation
- Data type validation (string, number, boolean, object, array)
- Format validation (email, uri, date-time, uuid, etc.)
- Constraint validation (min/max, pattern, enum, const)
- Nested object and array validation
5. **Return Errors**: Returns validation errors in ONIX format
4. **Return Errors**: If validation fails, format and return errors
**Extended Schema Validation** (if `extendedSchema_enabled: "true"` AND core validation passed):
5. **Scan for @context**: Recursively traverse `message` field for objects with `@context` and `@type`
6. **Filter Core Schemas**: Skip objects with `/schema/core/` in `@context` URL
7. **Validate Each Domain Object**:
- Check domain whitelist (if `allowedDomains` configured)
- Transform `@context` URL: `context.jsonld``attributes.yaml`
- Load schema from URL/file (check cache first, download if miss)
- Find schema by `@type` (direct match or `x-jsonld.@type` fallback)
- Strip `@context` and `@type` metadata from object
- Validate remaining data against domain schema
- Prefix error paths with object location (e.g., `message.order.field`)
8. **Return Errors**: Returns first validation error (fail-fast)
## Action-Based Matching
@@ -120,7 +157,33 @@ schemaValidator:
cacheTTL: "3600"
```
### With Extended Schema Validation
```yaml
schemaValidator:
id: schemav2validator
config:
type: url
location: https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/draft/api-specs/beckn-protocol-api.yaml
cacheTTL: "3600"
extendedSchema_enabled: "true"
extendedSchema_cacheTTL: "86400"
extendedSchema_maxCacheSize: "100"
extendedSchema_downloadTimeout: "30"
extendedSchema_allowedDomains: "raw.githubusercontent.com,schemas.beckn.org"
```
**At Load Time**:
- Creates LRU cache for domain schemas (max 100 entries)
- Starts background goroutine for cache cleanup every 24 hours
**At Runtime** (after core validation passes):
- Scans `message` field for objects with `@context` and `@type`
- Skips core Beckn schemas (containing `/schema/core/`)
- Downloads domain schemas from `@context` URLs (cached for 24 hours)
- Validates domain-specific data against schemas
- Returns errors with full JSON paths (e.g., `message.order.chargingRate`)
- Fail-fast: returns on first validation error
## Dependencies

View File

@@ -0,0 +1,709 @@
package schemav2validator
import (
"context"
"os"
"testing"
"time"
"github.com/getkin/kin-openapi/openapi3"
"github.com/stretchr/testify/assert"
)
func TestIsCoreSchema(t *testing.T) {
tests := []struct {
name string
contextURL string
want bool
}{
{
name: "core schema URL",
contextURL: "https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/draft/schema/core/v2/context.jsonld",
want: true,
},
{
name: "domain schema URL",
contextURL: "https://raw.githubusercontent.com/beckn/protocol-specifications-new/refs/heads/draft/schema/EvChargingOffer/v1/context.jsonld",
want: false,
},
{
name: "empty URL",
contextURL: "",
want: false,
},
{
name: "URL without schema/core",
contextURL: "https://example.com/some/path/context.jsonld",
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := isCoreSchema(tt.contextURL)
assert.Equal(t, tt.want, got)
})
}
}
func TestFindReferencedObjects(t *testing.T) {
tests := []struct {
name string
data interface{}
path string
want int // number of objects found
}{
{
name: "single domain object",
data: map[string]interface{}{
"@context": "https://example.com/schema/DomainType/v1/context.jsonld",
"@type": "DomainType",
"field": "value",
},
path: "message",
want: 1,
},
{
name: "core schema object - should be skipped",
data: map[string]interface{}{
"@context": "https://example.com/schema/core/v2/context.jsonld",
"@type": "beckn:Order",
"field": "value",
},
path: "message",
want: 0,
},
{
name: "nested domain objects",
data: map[string]interface{}{
"order": map[string]interface{}{
"@context": "https://example.com/schema/core/v2/context.jsonld",
"@type": "beckn:Order",
"orderAttributes": map[string]interface{}{
"@context": "https://example.com/schema/ChargingSession/v1/context.jsonld",
"@type": "ChargingSession",
"field": "value",
},
},
},
path: "message",
want: 1, // Only domain object, core skipped
},
{
name: "array with domain objects",
data: map[string]interface{}{
"items": []interface{}{
map[string]interface{}{
"@context": "https://example.com/schema/DomainType/v1/context.jsonld",
"@type": "DomainType",
},
map[string]interface{}{
"@context": "https://example.com/schema/AnotherType/v1/context.jsonld",
"@type": "AnotherType",
},
},
},
path: "message",
want: 2,
},
{
name: "object without @context",
data: map[string]interface{}{
"field": "value",
},
path: "message",
want: 0,
},
{
name: "object with @context but no @type",
data: map[string]interface{}{
"@context": "https://example.com/schema/DomainType/v1/context.jsonld",
"field": "value",
},
path: "message",
want: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := findReferencedObjects(tt.data, tt.path)
assert.Equal(t, tt.want, len(got))
})
}
}
func TestTransformContextToSchemaURL(t *testing.T) {
tests := []struct {
name string
contextURL string
want string
}{
{
name: "standard transformation",
contextURL: "https://example.com/schema/EvChargingOffer/v1/context.jsonld",
want: "https://example.com/schema/EvChargingOffer/v1/attributes.yaml",
},
{
name: "already attributes.yaml",
contextURL: "https://example.com/schema/EvChargingOffer/v1/attributes.yaml",
want: "https://example.com/schema/EvChargingOffer/v1/attributes.yaml",
},
{
name: "no context.jsonld in URL",
contextURL: "https://example.com/schema/EvChargingOffer/v1/",
want: "https://example.com/schema/EvChargingOffer/v1/",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := transformContextToSchemaURL(tt.contextURL)
assert.Equal(t, tt.want, got)
})
}
}
func TestHashURL(t *testing.T) {
tests := []struct {
name string
url string
}{
{
name: "consistent hashing",
url: "https://example.com/schema.yaml",
},
{
name: "empty string",
url: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hash1 := hashURL(tt.url)
hash2 := hashURL(tt.url)
// Same URL should produce same hash
assert.Equal(t, hash1, hash2)
// Hash should be 64 characters (SHA256 hex)
assert.Equal(t, 64, len(hash1))
})
}
}
func TestIsValidSchemaPath(t *testing.T) {
tests := []struct {
name string
schemaPath string
want bool
}{
{
name: "http URL",
schemaPath: "http://example.com/schema.yaml",
want: true,
},
{
name: "https URL",
schemaPath: "https://example.com/schema.yaml",
want: true,
},
{
name: "file URL",
schemaPath: "file:///path/to/schema.yaml",
want: true,
},
{
name: "local path",
schemaPath: "/path/to/schema.yaml",
want: true,
},
{
name: "relative path",
schemaPath: "./schema.yaml",
want: true,
},
{
name: "empty path",
schemaPath: "",
want: true, // url.Parse("") succeeds, returns empty scheme
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := isValidSchemaPath(tt.schemaPath)
assert.Equal(t, tt.want, got)
})
}
}
func TestNewSchemaCache(t *testing.T) {
tests := []struct {
name string
maxSize int
}{
{
name: "default size",
maxSize: 100,
},
{
name: "custom size",
maxSize: 50,
},
{
name: "zero size",
maxSize: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cache := newSchemaCache(tt.maxSize)
assert.NotNil(t, cache)
assert.Equal(t, tt.maxSize, cache.maxSize)
assert.NotNil(t, cache.schemas)
assert.Equal(t, 0, len(cache.schemas))
})
}
}
func TestSchemaCache_GetSet(t *testing.T) {
cache := newSchemaCache(10)
// Create a simple schema doc
doc := &openapi3.T{
OpenAPI: "3.1.0",
}
urlHash := hashURL("https://example.com/schema.yaml")
ttl := 1 * time.Hour
// Test Set
cache.set(urlHash, doc, ttl)
// Test Get - should find it
retrieved, found := cache.get(urlHash)
assert.True(t, found)
assert.Equal(t, doc, retrieved)
// Test Get - non-existent key
_, found = cache.get("non-existent-hash")
assert.False(t, found)
}
func TestSchemaCache_LRUEviction(t *testing.T) {
cache := newSchemaCache(2) // Small cache for testing
doc1 := &openapi3.T{OpenAPI: "3.1.0"}
doc2 := &openapi3.T{OpenAPI: "3.1.1"}
doc3 := &openapi3.T{OpenAPI: "3.1.2"}
ttl := 1 * time.Hour
// Add first two items
cache.set("hash1", doc1, ttl)
cache.set("hash2", doc2, ttl)
// Access first item to make it more recent
cache.get("hash1")
// Add third item - should evict hash2 (least recently used)
cache.set("hash3", doc3, ttl)
// Verify hash1 and hash3 exist, hash2 was evicted
_, found1 := cache.get("hash1")
_, found2 := cache.get("hash2")
_, found3 := cache.get("hash3")
assert.True(t, found1, "hash1 should exist (recently accessed)")
assert.False(t, found2, "hash2 should be evicted (LRU)")
assert.True(t, found3, "hash3 should exist (just added)")
}
func TestSchemaCache_TTLExpiry(t *testing.T) {
cache := newSchemaCache(10)
doc := &openapi3.T{OpenAPI: "3.1.0"}
urlHash := "test-hash"
// Set with very short TTL
cache.set(urlHash, doc, 1*time.Millisecond)
// Should be found immediately
_, found := cache.get(urlHash)
assert.True(t, found)
// Wait for expiry
time.Sleep(10 * time.Millisecond)
// Should not be found after expiry
_, found = cache.get(urlHash)
assert.False(t, found)
}
func TestSchemaCache_CleanupExpired(t *testing.T) {
cache := newSchemaCache(10)
doc := &openapi3.T{OpenAPI: "3.1.0"}
// Add items with short TTL
cache.set("hash1", doc, 1*time.Millisecond)
cache.set("hash2", doc, 1*time.Millisecond)
cache.set("hash3", doc, 1*time.Hour) // This one won't expire
// Wait for expiry
time.Sleep(10 * time.Millisecond)
// Cleanup expired
count := cache.cleanupExpired()
// Should have cleaned up 2 expired items
assert.Equal(t, 2, count)
// Verify only hash3 remains
cache.mu.RLock()
assert.Equal(t, 1, len(cache.schemas))
_, exists := cache.schemas["hash3"]
assert.True(t, exists)
cache.mu.RUnlock()
}
func TestIsAllowedDomain(t *testing.T) {
tests := []struct {
name string
schemaURL string
allowedDomains []string
want bool
}{
{
name: "empty whitelist - all allowed",
schemaURL: "https://example.com/schema.yaml",
allowedDomains: []string{},
want: true,
},
{
name: "nil whitelist - all allowed",
schemaURL: "https://example.com/schema.yaml",
allowedDomains: nil,
want: true,
},
{
name: "domain in whitelist",
schemaURL: "https://raw.githubusercontent.com/beckn/schema.yaml",
allowedDomains: []string{"raw.githubusercontent.com", "schemas.beckn.org"},
want: true,
},
{
name: "domain not in whitelist",
schemaURL: "https://malicious.com/schema.yaml",
allowedDomains: []string{"raw.githubusercontent.com", "schemas.beckn.org"},
want: false,
},
{
name: "partial domain match",
schemaURL: "https://raw.githubusercontent.com/beckn/schema.yaml",
allowedDomains: []string{"githubusercontent.com"},
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := isAllowedDomain(tt.schemaURL, tt.allowedDomains)
assert.Equal(t, tt.want, got)
})
}
}
func TestFindReferencedObjects_PathBuilding(t *testing.T) {
data := map[string]interface{}{
"order": map[string]interface{}{
"beckn:orderItems": []interface{}{
map[string]interface{}{
"beckn:acceptedOffer": map[string]interface{}{
"beckn:offerAttributes": map[string]interface{}{
"@context": "https://example.com/schema/ChargingOffer/v1/context.jsonld",
"@type": "ChargingOffer",
},
},
},
},
},
}
objects := findReferencedObjects(data, "message")
assert.Equal(t, 1, len(objects))
assert.Equal(t, "message.order.beckn:orderItems[0].beckn:acceptedOffer.beckn:offerAttributes", objects[0].Path)
assert.Equal(t, "ChargingOffer", objects[0].Type)
}
// Integration tests for the 4 remaining functions
func TestLoadSchemaFromPath_LocalFile(t *testing.T) {
cache := newSchemaCache(10)
ctx := context.Background()
tmpFile, err := os.CreateTemp("", "test-schema-*.yaml")
assert.NoError(t, err)
defer os.Remove(tmpFile.Name())
schemaContent := `openapi: 3.1.0
info:
title: Test Schema
version: 1.0.0
components:
schemas:
TestType:
type: object
properties:
field1:
type: string`
_, err = tmpFile.Write([]byte(schemaContent))
assert.NoError(t, err)
tmpFile.Close()
doc, err := cache.loadSchemaFromPath(ctx, tmpFile.Name(), 1*time.Hour, 30*time.Second)
assert.NoError(t, err)
assert.NotNil(t, doc)
assert.Equal(t, "3.1.0", doc.OpenAPI)
}
func TestLoadSchemaFromPath_CacheHit(t *testing.T) {
cache := newSchemaCache(10)
ctx := context.Background()
tmpFile, err := os.CreateTemp("", "test-schema-*.yaml")
assert.NoError(t, err)
defer os.Remove(tmpFile.Name())
schemaContent := `openapi: 3.1.0
info:
title: Test Schema
version: 1.0.0`
tmpFile.Write([]byte(schemaContent))
tmpFile.Close()
doc1, err := cache.loadSchemaFromPath(ctx, tmpFile.Name(), 1*time.Hour, 30*time.Second)
assert.NoError(t, err)
doc2, err := cache.loadSchemaFromPath(ctx, tmpFile.Name(), 1*time.Hour, 30*time.Second)
assert.NoError(t, err)
assert.Equal(t, doc1, doc2)
}
func TestLoadSchemaFromPath_InvalidPath(t *testing.T) {
cache := newSchemaCache(10)
ctx := context.Background()
_, err := cache.loadSchemaFromPath(ctx, "/nonexistent/schema.yaml", 1*time.Hour, 30*time.Second)
assert.Error(t, err)
}
func TestFindSchemaByType_DirectMatch(t *testing.T) {
cache := newSchemaCache(10)
ctx := context.Background()
tmpFile, err := os.CreateTemp("", "test-schema-*.yaml")
assert.NoError(t, err)
defer os.Remove(tmpFile.Name())
schemaContent := `openapi: 3.1.0
info:
title: Test Schema
version: 1.0.0
components:
schemas:
TestType:
type: object
properties:
field1:
type: string`
tmpFile.Write([]byte(schemaContent))
tmpFile.Close()
doc, err := cache.loadSchemaFromPath(ctx, tmpFile.Name(), 1*time.Hour, 30*time.Second)
assert.NoError(t, err)
schema, err := findSchemaByType(ctx, doc, "TestType")
assert.NoError(t, err)
assert.NotNil(t, schema)
}
func TestFindSchemaByType_NotFound(t *testing.T) {
cache := newSchemaCache(10)
ctx := context.Background()
tmpFile, err := os.CreateTemp("", "test-schema-*.yaml")
assert.NoError(t, err)
defer os.Remove(tmpFile.Name())
schemaContent := `openapi: 3.1.0
info:
title: Test Schema
version: 1.0.0
components:
schemas:
TestType:
type: object`
tmpFile.Write([]byte(schemaContent))
tmpFile.Close()
doc, err := cache.loadSchemaFromPath(ctx, tmpFile.Name(), 1*time.Hour, 30*time.Second)
assert.NoError(t, err)
_, err = findSchemaByType(ctx, doc, "NonExistentType")
assert.Error(t, err)
assert.Contains(t, err.Error(), "no schema found")
}
func TestValidateReferencedObject_Valid(t *testing.T) {
cache := newSchemaCache(10)
ctx := context.Background()
tmpFile, err := os.CreateTemp("", "test-schema-*.yaml")
assert.NoError(t, err)
defer os.Remove(tmpFile.Name())
schemaContent := `openapi: 3.1.0
info:
title: Test Schema
version: 1.0.0
components:
schemas:
TestType:
type: object
additionalProperties: false
x-jsonld:
"@context": ./context.jsonld
"@type": TestType
properties:
field1:
type: string
required:
- field1`
tmpFile.Write([]byte(schemaContent))
tmpFile.Close()
obj := referencedObject{
Path: "message.test",
Context: tmpFile.Name(),
Type: "TestType",
Data: map[string]interface{}{
"@context": tmpFile.Name(),
"@type": "TestType",
"field1": "value1",
},
}
err = cache.validateReferencedObject(ctx, obj, 1*time.Hour, 30*time.Second, nil)
assert.NoError(t, err)
}
func TestValidateReferencedObject_Invalid(t *testing.T) {
cache := newSchemaCache(10)
ctx := context.Background()
tmpFile, err := os.CreateTemp("", "test-schema-*.yaml")
assert.NoError(t, err)
defer os.Remove(tmpFile.Name())
schemaContent := `openapi: 3.1.0
info:
title: Test Schema
version: 1.0.0
components:
schemas:
TestType:
type: object
additionalProperties: false
x-jsonld:
"@context": ./context.jsonld
"@type": TestType
properties:
field1:
type: string
required:
- field1`
tmpFile.Write([]byte(schemaContent))
tmpFile.Close()
obj := referencedObject{
Path: "message.test",
Context: tmpFile.Name(),
Type: "TestType",
Data: map[string]interface{}{
"@context": tmpFile.Name(),
"@type": "TestType",
},
}
err = cache.validateReferencedObject(ctx, obj, 1*time.Hour, 30*time.Second, nil)
assert.Error(t, err)
}
func TestValidateReferencedObject_DomainNotAllowed(t *testing.T) {
cache := newSchemaCache(10)
ctx := context.Background()
obj := referencedObject{
Path: "message.test",
Context: "https://malicious.com/schema.yaml",
Type: "TestType",
Data: map[string]interface{}{},
}
allowedDomains := []string{"trusted.com"}
err := cache.validateReferencedObject(ctx, obj, 1*time.Hour, 30*time.Second, allowedDomains)
assert.Error(t, err)
assert.Contains(t, err.Error(), "domain not allowed")
}
func TestValidateExtendedSchemas_NoObjects(t *testing.T) {
v := &schemav2Validator{
config: &Config{
EnableExtendedSchema: true,
ExtendedSchemaConfig: ExtendedSchemaConfig{},
},
schemaCache: newSchemaCache(10),
}
ctx := context.Background()
body := map[string]interface{}{
"message": map[string]interface{}{
"field": "value",
},
}
err := v.validateExtendedSchemas(ctx, body)
assert.NoError(t, err)
}
func TestValidateExtendedSchemas_MissingMessage(t *testing.T) {
v := &schemav2Validator{
config: &Config{
EnableExtendedSchema: true,
},
schemaCache: newSchemaCache(10),
}
ctx := context.Background()
body := map[string]interface{}{
"context": map[string]interface{}{},
}
err := v.validateExtendedSchemas(ctx, body)
assert.Error(t, err)
assert.Contains(t, err.Error(), "missing 'message' field")
}