```json
{
"tests": [
{
"name": "health_check",
"endpoint": "/health",
"method": "GET",
"headers": {},
"payload": {},
"expected_status": 200,
"expected_behavior": "Returns service health status with version, receipt count, MongoDB status, peer oracle count",
"metrics": ["latency", "status_code", "response_time"],
"validation": {
"field": "version",
"type": "string",
"required": true
}
},
{
"name": "oracle_stats",
"endpoint": "/stats",
"method": "GET",
"headers": {},
"payload": {},
"expected_status": 200,
"expected_behavior": "Returns oracle statistics with pass/partial/fail breakdown, divergence rate, confidence metrics",
"metrics": ["latency", "status_code", "response_time"],
"validation": {
"field": "pass_rate",
"type": "number",
"min_value": 0
}
},
{
"name": "judge_methodology",
"endpoint": "/judge-guide-v3",
"method": "GET",
"headers": {},
"payload": {},
"expected_status": 200,
"expected_behavior": "Returns evaluation methodology for SYNTHESIS judges",
"metrics": ["latency", "status_code", "response_time"],
"validation": {
"field": "methodology",
"type": "object",
"required": true
}
},
{
"name": "audit_receipts",
"endpoint": "/real-receipts",
"method": "GET",
"headers": {},
"payload": {},
"expected_status": 200,
"expected_behavior": "Returns last 30 production receipts with HMAC signatures",
"metrics": ["latency", "status_code", "response_time"],
"validation": {
"field": "receipts",
"type": "array",
"max_length": 30
}
},
{
"name": "verify_simple_task",
"endpoint": "/verify-task",
"method": "POST",
"headers": {
"Content-Type": "application/json"
},
"payload": {
"task_spec": {
"task_id": "test_001",
"description": "Generate a simple greeting message",
"expected_output": "Hello, World!",
"criteria": ["contains greeting", "proper format"]
},
"agent_output": {
"result": "Hello, World!",
"confidence": 0.95,
"completion_time": 1.2
}
},
"expected_status": 200,
"expected_behavior": "Returns HMAC-signed receipt with PASS/PARTIAL/FAIL verdict",
"metrics": ["latency", "accuracy", "status_code"],
"validation": {
"field": "verdict",
"type": "string",
"enum": ["PASS", "PARTIAL", "FAIL"]
}
},
{
"name": "verify_complex_task",
"endpoint": "/verify-task",
"method": "POST",
"headers": {
"Content-Type": "application/json"
},
"payload": {
"task_spec": {
"task_id": "test_002",
"description": "Analyze sentiment of customer reviews",
"expected_output": "positive",
"criteria": ["accurate sentiment classification", "confidence score provided"],
"input_data": "This product is amazing! I love it."
},
"agent_output": {
"result": "positive",
"confidence": 0.87,
"reasoning": "Contains positive words like 'amazing' and 'love'"
}
},
"expected_status": 200,
"expected_behavior": "Returns verification receipt with oracle consensus",
"metrics": ["latency", "accuracy", "status_code"],
"validation": {
"field": "hmac_signature",
"type": "string",
"required": true
}
},
{
"name": "verify_failed_task",
"endpoint": "/verify-task",
"method": "POST",
"headers": {
"Content-Type": "application/json"
},
"payload": {
"task_spec": {
"task_id": "test_003",
"description": "Calculate 2 + 2",
"expected_output": "4",
"criteria": ["correct mathematical answer"]
},
"agent_output": {
"result": "5",
"confidence": 0.9
}
},
"expected_status": 200,
"expected_behavior": "Returns FAIL verdict for incorrect answer",
"metrics": ["latency", "accuracy", "status_code"],
"validation": {
"field": "verdict",
"type": "string",
"expected_value": "FAIL"
}
},
{
"name": "missing_task_spec",
"endpoint": "/verify-task",
"method": "POST",
"headers": {
"Content-Type": "application/json"
},
"payload": {
"agent_output": {
"result": "test output"
}
},
"expected_status": 400,
"expected_behavior": "Returns error for missing required task_spec parameter",
"metrics": ["latency", "error_handling", "status_code"],
"validation": {
"field": "error",
"type": "string",
"required"