{"id":"628e0dcc-17a9-45e2-ba33-de3c8b604c7e","name":"OpenAI Evals","slug":"openai-evals","description":"OpenAI's framework for evaluating LLMs and LLM systems. Open-source registry of benchmarks. Write custom evals to test different dimensions of model quality.","website_url":"https://github.com/openai/evals","category":["ai","testing","benchmark"],"score":{"overall":75,"raw":81,"capped":true,"verified":false,"breakdown":{"latency":9,"consistency":8,"doc_quality":8,"error_clarity":8,"auth_simplicity":9,"token_efficiency":8,"first_try_success":7,"response_parseability":9},"source":"prowl_capped","credential_tested":false,"probe_health":{"status":"healthy","up_pct":1.0,"samples":26,"target":"website","p50_latency_ms":346,"last_probed":"2026-05-14T13:28:58.164364+00:00","lookback_hours":24}},"latency":null,"uptime_30d":null,"protocols":["rest"],"auth_type":null,"mcp_manifest_url":null,"openapi_spec_url":null,"verified":false,"claimed":false,"vendor_id":null,"status":"active","profile":null,"pricing":{"free_tier":true,"open_source":true,"cost_per_evaluation":0},"last_crawled":"2026-05-11T16:19:51.801912+00:00","last_benchmarked":"2026-04-06T06:47:58.582505+00:00","supports_x402":false,"agent_auth_methods":null,"supports_streaming":false,"has_sandbox":false,"sdks":null,"llms_txt_url":null,"similarity_score":null}