{
  "evaluation_id": "EVAL-20260402-145527",
  "question_id": "CODE-027",
  "question_text": "Implement a JSON Schema validator from scratch that supports: type validation (string, number, integer, boolean, array, object, null), required fields, minimum/maximum for numbers, minLength/maxLength for strings, pattern (regex), enum, nested objects, and array items validation. No external libraries.",
  "category": "code",
  "timestamp": "2026-04-02T14:55:27.892Z",
  "display_date": "Apr 02, 2026",
  "winner": {
    "name": "GPT-5.4",
    "provider": "openrouter",
    "score": 8.96
  },
  "avg_score": 6.768,
  "matrix_size": 90,
  "models_used": [
    {
      "id": "deepseek_v4",
      "name": "DeepSeek V4",
      "provider": "openrouter"
    },
    {
      "id": "claude_opus_46",
      "name": "Claude Opus 4.6",
      "provider": "openrouter"
    },
    {
      "id": "gemini_31_pro",
      "name": "Gemini 3.1 Pro",
      "provider": "openrouter"
    },
    {
      "id": "claude_sonnet_46",
      "name": "Claude Sonnet 4.6",
      "provider": "openrouter"
    },
    {
      "id": "grok_420",
      "name": "Grok 4.20",
      "provider": "openrouter"
    },
    {
      "id": "gpt_oss_120b",
      "name": "GPT-OSS-120B",
      "provider": "OpenAI"
    },
    {
      "id": "gemini_3_flash",
      "name": "Gemini 3 Flash Preview",
      "provider": "Google"
    },
    {
      "id": "minimax_m25",
      "name": "MiniMax M2.5",
      "provider": "openrouter"
    },
    {
      "id": "mimo_v2_flash",
      "name": "MiMo-V2-Flash",
      "provider": "Xiaomi"
    },
    {
      "id": "gpt_5_4",
      "name": "GPT-5.4",
      "provider": "openrouter"
    }
  ],
  "rankings": {
    "gpt_5_4": {
      "display_name": "GPT-5.4",
      "provider": "openrouter",
      "average_score": 8.96,
      "score_count": 9,
      "min_score": 8.6,
      "max_score": 9.8,
      "rank": 1
    },
    "deepseek_v4": {
      "display_name": "DeepSeek V4",
      "provider": "openrouter",
      "average_score": 8.66,
      "score_count": 9,
      "min_score": 7.35,
      "max_score": 9.8,
      "rank": 2
    },
    "grok_420": {
      "display_name": "Grok 4.20",
      "provider": "openrouter",
      "average_score": 8.61,
      "score_count": 9,
      "min_score": 7.75,
      "max_score": 9.8,
      "rank": 3
    },
    "gemini_3_flash": {
      "display_name": "Gemini 3 Flash Preview",
      "provider": "Google",
      "average_score": 8.2,
      "score_count": 9,
      "min_score": 7,
      "max_score": 8.8,
      "rank": 4
    },
    "gpt_oss_120b": {
      "display_name": "GPT-OSS-120B",
      "provider": "OpenAI",
      "average_score": 7.55,
      "score_count": 9,
      "min_score": 4.4,
      "max_score": 9.45,
      "rank": 5
    },
    "mimo_v2_flash": {
      "display_name": "MiMo-V2-Flash",
      "provider": "Xiaomi",
      "average_score": 7.35,
      "score_count": 9,
      "min_score": 5.2,
      "max_score": 9.6,
      "rank": 6
    },
    "claude_sonnet_46": {
      "display_name": "Claude Sonnet 4.6",
      "provider": "openrouter",
      "average_score": 6.22,
      "score_count": 9,
      "min_score": 3.45,
      "max_score": 8.45,
      "rank": 7
    },
    "claude_opus_46": {
      "display_name": "Claude Opus 4.6",
      "provider": "openrouter",
      "average_score": 5.62,
      "score_count": 9,
      "min_score": 2.45,
      "max_score": 8.8,
      "rank": 8
    },
    "minimax_m25": {
      "display_name": "MiniMax M2.5",
      "provider": "openrouter",
      "average_score": 3.59,
      "score_count": 9,
      "min_score": 0.2,
      "max_score": 7,
      "rank": 9
    },
    "gemini_31_pro": {
      "display_name": "Gemini 3.1 Pro",
      "provider": "openrouter",
      "average_score": 2.92,
      "score_count": 9,
      "min_score": 0.4,
      "max_score": 8.35,
      "rank": 10
    }
  },
  "judgments": [
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 9,
      "brief_justification": "The implementation correctly covers all required validation features, adds many useful extras, is well‑structured and documented, and provides ready‑to‑use code without external dependencies."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 2.45,
      "brief_justification": "The response is truncated and does not provide a complete implementation. It starts with a well-structured design and includes many extra features, but the actual validator code is incomplete, so it cannot satisfy the requested implementation or be used as-is."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 0.65,
      "brief_justification": "The response claims a complete implementation but provides no actual code or substantive content beyond an introduction, so it does not answer the request."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 3.45,
      "brief_justification": "The response is partially clear and shows a reasonable design, but the implementation is cut off mid-code and does not provide a complete working validator. It includes some requested features and even extras, but because array validation and likely the rest of the main validator are missing, it fails to fully implement the asked solution."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 7.75,
      "brief_justification": "Well-structured and mostly implements the requested features, but it has a notable correctness issue: 'integer' fails type validation because getType returns 'number', causing valid integers to be flagged. Otherwise it covers required fields, numeric/string constraints, enum, nested objects, and array items with practical examples."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 7.95,
      "brief_justification": "Clear, well-structured, and implements all requested core features with recursive nested object and array validation. However, it adds extra unsupported features and has some JSON Schema correctness issues, such as enum using includes (reference equality for objects/arrays), number accepting NaN/Infinity, and some behavior differing from standard JSON Schema semantics."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 4.4,
      "brief_justification": "Well-structured and clear, but the response is incomplete: the implementation is truncated before number validation and likely missing the rest of the class/example. It correctly covers several requested features conceptually, but as provided it does not fully implement the required validator end-to-end."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8.35,
      "brief_justification": "Covers all requested features with a clear recursive implementation and practical example. Main correctness issue: recursive validate() resets errors whenever path is empty, so nested validation under a root-level array/object item with empty path could behave inconsistently; also enum uses shallow includes semantics rather than full JSON Schema deep equality. Otherwise solid and useful."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 1.4,
      "brief_justification": "The response is incomplete and cuts off mid-implementation, so it does not actually provide a working validator. It starts clearly, but lacks the required features in executable form and is not practically usable as given."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 5.2,
      "brief_justification": "The response is fairly clear and shows a recursive validator design, but the provided implementation is incomplete/truncated and includes issues such as incorrect handling of JSON Schema 'number' vs 'integer' typing. It covers many requested features conceptually, yet as given it is not fully runnable or fully correct, which limits practical usefulness."
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 9,
      "brief_justification": "The implementation is correct, covering all requested features: type validation for all specified types, required fields, min/max for numbers, minLength/maxLength for strings, pattern regex, enum with deep equality, nested object validation, and array items validation. The code is clean, well-structured, and includes proper error reporting with paths. The example usage effectively demonstrates all features with intentional validation failures. The notes section honestly acknowledges limitations."
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 1.65,
      "brief_justification": "The response promises a complete implementation but only provides the introduction and an opening code fence with no actual code. There is no validator implementation, no working code, and none of the required features are demonstrated. The response is essentially empty in terms of deliverable content."
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 5.25,
      "brief_justification": "The implementation is cut off mid-code, leaving the ArrayValidator incomplete (missing tuple validation, item iteration logic), the ObjectValidator entirely missing (no nested object validation, no required fields validation), and no main JSONSchemaValidator class that ties everything together. The enum validator is also absent. While the architecture diagram and the parts that are present (TypeValidator, StringValidator, NumberValidator, partial ArrayValidator) are well-structured and mostly co"
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 7.75,
      "brief_justification": "The implementation covers all requested features and includes a bonus (additionalProperties). However, there's a notable bug: the `validateType` method uses `typeof` for type detection, which means `integer` type validation partially fails - `getType` returns 'number' for integers, so `validateType` would report an error for integers when type is 'integer'. The integer check in `validateNumber` is a secondary check but the type validation would already have failed. The enum comparison using JSON"
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 8.55,
      "brief_justification": "The implementation is well-structured and covers all requested features plus several bonus ones (additionalProperties, uniqueItems, contains, multipleOf, const, format). The code is correct for the most part, though there are minor issues: the exclusiveMinimum/exclusiveMaximum handling uses the draft-04 boolean style rather than the more modern draft-06+ numeric style, and the early return pattern in validateAgainstSchema means that for objects/arrays, type-independent checks like enum are valid"
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8.8,
      "brief_justification": "Comprehensive, correct implementation with clear code and useful explanations."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 3.4,
      "brief_justification": "The response starts a plausible implementation but is incomplete and missing many required features, limiting its correctness and usefulness."
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 7.4,
      "brief_justification": "The implementation is well-structured, clearly documented, and covers most requested features correctly. The type checking properly handles the bool/int edge case in Python. However, the response appears truncated - the _validate_number method is missing, which means minimum/maximum validation for numbers isn't shown. Also, using re.fullmatch instead of re.search differs from JSON Schema spec (which uses re.search-like behavior). The pattern of returning all errors with paths is a nice touch. Th"
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8,
      "brief_justification": "The implementation correctly handles all requested features: type validation (including the integer vs number distinction and null handling), required fields, min/max for numbers, minLength/maxLength for strings, pattern regex, enum, nested objects, and array items validation. There's a subtle bug: the recursive validate() calls from _validateObject and _validateArray will return result objects but also reset errors if path is empty (though path won't be empty in recursive calls, so this is fine"
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 1.85,
      "brief_justification": "The response is severely incomplete - the code is cut off mid-implementation, only showing type validation and the very beginning of further logic. It doesn't actually implement required fields, min/max for numbers, minLength/maxLength for strings, pattern, enum, nested objects, or array items validation. While the initial structure and type validation logic shown is reasonable, the response fails to deliver on virtually all requested features."
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 7.05,
      "brief_justification": "The implementation covers most required features: type validation (including the bool/int subtlety), required fields, min/max for numbers, minLength/maxLength/pattern for strings, enum, nested objects, and array items validation. It also adds extras like exclusiveMinimum/Maximum, additionalProperties, tuple validation, and minItems/maxItems. However, the code is truncated - the example usage section is cut off mid-schema, which hurts completeness and usefulness. The type detection logic has a mi"
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 9.8,
      "brief_justification": ""
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 4.5,
      "brief_justification": "The code is well-structured and shows a good architectural approach, but the response is cut off mid-implementation, making it incomplete and unusable."
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 6.4,
      "brief_justification": "The response provides a well-structured and accurate partial implementation with good handling of edge cases, but it is severely truncated, missing object validation, enums, required fields, and the main class, rendering the code unrunnable."
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 8.15,
      "brief_justification": ""
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 9.4,
      "brief_justification": ""
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 5.15,
      "brief_justification": ""
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 7.85,
      "brief_justification": ""
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 3.2,
      "brief_justification": "The response is severely truncated, cutting off mid-code. While the initial type validation logic is correct, it fails to implement almost all requested features and is completely unusable in its current state."
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 6.25,
      "brief_justification": ""
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 8.6,
      "brief_justification": "The implementation is correct and covers all requested features. Code is well-structured with clear separation of concerns. The example with expected output is helpful. Minor gaps: no minItems/maxItems for arrays, no exclusiveMinimum/Maximum, but these weren't required. The notes section honestly acknowledges limitations. Deep equal for enum comparison is a nice touch. Overall a high-quality, practical implementation."
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 6.85,
      "brief_justification": "The implementation is well-structured with good class design, clear documentation, and solid architecture. It correctly handles the core requirements with proper type mapping, error collection with path context, and support for advanced features like composition keywords and $ref. However, the response is incomplete - it cuts off mid-method at `_validate_type`, leaving the actual validation logic unimplemented. The design and scaffolding are excellent, but without the complete implementation, it"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 1.85,
      "brief_justification": "The response is severely incomplete - it only contains an introduction and a code block header but no actual implementation. The validator was never provided, making this response essentially useless despite the promising description."
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 8.8,
      "brief_justification": "The implementation is correct and covers all requested features plus extras like additionalProperties. The code is well-structured with clear separation of concerns, proper TypeScript types, and good error path tracking. Minor issue: integer type validation in validateNumber only checks when schema.type === 'integer' (string comparison), but misses when type is an array containing 'integer'. The usage example is comprehensive and demonstrates all features. Overall an excellent, production-qualit"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 8.55,
      "brief_justification": "The implementation is well-structured and covers all requested features plus extras (additionalProperties, uniqueItems, contains, multipleOf, etc.). The code is clean and readable with good path tracking for errors. Minor issues: the null type check is redundant (validateSingleType already handles it), and the return value inconsistency (true vs object) could cause confusion, but the validateJson wrapper handles it. The enum check using Array.includes won't work correctly for object values. Over"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 7.8,
      "brief_justification": "The implementation is largely correct with proper type checking (including the Python bool/int distinction), good path tracking, and collects all errors rather than stopping at first. The code is well-structured and clearly explained. However, the response appears truncated (missing _validate_number method and test examples), and pattern uses fullmatch instead of search (which differs from JSON Schema spec behavior). Missing minItems/maxItems for arrays, and the enum check continues after failur"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 7.8,
      "brief_justification": "The implementation is largely correct and handles most required features well. Type validation correctly distinguishes null/array/integer edge cases. Path tracking is well-implemented. Minor issues: the validate() method returns an object only at the root level but is also called recursively where the return value is ignored (works but slightly inconsistent); the errors array reset only happens when path==='', which could cause issues if validate() is called multiple times. Missing: additionalPr"
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 8.25,
      "brief_justification": "The implementation covers all required validation features with solid logic and clear structure, though the example snippet is truncated, slightly reducing completeness and overall clarity."
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 4.3,
      "brief_justification": "The response starts a reasonable implementation but is clearly truncated/incomplete - it cuts off mid-sentence during the implementation. While the beginning shows correct approach (type validation, error handling structure), the response fails to deliver the complete validator covering all requested features: required fields, min/max, minLength/maxLength, pattern, enum, nested objects, and array items validation are all missing or incomplete."
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 7.4,
      "brief_justification": "The implementation is largely correct and covers all required features: type validation, required fields, numeric constraints, string constraints, pattern, enum, nested objects, and array items validation. The type detection logic has a minor bug where the integer branch sets json_type incorrectly (the ternary always evaluates to 'integer' for non-bool ints). The response is cut off mid-example, which reduces completeness and usefulness scores. The code structure is clean and well-commented, wit"
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 8.6,
      "brief_justification": "The implementation correctly supports all requested features with clean, well-structured code. It includes proper type checking, nested validation, error collection, and a working example. Minor limitations (e.g., early return on type mismatch, no additionalProperties) are acknowledged in notes."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 8.5,
      "brief_justification": "The code is a robust, well-structured implementation covering all requested features plus many extras (formats, composition, refs). It is logically sound and clearly organized, though the provided snippet is incomplete (cuts off mid-method)."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 0.75,
      "brief_justification": "The response claims to provide a full implementation but contains no actual code, only a placeholder comment. It fails to implement any of the requested features."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 5.85,
      "brief_justification": "The implementation is a solid partial validator covering most requested features with good structure and error handling. However, it is incomplete: the ArrayValidator is truncated mid-function, ObjectValidator and CommonValidator (required, enum, nested objects) are entirely missing from the code, and the main validator class is not shown. This prevents it from fully satisfying the requirements."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 7.35,
      "brief_justification": "The code implements most requested features (type, required, min/max, pattern, enum, nested, array items) but includes extras like const, uniqueItems, contains, formats not asked for, while the validate method returns inconsistent types (boolean vs error object) which breaks the expected contract. The structure is clear, well-commented, and mostly functional from scratch."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 8.6,
      "brief_justification": "The implementation correctly covers all requested JSON Schema features with a clean, recursive design and good error reporting. It is mostly complete (minor gaps like boolean/integer edge cases or schema composition are expected in a minimal version). Code is well-structured, documented, and highly usable as a standalone validator."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 7,
      "brief_justification": "The implementation covers most requested features (types, required, min/max, pattern, enum, nested objects, array items) with recursive validation and error paths. However, it has bugs: required errors use incorrect path (should be `${path}.${key}`), missing required fields don't trigger type checks on other properties, integer type check is flawed for non-numbers, and it doesn't handle schema without explicit type or additionalProperties. The code is well-structured and clear but not fully robu"
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 5.45,
      "brief_justification": "The code is a partial, incomplete implementation that only covers type validation and stops abruptly mid-function. It lacks required fields, min/max, pattern, enum, nested objects, and array items support despite claiming to handle them. The structure is clear but the validator is non-functional for most requested features."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 6.75,
      "brief_justification": "Implements most requested features with a recursive validator and good structure, but code is incomplete (truncated at the end), has some type detection bugs (e.g. integer handling), and does not fully support all edge cases like null type or combined schemas."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 8.6,
      "brief_justification": "Accurate implementation with clear structure and good depth, though not fully spec-compliant."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 8.8,
      "brief_justification": "Comprehensive implementation with detailed validation features and clear structure."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 8.2,
      "brief_justification": "The response promises a complete implementation but only provides a placeholder. However, the described features are comprehensive and would be highly useful if fully implemented."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 8.45,
      "brief_justification": "Well-structured implementation with good coverage of requirements."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 8.6,
      "brief_justification": "Comprehensive implementation with clear examples and thorough feature coverage."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 9.45,
      "brief_justification": "Comprehensive implementation with detailed feature coverage and clear documentation."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8.8,
      "brief_justification": "Comprehensive implementation with clear examples and thorough coverage of requirements."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 7,
      "brief_justification": "Correct implementation but incomplete, missing some validation logic."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 8.6,
      "brief_justification": "Comprehensive and correct implementation with clear structure and practical utility."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 8.8,
      "brief_justification": ""
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 3.8,
      "brief_justification": "The response includes a partially implemented validator with many advanced features, but it is incomplete (truncated) and lacks required field handling and core numeric/string constraints, reducing correctness and completeness. The code is fairly readable, showing some depth, but limited practical usefulness due to missing functionality."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 2.15,
      "brief_justification": "The answer promises a full validator but provides no actual code or implementation details, failing to meet the request; only a brief description is given, making it largely incorrect and incomplete."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 4.4,
      "brief_justification": "The response outlines a reasonable architecture and implements several validators, but the code is incomplete and omits required fields, enum, nested object handling, and full array item validation, limiting correctness and usefulness."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 8.8,
      "brief_justification": "The TypeScript code correctly implements all requested validation features, is well‑structured and documented, includes useful error handling and extra capabilities, making it practical and insightful."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 9.8,
      "brief_justification": "The implementation is robust, accurate, and covers all requested features including nested objects and array items. The code is well-structured, includes a deep equality check for enums, and provides a clear example."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 6.25,
      "brief_justification": "The code provided is incomplete and cuts off mid-method ('_validate_'). While the architecture and implemented parts are high quality and cover more than the requested features (like $ref and composition), the core requirement to provide a working validator from scratch is not met because the implementation of specific validation logic for types, objects, and arrays is missing."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 2.25,
      "brief_justification": "The response is incomplete; it provides an introductory description but the actual code implementation is missing entirely, making it non-functional for the user's request."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 7.85,
      "brief_justification": "The implementation is high-quality, modular, and handles complex edge cases like floating-point modulo and deep equality. However, the code is truncated at the end, missing the completion of the ArrayValidator and the main ObjectValidator/orchestration logic required by the prompt."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 9.8,
      "brief_justification": "The implementation is excellent, covering all requested features including the tricky 'integer' type and nested object paths. It uses clean TypeScript and provides a helpful usage example."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 9.8,
      "brief_justification": "The implementation is excellent, covering all requested features and even including extra standard JSON Schema keywords like additionalProperties and uniqueItems. The code is clean, well-structured, and handles edge cases like the difference between 'number' and 'integer' correctly."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 9.15,
      "brief_justification": "The implementation is highly accurate and covers all requested features including type validation, numeric/string constraints, enums, and recursion for nested objects and arrays. It correctly handles the edge case where booleans are treated as integers in Python. The code is well-structured and includes a helpful error reporting mechanism with paths."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 5.5,
      "brief_justification": "The response is incomplete; the code implementation cuts off mid-sentence, failing to provide the logic for numeric constraints, string constraints, enums, or nested object/array recursion as requested."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 9.6,
      "brief_justification": "The implementation is robust and correctly handles the nuances of Python type checking (e.g., distinguishing booleans from integers). It covers all requested features including nested objects, array items, and regex patterns. The code is well-structured and includes helpful error reporting with JSON paths."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 8.8,
      "brief_justification": "Well-implemented validator covering all requested features with clear code structure, comprehensive example, and helpful notes about limitations. Minor deduction on depth for not covering edge cases like NaN handling or exclusive min/max."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 5,
      "brief_justification": "Well-structured skeleton with good class design and comprehensive docstrings, but incomplete - the actual validation method implementations (_validate_type, _validate_enum, _validate_numeric, etc.) are missing, leaving only infrastructure without functional code."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 0.4,
      "brief_justification": "Response is incomplete - only contains an introduction and description but no actual implementation code. The code block is empty."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 6.7,
      "brief_justification": "Well-structured implementation with good depth (exclusiveMinimum, multipleOf, uniqueItems, deepEqual). However, the response is truncated mid-code - ArrayValidator validate method is incomplete and main validator orchestration is missing. Required fields and enum validation not clearly shown in the provided code."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 8.8,
      "brief_justification": "Comprehensive implementation covering all required features with correct logic, well-structured code, clear error paths, and practical API with usage examples."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 8.55,
      "brief_justification": "Comprehensive implementation covering all requested features with clear code structure, usage examples, and additional bonus features like uniqueItems, contains, and format validation. Well-organized and immediately usable."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 7.4,
      "brief_justification": "Well-structured implementation with good code quality and clear documentation. However, the table lists minimum/maximum support but the _validate_number method is empty - these constraints are not actually implemented, which is a significant omission."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8.6,
      "brief_justification": "Comprehensive implementation covering all requested features with correct logic. Well-structured class with clear method separation, proper recursion for nested structures, and useful path tracking for error reporting. The usage example demonstrates all key features effectively."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 7.05,
      "brief_justification": "Well-structured implementation covering all required features with good depth (exclusive min/max, additionalProperties, tuple validation). However, the response is truncated mid-code (ends at \"zip\":), making it incomplete and unusable as-is."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 8.6,
      "brief_justification": "Accurate implementation of requested features, clear structure, good depth with edge cases, and practical example."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 4.4,
      "brief_justification": "The response is incomplete and cut off mid-implementation, missing many required features like nested objects, array items validation, and the main validation logic. While the structure is clear and the partial code is correct, it fails to deliver a working validator as requested."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 8.35,
      "brief_justification": "The response provides a complete, functional implementation with recursive validation and error collection, covering all required features. It is clear and practical, though the code snippet is not fully shown in the provided text."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 7.65,
      "brief_justification": "Implementation is well-structured and covers most required features, but incomplete (array validation cut off) and missing some edge cases like exclusiveMinimum/Maximum handling."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 9,
      "brief_justification": "Comprehensive implementation covering all requested features with clear structure and practical examples; minor oversight in integer type validation for arrays."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 8.8,
      "brief_justification": "Comprehensive and accurate implementation covering all requested features with clear structure and practical examples."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 8.6,
      "brief_justification": "The solution is highly accurate, well-structured, and implements all requested features using standard Python. It lacks support for 'const' and 'not' keywords mentioned in the prompt, and does not validate array 'minItems'/'maxItems', which prevents a perfect completeness score."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8.6,
      "brief_justification": "The implementation is highly accurate, well-structured, and covers all requested features including recursion for nested objects and arrays. It correctly handles type distinctions (integer vs number, null) and path tracking. Minor deduction in completeness for not explicitly handling 'additionalProperties' or 'enum' on non-string/number types (though the code handles it, it's not explicitly tested in the example), but it fulfills the core requirements robustly."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 0.2,
      "brief_justification": "The response is incomplete and cuts off abruptly in the middle of the code implementation (inside the 'isArray' variable declaration). It fails to provide a working solution for any of the requested features."
    }
  ],
  "meta": {
    "source": "The Multivac (app.themultivac.com)",
    "methodology": "10x10 blind peer matrix evaluation",
    "criteria": "correctness, completeness, clarity, depth, usefulness",
    "self_judgments": "excluded from rankings",
    "license": "Open data — cite as: The Multivac (2026)"
  }
}