{
  "evaluation_id": "EVAL-20260402-143013",
  "question_id": "CODE-024",
  "question_text": "Implement an HTTP/1.1 server from raw TCP sockets in Python (no http.server, no frameworks). Support: GET and POST methods, proper header parsing, chunked transfer encoding, keep-alive connections, static file serving, and a 404 handler. Handle malformed requests gracefully.",
  "category": "code",
  "timestamp": "2026-04-02T14:30:13.046Z",
  "display_date": "Apr 02, 2026",
  "winner": {
    "name": "MiniMax M2.5",
    "provider": "openrouter",
    "score": 8.29
  },
  "avg_score": 6.605,
  "matrix_size": 86,
  "models_used": [
    {
      "id": "gpt_5_4",
      "name": "GPT-5.4",
      "provider": "openrouter"
    },
    {
      "id": "claude_opus_46",
      "name": "Claude Opus 4.6",
      "provider": "openrouter"
    },
    {
      "id": "claude_sonnet_46",
      "name": "Claude Sonnet 4.6",
      "provider": "openrouter"
    },
    {
      "id": "grok_420",
      "name": "Grok 4.20",
      "provider": "openrouter"
    },
    {
      "id": "deepseek_v4",
      "name": "DeepSeek V4",
      "provider": "openrouter"
    },
    {
      "id": "gpt_oss_120b",
      "name": "GPT-OSS-120B",
      "provider": "OpenAI"
    },
    {
      "id": "gemini_3_flash",
      "name": "Gemini 3 Flash Preview",
      "provider": "Google"
    },
    {
      "id": "minimax_m25",
      "name": "MiniMax M2.5",
      "provider": "openrouter"
    },
    {
      "id": "mimo_v2_flash",
      "name": "MiMo-V2-Flash",
      "provider": "Xiaomi"
    },
    {
      "id": "gemini_31_pro",
      "name": "Gemini 3.1 Pro",
      "provider": "openrouter"
    }
  ],
  "rankings": {
    "minimax_m25": {
      "display_name": "MiniMax M2.5",
      "provider": "openrouter",
      "average_score": 8.29,
      "score_count": 9,
      "min_score": 6.75,
      "max_score": 9.55,
      "rank": 1
    },
    "gemini_3_flash": {
      "display_name": "Gemini 3 Flash Preview",
      "provider": "Google",
      "average_score": 7.69,
      "score_count": 9,
      "min_score": 6.75,
      "max_score": 8.6,
      "rank": 2
    },
    "gpt_5_4": {
      "display_name": "GPT-5.4",
      "provider": "openrouter",
      "average_score": 7.41,
      "score_count": 8,
      "min_score": 4.95,
      "max_score": 9.15,
      "rank": 3
    },
    "grok_420": {
      "display_name": "Grok 4.20",
      "provider": "openrouter",
      "average_score": 7.3,
      "score_count": 9,
      "min_score": 4.95,
      "max_score": 9.15,
      "rank": 4
    },
    "deepseek_v4": {
      "display_name": "DeepSeek V4",
      "provider": "openrouter",
      "average_score": 7.15,
      "score_count": 8,
      "min_score": 5.4,
      "max_score": 9.15,
      "rank": 5
    },
    "claude_sonnet_46": {
      "display_name": "Claude Sonnet 4.6",
      "provider": "openrouter",
      "average_score": 6.61,
      "score_count": 9,
      "min_score": 2.65,
      "max_score": 8.8,
      "rank": 6
    },
    "gpt_oss_120b": {
      "display_name": "GPT-OSS-120B",
      "provider": "OpenAI",
      "average_score": 6.48,
      "score_count": 9,
      "min_score": 3.25,
      "max_score": 8.8,
      "rank": 7
    },
    "mimo_v2_flash": {
      "display_name": "MiMo-V2-Flash",
      "provider": "Xiaomi",
      "average_score": 5.96,
      "score_count": 8,
      "min_score": 3.25,
      "max_score": 8.8,
      "rank": 8
    },
    "claude_opus_46": {
      "display_name": "Claude Opus 4.6",
      "provider": "openrouter",
      "average_score": 5.63,
      "score_count": 9,
      "min_score": 2.25,
      "max_score": 8,
      "rank": 9
    },
    "gemini_31_pro": {
      "display_name": "Gemini 3.1 Pro",
      "provider": "openrouter",
      "average_score": 3.53,
      "score_count": 8,
      "min_score": 1.15,
      "max_score": 8.2,
      "rank": 10
    }
  },
  "judgments": [
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 5.95,
      "brief_justification": "The provided code demonstrates a deep understanding of HTTP parsing and raw socket handling, but the response cuts off abruptly before implementing the server loop and socket binding, rendering it incomplete and unrunnable."
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 4.9,
      "brief_justification": "The code demonstrates a solid architecture and handles raw socket reading well, but the response cuts off mid-implementation, missing the server loop, handlers, and static file serving."
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 4.1,
      "brief_justification": "The response is severely truncated, missing the core server implementation, request parsing, and routing logic, though the provided utility functions are well-written."
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 6.95,
      "brief_justification": ""
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 5.4,
      "brief_justification": ""
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 4.1,
      "brief_justification": "The response is incomplete and cuts off before implementing the core server logic, missing the requested features such as request parsing, chunked encoding, and routing."
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 7.6,
      "brief_justification": "The code successfully implements the requested features including chunked encoding and keep-alive. However, the response cuts off abruptly at the end, and the server hardcodes the Content-Type to text/plain for all files."
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 9.55,
      "brief_justification": ""
    },
    {
      "judge_key": "gemini_31_pro",
      "judge_name": "Gemini 3.1 Pro",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 4.25,
      "brief_justification": "The response is incomplete, cutting off before implementing POST, the 404 handler, and the send_response method. It also contains a bug where handle_logic returns 4 values but only 3 are unpacked."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 2.25,
      "brief_justification": "The response is truncated and incomplete, ending mid-function, so it does not actually implement the requested server. What is shown is reasonably organized and partially correct, but key required features cannot be verified or used."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 1.65,
      "brief_justification": "The response is only an introductory claim with no actual implementation or substantive explanation, so it does not fulfill the request despite being clearly phrased."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 6.75,
      "brief_justification": "The implementation covers most requirements but has significant bugs in request parsing, body reading for Content-Length, and chunked decoding that can lead to incorrect behavior or hangs. It is mostly clear and useful as a starting point but not fully robust."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 2.65,
      "brief_justification": "The response is only a partial implementation and stops at the request parser heading, so it does not actually provide a working server. Existing code sections are mostly reasonable, but key requirements like socket accept loop, request parsing, GET/POST handling, keep-alive behavior, static file serving flow, 404 handling, and malformed request handling are missing or incomplete."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 4.95,
      "brief_justification": "Readable and mostly on-topic, but the implementation is incomplete/truncated and has important HTTP/1.1 flaws: malformed requests are often dropped without a proper error response, chunked request parsing is not robust or fully read from the socket, keep-alive/pipelining buffering is incorrect, and header/body handling is only partial. Static serving and 404 support are present, but overall correctness is limited."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 6.15,
      "brief_justification": "Readable and mostly aligned with the prompt, but several HTTP/1.1 details are flawed: request reading is not robust for keep-alive, only POST bodies are handled, chunked parsing is incomplete, malformed handling can reference undefined headers, and static serving lacks path traversal protection. Still, it covers the requested feature areas at a basic level."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 3.25,
      "brief_justification": "The response is clearly organized and starts with a reasonable design, but the implementation is truncated mid-function and does not actually provide a complete working server. As given, it fails the user's request for a full implementation with all required features."
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 6.75,
      "brief_justification": "Clear and mostly workable raw-socket HTTP server with GET/POST, basic header parsing, chunked request-body handling, keep-alive loop, static files, and 404/405 responses. However, it has notable HTTP/1.1 issues: malformed requests are usually dropped instead of receiving 400, keep-alive behavior and Connection headers are not fully correct, chunked parsing ignores extensions/trailers and some edge cases, header parsing can crash on malformed lines, content types are fixed, and the example/testin"
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 6.75,
      "brief_justification": "Clear and mostly functional, but not fully correct for HTTP/1.1. It parses basic requests and supports GET/POST, static files, 404s, keep-alive, and chunked request bodies, yet it has important protocol flaws: no handling of pipelined/leftover bytes across keep-alive requests, incorrect default keep-alive semantics for HTTP/1.1 requests without Connection header, always responds with Connection: keep-alive, simplistic header parsing, incomplete chunked decoding (no trailers/extensions validation"
    },
    {
      "judge_key": "gpt_5_4",
      "judge_name": "GPT-5.4",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 3.25,
      "brief_justification": "The response is partially clear and structured, but the implementation is incomplete/truncated and contains correctness issues. It does not fully implement the requested server features end-to-end, omits key parts like full response handling and complete routing, and has flaws around static path safety and chunked/keep-alive robustness."
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 7.4,
      "brief_justification": "The implementation is well-structured and covers most of the required features: GET/POST methods, header parsing with continuation lines, chunked transfer encoding, keep-alive connections, static file serving, 404 handling, and malformed request handling. The code demonstrates good understanding of HTTP/1.1 protocol details (iso-8859-1 encoding, chunk extensions, trailer headers, path traversal protection). However, the response is truncated - the process_request function is cut off mid-string, "
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 1.2,
      "brief_justification": "The response is essentially empty - it describes what it will implement but provides no actual code. It mentions a 'Directory Setup' section header but cuts off immediately. There is no implementation, no code, no working server. The response fails to deliver on any of the stated requirements."
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 6.5,
      "brief_justification": "The response is well-structured and demonstrates good understanding of HTTP/1.1 concepts including proper header parsing, chunked encoding, keep-alive, path traversal protection, and graceful error handling. However, the code is clearly truncated - the Request Parser section is cut off mid-comment, meaning the actual connection handling loop, request parsing logic, keep-alive implementation, static file serving, POST handling, and 404 handler are all missing. While the portions shown (data class"
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 6.4,
      "brief_justification": "The implementation covers the basic requirements but has several issues. Correctness: The chunked transfer encoding parsing only works if all chunked data arrives in the initial body_part buffer - it doesn't read more data from the socket for incomplete chunks. The Connection header may be written twice (once in headers dict, once hardcoded). The keep-alive logic doesn't handle HTTP/1.1 default (keep-alive unless 'close'). The server is single-threaded with no concurrency, meaning keep-alive blo"
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 6.4,
      "brief_justification": "The implementation covers the basic requirements but has several correctness issues. The receive_all function has a problematic approach: it uses a 1-second timeout which is fragile, and only checks for POST when determining if body reading is needed (other methods can have bodies too). The keep-alive implementation has a bug - the 'headers' variable referenced in the Connection: close check may be uninitialized if parse_request fails. The chunked transfer encoding decoding is implemented but on"
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 5.25,
      "brief_justification": "The response provides a good high-level overview and starts with well-structured code, but the implementation is critically incomplete - it cuts off mid-function at handle_client(), missing the core parsing logic (request line parsing, header parsing, chunked transfer encoding reading, body reading), the routing/dispatch logic, static file serving, the 404 handler, and malformed request handling. While the architectural design shown is sound and the data structures (HTTPRequest, HTTPResponse) ar"
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 7.4,
      "brief_justification": "The implementation covers the core requirements: GET/POST handling, header parsing, chunked transfer encoding, keep-alive connections, static file serving, 404 handler, and basic malformed request handling. However, there are notable issues: (1) read_until reads one byte at a time which is very inefficient, (2) Content-Type is always text/plain even for HTML files - no MIME type detection, (3) the keep-alive logic doesn't properly respect HTTP/1.1 defaults (keep-alive is default in 1.1, but the "
    },
    {
      "judge_key": "claude_opus_46",
      "judge_name": "Claude Opus 4.6",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 7.55,
      "brief_justification": "The implementation covers the core requirements: GET/POST, header parsing, chunked transfer encoding decoding, keep-alive via a loop with timeout, static file serving with MIME types, 404 handler, and malformed request handling with 400 responses. However, there are correctness issues: (1) The keep-alive logic defaults to 'close' when the Connection header is absent, but HTTP/1.1 defaults to keep-alive. (2) The response always sends 'Connection: keep-alive' regardless of the client's request or "
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 8.2,
      "brief_justification": "The implementation is well-structured and covers most requirements: raw TCP sockets, GET/POST, header parsing, chunked transfer encoding, keep-alive, static file serving, and 404 handling. The code is clean and readable with good error handling via HTTPError exceptions. Minor issues: the response appears cut off at the end (missing the connection handler loop and server startup code), and chunked transfer encoding for responses (not just requests) isn't implemented. The path traversal protection"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 6.05,
      "brief_justification": "The implementation is well-structured with good design patterns (SocketReader, RequestParser, dataclasses, proper constants), but the response is truncated mid-implementation - it cuts off at `RequestParser._read_chun` without completing the chunked body reading, response serialization, connection handling, static file serving, or the server loop. What exists is correct and well-written, but the missing portions are critical to the stated requirements (chunked encoding, keep-alive, static servin"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 1.9,
      "brief_justification": "The response is incomplete - it only shows a directory setup header and never provides the actual code implementation. Despite claiming to fulfill all requirements, no working code was delivered. The response is essentially empty of substance."
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 7,
      "brief_justification": "The implementation is largely correct and well-structured, covering GET/POST, header parsing, keep-alive, chunked encoding, static file serving, and 404 handling. However, it has notable issues: the server is single-threaded (no threading/select for concurrent connections), the chunked transfer encoding is only for incoming requests (not outgoing responses), the send_response method adds a duplicate Connection header when should_close is true (it's already in the headers dict), and the code is t"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 7,
      "brief_justification": "The implementation covers most required features: GET/POST, header parsing, chunked decoding, keep-alive, static file serving, and 404 handling. However, there are notable issues: the receive_all method has a fragile approach (1-second timeout, only checks POST for chunked), the keep-alive loop doesn't properly re-parse headers after a failed parse (headers variable used before assignment in error path), chunked transfer encoding is only decoded for POST bodies but not sent as chunked responses,"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 7.2,
      "brief_justification": "The response provides a well-structured, clearly explained HTTP/1.1 server implementation with good architecture documentation. However, the code appears to be cut off before completion - the handle_client method is incomplete, and several promised features (chunked transfer encoding reading, keep-alive loop, POST handling, 404 handler) are described but not fully shown in the truncated code. The portions shown are correct and well-written, but the incomplete response significantly reduces compl"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 7.8,
      "brief_justification": "The implementation is largely correct and functional, covering GET/POST, header parsing, chunked transfer encoding, keep-alive, static file serving, 404 handling, and basic security. The code is clean and well-explained. Minor issues: read_until reads byte-by-byte (inefficient), Content-Type detection is missing (always text/plain), the keep-alive logic doesn't respect HTTP/1.1 defaults (should default to keep-alive unless Connection: close), and the response doesn't honor the client's Connectio"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 8.15,
      "brief_justification": "The implementation is solid and covers all required features: raw TCP sockets, GET/POST, header parsing, chunked transfer encoding, keep-alive, static file serving, and 404/error handling. The code is well-structured and clearly explained. Minor issues: the keep-alive response header is always 'keep-alive' regardless of client request; HTTP/1.1 defaults to keep-alive so the Connection check should also handle that default; chunked encoding is only decoded for incoming requests but not used for o"
    },
    {
      "judge_key": "claude_sonnet_46",
      "judge_name": "Claude Sonnet 4.6",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 6.85,
      "brief_justification": "The implementation demonstrates solid understanding of HTTP/1.1 concepts with good structure and comments. However, the code is truncated - the handle_logic method is cut off mid-function, missing the POST handler, 404 handler, and send_response method. The chunked encoding parsing and keep-alive logic are correctly implemented. The return tuple inconsistency (sometimes 3 elements, sometimes 4) suggests incomplete refactoring. Despite being incomplete, the shown portions are largely correct and "
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 6.6,
      "brief_justification": "The implementation covers most required features but has critical bugs: incomplete function definition, missing closing quote/parenthesis, unhandled exceptions in main loop, no proper 404 handler integration, and potential socket leaks. Code is readable but not fully functional or robust."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 6.05,
      "brief_justification": "The code is a solid partial implementation with good structure, proper error handling, and useful classes, but it is incomplete (body parsing for chunked/POST is cut off mid-function) and lacks the actual server loop, response sending, static file serving, and 404 handler required by the spec."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 3.65,
      "brief_justification": "The response is incomplete as it only provides setup instructions and a partial code skeleton without implementing the core server logic, header parsing, request handling, chunked encoding, or error handling as requested."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 8.65,
      "brief_justification": "The code is a high-quality, well-structured implementation covering nearly all requested features with robust parsing, security guards, and utilities. Minor incompleteness: the provided snippet cuts off before the actual socket handling, request serving logic, and main server loop, so it doesn't fully implement a runnable server."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 6.75,
      "brief_justification": "Implements most requested features but has significant bugs in request parsing, receive_all logic, keep-alive handling, and header parsing that prevent reliable operation. Code is readable and well-structured with good comments."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 5.85,
      "brief_justification": "The response provides a good high-level overview and partial implementation with solid structure and explanations. However, it is incomplete (code is truncated mid-function), lacks full support for chunked encoding parsing, proper request routing for GET/POST and static files, and 404 handling as claimed. The provided code cannot run as a full server."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 6.75,
      "brief_justification": "The implementation covers most required features but has bugs: read_until can loop forever on partial data, header parsing assumes no folded headers and doesn't handle multiple values, chunked parser doesn't fully follow spec (trailing headers, extensions), content-type is always text/plain, and response for files should use appropriate MIME type. Overall functional but not fully robust."
    },
    {
      "judge_key": "grok_420",
      "judge_name": "Grok 4.20",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 6.05,
      "brief_justification": "The code implements a basic HTTP/1.1 server with good structure and comments, but is incomplete (handle_logic and send_response are truncated, POST not fully handled, chunked response missing, and some logic errors like returning extra tuple values). It covers core parsing but lacks full robustness and all required features."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 9,
      "brief_justification": "Comprehensive implementation with all requested features, well-structured and clear."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 8,
      "brief_justification": "Well-structured and detailed implementation with good error handling, but incomplete (missing POST handling and chunked transfer encoding completion)."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 8.2,
      "brief_justification": "Detailed and functional, but lacks clarity in explanation."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 8.6,
      "brief_justification": "Comprehensive implementation with robust features, clear structure, and good error handling."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 8.6,
      "brief_justification": "Comprehensive implementation with proper handling of HTTP/1.1 features, security considerations, and error handling."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 8.8,
      "brief_justification": "Comprehensive and well-structured with detailed implementation."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8.6,
      "brief_justification": "Comprehensive implementation with clear explanations and robust features."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 8.8,
      "brief_justification": "Comprehensive implementation with clear explanations and robust features."
    },
    {
      "judge_key": "deepseek_v4",
      "judge_name": "DeepSeek V4",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 8.8,
      "brief_justification": "Comprehensive and correct implementation with detailed features and robustness."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 4.95,
      "brief_justification": "The answer includes many core components (parsing, chunked handling, static files) and shows good depth, but it is missing the server loop, has a syntax error, and is truncated, making it incomplete and partially incorrect."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 4.35,
      "brief_justification": "The answer provides a well-structured, commented code skeleton with thoughtful parsing and error handling, but it is incomplete—key parts like chunked body reading, request handling loop, static file serving, and 404 response are missing, preventing a functional implementation."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 1.15,
      "brief_justification": "Response is truncated and lacks the required implementation, making it incorrect, incomplete, and of limited usefulness despite a brief clear intro."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 5.95,
      "brief_justification": "The code shows a well-structured, clear design with detailed utilities and classes, demonstrating depth. However, it is incomplete (truncated before request parsing and server loop) and thus lacks full correctness and completeness, limiting its immediate usefulness."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 7.35,
      "brief_justification": "The code implements most required features with clear structure, but has gaps in robust malformed request handling and full chunked support, limiting full correctness."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 8.55,
      "brief_justification": "Implements required features with clear code and explanations; minor edge‑case handling gaps keep it from perfect correctness."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8,
      "brief_justification": "The answer supplies a functional Python socket server covering GET/POST, header parsing, chunked encoding, keep-alive, static files, 404 handling, and basic error resilience. It is well‑structured and explained, though it lacks a proper 400 response for malformed requests and could refine keep-alive handling."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 8.55,
      "brief_justification": "The answer supplies a functional raw-socket HTTP/1.1 server covering all requested features, with clear code and explanations. Minor edge‑case handling (e.g., exact body length management, connection header handling) could be tighter, but overall it is accurate, thorough, well‑structured, and immediately usable."
    },
    {
      "judge_key": "gpt_oss_120b",
      "judge_name": "GPT-OSS-120B",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 4.5,
      "brief_justification": "The answer includes a partially working server with clear structure and some advanced parsing (chunked, keep-alive), but the code is incomplete (missing send_response, POST handling, proper 404 response, and final logic), contains mismatched return values, and cannot run as‑is, limiting its correctness and completeness."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 9.15,
      "brief_justification": "The implementation is robust and covers all requested features, including the complex chunked transfer encoding and keep-alive logic. It includes security considerations like path traversal protection and handles malformed requests with a custom exception system. The code is clean and well-structured."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 7.45,
      "brief_justification": "The implementation is high quality, well-structured, and handles socket buffering and header parsing correctly. However, the response is incomplete as the code cuts off mid-method in the RequestParser class, missing the actual body/chunked reading logic and the main server loop."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 2.85,
      "brief_justification": "The response is incomplete. It provides an introductory description and a directory setup section but fails to provide the actual Python code implementation requested."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 8.8,
      "brief_justification": "The implementation is high-quality, providing robust utility functions for chunked encoding, path traversal protection, and HTTP date formatting. While the provided snippet cuts off before the main loop logic, the architectural components (HTTPRequest/Response classes and helpers) are accurate and follow RFC 7231 standards."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 9.15,
      "brief_justification": "The implementation is robust and covers all requested features including chunked encoding, keep-alive, and security considerations like directory traversal protection. The code is clean and well-structured. The only minor issue is that the usage example at the very end is slightly truncated, but the core logic is fully functional."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 9.15,
      "brief_justification": "The implementation is robust and accurately addresses all requirements, including complex tasks like chunked encoding and keep-alive. It includes thoughtful additions like MIME type detection and 304 caching logic."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 8.45,
      "brief_justification": "The code provided is high quality, implementing complex features like chunked encoding and directory traversal protection correctly. However, the response was truncated, missing the actual implementation of the handle_client and parsing logic, which were core requirements."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 9.55,
      "brief_justification": "The implementation is excellent and covers all requirements including the more complex 'chunked transfer encoding' and 'keep-alive' logic. It includes security considerations like directory traversal protection and handles malformed requests via try-except blocks."
    },
    {
      "judge_key": "gemini_3_flash",
      "judge_name": "Gemini 3 Flash Preview",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 8.6,
      "brief_justification": "The implementation is robust and correctly handles the complexities of HTTP/1.1, including chunked transfer encoding and keep-alive connections. It includes security considerations like directory traversal prevention and handles malformed requests. The code is well-structured and easy to follow."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "gpt_5_4",
      "respondent_name": "GPT-5.4",
      "weighted_score": 8.05,
      "brief_justification": "Comprehensive HTTP/1.1 server implementation covering all required features with good structure and error handling. However, there is a syntax error at the very end (missing closing quote and parenthesis in the HTTPError line) that would cause a parse failure."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 5.75,
      "brief_justification": "Well-structured code with proper HTTP/1.1 implementation details, but response is severely truncated - cuts off mid-implementation during chunked body reading, leaving critical functionality incomplete."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 5.45,
      "brief_justification": "Code structure is excellent with proper classes and utility functions, and shows good HTTP/1.1 knowledge (chunked encoding, keep-alive, path traversal protection). However, the response is severely incomplete - it ends abruptly at '# Request Parser' section header, missing the actual request parser, server loop, connection handling, GET/POST handlers, static file serving, and 404 handler. The code cannot run as provided."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 7.15,
      "brief_justification": "Implements all required features with good depth including security (path traversal), MIME types, and HTTP date formatting. However, has critical bugs: chunked encoding only reads initial data without fetching remaining chunks from socket, and the code is truncated at the end (missing closing parenthesis) making it unrunnable. Keep-alive logic also has subtle issues with incomplete reads."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 6.85,
      "brief_justification": "Well-structured explanation with good depth on HTTP/1.1 concepts, but the code is incomplete - it ends abruptly mid-method and cannot actually run. The architectural overview and class design are clear, but the critical implementation details are missing."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8.2,
      "brief_justification": "Comprehensive implementation covering all required features (GET, POST, header parsing, chunked encoding, keep-alive, static files, 404, error handling). Well-structured with good explanations. Minor issues: always sets Connection: keep-alive regardless of client request, Content-Type always text/plain, and some mixed string/bytes handling. Overall solid and functional."
    },
    {
      "judge_key": "minimax_m25",
      "judge_name": "MiniMax M2.5",
      "respondent_key": "mimo_v2_flash",
      "respondent_name": "MiMo-V2-Flash",
      "weighted_score": 5.4,
      "brief_justification": "Implementation shows good understanding but is incomplete - the code cuts off mid-method, missing the POST handler and send_response method entirely. Also has a return value mismatch (returns 4 values in GET but caller expects 3) and chunked encoding parser bugs."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "claude_opus_46",
      "respondent_name": "Claude Opus 4.6",
      "weighted_score": 5.85,
      "brief_justification": "Implementation is incomplete (cuts off mid-function), missing chunked encoding parsing and static file serving logic. Code structure is clear but lacks full functionality for the requested features."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "gemini_31_pro",
      "respondent_name": "Gemini 3.1 Pro",
      "weighted_score": 7.65,
      "brief_justification": "The response starts with a strong claim of a complete implementation but only provides a directory setup section, lacking the actual code. This makes it incomplete and less useful, though the structure and clarity are good."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "claude_sonnet_46",
      "respondent_name": "Claude Sonnet 4.6",
      "weighted_score": 8.8,
      "brief_justification": "The provided code implements a robust HTTP/1.1 server with raw sockets, covering all requested features including chunked encoding, keep-alive, and security measures. It uses modern Python practices (type hints, dataclasses) and handles edge cases well. The implementation is slightly incomplete as it lacks the main server loop and request handling dispatch logic shown in the prompt's cut-off source, but the core components are fully functional."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "grok_420",
      "respondent_name": "Grok 4.20",
      "weighted_score": 8.15,
      "brief_justification": "Solid implementation covering most requirements. Missing explicit handling of malformed requests (e.g., invalid headers) and has a minor bug in chunked parsing (missing trailer handling). Overall clear and useful."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "deepseek_v4",
      "respondent_name": "DeepSeek V4",
      "weighted_score": 7.8,
      "brief_justification": "Implementation covers core requirements with good structure, but has minor issues like incomplete chunked encoding handling and lack of request pipelining."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "gpt_oss_120b",
      "respondent_name": "GPT-OSS-120B",
      "weighted_score": 8.6,
      "brief_justification": "The response provides a robust, single-file implementation covering all requested features (GET/POST, chunked encoding, keep-alive, static files). It includes good error handling and a clear architectural overview. The code is cut off mid-function, but the provided logic is sound and sufficient to demonstrate the core requirements."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "gemini_3_flash",
      "respondent_name": "Gemini 3 Flash Preview",
      "weighted_score": 8.15,
      "brief_justification": "Implementation is largely correct and covers most requirements, with clear structure and good depth. Minor issues: chunked parsing may not handle trailers, and keep-alive logic is simplistic."
    },
    {
      "judge_key": "mimo_v2_flash",
      "judge_name": "MiMo-V2-Flash",
      "respondent_key": "minimax_m25",
      "respondent_name": "MiniMax M2.5",
      "weighted_score": 8.95,
      "brief_justification": "The code is highly accurate and implements all requested features (GET/POST, chunked, keep-alive, static files, 404). It includes a robust testing guide and handles edge cases like directory traversal and malformed headers. Minor deduction in correctness for a potential race condition in the chunked decoder buffer management, but overall it is a very high-quality implementation."
    }
  ],
  "meta": {
    "source": "The Multivac (app.themultivac.com)",
    "methodology": "10x10 blind peer matrix evaluation",
    "criteria": "correctness, completeness, clarity, depth, usefulness",
    "self_judgments": "excluded from rankings",
    "license": "Open data — cite as: The Multivac (2026)"
  }
}