{
  "evaluation_id": "EVAL-20260315-043801",
  "question_id": "EVAL-20260315-043801",
  "question_text": "Implement an LRU cache with per-key TTL...",
  "category": "code",
  "timestamp": "2026-03-15T04:38:01.133Z",
  "display_date": "Mar 15, 2026",
  "winner": {
    "name": "Gemma 3 27B",
    "provider": "openrouter",
    "score": 9.06
  },
  "avg_score": 8.654445,
  "matrix_size": 69,
  "models_used": [
    {
      "id": "devstral",
      "name": "Devstral Small",
      "provider": "openrouter"
    },
    {
      "id": "gemma3_27b",
      "name": "Gemma 3 27B",
      "provider": "openrouter"
    },
    {
      "id": "llama4_scout",
      "name": "Llama 4 Scout",
      "provider": "openrouter"
    },
    {
      "id": "phi4",
      "name": "Phi-4 14B",
      "provider": "openrouter"
    },
    {
      "id": "granite_40",
      "name": "Granite 4.0 Micro",
      "provider": "openrouter"
    },
    {
      "id": "qwen3_8b",
      "name": "Qwen 3 8B",
      "provider": "openrouter"
    },
    {
      "id": "mistral_nemo",
      "name": "Mistral Nemo 12B",
      "provider": "openrouter"
    },
    {
      "id": "llama31_8b",
      "name": "Llama 3.1 8B",
      "provider": "openrouter"
    },
    {
      "id": "qwen3_32b",
      "name": "Qwen 3 32B",
      "provider": "openrouter"
    }
  ],
  "rankings": {
    "gemma3_27b": {
      "display_name": "Gemma 3 27B",
      "provider": "openrouter",
      "average_score": 9.06,
      "score_count": 8,
      "min_score": 8.2,
      "max_score": 9.6,
      "rank": 1
    },
    "qwen3_8b": {
      "display_name": "Qwen 3 8B",
      "provider": "openrouter",
      "average_score": 9.05,
      "score_count": 8,
      "min_score": 8.25,
      "max_score": 9.6,
      "rank": 2
    },
    "qwen3_32b": {
      "display_name": "Qwen 3 32B",
      "provider": "openrouter",
      "average_score": 8.99,
      "score_count": 8,
      "min_score": 8.25,
      "max_score": 9.6,
      "rank": 3
    },
    "phi4": {
      "display_name": "Phi-4 14B",
      "provider": "openrouter",
      "average_score": 8.91,
      "score_count": 8,
      "min_score": 7.6,
      "max_score": 9.6,
      "rank": 4
    },
    "devstral": {
      "display_name": "Devstral Small",
      "provider": "openrouter",
      "average_score": 8.66,
      "score_count": 6,
      "min_score": 8.25,
      "max_score": 9.05,
      "rank": 5
    },
    "granite_40": {
      "display_name": "Granite 4.0 Micro",
      "provider": "openrouter",
      "average_score": 8.65,
      "score_count": 8,
      "min_score": 8,
      "max_score": 9.05,
      "rank": 6
    },
    "llama4_scout": {
      "display_name": "Llama 4 Scout",
      "provider": "openrouter",
      "average_score": 8.59,
      "score_count": 7,
      "min_score": 8.25,
      "max_score": 9.05,
      "rank": 7
    },
    "mistral_nemo": {
      "display_name": "Mistral Nemo 12B",
      "provider": "openrouter",
      "average_score": 8.16,
      "score_count": 8,
      "min_score": 7.2,
      "max_score": 8.85,
      "rank": 8
    },
    "llama31_8b": {
      "display_name": "Llama 3.1 8B",
      "provider": "openrouter",
      "average_score": 7.82,
      "score_count": 8,
      "min_score": 5.2,
      "max_score": 8.8,
      "rank": 9
    }
  },
  "judgments": [
    {
      "judge_key": "qwen3_32b",
      "judge_name": "Qwen 3 32B",
      "respondent_key": "devstral",
      "respondent_name": "Devstral Small",
      "weighted_score": 8.25,
      "brief_justification": "The implementation correctly combines LRU eviction with per-key TTL using OrderedDict and a TTL dictionary. It handles basic scenarios but lacks automatic background expiration checks for keys not accessed after expiration, which limits completeness and depth."
    },
    {
      "judge_key": "qwen3_32b",
      "judge_name": "Qwen 3 32B",
      "respondent_key": "gemma3_27b",
      "respondent_name": "Gemma 3 27B",
      "weighted_score": 8.2,
      "brief_justification": "The code correctly implements LRU eviction and per-key TTL with thread safety. However, the LRU list uses O(n) operations for removals, which is inefficient for large caches. Completeness is high as all required features are present, but could be improved with a more optimal data structure like OrderedDict or linked list for LRU tracking."
    },
    {
      "judge_key": "qwen3_32b",
      "judge_name": "Qwen 3 32B",
      "respondent_key": "llama4_scout",
      "respondent_name": "Llama 4 Scout",
      "weighted_score": 8.8,
      "brief_justification": "The implementation correctly combines LRU eviction and per-key TTL using an OrderedDict. It's well-documented with thread safety and example usage, though manual/explicit cleanup is required. The advice section provides practical guidance for real-world use."
    },
    {
      "judge_key": "qwen3_32b",
      "judge_name": "Qwen 3 32B",
      "respondent_key": "phi4",
      "respondent_name": "Phi-4 14B",
      "weighted_score": 7.6,
      "brief_justification": "The implementation correctly combines LRU eviction with per-key TTL using a doubly linked list and dictionary. However, it only checks for expired keys during get/put operations, leaving stale entries in the cache until accessed or evicted. The code is clear and structured but lacks proactive cleanup of expired keys, which could be improved for completeness and real-world utility."
    },
    {
      "judge_key": "qwen3_32b",
      "judge_name": "Qwen 3 32B",
      "respondent_key": "granite_40",
      "respondent_name": "Granite 4.0 Micro",
      "weighted_score": 8,
      "brief_justification": "The code correctly implements an LRU cache with per-key TTL using OrderedDict and threading.Lock, but stores TTL in two separate dictionaries which is redundant. It provides necessary methods and is thread-safe, though manual cleanup of expired items is required."
    },
    {
      "judge_key": "qwen3_32b",
      "judge_name": "Qwen 3 32B",
      "respondent_key": "qwen3_8b",
      "respondent_name": "Qwen 3 8B",
      "weighted_score": 8.8,
      "brief_justification": "The code correctly implements LRU eviction and per-key TTL using OrderedDict, but the design explanation mistakenly references a separate dict for expiration. It lacks active cleanup for expired keys, though this limitation is acknowledged. The solution is practical and well-documented with clear examples and trade-off considerations."
    },
    {
      "judge_key": "qwen3_32b",
      "judge_name": "Qwen 3 32B",
      "respondent_key": "mistral_nemo",
      "respondent_name": "Mistral Nemo 12B",
      "weighted_score": 7.2,
      "brief_justification": "The implementation correctly handles LRU eviction and TTL storage, but the `get` method does not check for key expiration before returning values. Expired keys are only removed when `evict_expired()` is explicitly called, which may leave stale entries in the cache. The code is clear and well-structured, but automatic TTL enforcement during access is missing."
    },
    {
      "judge_key": "qwen3_32b",
      "judge_name": "Qwen 3 32B",
      "respondent_key": "llama31_8b",
      "respondent_name": "Llama 3.1 8B",
      "weighted_score": 5.2,
      "brief_justification": "The implementation incorrectly uses a global TTL instead of per-key TTL. The _remove_expired method is not triggered automatically, and the update method has flawed logic for modifying TTLs. While the code structure is clear and follows LRU principles, it fails to meet the core per-key TTL requirement."
    },
    {
      "judge_key": "devstral",
      "judge_name": "Devstral Small",
      "respondent_key": "qwen3_32b",
      "respondent_name": "Qwen 3 32B",
      "weighted_score": 9.6,
      "brief_justification": "The response provides a thorough, accurate, and well-structured implementation of an LRU cache with per-key TTL, including detailed explanations and example usage."
    },
    {
      "judge_key": "devstral",
      "judge_name": "Devstral Small",
      "respondent_key": "gemma3_27b",
      "respondent_name": "Gemma 3 27B",
      "weighted_score": 9.6,
      "brief_justification": "The response provides a comprehensive and correct implementation of an LRU cache with per-key TTL, including thread safety, clear documentation, and practical usage examples."
    },
    {
      "judge_key": "devstral",
      "judge_name": "Devstral Small",
      "respondent_key": "llama4_scout",
      "respondent_name": "Llama 4 Scout",
      "weighted_score": 8.6,
      "brief_justification": "The response provides a comprehensive and functional implementation of an LRU cache with per-key TTL, including thread safety and example usage. It is mostly accurate, clear, and practical, with minor room for improvement in clarity and depth."
    },
    {
      "judge_key": "devstral",
      "judge_name": "Devstral Small",
      "respondent_key": "phi4",
      "respondent_name": "Phi-4 14B",
      "weighted_score": 9.6,
      "brief_justification": "The response provides a correct, complete, and clear implementation of an LRU cache with per-key TTL. It includes a detailed explanation and example usage, demonstrating both correctness and practical value."
    },
    {
      "judge_key": "devstral",
      "judge_name": "Devstral Small",
      "respondent_key": "granite_40",
      "respondent_name": "Granite 4.0 Micro",
      "weighted_score": 9.05,
      "brief_justification": "The response provides a correct and complete implementation of an LRU cache with per-key TTL. It is well-structured and clear, with a good explanation of the code and its usage. The implementation is practical and actionable, making it useful for real-world applications."
    },
    {
      "judge_key": "devstral",
      "judge_name": "Devstral Small",
      "respondent_key": "qwen3_8b",
      "respondent_name": "Qwen 3 8B",
      "weighted_score": 9.6,
      "brief_justification": "The response provides a thorough, accurate, and clear implementation of an LRU cache with per-key TTL, including example usage and key design decisions."
    },
    {
      "judge_key": "devstral",
      "judge_name": "Devstral Small",
      "respondent_key": "mistral_nemo",
      "respondent_name": "Mistral Nemo 12B",
      "weighted_score": 8.2,
      "brief_justification": "The implementation is mostly correct and functional, but it lacks a mechanism to automatically evict expired keys without calling evict_expired()."
    },
    {
      "judge_key": "devstral",
      "judge_name": "Devstral Small",
      "respondent_key": "llama31_8b",
      "respondent_name": "Llama 3.1 8B",
      "weighted_score": 7.4,
      "brief_justification": "The implementation is mostly correct and clear, but it has some issues with TTL handling and lacks a method to periodically check for expired items."
    },
    {
      "judge_key": "gemma3_27b",
      "judge_name": "Gemma 3 27B",
      "respondent_key": "qwen3_32b",
      "respondent_name": "Qwen 3 32B",
      "weighted_score": 9.2,
      "brief_justification": "The code is a correct and well-documented implementation of an LRU cache with per-key TTL. It covers important aspects like thread safety and cleanup, and provides a clear example. The notes section offers valuable considerations for production environments."
    },
    {
      "judge_key": "gemma3_27b",
      "judge_name": "Gemma 3 27B",
      "respondent_key": "devstral",
      "respondent_name": "Devstral Small",
      "weighted_score": 8.85,
      "brief_justification": "The code is functionally correct and implements the LRU cache with TTL as requested. It could be improved with more robust error handling or consideration for thread safety, but it provides a solid and understandable implementation."
    },
    {
      "judge_key": "gemma3_27b",
      "judge_name": "Gemma 3 27B",
      "respondent_key": "llama4_scout",
      "respondent_name": "Llama 4 Scout",
      "weighted_score": 8.25,
      "brief_justification": "The code is functionally correct and implements the requested LRU cache with TTL. It's well-documented and clear, but could benefit from more discussion of potential race conditions even with the lock, and alternative eviction strategies."
    },
    {
      "judge_key": "gemma3_27b",
      "judge_name": "Gemma 3 27B",
      "respondent_key": "phi4",
      "respondent_name": "Phi-4 14B",
      "weighted_score": 9.4,
      "brief_justification": "The code is a correct and complete implementation of an LRU cache with TTL. The explanation is clear and well-structured, and the example usage demonstrates the functionality effectively. Could benefit from more discussion of potential edge cases or optimizations."
    },
    {
      "judge_key": "gemma3_27b",
      "judge_name": "Gemma 3 27B",
      "respondent_key": "granite_40",
      "respondent_name": "Granite 4.0 Micro",
      "weighted_score": 8.85,
      "brief_justification": "The implementation is correct and provides a functional LRU cache with TTL. It's well-explained and includes thread safety. Could benefit from discussing potential performance implications of `clear_expired` and alternative approaches like using a separate thread for cleanup."
    },
    {
      "judge_key": "gemma3_27b",
      "judge_name": "Gemma 3 27B",
      "respondent_key": "qwen3_8b",
      "respondent_name": "Qwen 3 8B",
      "weighted_score": 9.4,
      "brief_justification": "The response provides a correct and well-explained implementation of an LRU cache with per-key TTL. It covers all essential aspects, including data structures, TTL handling, and LRU behavior, with clear code and example usage. The notes section also highlights potential improvements."
    },
    {
      "judge_key": "gemma3_27b",
      "judge_name": "Gemma 3 27B",
      "respondent_key": "mistral_nemo",
      "respondent_name": "Mistral Nemo 12B",
      "weighted_score": 8.85,
      "brief_justification": "The code provides a correct and well-explained implementation of an LRU cache with TTL. It could be improved with more robust error handling or a more sophisticated eviction strategy, but it fulfills the prompt's requirements effectively."
    },
    {
      "judge_key": "gemma3_27b",
      "judge_name": "Gemma 3 27B",
      "respondent_key": "llama31_8b",
      "respondent_name": "Llama 3.1 8B",
      "weighted_score": 8.4,
      "brief_justification": "The implementation is mostly correct and provides a functional LRU cache with TTL. It could be improved by handling TTL expiration more efficiently (e.g., using a separate thread or scheduler) instead of iterating through the entire cache in `_remove_expired`. The explanation is clear and well-structured."
    },
    {
      "judge_key": "llama4_scout",
      "judge_name": "Llama 4 Scout",
      "respondent_key": "qwen3_32b",
      "respondent_name": "Qwen 3 32B",
      "weighted_score": 8.8,
      "brief_justification": "The response provides a correct and complete implementation of an LRU cache with per-key TTL in Python. The code is well-structured, clear, and includes example usage. The notes section offers additional insights and suggestions for improvement."
    },
    {
      "judge_key": "llama4_scout",
      "judge_name": "Llama 4 Scout",
      "respondent_key": "gemma3_27b",
      "respondent_name": "Gemma 3 27B",
      "weighted_score": 8.8,
      "brief_justification": "The implementation is mostly correct and complete, with clear documentation and thread safety. However, it could be improved by discussing potential edge cases and providing more context on using the cleanup function."
    },
    {
      "judge_key": "llama4_scout",
      "judge_name": "Llama 4 Scout",
      "respondent_key": "phi4",
      "respondent_name": "Phi-4 14B",
      "weighted_score": 9.6,
      "brief_justification": "The response provides a correct and complete implementation of an LRU cache with per-key TTL, along with clear explanations and example usage. The code is well-structured and readable."
    },
    {
      "judge_key": "llama4_scout",
      "judge_name": "Llama 4 Scout",
      "respondent_key": "granite_40",
      "respondent_name": "Granite 4.0 Micro",
      "weighted_score": 8.8,
      "brief_justification": "The response provides a correct and complete implementation of an LRU cache with per-key TTL, along with clear explanations and example usage. The code is well-structured, and the documentation is thorough."
    },
    {
      "judge_key": "llama4_scout",
      "judge_name": "Llama 4 Scout",
      "respondent_key": "qwen3_8b",
      "respondent_name": "Qwen 3 8B",
      "weighted_score": 9.45,
      "brief_justification": "The response provides a correct and complete implementation of an LRU cache with per-key TTL in Python. The code is well-structured, clear, and includes example usage. The implementation covers key design decisions, TTL handling, LRU behavior, and efficient get and put operations."
    },
    {
      "judge_key": "llama4_scout",
      "judge_name": "Llama 4 Scout",
      "respondent_key": "mistral_nemo",
      "respondent_name": "Mistral Nemo 12B",
      "weighted_score": 8.4,
      "brief_justification": "The implementation is mostly correct but lacks automatic expiration. The example usage effectively demonstrates the functionality."
    },
    {
      "judge_key": "llama4_scout",
      "judge_name": "Llama 4 Scout",
      "respondent_key": "llama31_8b",
      "respondent_name": "Llama 3.1 8B",
      "weighted_score": 8.2,
      "brief_justification": "The implementation is mostly correct and complete, with clear documentation and example use cases. However, there are some minor issues with the update method and the handling of expired items."
    },
    {
      "judge_key": "phi4",
      "judge_name": "Phi-4 14B",
      "respondent_key": "qwen3_32b",
      "respondent_name": "Qwen 3 32B",
      "weighted_score": 8.25,
      "brief_justification": "The response provides a correct and functional implementation of an LRU cache with per-key TTL, using Python's threading and OrderedDict. It covers key features and includes example usage, but lacks details on handling distributed environments or async/await."
    },
    {
      "judge_key": "phi4",
      "judge_name": "Phi-4 14B",
      "respondent_key": "devstral",
      "respondent_name": "Devstral Small",
      "weighted_score": 8.6,
      "brief_justification": "The response correctly implements an LRU cache with per-key TTL using Python's OrderedDict and provides a clear example. The implementation covers essential aspects but could include more details on edge cases and potential optimizations."
    },
    {
      "judge_key": "phi4",
      "judge_name": "Phi-4 14B",
      "respondent_key": "gemma3_27b",
      "respondent_name": "Gemma 3 27B",
      "weighted_score": 9.4,
      "brief_justification": "The implementation correctly addresses the requirements of an LRU cache with per-key TTL and thread safety. It includes all necessary methods and attributes, with clear comments and docstrings explaining functionality. The periodic cleanup function is crucial for maintaining cache performance and preventing indefinite growth. The use of locks ensures thread safety, making the implementation practical for real-world applications."
    },
    {
      "judge_key": "phi4",
      "judge_name": "Phi-4 14B",
      "respondent_key": "llama4_scout",
      "respondent_name": "Llama 4 Scout",
      "weighted_score": 8.25,
      "brief_justification": "The implementation accurately describes an LRU cache with per-key TTL, ensuring thread safety with locks. It covers the basic functionality and provides example usage. However, it lacks detailed discussion on potential edge cases or performance considerations for high-concurrency scenarios."
    },
    {
      "judge_key": "phi4",
      "judge_name": "Phi-4 14B",
      "respondent_key": "granite_40",
      "respondent_name": "Granite 4.0 Micro",
      "weighted_score": 9.05,
      "brief_justification": "The response provides a correct and complete implementation of an LRU cache with per-key TTL, using Python's `OrderedDict` and `threading` modules. It covers all necessary operations and explains thread safety and TTL management. The clarity and depth are high, with practical insights into usage and periodic cleanup."
    },
    {
      "judge_key": "phi4",
      "judge_name": "Phi-4 14B",
      "respondent_key": "qwen3_8b",
      "respondent_name": "Qwen 3 8B",
      "weighted_score": 8.25,
      "brief_justification": "The response correctly implements an LRU cache with per-key TTL using Python's OrderedDict and time tracking. The design decisions and implementation details are clear and accurate. The response could be improved by discussing potential enhancements for TTL expiration tracking during inactivity or custom cleanup intervals."
    },
    {
      "judge_key": "phi4",
      "judge_name": "Phi-4 14B",
      "respondent_key": "mistral_nemo",
      "respondent_name": "Mistral Nemo 12B",
      "weighted_score": 7.8,
      "brief_justification": "The implementation correctly uses an OrderedDict to manage LRU eviction and handles per-key TTL. However, it lacks handling for the 'ttl' key in the 'get' method and does not account for updating TTLs when keys are accessed. The code is clear and structured, but could be more complete and useful by addressing these issues."
    },
    {
      "judge_key": "phi4",
      "judge_name": "Phi-4 14B",
      "respondent_key": "llama31_8b",
      "respondent_name": "Llama 3.1 8B",
      "weighted_score": 8.25,
      "brief_justification": "The implementation correctly addresses the LRU cache with per-key TTL, providing clear methods for managing cache entries. However, it lacks handling for edge cases, such as checking if the cache is empty before updating or deleting entries, and does not include a method to automatically remove expired entries on access."
    },
    {
      "judge_key": "granite_40",
      "judge_name": "Granite 4.0 Micro",
      "respondent_key": "qwen3_32b",
      "respondent_name": "Qwen 3 32B",
      "weighted_score": 8.8,
      "brief_justification": "The response accurately describes an LRU cache with per-key TTL, provides a clear Python implementation, covers key design aspects, and includes example usage. It is mostly complete but could mention thread-safety more explicitly and discuss trade-offs of the background cleanup approach."
    },
    {
      "judge_key": "granite_40",
      "judge_name": "Granite 4.0 Micro",
      "respondent_key": "devstral",
      "respondent_name": "Devstral Small",
      "weighted_score": 8.6,
      "brief_justification": "The response accurately implements an LRU cache with TTL using OrderedDict and a TTL dictionary, covers initialization, get, put, remove, and clear methods, and provides clear code with comments. It could be more in-depth by discussing edge cases or performance considerations."
    },
    {
      "judge_key": "granite_40",
      "judge_name": "Granite 4.0 Micro",
      "respondent_key": "gemma3_27b",
      "respondent_name": "Gemma 3 27B",
      "weighted_score": 8.8,
      "brief_justification": "The response accurately implements an LRU cache with per-key TTL, includes thread safety, and provides useful methods. Minor depth could be improved by discussing edge cases or performance considerations."
    },
    {
      "judge_key": "granite_40",
      "judge_name": "Granite 4.0 Micro",
      "respondent_key": "llama4_scout",
      "respondent_name": "Llama 4 Scout",
      "weighted_score": 8.8,
      "brief_justification": "The response accurately implements an LRU cache with per-key TTL, covers the necessary components, is well-structured, provides a reasonable depth of explanation, and offers practical advice for usage and optimization."
    },
    {
      "judge_key": "granite_40",
      "judge_name": "Granite 4.0 Micro",
      "respondent_key": "phi4",
      "respondent_name": "Phi-4 14B",
      "weighted_score": 8.8,
      "brief_justification": "The response accurately implements an LRU cache with TTL using appropriate data structures, is well-structured, and provides a practical Python example, though it could delve deeper into edge cases and performance considerations."
    },
    {
      "judge_key": "granite_40",
      "judge_name": "Granite 4.0 Micro",
      "respondent_key": "qwen3_8b",
      "respondent_name": "Qwen 3 8B",
      "weighted_score": 8.8,
      "brief_justification": "The response accurately describes an LRU cache with TTL using OrderedDict and time checks, covers implementation details, provides clear code, and includes example usage, though it could delve deeper into edge cases and cleanup strategies."
    },
    {
      "judge_key": "granite_40",
      "judge_name": "Granite 4.0 Micro",
      "respondent_key": "mistral_nemo",
      "respondent_name": "Mistral Nemo 12B",
      "weighted_score": 8.6,
      "brief_justification": "The response accurately implements an LRU cache with TTL using OrderedDict, covers initialization, get, put, and eviction logic, and provides a clear example. Depth could be higher with more advanced optimizations or edge case handling."
    },
    {
      "judge_key": "granite_40",
      "judge_name": "Granite 4.0 Micro",
      "respondent_key": "llama31_8b",
      "respondent_name": "Llama 3.1 8B",
      "weighted_score": 8.8,
      "brief_justification": "The response provides a clear, well-structured Python implementation of an LRU cache with per-key TTL, covering key aspects like initialization, retrieval, insertion, expiration handling, and deletion. It includes example usage and explanations, though it could delve deeper into edge cases and performance considerations."
    },
    {
      "judge_key": "qwen3_8b",
      "judge_name": "Qwen 3 8B",
      "respondent_key": "qwen3_32b",
      "respondent_name": "Qwen 3 32B",
      "weighted_score": 9.4,
      "brief_justification": "The response provides a correct and complete LRU TTL cache implementation with clear explanations, but lacks deeper optimization discussion."
    },
    {
      "judge_key": "qwen3_8b",
      "judge_name": "Qwen 3 8B",
      "respondent_key": "devstral",
      "respondent_name": "Devstral Small",
      "weighted_score": 8.6,
      "brief_justification": "The implementation correctly handles LRU eviction and per-key TTL with clear code structure, but lacks advanced features like thread safety or more efficient data structures."
    },
    {
      "judge_key": "qwen3_8b",
      "judge_name": "Qwen 3 8B",
      "respondent_key": "gemma3_27b",
      "respondent_name": "Gemma 3 27B",
      "weighted_score": 9.25,
      "brief_justification": "The implementation correctly handles LRU with per-key TTL, thread safety, and cleanup. It's well-structured and practical, though lacks deeper optimizations like linked lists for LRU tracking."
    },
    {
      "judge_key": "qwen3_8b",
      "judge_name": "Qwen 3 8B",
      "respondent_key": "llama4_scout",
      "respondent_name": "Llama 4 Scout",
      "weighted_score": 9.05,
      "brief_justification": "The implementation correctly handles LRU eviction and per-key TTL with thread safety. It is well-structured and includes examples, but lacks deeper analysis of performance optimizations or edge cases."
    },
    {
      "judge_key": "qwen3_8b",
      "judge_name": "Qwen 3 8B",
      "respondent_key": "phi4",
      "respondent_name": "Phi-4 14B",
      "weighted_score": 9.4,
      "brief_justification": "The implementation correctly combines LRU eviction with per-key TTL using a doubly linked list and dictionary. It is well-structured and practical, though lacks deeper analysis of trade-offs or optimizations."
    },
    {
      "judge_key": "qwen3_8b",
      "judge_name": "Qwen 3 8B",
      "respondent_key": "granite_40",
      "respondent_name": "Granite 4.0 Micro",
      "weighted_score": 8.6,
      "brief_justification": "The implementation correctly handles LRU and per-key TTL with thread safety, but stores TTL in two locations redundantly. The explanation is clear and covers all core operations, though lacks advanced optimizations."
    },
    {
      "judge_key": "qwen3_8b",
      "judge_name": "Qwen 3 8B",
      "respondent_key": "mistral_nemo",
      "respondent_name": "Mistral Nemo 12B",
      "weighted_score": 8.05,
      "brief_justification": "The implementation correctly handles LRU eviction and per-key TTL with clear structure, but lacks automatic TTL checking during get/put operations and could benefit from more detailed edge case handling."
    },
    {
      "judge_key": "qwen3_8b",
      "judge_name": "Qwen 3 8B",
      "respondent_key": "llama31_8b",
      "respondent_name": "Llama 3.1 8B",
      "weighted_score": 8.05,
      "brief_justification": "The implementation correctly handles LRU eviction and per-key TTL with ordered dictionary. However, the update method has logical issues with value storage and the TTL cleanup requires manual invocation, limiting completeness and practicality."
    },
    {
      "judge_key": "mistral_nemo",
      "judge_name": "Mistral Nemo 12B",
      "respondent_key": "qwen3_32b",
      "respondent_name": "Qwen 3 32B",
      "weighted_score": 9.05,
      "brief_justification": "The response provides a comprehensive and accurate implementation of an LRU cache with per-key TTL in Python. It covers all aspects of the problem, including design, implementation, and usage examples. The only minor deduction is that it doesn't discuss potential edge cases or error handling in detail."
    },
    {
      "judge_key": "mistral_nemo",
      "judge_name": "Mistral Nemo 12B",
      "respondent_key": "gemma3_27b",
      "respondent_name": "Gemma 3 27B",
      "weighted_score": 9.05,
      "brief_justification": "The response provides a complete and accurate implementation of an LRU cache with per-key TTL, with clear comments and docstrings. It also includes thread safety, cleanup function, and special methods for easier use. However, it lacks a brief explanation of the LRU algorithm itself."
    },
    {
      "judge_key": "mistral_nemo",
      "judge_name": "Mistral Nemo 12B",
      "respondent_key": "phi4",
      "respondent_name": "Phi-4 14B",
      "weighted_score": 8.25,
      "brief_justification": "The response provides a detailed and correct implementation of an LRU cache with per-key TTL. It covers the topic comprehensively, is well-structured, and offers a practical example. However, it could delve deeper into edge cases and error handling for a higher depth score."
    },
    {
      "judge_key": "mistral_nemo",
      "judge_name": "Mistral Nemo 12B",
      "respondent_key": "granite_40",
      "respondent_name": "Granite 4.0 Micro",
      "weighted_score": 8.25,
      "brief_justification": "The response provides a detailed and accurate implementation of an LRU cache with per-key TTL in Python. It covers the necessary methods for getting, putting, deleting, and clearing expired items. However, it lacks a bit in depth by not discussing potential optimizations or edge cases. The code is clear and well-structured, making it useful for practical implementation."
    },
    {
      "judge_key": "mistral_nemo",
      "judge_name": "Mistral Nemo 12B",
      "respondent_key": "qwen3_8b",
      "respondent_name": "Qwen 3 8B",
      "weighted_score": 9.05,
      "brief_justification": "The response provides a comprehensive and accurate implementation of an LRU cache with per-key TTL in Python. It covers the key design decisions, data structures used, and provides a clear and well-structured implementation. It also includes an example usage and notes on potential enhancements. However, it doesn't go into the deepest level of optimization or discuss potential edge cases."
    },
    {
      "judge_key": "mistral_nemo",
      "judge_name": "Mistral Nemo 12B",
      "respondent_key": "llama31_8b",
      "respondent_name": "Llama 3.1 8B",
      "weighted_score": 8.25,
      "brief_justification": "The response provides a clear and correct implementation of an LRU cache with per-key TTL. It covers most aspects of the problem, but lacks a bit in depth, as it doesn't discuss potential edge cases or performance optimizations. The code is well-structured and easy to understand, making it useful for practical use."
    },
    {
      "judge_key": "llama31_8b",
      "judge_name": "Llama 3.1 8B",
      "respondent_key": "qwen3_32b",
      "respondent_name": "Qwen 3 32B",
      "weighted_score": 8.8,
      "brief_justification": "The implementation is accurate, thorough, and well-structured, with good use of Python's built-in data structures and synchronization primitives. It provides a useful and practical solution for caching with per-key TTL, although it may require adjustments for high-performance or distributed environments."
    },
    {
      "judge_key": "llama31_8b",
      "judge_name": "Llama 3.1 8B",
      "respondent_key": "devstral",
      "respondent_name": "Devstral Small",
      "weighted_score": 9.05,
      "brief_justification": "The implementation is correct and comprehensive, but lacks a detailed explanation of the data structures and algorithms used."
    },
    {
      "judge_key": "llama31_8b",
      "judge_name": "Llama 3.1 8B",
      "respondent_key": "gemma3_27b",
      "respondent_name": "Gemma 3 27B",
      "weighted_score": 9.4,
      "brief_justification": "The implementation is accurate, complete, and well-structured, with clear comments and docstrings. It also provides a useful and practical solution for implementing an LRU cache with per-key TTL."
    },
    {
      "judge_key": "llama31_8b",
      "judge_name": "Llama 3.1 8B",
      "respondent_key": "llama4_scout",
      "respondent_name": "Llama 4 Scout",
      "weighted_score": 8.4,
      "brief_justification": "The implementation is mostly correct, but it would benefit from more detailed explanations and examples in the code comments."
    },
    {
      "judge_key": "llama31_8b",
      "judge_name": "Llama 3.1 8B",
      "respondent_key": "phi4",
      "respondent_name": "Phi-4 14B",
      "weighted_score": 8.6,
      "brief_justification": "The implementation correctly combines LRU eviction with time-based expiration, but could benefit from additional error handling and documentation."
    },
    {
      "judge_key": "llama31_8b",
      "judge_name": "Llama 3.1 8B",
      "respondent_key": "granite_40",
      "respondent_name": "Granite 4.0 Micro",
      "weighted_score": 8.6,
      "brief_justification": "The implementation accurately describes an LRU cache with per-key TTL and provides a clear explanation of its usage and functionality, but could benefit from more detailed explanations of certain methods and edge cases."
    },
    {
      "judge_key": "llama31_8b",
      "judge_name": "Llama 3.1 8B",
      "respondent_key": "qwen3_8b",
      "respondent_name": "Qwen 3 8B",
      "weighted_score": 9.05,
      "brief_justification": "The response provides a clear and well-structured implementation of an LRU cache with per-key TTL, including key design decisions, implementation, and example usage. However, it lacks some advanced features like TTL expiration tracking during inactivity or custom cleanup intervals."
    },
    {
      "judge_key": "llama31_8b",
      "judge_name": "Llama 3.1 8B",
      "respondent_key": "mistral_nemo",
      "respondent_name": "Mistral Nemo 12B",
      "weighted_score": 8.2,
      "brief_justification": "The implementation is mostly correct and complete, but lacks some edge cases and error handling. The code is clear and well-structured, but the depth of analysis is moderate, and the usefulness is high due to the practical application of the LRU cache with TTL."
    }
  ],
  "meta": {
    "source": "The Multivac (app.themultivac.com)",
    "methodology": "10x10 blind peer matrix evaluation",
    "criteria": "correctness, completeness, clarity, depth, usefulness",
    "self_judgments": "excluded from rankings",
    "license": "Open data — cite as: The Multivac (2026)"
  }
}