{
  "mistral-7b-instruct": {
    "release": "Septembre 2023",
    "arch": "Transformer dense · 32 couches · Grouped-query attention",
    "training": "Corpus web multilingue, fort en FR. Données 2023.",
    "strengths": [
      "Excellent quality-to-speed ratio for a 7B",
      "Fully permissive Apache 2.0 license",
      "Mature ecosystem of fine-tunes, GGUFs, and quants",
      "Solid multilingual coverage, including strong French"
    ],
    "weaknesses": [
      "Outclassed on reasoning by 2024+ models like Qwen 2.5 and Llama 3.1",
      "32k context is no longer competitive",
      "Training data cutoff in 2023 shows on recent topics"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 60.1
      },
      {
        "name": "HellaSwag",
        "score": 81.3
      },
      {
        "name": "HumanEval",
        "score": 30.5
      }
    ],
    "alternatives": [
      "llama3-8b",
      "qwen25-7b",
      "lucie-7b"
    ],
    "install": "ollama run mistral:7b-instruct",
    "desc": "Mistral AI's breakout 7B instruct model. Still a go-to baseline for fast, low-cost inference and the most fine-tuned open-weight model in the wild.",
    "verdict": "A reliable, freely licensed workhorse — fine as a baseline, but newer 7Bs win on quality."
  },
  "mistral-small-24b": {
    "release": "Janvier 2025",
    "arch": "Transformer dense · 40 couches · GQA + sliding window",
    "training": "Corpus multilingue enrichi, fort focus FR + anglais scientifique.",
    "strengths": [
      "Quality approaching Llama 3 70B at a third the size",
      "Low latency relative to peers",
      "128k context window",
      "Strong tool use and agent behavior",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "Needs ~16GB VRAM at Q4, more for higher precision",
      "Trails Qwen 2.5 Coder on dedicated coding tasks",
      "No native vision (see Small 3.1 for that)"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 81
      },
      {
        "name": "GPQA",
        "score": 42.2
      },
      {
        "name": "HumanEval",
        "score": 84.8
      }
    ],
    "alternatives": [
      "qwen25-32b",
      "gemma2-27b",
      "llama3-70b"
    ],
    "install": "ollama run mistral-small:24b",
    "desc": "Mistral AI's 24B dense model that closes most of the gap with 70B-class models. Best quality-per-parameter we've measured at this size in 2025.",
    "verdict": "The 2025 sweet spot for open-weight chat — frontier-adjacent quality at a tractable size."
  },
  "lucie-7b": {
    "release": "Janvier 2025",
    "arch": "Llama-like · 32 couches · entraîné sur Jean Zay (CNRS)",
    "training": "Projet OpenLLM-France · corpus 100% transparent, forte proportion FR.",
    "strengths": [
      "Full European data sovereignty story",
      "Publicly available training corpus",
      "Strong formal French output",
      "Backed by CNRS and LINAGORA"
    ],
    "weaknesses": [
      "4k context is too short for modern RAG or long docs",
      "Weaker English than Mistral or Llama at the same size",
      "Smaller ecosystem of fine-tunes and tools"
    ],
    "benchmarks": [
      {
        "name": "MMLU (fr)",
        "score": 54.2
      },
      {
        "name": "FrenchBench",
        "score": 68
      }
    ],
    "alternatives": [
      "mistral-7b-instruct",
      "croissant-llm"
    ],
    "install": "ollama run lucie:7b",
    "desc": "A French-sovereign 7B model from OpenLLM-France, backed by CNRS and LINAGORA, with a fully transparent and auditable training corpus.",
    "verdict": "Pick it for sovereignty and provenance, not raw capability — the 4k context is the dealbreaker for most workloads."
  },
  "llama3-70b": {
    "release": "Juillet 2024",
    "arch": "Transformer dense · 80 couches · GQA",
    "training": "15T tokens, corpus multilingue Meta.",
    "strengths": [
      "Benchmark-leading quality for open-weight 70B",
      "128k context",
      "Strong reasoning and code generation",
      "Mature serving stack in vLLM, TGI, llama.cpp"
    ],
    "weaknesses": [
      "~40GB VRAM at Q4 — minimum two 24GB GPUs",
      "Llama Community license restricts use above 700M MAU",
      "Slower and pricier to serve than Llama 3.3 70B at similar quality"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 86
      },
      {
        "name": "GPQA",
        "score": 48
      },
      {
        "name": "HumanEval",
        "score": 80.5
      }
    ],
    "alternatives": [
      "qwen25-32b",
      "mistral-small-24b",
      "deepseek-r1-32b"
    ],
    "install": "ollama run llama3.1:70b",
    "desc": "Meta's Llama 3.1 70B, the open-weight model that first felt like a credible GPT-4 alternative. Needs serious hardware — think dual 3090s or an A100.",
    "verdict": "A milestone model, but Llama 3.3 70B delivers the same quality with better post-training — use 3.3 unless you have a reason."
  },
  "qwen25-coder-32b": {
    "release": "Novembre 2024",
    "arch": "Transformer dense spécialisé code · 64 couches",
    "training": "Pré-entraînement général + 5.5T tokens code, 92 langages.",
    "strengths": [
      "Best-in-class open-weight code generation",
      "Claude 3.5 Sonnet-level HumanEval scores",
      "128k context for repo-wide tasks",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "Requires 20+ GB VRAM at Q4",
      "Weaker than Qwen 2.5 32B for general chat",
      "Slower than 7B-class models for autocomplete loops"
    ],
    "benchmarks": [
      {
        "name": "HumanEval",
        "score": 92.7
      },
      {
        "name": "MBPP",
        "score": 86
      },
      {
        "name": "LiveCodeBench",
        "score": 31.4
      }
    ],
    "alternatives": [
      "qwen25-coder-7b",
      "deepseek-coder-v2-16b"
    ],
    "install": "ollama run qwen2.5-coder:32b",
    "desc": "Alibaba's Qwen 2.5 Coder 32B — the strongest open-weight code model we've benchmarked, trading punches with Claude 3.5 Sonnet on HumanEval.",
    "verdict": "The default open-weight choice for serious code work — frontier-grade quality without an API bill."
  },
  "deepseek-r1-32b": {
    "release": "Janvier 2025",
    "arch": "Distillation de DeepSeek R1 · chain-of-thought renforcée",
    "training": "Distillé depuis R1 671B · RL sur problèmes raisonnement.",
    "strengths": [
      "Best open-weight reasoner that fits on one consumer GPU",
      "Excellent math and science performance",
      "Explicit step-by-step thinking",
      "MIT license",
      "32k context"
    ],
    "weaknesses": [
      "Heavy thinking-token output inflates latency and cost",
      "Slow time-to-first-useful-answer",
      "32k context is shorter than most 2025 peers",
      "Overkill for simple chat"
    ],
    "benchmarks": [
      {
        "name": "AIME 2024",
        "score": 72.6
      },
      {
        "name": "MATH-500",
        "score": 94.3
      },
      {
        "name": "GPQA",
        "score": 62.1
      }
    ],
    "alternatives": [
      "deepseek-r1-7b",
      "phi4-14b"
    ],
    "install": "ollama run deepseek-r1:32b",
    "desc": "The 32B DeepSeek R1 distill — the best accessible open-weight reasoner we've tested. Explicit chain-of-thought, MIT-licensed, runs on a single 24GB GPU.",
    "verdict": "The go-to local reasoning model for STEM and code — accept the verbosity, get the accuracy."
  },
  "mistral-nemo-12b": {
    "release": "Juillet 2024",
    "arch": "Dense Transformer · GQA · Tekken tokenizer (131k vocab)",
    "training": "Co-entraîné Mistral × NVIDIA. Corpus multilingue européen.",
    "strengths": [
      "128k context window",
      "Strong European multilingual performance",
      "Apache 2.0 license",
      "Efficient Tekken tokenizer reduces token counts"
    ],
    "weaknesses": [
      "Reasoning trails Mistral Small 3.1",
      "No vision",
      "Eclipsed by Small 3 on most general benchmarks"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 68
      },
      {
        "name": "HellaSwag",
        "score": 83.5
      },
      {
        "name": "Winogrande",
        "score": 76.8
      }
    ],
    "alternatives": [
      "mistral-small-31-24b",
      "qwen3-14b",
      "gemma3-12b"
    ],
    "install": "ollama run mistral-nemo:12b",
    "desc": "Mistral AI and NVIDIA's co-developed 12B instruct model with 128k context, the Tekken tokenizer, and strong European multilingual coverage.",
    "verdict": "A clean midsize Mistral with great multilingual chops — Small 3.1 wins overall, but Nemo's tokenizer remains attractive."
  },
  "mistral-small-31-24b": {
    "release": "Mars 2025",
    "arch": "Dense · multimodal texte+vision · Tekken tokenizer",
    "training": "Successeur de Small 3 avec encodage visuel ajouté.",
    "strengths": [
      "Vision and text combined in one 24B model",
      "128k context window",
      "Apache 2.0 license",
      "Around 150 tokens/sec inference"
    ],
    "weaknesses": [
      "Requires Ollama 0.6.5 or newer",
      "Small 3.2 (June 2025) is a marginal improvement worth picking instead",
      "Vision quality trails Qwen2-VL on OCR"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 80.6
      },
      {
        "name": "MMMU",
        "score": 64
      }
    ],
    "alternatives": [
      "gemma3-27b",
      "qwen25-vl-7b",
      "mistral-small-24b"
    ],
    "install": "ollama run mistral-small3.1:24b",
    "desc": "Mistral AI's Small 3.1 — Small 3 plus a vision encoder, a 128k context, and ~150 tok/s inference under Apache 2.0. Small 3.2 (June 2025) is a drop-in upgrade.",
    "verdict": "The best open-weight 24B multimodal model under Apache 2.0 — and Small 3.2 makes it slightly better still."
  },
  "llama33-70b": {
    "release": "Décembre 2024",
    "arch": "Dense · GQA · base Llama 3.1",
    "training": "Post-training amélioré vs Llama 3.1 70B.",
    "strengths": [
      "Quality competitive with Llama 3.1 405B",
      "128k context window",
      "Strong reasoning and code performance",
      "Major efficiency gain vs the 405B model"
    ],
    "weaknesses": [
      "Hugging Face access is gated — must accept Meta's terms",
      "Llama Community license restricts use above 700M MAU",
      "No vision capabilities",
      "Still needs roughly 40GB VRAM at Q4"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 86
      },
      {
        "name": "GPQA Diamond",
        "score": 50.5
      },
      {
        "name": "HumanEval",
        "score": 88.4
      }
    ],
    "alternatives": [
      "deepseek-r1-distill-llama-70b",
      "qwen3-32b",
      "llama3-70b"
    ],
    "install": "ollama run llama3.3:70b",
    "desc": "Meta's Llama 3.3 70B — same quality tier as Llama 3.1 405B at one-sixth the size, thanks to improved post-training. Weights are gated on Hugging Face.",
    "verdict": "The best open-weight 70B available — pick it over Llama 3.1 70B unless you have a hard reason not to."
  },
  "qwen3-8b": {
    "release": "Avril 2025",
    "arch": "Dense · GQA · hybrid thinking/non-thinking mode",
    "training": "36T tokens, 119 langues.",
    "strengths": [
      "Hybrid thinking/fast modes switchable per request",
      "Strong multilingual performance across 119 languages",
      "Up to 131K context via YaRN (32K native)",
      "Apache 2.0 — clean commercial use"
    ],
    "weaknesses": [
      "Thinking traces are verbose and burn tokens fast",
      "Ecosystem tooling still less mature than the Qwen 2.5 line"
    ],
    "benchmarks": [
      {
        "name": "MMLU-Pro",
        "score": 68.7
      },
      {
        "name": "GPQA",
        "score": 60
      },
      {
        "name": "LiveCodeBench",
        "score": 54.4
      }
    ],
    "alternatives": [
      "qwen3-14b",
      "llama3-8b",
      "mistral-7b-instruct"
    ],
    "install": "ollama run qwen3:8b",
    "desc": "Alibaba's 8B dense model with a toggleable thinking mode and broad multilingual coverage. Punches well above its weight for an 8B and runs comfortably on a single consumer GPU.",
    "verdict": "The best general-purpose Apache-licensed 8B for teams that want one model covering chat, reasoning, and 100+ languages."
  },
  "qwen3-14b": {
    "release": "Avril 2025",
    "arch": "Dense · GQA · hybrid thinking",
    "training": "Corpus 36T tokens.",
    "strengths": [
      "Matches Qwen 2.5 32B Base on STEM and code at less than half the size",
      "Hybrid thinking mode for harder reasoning passes",
      "131K context window",
      "Apache 2.0"
    ],
    "weaknesses": [
      "Still trails dedicated reasoners like QwQ-32B on AIME-class problems",
      "Thinking mode output can balloon for simple prompts"
    ],
    "benchmarks": [
      {
        "name": "MMLU (base)",
        "score": 81.05
      },
      {
        "name": "SuperGPQA",
        "score": 34.27
      }
    ],
    "alternatives": [
      "qwen3-8b",
      "qwen3-32b",
      "phi4-14b"
    ],
    "install": "ollama run qwen3:14b",
    "desc": "A 14B dense model from Alibaba that matches Qwen 2.5 32B Base on STEM and code, with the same hybrid thinking system as the rest of the Qwen 3 family. The pragmatic sweet spot for a single 24GB GPU.",
    "verdict": "The smartest dense 14B you can run locally — ideal for a single high-end consumer GPU."
  },
  "qwen3-32b": {
    "release": "Avril 2025",
    "arch": "Dense · GQA · hybrid thinking",
    "training": "Même pré-entraînement 36T que le reste de la famille Qwen 3.",
    "strengths": [
      "Strong reasoning with thinking mode enabled",
      "Solid MMLU-Pro and SuperGPQA scores for its size",
      "131K context window",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "QwQ-32B is sharper for pure reasoning tasks",
      "Verbose thinking traces inflate latency and cost"
    ],
    "benchmarks": [
      {
        "name": "MMLU-Pro",
        "score": 65.54
      },
      {
        "name": "SuperGPQA",
        "score": 39.78
      }
    ],
    "alternatives": [
      "qwen3-14b",
      "qwen3-235b-a22b",
      "qwq-32b"
    ],
    "install": "ollama run qwen3:32b",
    "desc": "Alibaba's 32B dense flagship with thinking mode, scoring 65.5 on MMLU-Pro and 39.8 on SuperGPQA. The strongest general-purpose Qwen 3 dense model before stepping up to the MoE.",
    "verdict": "The most versatile Apache-licensed 32B available — pick this when you want one model for everything."
  },
  "qwen3-235b-a22b": {
    "release": "Avril 2025",
    "arch": "MoE · 128 experts, 8 actifs · 94 couches · GQA 64Q/4KV",
    "training": "36T tokens. Variantes Instruct-2507 et Thinking-2507 (Juillet 2025).",
    "strengths": [
      "Frontier-open scores on AIME 2024 (85.7) and LiveCodeBench (70.7)",
      "Only 22B active parameters — fast for its total size",
      "Instruct-2507 and Thinking-2507 variants available",
      "Apache 2.0"
    ],
    "weaknesses": [
      "~142GB at Q4 — needs multi-GPU or a 192GB+ Apple Silicon host",
      "Not realistic for laptop or single-GPU deployment"
    ],
    "benchmarks": [
      {
        "name": "AIME 2024",
        "score": 85.7
      },
      {
        "name": "AIME 2025",
        "score": 81.5
      },
      {
        "name": "LiveCodeBench v5",
        "score": 70.7
      }
    ],
    "alternatives": [
      "deepseek-v3-671b",
      "deepseek-r1-671b",
      "qwen3-32b"
    ],
    "install": "ollama run qwen3:235b",
    "desc": "Alibaba's flagship MoE — 235B total, 22B active per token across 128 experts. Hits 85.7 on AIME 2024 and 70.7 on LiveCodeBench, putting it in frontier-open territory.",
    "verdict": "Pick this when you have the hardware for frontier-open performance under an Apache license."
  },
  "qwen25-vl-7b": {
    "release": "Janvier 2025",
    "arch": "ViT + LLM Qwen2.5 · window attention · mRoPE · dynamic resolution",
    "training": "Support vidéo >1h, grounding bbox, sortie JSON structurée.",
    "strengths": [
      "State-of-the-art vision performance at the 7B tier",
      "Excellent multilingual OCR",
      "Long video input (over 1 hour)",
      "Apache 2.0"
    ],
    "weaknesses": [
      "Requires a VLM-capable backend (Ollama 0.5+ or vLLM)",
      "Smaller than 72B sibling for the hardest visual reasoning"
    ],
    "benchmarks": [
      {
        "name": "DocVQA",
        "score": 95.7
      },
      {
        "name": "ChartQA",
        "score": 87.3
      },
      {
        "name": "OCRBench",
        "score": 86.4
      }
    ],
    "alternatives": [
      "qwen25-vl-72b",
      "llama32-vision-11b",
      "qwen2-vl-7b"
    ],
    "install": "ollama run qwen2.5vl:7b",
    "desc": "A 7B vision-language model from Alibaba with state-of-the-art results in its class, scoring 95.7 on DocVQA. Handles hour-long video, bounding-box grounding, and multilingual OCR.",
    "verdict": "The default open VLM at 7B — best-in-class for document and video work on modest hardware."
  },
  "qwen25-vl-72b": {
    "release": "Janvier 2025",
    "arch": "ViT + LLM · GQA · SwiGLU · RMSNorm",
    "training": "Backbone 72B + vision encoder.",
    "strengths": [
      "Frontier vision benchmarks (MMMU 70.2)",
      "128K context window",
      "Strong OCR and grounding capabilities"
    ],
    "weaknesses": [
      "Qwen License — not Apache, has a 100M MAU clause",
      "40GB+ VRAM in Q4 — multi-GPU for full precision",
      "Tooling support varies vs the 7B variant"
    ],
    "benchmarks": [
      {
        "name": "MMMU",
        "score": 70.2
      },
      {
        "name": "MathVista",
        "score": 74.8
      },
      {
        "name": "MMBench-EN",
        "score": 88.6
      }
    ],
    "alternatives": [
      "qwen25-vl-7b",
      "llama32-vision-11b"
    ],
    "install": "ollama run qwen2.5vl:72b",
    "desc": "Frontier-class open vision-language model from Alibaba, scoring 70.2 on MMMU and 88.6 on MMBench. Uses the Qwen License rather than Apache, with a 100M MAU clause.",
    "verdict": "The strongest open VLM available — check the MAU clause before betting your product on it."
  },
  "qwen25-omni-7b": {
    "release": "Mars 2025",
    "arch": "Thinker-Talker end-to-end · TMRoPE · streaming speech in+out",
    "training": "Premier modèle omni ouvert grand public.",
    "strengths": [
      "Text, image, audio, and video input in one model",
      "Speech output without a separate TTS",
      "Apache 2.0",
      "Compact 7B footprint"
    ],
    "weaknesses": [
      "No official Ollama tag — community GGUFs only",
      "32K context is short for video-heavy workloads",
      "Early-generation omni model — quality lags specialized stacks"
    ],
    "benchmarks": [
      {
        "name": "OmniBench (avg)",
        "score": 56.13
      }
    ],
    "alternatives": [
      "phi4-multimodal",
      "moshi-7b",
      "qwen25-vl-7b"
    ],
    "install": "# GGUF : ggml-org/Qwen2.5-Omni-7B-GGUF (pas d'Ollama officiel)",
    "desc": "Alibaba's first true omni-modal open model — text, image, audio, and video in, with text and speech out. A research-grade preview rather than a production-ready release.",
    "verdict": "The first credible open omni model — promising for research, but not a drop-in for production yet."
  },
  "qwq-32b": {
    "release": "Mars 2025",
    "arch": "Dense · 64 couches · GQA (40Q/8KV) · RoPE · SwiGLU · entraîné en RL outcome-based",
    "training": "RL sur raisonnement (pas une simple distillation).",
    "strengths": [
      "Direct competitor to DeepSeek R1 at a fraction of the size",
      "131K context for long thinking traces",
      "Trained with RL, not just distilled",
      "Apache 2.0"
    ],
    "weaknesses": [
      "Very verbose — token costs add up fast",
      "Requires YaRN for context beyond 8K",
      "Overkill for non-reasoning chat workloads"
    ],
    "benchmarks": [
      {
        "name": "AIME 2024",
        "score": 79.5
      },
      {
        "name": "MATH-500",
        "score": 90.6
      }
    ],
    "alternatives": [
      "deepseek-r1-32b",
      "qwen3-32b",
      "phi4-reasoning-14b"
    ],
    "install": "ollama run qwq:32b",
    "desc": "Alibaba's dedicated 32B reasoner, trained with reinforcement learning rather than distillation. Hits 79.5 on AIME24 and 90.6 on MATH-500 — a direct Apache-licensed alternative to DeepSeek R1.",
    "verdict": "The best Apache-licensed reasoner you can run on a single GPU."
  },
  "deepseek-r1-671b": {
    "release": "Janvier 2025",
    "arch": "MoE (hérité de V3) · Multi-head Latent Attention · auxiliary-loss-free · entraîné RL",
    "training": "Distillation + RL multi-étapes. Update R1-0528 (Mai 2025).",
    "strengths": [
      "MIT license — no commercial restrictions",
      "Reference open reasoning model",
      "MATH-500 score of 97.3",
      "R1-0528 update further sharpens reasoning"
    ],
    "weaknesses": [
      "400GB+ in Q4 — server-class hardware required",
      "Out of reach for any single-machine local setup",
      "Very long reasoning traces drive up latency"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 90.8
      },
      {
        "name": "GPQA Diamond",
        "score": 71.5
      },
      {
        "name": "MATH-500",
        "score": 97.3
      }
    ],
    "alternatives": [
      "deepseek-r1-distill-llama-70b",
      "qwq-32b",
      "deepseek-r1-32b"
    ],
    "install": "ollama run deepseek-r1:671b",
    "desc": "The reference open reasoning model — a 671B MoE with 37B active, released under MIT. Scores 97.3 on MATH-500, 79.8 on AIME, and 90.8 on MMLU.",
    "verdict": "The open reasoning gold standard — if you have the hardware to host it."
  },
  "deepseek-r1-distill-llama-70b": {
    "release": "Janvier 2025",
    "arch": "Dense Llama 3.3 · SFT distillé des traces R1",
    "training": "Distillation depuis R1 671B.",
    "strengths": [
      "Frontier-class reasoning on a single workstation-class GPU",
      "128K context window",
      "Outperforms SFT-only 70B models on hard reasoning",
      "Strong drop-in for existing Llama 70B deployments"
    ],
    "weaknesses": [
      "Dual licensing (Llama 3.3 Community + DeepSeek)",
      "Hugging Face gated access via the Llama base",
      "Trails full R1 671B on the hardest problems"
    ],
    "benchmarks": [
      {
        "name": "AIME 2024 (pass@1)",
        "score": 70
      }
    ],
    "alternatives": [
      "deepseek-r1-671b",
      "qwq-32b",
      "llama33-70b"
    ],
    "install": "ollama run deepseek-r1:70b",
    "desc": "DeepSeek's R1 reasoning behavior distilled into Llama 3.3 70B. Brings frontier-class reasoning down to a single high-end GPU, but inherits both Llama and DeepSeek licenses.",
    "verdict": "The most practical way to get R1-class reasoning on a single high-end GPU."
  },
  "deepseek-v3-671b": {
    "release": "Décembre 2024",
    "arch": "MoE 256 experts, 8 actifs · MLA · auxiliary-loss-free · FP8 training",
    "training": "14.8T tokens pré-entraînement. V3.1-Terminus (Sep 2025) re-licenciée MIT.",
    "strengths": [
      "Frontier-open performance in chat, code, and general tasks",
      "MLA cuts KV memory significantly vs standard attention",
      "V3.1-Terminus available under MIT",
      "Pretrained on 14.8T tokens"
    ],
    "weaknesses": [
      "Original V3 uses the restrictive DeepSeek License",
      "400GB+ in Q4 — server-class hardware only",
      "Overkill for most workloads under 10B requests/month"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-r1-671b",
      "qwen3-235b-a22b"
    ],
    "install": "ollama run deepseek-v3:671b",
    "desc": "DeepSeek's frontier-open MoE — 671B total, 37B active — with multi-head latent attention and an auxiliary-loss-free balancing scheme. The V3.1-Terminus update relicenses under MIT.",
    "verdict": "Frontier-open performance for teams with serious inference infrastructure — go straight to V3.1-Terminus for the MIT license."
  },
  "gemma3-4b": {
    "release": "Mars 2025",
    "arch": "Dense · multimodal texte+vision · sliding-window attention (5:1 local:global)",
    "training": "4T tokens, 140+ langues.",
    "strengths": [
      "Multimodal in a 4B footprint",
      "140+ language coverage",
      "128K context",
      "Sliding-window attention keeps memory in check"
    ],
    "weaknesses": [
      "Gemma License — review terms before commercial use",
      "Trails the 12B and 27B on reasoning and code"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma3-12b",
      "llama3-3b",
      "phi35-mini"
    ],
    "install": "ollama run gemma3:4b",
    "desc": "Google's compact multimodal 4B with 128K context, vision input, and 140+ language coverage. The smallest Gemma 3 with the full feature set intact.",
    "verdict": "The most capable 4B multimodal you can run locally — strong default for resource-constrained deployments."
  },
  "gemma3-12b": {
    "release": "Mars 2025",
    "arch": "Dense VLM · sliding-window attention · multimodal",
    "training": "12T tokens.",
    "strengths": [
      "Sweet spot for multimodal performance vs hardware cost",
      "128K context window",
      "140 language coverage",
      "Strong general-purpose default"
    ],
    "weaknesses": [
      "Gemma License rather than Apache",
      "At least 9GB RAM required for Ollama deployment",
      "No dedicated thinking mode"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma3-4b",
      "gemma3-27b",
      "mistral-small-31-24b"
    ],
    "install": "ollama run gemma3:12b",
    "desc": "The 12B sweet spot of Google's Gemma 3 line — multimodal, 128K context, and 140 languages. Fits on a single consumer GPU with room for batching.",
    "verdict": "The pragmatic Gemma 3 — most teams should start here before reaching for the 27B."
  },
  "gemma3-27b": {
    "release": "Mars 2025",
    "arch": "Dense VLM · sliding-window attention",
    "training": "14T tokens.",
    "strengths": [
      "LMArena Elo 1338 — beats Llama 3.1 405B at 15x smaller",
      "Multimodal with vision input",
      "128K context window",
      "140 language coverage"
    ],
    "weaknesses": [
      "Gemma License rather than Apache",
      "No thinking mode for hard reasoning",
      "Trails dedicated reasoners on math benchmarks"
    ],
    "benchmarks": [
      {
        "name": "LMArena Elo",
        "score": 73
      },
      {
        "name": "MMLU",
        "score": 78.6
      },
      {
        "name": "MMLU-Pro",
        "score": 67.5
      },
      {
        "name": "MATH",
        "score": 89
      }
    ],
    "alternatives": [
      "gemma3-12b",
      "qwen3-32b",
      "mistral-small-31-24b"
    ],
    "install": "ollama run gemma3:27b",
    "desc": "Google's flagship Gemma 3 at 27B — multimodal, 128K context, and an LMArena Elo of 1338 that beats Llama 3.1 405B at 15x smaller. Sets the bar for open chat under 30B.",
    "verdict": "Punch-above-its-weight open chat that quietly outscores models 15x its size."
  },
  "phi4-multimodal": {
    "release": "Février 2025",
    "arch": "Dense · Mixture-of-LoRAs pour multimodal · LongRoPE",
    "training": "Jusqu'à ~2.8h d'audio en entrée.",
    "strengths": [
      "Text, image, and audio input in a 5.6B footprint",
      "MIT license",
      "128K context window",
      "Long audio handling (up to ~2.8 hours)"
    ],
    "weaknesses": [
      "No official Ollama tag",
      "English-first — weaker on other languages",
      "Limited ecosystem tooling vs Qwen VL"
    ],
    "benchmarks": [],
    "alternatives": [
      "phi4-reasoning-14b",
      "qwen25-omni-7b",
      "phi35-mini"
    ],
    "install": "# Via HuggingFace : microsoft/Phi-4-multimodal-instruct (pas d'Ollama officiel)",
    "desc": "Microsoft's 5.6B multimodal model — text, image, and audio in, text out — using a Mixture-of-LoRAs design. Accepts roughly 2.8 hours of audio per request.",
    "verdict": "The lightest credible audio-capable multimodal under MIT — ideal for transcription-adjacent pipelines on small hardware."
  },
  "phi4-reasoning-14b": {
    "release": "Avril 2025",
    "arch": "Dense · SFT sur traces o3-mini · variante Plus ajoute RL",
    "training": "RoPE base freq. augmentée vs Phi-4 base.",
    "strengths": [
      "Beats R1-Distill-Llama-70B on AIME and GPQA with 50x fewer parameters",
      "MIT license",
      "Increased RoPE base frequency improves long-form reasoning",
      "Practical hardware footprint for a frontier-class reasoner"
    ],
    "weaknesses": [
      "English-first — weak multilingual performance",
      "Weaker on non-Python code generation",
      "32K context vs 128K on most peers"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwq-32b",
      "deepseek-r1-32b",
      "phi4-14b"
    ],
    "install": "ollama run phi4-reasoning:14b",
    "desc": "Microsoft's 14B reasoner that beats R1-Distill-Llama-70B on AIME and GPQA with 50x fewer parameters. MIT-licensed, English-first, with a 32K context.",
    "verdict": "The most efficient open reasoner you can run on a single consumer GPU."
  },
  "command-r-plus-104b": {
    "release": "Août 2024 (refresh)",
    "arch": "Dense · optimisé RAG et tool-use · GQA",
    "training": "23 langues, +50% throughput / -25% latency vs version Avril 2024.",
    "strengths": [
      "Best-in-class open RAG and tool-use at release",
      "128K context window",
      "23 language coverage",
      "Higher throughput and lower latency than the April 2024 release"
    ],
    "weaknesses": [
      "CC-BY-NC 4.0 — no commercial use without a separate license",
      "60GB+ VRAM in Q4",
      "Surpassed by newer 100B-class models on general benchmarks"
    ],
    "benchmarks": [],
    "alternatives": [
      "aya-expanse-32b",
      "qwen3-32b",
      "llama33-70b"
    ],
    "install": "ollama run command-r-plus:104b",
    "desc": "Cohere's 104B RAG and tool-use flagship from August 2024 — 128K context, 23 languages. Licensed CC-BY-NC, so non-commercial only without a Cohere agreement.",
    "verdict": "Strong RAG and tool-use, but the non-commercial license rules it out of most production deployments."
  },
  "aya-expanse-8b": {
    "release": "Octobre 2024",
    "arch": "Dense · 32 couches · 32 heads · SwiGLU · GQA · SentencePiece ~128k vocab",
    "training": "23 langues, focus multilingue.",
    "strengths": [
      "23 language coverage with strong low-resource performance",
      "Beats Gemma 2 9B and Llama 3.1 8B on multilingual benchmarks",
      "Particularly strong on low-resource languages",
      "Compact 8B footprint"
    ],
    "weaknesses": [
      "CC-BY-NC 4.0 — no commercial deployment",
      "Only 8K context",
      "Outclassed by Qwen 3 8B on most general tasks"
    ],
    "benchmarks": [
      {
        "name": "Dolly (vs Llama 3.1 8B)",
        "score": 83.9
      }
    ],
    "alternatives": [
      "aya-expanse-32b",
      "qwen3-8b",
      "eurollm-9b"
    ],
    "install": "ollama run aya-expanse:8b",
    "desc": "Cohere For AI's multilingual 8B covering 23 languages, outperforming Gemma 2 9B and Llama 3.1 8B in its language set. CC-BY-NC — non-commercial only.",
    "verdict": "A strong multilingual research model held back by its non-commercial license."
  },
  "aya-expanse-32b": {
    "release": "Octobre 2024",
    "arch": "Dense (base Command R) · 23 langues",
    "training": "Fine-tune multilingue du backbone Command.",
    "strengths": [
      "25% improvement on low-resource languages vs peers",
      "23 language coverage",
      "89.9% win rate on Dolly vs Mixtral 8x22B",
      "Strong general performance for its size"
    ],
    "weaknesses": [
      "CC-BY-NC 4.0 — no commercial use",
      "Only 8K context window",
      "Newer Qwen 3 models close much of the gap with permissive licenses"
    ],
    "benchmarks": [
      {
        "name": "Dolly (vs Mixtral 8x22B)",
        "score": 89.9
      }
    ],
    "alternatives": [
      "aya-expanse-8b",
      "qwen3-32b",
      "command-r-plus-104b"
    ],
    "install": "ollama run aya-expanse:32b",
    "desc": "The 32B sibling of Aya Expanse from Cohere For AI, delivering a 25% gain on low-resource languages and 89.9% win rate on Dolly vs Mixtral 8x22B. CC-BY-NC.",
    "verdict": "The strongest open multilingual 32B for research — license disqualifies it for production."
  },
  "eurollm-9b": {
    "release": "Décembre 2024",
    "arch": "Dense · 42 couches · GQA (32Q/8KV) · SwiGLU · RoPE · RMSNorm",
    "training": "4T tokens sur MareNostrum5 (BSC). 35 langues (24 UE + extras).",
    "strengths": [
      "EU sovereignty — Horizon Europe funded",
      "Apache 2.0 license",
      "Best European open model at its scale",
      "Strong coverage of all 24 official EU languages"
    ],
    "weaknesses": [
      "Only 4K context — short for modern workloads",
      "No official Ollama tag",
      "Outpaced by Qwen 3 8B on general benchmarks"
    ],
    "benchmarks": [],
    "alternatives": [
      "teuken-7b",
      "gemma3-12b",
      "lucie-7b"
    ],
    "install": "# HuggingFace : utter-project/EuroLLM-9B-Instruct (pas d'Ollama officiel)",
    "desc": "An EU-funded 9B (Horizon Europe) covering 35 languages including all 24 official EU ones. Trained on 4T tokens on the MareNostrum5 supercomputer and released under Apache 2.0.",
    "verdict": "The natural pick when EU sovereignty or procurement requires a European-trained Apache 2.0 model."
  },
  "teuken-7b": {
    "release": "Novembre 2024",
    "arch": "Dense · 32 couches · GQA (2 groupes) · SwiGLU · RoPE · tokenizer multilingue",
    "training": "Corpus 24 langues UE, financé BMWK (Allemagne).",
    "strengths": [
      "Built for EU sovereignty",
      "Covers all 24 official EU languages",
      "Apache 2.0 commercial variant available",
      "German government (BMWK) backing"
    ],
    "weaknesses": [
      "Only 4K context",
      "No official Ollama tag",
      "Research-v0.4 variant carries a restricted license — check which you grabbed"
    ],
    "benchmarks": [],
    "alternatives": [
      "eurollm-9b",
      "lucie-7b",
      "mistral-7b-instruct"
    ],
    "install": "# HuggingFace : openGPT-X/Teuken-7B-instruct-commercial-v0.4",
    "desc": "A German government-funded (BMWK) 7B from OpenGPT-X covering all 24 EU languages. The commercial variant is Apache 2.0; the research-v0.4 variant has restricted terms.",
    "verdict": "An EU sovereignty pick with the commercial variant clearly licensed — confirm which variant you're using."
  },
  "pleias-3b": {
    "release": "Décembre 2024",
    "arch": "Dense type Llama/GPT-NeoX · entraîné from scratch",
    "training": "~1.08T tokens sur Common Corpus (données 100% open licensées).",
    "strengths": [
      "EU AI Act compliant by design",
      "100% traceable training data (Common Corpus)",
      "Apache 2.0",
      "Compact 3B footprint"
    ],
    "weaknesses": [
      "Modest benchmark scores vs general-purpose 3B peers",
      "Context window of only ~2K",
      "Narrower capabilities than mainstream open models"
    ],
    "benchmarks": [],
    "alternatives": [
      "pleias-rag-1b",
      "croissant-llm",
      "lucie-7b"
    ],
    "install": "# HuggingFace : PleIAs/Pleias-3b-Preview",
    "desc": "A French 3B from PleIAs trained 100% on Common Corpus open data, designed for EU AI Act compliance by construction. Fully traceable training data, Apache 2.0.",
    "verdict": "The right pick when training-data provenance and EU compliance matter more than benchmark numbers."
  },
  "pleias-rag-1b": {
    "release": "Avril 2025",
    "arch": "Dense 1.2B · fine-tuné pour RAG avec citations/grounding intégrés",
    "training": "Basé sur Pleias 1.2B.",
    "strengths": [
      "Built-in citation and grounding in RAG responses",
      "Outperforms most small language models under 4B on HotPotQA",
      "Runs on lightweight hardware",
      "Apache 2.0"
    ],
    "weaknesses": [
      "Context window of only ~2K",
      "No official Ollama tag",
      "Specialized for RAG — not a general chat model"
    ],
    "benchmarks": [],
    "alternatives": [
      "pleias-3b",
      "croissant-llm"
    ],
    "install": "# HuggingFace : PleIAs/Pleias-RAG-1B (GGUF : PleIAs/Pleias-RAG-1B-gguf)",
    "desc": "A 1.2B RAG-specialized model from PleIAs with built-in citation and grounding behavior. Beats most sub-4B small language models on HotPotQA.",
    "verdict": "The most efficient small open model for production RAG with citations."
  },
  "moshi-7b": {
    "release": "Septembre 2024",
    "arch": "Full-duplex speech-text · Depth Transformer (codebook) + 7B Temporal Transformer",
    "training": "Codec Mimi à 12.5 Hz / 1.1 kbps sur audio 24 kHz. Latence ~200ms pratique.",
    "strengths": [
      "First open full-duplex speech model",
      "Sub-second latency (~200ms in practice)",
      "Mimi codec at 12.5 Hz / 1.1 kbps on 24 kHz audio",
      "From Kyutai, a respected French AI lab"
    ],
    "weaknesses": [
      "Not a text LLM — different use case entirely",
      "Architecture not supported by Ollama",
      "CC-BY 4.0 license — attribution required"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen25-omni-7b",
      "helium-1-2b",
      "phi4-multimodal"
    ],
    "install": "# GitHub : kyutai-labs/moshi — voix Moshiko (H) / Moshika (F)",
    "desc": "Kyutai's full-duplex speech model — 7.6B parameters with sub-second latency (~200ms) and two voices, Moshiko and Moshika. A speech architecture, not a text LLM.",
    "verdict": "The reference open full-duplex speech model — niche, but the only credible choice in its category."
  },
  "helium-1-2b": {
    "release": "Avril 2025 (stable)",
    "arch": "Dense · GQA · RoPE · distillé de Gemma 2",
    "training": "2.5T tokens, 24 langues UE.",
    "strengths": [
      "Compact multilingual base from a European lab",
      "Covers all 24 EU languages",
      "Beats Qwen 2.5 1.5B, Gemma 2B, and Llama 3.2 3B at its scale",
      "Built by Kyutai"
    ],
    "weaknesses": [
      "CC-BY-SA 4.0 plus Gemma Terms via distillation",
      "Base model — not instruction-tuned",
      "No official Ollama support"
    ],
    "benchmarks": [],
    "alternatives": [
      "smollm2-17b",
      "pleias-3b",
      "gemma3-4b"
    ],
    "install": "# HuggingFace : kyutai/helium-1-2b",
    "desc": "Kyutai's 2B multilingual base covering all 24 EU languages, distilled from Gemma 2 — which means Gemma Terms apply on top of CC-BY-SA. Beats Qwen 2.5 1.5B, Gemma 2B, and Llama 3.2 3B at its scale.",
    "verdict": "A strong European small base for fine-tuning — just budget for the dual-license obligations."
  },
  "smollm2-17b": {
    "release": "Novembre 2024",
    "arch": "Dense type Llama 2 · SFT + DPO (UltraFeedback)",
    "training": "11T tokens.",
    "strengths": [
      "Best-in-class quality for its size on MMLU-Pro",
      "Clean Apache 2.0 license with no commercial strings",
      "Massive 11T-token training corpus for a small model",
      "One of the most downloaded small models on Hugging Face"
    ],
    "weaknesses": [
      "English-centric, weak on non-English languages",
      "8K context window is tight for modern RAG workflows",
      "BFCL function-calling score of 27% trails larger peers"
    ],
    "benchmarks": [
      {
        "name": "BFCL (function calling)",
        "score": 27
      }
    ],
    "alternatives": [
      "smolvlm2-22b",
      "llama3-3b",
      "phi35-mini"
    ],
    "install": "ollama run smollm2:1.7b",
    "desc": "HuggingFace's 1.7B Apache 2.0 instruct model trained on 11T tokens. Beats Qwen2.5-1.5B by roughly 6 points on MMLU-Pro, making it a top pick at the sub-2B tier.",
    "verdict": "If you need an Apache-licensed sub-2B model that punches above its weight, SmolLM2 is the default choice."
  },
  "smolvlm2-22b": {
    "release": "Février 2025",
    "arch": "VLM image+vidéo+texte → texte · backbone SmolLM2-1.7B",
    "training": "~5.2 GB VRAM pour l'inférence vidéo.",
    "strengths": [
      "Runs full video inference in ~5.2GB VRAM",
      "Apache 2.0 license suitable for commercial use",
      "Genuine image + video + text capability at 2.2B scale",
      "Inherits SmolLM2's tight text fundamentals"
    ],
    "weaknesses": [
      "8K context inherited from SmolLM2 limits long video",
      "No official Ollama distribution yet",
      "Video understanding is basic compared to frontier VLMs"
    ],
    "benchmarks": [],
    "alternatives": [
      "smollm2-17b",
      "qwen25-vl-7b",
      "llama32-vision-11b"
    ],
    "install": "# HuggingFace : HuggingFaceTB/SmolVLM2-2.2B-Instruct",
    "desc": "HuggingFace's 2.2B vision-language model built on SmolLM2-1.7B, handling image, video, and text in roughly 5.2GB of VRAM. The smallest serious VLM with video understanding.",
    "verdict": "The go-to small VLM when you need vision plus video in under 3B parameters and an Apache license."
  },
  "glm-51": {
    "release": "Avril 2026",
    "arch": "MoE · 744B/40B actifs · 200k ctx · variante Reasoning",
    "training": "Successeur de GLM-5 (février 2026).",
    "strengths": [
      "#1 open-weight model on Artificial Analysis (April 2026)",
      "58.4 on SWE-Bench Pro, leading all open weights",
      "200K context for whole-repo reasoning",
      "True MIT license with full commercial rights"
    ],
    "weaknesses": [
      "445GB+ in Q4 quantization requires a multi-GPU server",
      "No official Ollama tag at launch",
      "Operational complexity rules out single-workstation use"
    ],
    "benchmarks": [
      {
        "name": "SWE-Bench Pro",
        "score": 58.4
      }
    ],
    "alternatives": [
      "minimax-m27",
      "qwen35-397b-a17b",
      "deepseek-v32"
    ],
    "install": "# HuggingFace (GGUF) : unsloth/GLM-5.1-GGUF",
    "desc": "Z.AI's flagship MoE with 744B total and 40B active parameters under an MIT license. Ranked #1 open-weight model on Artificial Analysis as of April 2026.",
    "verdict": "The strongest open-weight model available today, provided you have the hardware to run a 744B MoE."
  },
  "minimax-m27": {
    "release": "Avril 2026",
    "arch": "MoE 229B/10B actifs · 205k ctx · agent self-evolving",
    "training": "Open-sourcé 12 avril 2026. Successeur M2.5.",
    "strengths": [
      "State-of-the-art open agentic performance",
      "56.22% SWE-Bench Pro and 57% Terminal Bench",
      "Only 10B active parameters keeps inference fast",
      "Clean Apache 2.0 license",
      "#1 trending on Hugging Face at launch"
    ],
    "weaknesses": [
      "138GB+ in Q4 demands serious server hardware",
      "Verbose output in agent mode inflates token costs",
      "Newer release means thinner tooling ecosystem"
    ],
    "benchmarks": [
      {
        "name": "SWE-Pro",
        "score": 56.22
      },
      {
        "name": "Terminal Bench",
        "score": 57
      }
    ],
    "alternatives": [
      "qwen36-35b-a3b",
      "glm-51",
      "qwen3-coder-next"
    ],
    "install": "# HuggingFace (GGUF) : unsloth/MiniMax-M2.7-GGUF",
    "desc": "MiniMax's agentic MoE with 229B total and 10B active parameters under Apache 2.0. Open-sourced April 12, 2026 and currently the top trending model on Hugging Face.",
    "verdict": "The most exciting agentic open-weight model of 2026, if your hardware can host 229B parameters."
  },
  "gemma4-31b": {
    "release": "Avril 2026",
    "arch": "Dense 31B · multimodal texte+image+audio · 256k ctx",
    "training": "140+ langues.",
    "strengths": [
      "#3 on Chatbot Arena's open leaderboard",
      "Native audio understanding, not just text-to-image",
      "256K context window in a dense 31B model",
      "Strong coverage across 140+ languages",
      "Backed by Google's training infrastructure"
    ],
    "weaknesses": [
      "Gemma license is more restrictive than Apache 2.0",
      "31B dense model needs ~20GB VRAM in Q4",
      "Audio quality trails purpose-built ASR models"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma3-27b",
      "mistral-small-4",
      "qwen35-27b"
    ],
    "install": "ollama run gemma4:31b",
    "desc": "Google's dense 31B multimodal model with native text, image, and audio support across 140+ languages. Ranked #3 on Chatbot Arena's open leaderboard with a 256K context window.",
    "verdict": "The best open multimodal generalist of the Gemma line, assuming you can live with the Gemma license."
  },
  "gemma4-e4b": {
    "release": "Avril 2026",
    "arch": "Dense E4B (4B effectifs) · multimodal texte+image+audio",
    "training": "Édition edge/mobile de Gemma 4.",
    "strengths": [
      "Full text + image + audio in a 4B model",
      "Runs comfortably on laptops and high-end phones",
      "128K context is generous for the size class",
      "140-language coverage in a small footprint"
    ],
    "weaknesses": [
      "Gemma license restricts some commercial uses",
      "Quality clearly trails 12B+ multimodal models",
      "Audio reasoning is functional but not robust"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma3-4b",
      "phi4-multimodal",
      "smolvlm2-22b"
    ],
    "install": "ollama run gemma4:e4b",
    "desc": "Google's 4B-effective multimodal Gemma variant tuned for laptops and edge devices, handling text, image, and audio across 140 languages with a 128K context.",
    "verdict": "The most capable sub-5B multimodal model for edge deployments, with the usual Gemma license caveats."
  },
  "gemma4-e2b": {
    "release": "Avril 2026",
    "arch": "Dense E2B (2B effectifs) · multimodal texte+image · 128k ctx · thinking configurable",
    "training": "Edition edge ultra-compacte de Gemma 4. Architecture optimisee on-device/mobile. 140+ langues.",
    "strengths": [
      "Full multimodal in ~7 GB at Q4",
      "Runs on CPU or entry-level GPU",
      "128k context",
      "Thinking mode toggle",
      "Open Gemma license"
    ],
    "weaknesses": [
      "Quality trails the E4B and 26B variants",
      "Reasoning benchmarks well below larger models",
      "Gemma license isn't Apache or MIT"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma4-e4b",
      "phi4-mini",
      "smolvlm2-22b"
    ],
    "install": "ollama run gemma4:e2b",
    "desc": "Google's edge-optimized Gemma 4: 2B effective params, full text + image multimodal, 128k context, and a configurable thinking mode. Built for laptops, mobile, and CPU inference.",
    "verdict": "The Gemma 4 to pick when you're shipping on-device — small, multimodal, and surprisingly long-context."
  },
  "qwen35-397b-a17b": {
    "release": "Février 2026",
    "arch": "MoE 397B/17B actifs · 262k ctx · hybrid thinking",
    "training": "Nouvelle flagship Qwen famille 3.5.",
    "strengths": [
      "#5 on Artificial Analysis's open leaderboard",
      "262K context window",
      "Only 17B active parameters keeps inference efficient",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "240GB+ in Q4 demands a multi-GPU server",
      "MoE deployment adds operational complexity",
      "Beaten by GLM-5.1 and MiniMax-M2.7 on key benchmarks"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-235b-a22b",
      "glm-51",
      "deepseek-v32"
    ],
    "install": "# HuggingFace : Qwen/Qwen3.5-397B-A17B (alternative locale plus accessible : ollama run qwen3.5:122b)",
    "desc": "Alibaba's flagship MoE with 397B total and 17B active parameters, ranked #5 open-weight on Artificial Analysis. Apache 2.0 with a 262K context.",
    "verdict": "A strong flagship MoE with permissive licensing, though no longer the top of the open leaderboard."
  },
  "qwen36-35b-a3b": {
    "release": "Avril 2026",
    "arch": "MoE 35B/3B actifs · agentic-coding specialist",
    "training": "Sortie 16 avril 2026.",
    "strengths": [
      "73.4% SWE-Bench in an MoE that fits on a 24GB GPU",
      "Only 3B active parameters means fast inference",
      "262K context handles whole repos",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "No official Ollama tag yet",
      "Brand-new release with limited production track record",
      "Specialized for coding, weaker as a general chat model"
    ],
    "benchmarks": [
      {
        "name": "SWE-Bench",
        "score": 73.4
      }
    ],
    "alternatives": [
      "qwen3-coder-next",
      "devstral-small-2",
      "qwen25-coder-32b"
    ],
    "install": "ollama run qwen3.6:35b-a3b",
    "desc": "Alibaba's agentic coding MoE with 35B total and just 3B active parameters, released April 16, 2026. Scores 73.4% on SWE-Bench while running on a single 24GB GPU.",
    "verdict": "The best local coding agent for a single 24GB GPU as of April 2026."
  },
  "qwen3-coder-next": {
    "release": "Février 2026",
    "arch": "MoE 80B/3B · Gated DeltaNet + Attention hybride · 262k ctx",
    "training": "Spécialiste code agentic.",
    "strengths": [
      "Runs as a local copilot on a 24GB GPU",
      "262K context fits entire codebases",
      "Hybrid architecture keeps memory low",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "Hybrid architecture means partial llama.cpp support",
      "Less mature than dense coder alternatives",
      "Tooling lags behind standard transformer models"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen36-35b-a3b",
      "devstral-small-2",
      "qwen25-coder-32b"
    ],
    "install": "ollama run qwen3-coder-next",
    "desc": "Alibaba's hybrid Gated DeltaNet + Attention MoE with 80B total and 3B active parameters. Purpose-built as a local coding copilot that fits on a 24GB GPU.",
    "verdict": "Choose this when you want a local Copilot replacement and can tolerate early-stage tooling friction."
  },
  "mistral-small-4": {
    "release": "Mars 2026",
    "arch": "MoE 119B/6.5B actifs · 256k ctx · unifie instruct+reasoning+vision+code",
    "training": "Remplace Small 3.x et Pixtral en un seul modèle.",
    "strengths": [
      "Unifies chat, reasoning, vision, and code in one model",
      "Only 6.5B active parameters for fast inference",
      "256K context window",
      "Apache 2.0 license",
      "European lab with strong French and EU-language support"
    ],
    "weaknesses": [
      "72GB+ in Q4 requires a prosumer multi-GPU setup",
      "Breaks continuity with the Small 3.x line",
      "Newer release means thinner ecosystem"
    ],
    "benchmarks": [],
    "alternatives": [
      "mistral-small-31-24b",
      "qwen35-27b",
      "gemma4-31b"
    ],
    "install": "# HuggingFace : mistralai/Mistral-Small-4 (pas encore de tag Ollama officiel)",
    "desc": "Mistral AI's 2026 flagship MoE with 119B total and 6.5B active parameters, unifying chat, reasoning, vision, and code in a single Apache 2.0 model.",
    "verdict": "Mistral's most ambitious open release yet, ideal if you want one model covering four product lines."
  },
  "devstral-small-2": {
    "release": "Décembre 2025",
    "arch": "Dense 24B · base Mistral · 256k ctx · post-trainé code",
    "training": "Codéveloppé avec All Hands AI.",
    "strengths": [
      "72.2% SWE-Bench in a 24B dense model",
      "Runs comfortably on a single RTX 4090",
      "256K context for whole-repo work",
      "Apache 2.0 license",
      "Co-developed with All Hands AI for agent workloads"
    ],
    "weaknesses": [
      "No vision capability",
      "Specialized for code, weaker as a general assistant"
    ],
    "benchmarks": [
      {
        "name": "SWE-Bench",
        "score": 72.2
      }
    ],
    "alternatives": [
      "qwen36-35b-a3b",
      "qwen25-coder-32b",
      "qwen3-coder-next"
    ],
    "install": "ollama run devstral-small2:24b",
    "desc": "Mistral AI's 24B coding specialist co-developed with All Hands AI, scoring 72.2% on SWE-Bench under Apache 2.0. Fits on a single RTX 4090.",
    "verdict": "The strongest Apache-licensed dense coder that fits on a single consumer GPU."
  },
  "voxtral-4b-tts": {
    "release": "Mars 2026",
    "arch": "TTS frontière open · 4B · 9 langues",
    "training": "Compétiteur direct d'ElevenLabs.",
    "strengths": [
      "Studio-quality TTS in an open model",
      "Native French alongside 8 other languages",
      "Runs on consumer laptop hardware",
      "Competitive with ElevenLabs on quality"
    ],
    "weaknesses": [
      "CC-BY-NC 4.0 license blocks commercial use",
      "Not a text LLM, narrower utility",
      "Short 4K context limits long-form scripts"
    ],
    "benchmarks": [],
    "alternatives": [
      "moshi-7b",
      "qwen25-omni-7b"
    ],
    "install": "# HuggingFace : mistralai/Voxtral-4B-TTS-2603",
    "desc": "Mistral AI's open frontier TTS model covering 9 languages including French, rivaling ElevenLabs on quality. Note: CC-BY-NC 4.0, non-commercial only.",
    "verdict": "An ElevenLabs-class TTS for non-commercial work; commercial users need a different license path."
  },
  "deepseek-r2-32b": {
    "release": "Avril 2026",
    "arch": "Dense 32B · MIT · raisonneur",
    "training": "Successeur de R1 et R1-Distill.",
    "strengths": [
      "92.7% on AIME, frontier-level math reasoning",
      "Runs on a single RTX 4090 in Q4",
      "MIT license with full commercial rights",
      "Best consumer-GPU reasoner of its generation"
    ],
    "weaknesses": [
      "Verbose chain-of-thought inflates token costs",
      "Specialized for reasoning, less polished for chat",
      "Latency can spike on hard problems"
    ],
    "benchmarks": [
      {
        "name": "AIME",
        "score": 92.7
      }
    ],
    "alternatives": [
      "deepseek-r1-distill-llama-70b",
      "qwq-32b",
      "phi4-reasoning-14b"
    ],
    "install": "# HuggingFace : deepseek-ai/DeepSeek-R2 (pas encore de tag Ollama officiel)",
    "desc": "DeepSeek's dense 32B reasoning model under MIT, scoring 92.7% on AIME. Fits on a single RTX 4090 in Q4 and is the best consumer-GPU reasoner available.",
    "verdict": "The best open reasoning model that fits on a single consumer GPU."
  },
  "deepseek-v32": {
    "release": "Décembre 2025",
    "arch": "MoE 685B/37B actifs · DeepSeek Sparse Attention · MIT",
    "training": "Successeur V3 avec DSA pour mémoire réduite.",
    "strengths": [
      "IMO gold-medal reasoning quality",
      "DeepSeek Sparse Attention reduces memory pressure",
      "MIT license",
      "#2 by usage volume on OpenRouter"
    ],
    "weaknesses": [
      "410GB+ in Q4 needs a serious multi-GPU server",
      "Sparse attention adds inference engine complexity",
      "Operational overhead is significant"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-v3-671b",
      "qwen35-397b-a17b",
      "glm-51"
    ],
    "install": "# HuggingFace : deepseek-ai/DeepSeek-V3.2 (alternative locale : ollama run deepseek-v3:671b)",
    "desc": "DeepSeek's 685B MoE featuring DeepSeek Sparse Attention for lower memory use. Holds an IMO gold-medal score and ranks #2 by volume on OpenRouter.",
    "verdict": "A frontier-grade MIT-licensed MoE if you can run a multi-GPU cluster."
  },
  "kimi-k25": {
    "release": "Janvier 2026",
    "arch": "MoE 1T/32B actifs · multimodal · mode 'agent swarm' · 256k ctx",
    "training": "Le plus gros modèle open-weight pratique.",
    "strengths": [
      "Genuine 1-trillion-parameter open-weight model",
      "Built-in agent swarm coordination mode",
      "256K context with multimodal input",
      "Only 32B active parameters per token"
    ],
    "weaknesses": [
      "~600GB in Q4 demands a small cluster",
      "Modified MIT license needs legal review for commercial use",
      "Operational complexity is extreme",
      "Power and cooling budget rules out most home setups"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-v32",
      "glm-51",
      "qwen35-397b-a17b"
    ],
    "install": "# HuggingFace : moonshotai/Kimi-K2.5",
    "desc": "Moonshot AI's 1-trillion-parameter MoE with 32B active parameters and a multimodal agent-swarm mode. Around 595GB on disk, aimed at serious home labs and small clusters.",
    "verdict": "The largest practical open-weight model in 2026, for teams that can host it."
  },
  "nemotron-3-super-120b": {
    "release": "Mars 2026",
    "arch": "MoE 120B/12B actifs · NVIDIA Open Model License",
    "training": "10T tokens d'entraînement aussi libérés.",
    "strengths": [
      "NVIDIA's first true frontier open release",
      "60% on SWE-Bench Verified",
      "Commercially permissive NVIDIA Open Model License",
      "10T-token training corpus released alongside weights"
    ],
    "weaknesses": [
      "72GB+ in Q4 needs serious hardware",
      "Ollama support is still partial",
      "License is permissive but not Apache 2.0"
    ],
    "benchmarks": [
      {
        "name": "SWE-Bench Verified",
        "score": 60
      }
    ],
    "alternatives": [
      "mistral-small-4",
      "qwen35-397b-a17b",
      "llama33-70b"
    ],
    "install": "ollama run nemotron-3-super:120b",
    "desc": "NVIDIA's first frontier-class release, a 120B MoE with 12B active parameters scoring 60% on SWE-Bench Verified. Ships with the 10T-token training corpus.",
    "verdict": "A credible NVIDIA-backed frontier model with the rare bonus of a public training corpus."
  },
  "olmo3-32b": {
    "release": "Fin 2025",
    "arch": "Dense 32B · 100% ouvert (poids + données + code)",
    "training": "Allen AI. Variantes Think et Instruct.",
    "strengths": [
      "Complete training transparency at 32B scale",
      "Apache 2.0 across weights, data, and code",
      "Think and Instruct variants for different workloads",
      "Strongest auditable model for AI Act compliance"
    ],
    "weaknesses": [
      "Benchmarks trail closed-data 32B models",
      "64K context lags top competitors",
      "Less polished than commercial-tuned alternatives"
    ],
    "benchmarks": [],
    "alternatives": [
      "olmo3-7b",
      "qwen3-32b",
      "pleias-3b"
    ],
    "install": "ollama run olmo-3:32b",
    "desc": "Allen AI's fully open dense 32B with Think and Instruct variants, releasing weights, data, and code under Apache 2.0. The transparency benchmark for 32B-class models.",
    "verdict": "The most transparent 32B available; pick it when auditability outweighs raw benchmark scores."
  },
  "olmo3-7b": {
    "release": "Fin 2025",
    "arch": "Dense 7B · 100% ouvert",
    "training": "Allen AI.",
    "strengths": [
      "Weights, data, and code all Apache 2.0",
      "Full traceability from corpus to checkpoint",
      "Backed by Allen AI's research credibility"
    ],
    "weaknesses": [
      "Quality trails the best closed-data 7B models",
      "8K context is restrictive for modern workloads",
      "Not tuned for top leaderboard scores"
    ],
    "benchmarks": [],
    "alternatives": [
      "olmo3-32b",
      "qwen3-8b",
      "pleias-3b"
    ],
    "install": "ollama run olmo-3:7b",
    "desc": "Allen AI's fully open 7B model releasing weights, training data, and code under Apache 2.0. The reference choice for reproducible LLM research.",
    "verdict": "The clearest choice when full training transparency matters more than peak benchmark scores."
  },
  "tiny-aya-3b": {
    "release": "Février 2026",
    "arch": "Dense 3.35B · 5 variantes régionales (Base/Global/Earth/Fire/Water)",
    "training": "70+ langues. Water = Europe + APAC (FR).",
    "strengths": [
      "Best multilingual quality in the tiny tier",
      "Five regional variants targeting specific markets",
      "Native coverage of 70+ languages",
      "Backed by Cohere For AI research"
    ],
    "weaknesses": [
      "CC-BY-NC 4.0 blocks commercial deployment",
      "8K context is limiting for long-form work",
      "Quality varies across regional variants"
    ],
    "benchmarks": [],
    "alternatives": [
      "aya-expanse-8b",
      "smollm2-17b",
      "gemma4-e4b"
    ],
    "install": "# HuggingFace : CohereLabs/tiny-aya-base (pas encore de tag Ollama officiel)",
    "desc": "Cohere For AI's 3.35B model in 5 regional variants covering 70+ languages, with the Water variant tuned for Europe and APAC. CC-BY-NC 4.0, non-commercial only.",
    "verdict": "The strongest tiny multilingual model when commercial use is off the table."
  },
  "granite4-3b-vision": {
    "release": "Mars 2026",
    "arch": "VLM 3B dense · spécialisé documents entreprise",
    "training": "IBM Granite 4.0 family.",
    "strengths": [
      "Fast, accurate enterprise OCR",
      "Strong table and form-field extraction",
      "Apache 2.0 license",
      "Runs comfortably on a laptop"
    ],
    "weaknesses": [
      "16K context limits multi-page documents",
      "English-first, weak on non-Latin scripts",
      "Narrow scope, not a general-purpose VLM"
    ],
    "benchmarks": [],
    "alternatives": [
      "smolvlm2-22b",
      "qwen25-vl-7b",
      "llama32-vision-11b"
    ],
    "install": "# HuggingFace : ibm-granite/granite-4.0-3b-vision",
    "desc": "IBM's 3B vision-language model purpose-built for enterprise document extraction, including OCR, table parsing, and form understanding. Apache 2.0 and laptop-deployable.",
    "verdict": "The best small VLM for enterprise document workflows under an Apache license."
  },
  "step-35-flash": {
    "release": "Février 2026",
    "arch": "MoE 196B/11B actifs · 256k ctx",
    "training": "StepFun. 100 tok/s à 128k ctx.",
    "strengths": [
      "100 tokens/sec sustained at 128K context",
      "256K maximum context window",
      "Only 11B active parameters",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "118GB+ in Q4 needs a multi-GPU server",
      "Brand awareness still low outside Asia",
      "Trails top open models on hardest benchmarks"
    ],
    "benchmarks": [],
    "alternatives": [
      "mistral-small-4",
      "nemotron-3-super-120b",
      "minimax-m27"
    ],
    "install": "# HuggingFace : stepfun-ai/step-3.5-flash",
    "desc": "StepFun's 196B MoE with 11B active parameters delivers 100 tokens/sec at 128K context. Ranks #3 by free-tier volume on OpenRouter under Apache 2.0.",
    "verdict": "A fast, permissively licensed MoE that punches well above its name recognition."
  },
  "falcon-h1r-7b": {
    "release": "Janvier 2026",
    "arch": "Dense 7B hybride · raisonnement",
    "training": "TII (UAE).",
    "strengths": [
      "Outperforms models 7x its size on reasoning",
      "Compact 7B footprint",
      "Strong energy efficiency",
      "Novel hybrid architecture"
    ],
    "weaknesses": [
      "TII Falcon-LLM License 2.0 needs clause-by-clause review",
      "32K context is modest for 2026",
      "Hybrid architecture means uneven tooling support"
    ],
    "benchmarks": [],
    "alternatives": [
      "phi4-reasoning-14b",
      "deepseek-r2-32b",
      "qwq-32b"
    ],
    "install": "# HuggingFace : tiiuae/Falcon-H1R-7B",
    "desc": "TII's 7B hybrid reasoning architecture that outperforms models seven times its size on key benchmarks. Compact and energy-efficient.",
    "verdict": "An impressive small reasoner if its specific license terms fit your use case."
  },
  "mixtral-8x22b": {
    "release": "Avril 2024",
    "arch": "MoE sparse · 8 experts · 141B/39B actifs · GQA",
    "training": "Apache 2.0, 64k ctx.",
    "strengths": [
      "Battle-tested mature MoE",
      "Strong general-purpose performance",
      "Apache 2.0 license",
      "Solid multilingual coverage"
    ],
    "weaknesses": [
      "80GB in Q4 still demands serious hardware",
      "Coding trails newer specialists",
      "64K context lags 2026 competitors",
      "Outclassed by newer Mistral releases on most benchmarks"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 77.8
      },
      {
        "name": "GSM8K",
        "score": 78.6
      }
    ],
    "alternatives": [
      "mistral-small-4",
      "qwen35-27b",
      "deepseek-v3-671b"
    ],
    "install": "ollama run mixtral:8x22b",
    "desc": "Mistral AI's mature 141B/39B-active MoE under Apache 2.0, scoring 77.8 on MMLU and 45.1 on HumanEval. A proven general-purpose workhorse at roughly 80GB in Q4.",
    "verdict": "Still a dependable Apache-licensed generalist, but newer Mistral models now beat it across the board."
  },
  "mistral-small-32-24b": {
    "release": "Juin 2025",
    "arch": "Dense 24B · vision · Tekken tokenizer",
    "training": "Update mineure de Small 3.1 (2503).",
    "strengths": [
      "Roughly 50% fewer infinite-generation loops than 3.1",
      "Notably improved function calling and structured output reliability",
      "Vision encoder included for multimodal tasks",
      "Apache 2.0 — unrestricted commercial use",
      "Fits comfortably on a single 24GB consumer GPU at Q4"
    ],
    "weaknesses": [
      "Requires a recent Ollama build for full chat-template support",
      "Still trails frontier models on hard reasoning benchmarks",
      "Vision quality lags dedicated VLMs like Qwen2.5-VL"
    ],
    "benchmarks": [],
    "alternatives": [
      "mistral-small-31-24b",
      "mistral-small-4",
      "qwen3-32b"
    ],
    "install": "ollama run mistral-small3.2:24b",
    "desc": "Mistral AI's June 2025 refresh of Small 3.1: a 24B Apache 2.0 dense model with vision input, sharper function calling, and roughly half the rate of runaway generations seen in 3.1.",
    "verdict": "The pragmatic choice for self-hosted multilingual chat and tool-using agents on a single GPU — and a no-brainer upgrade from Small 3.1."
  },
  "codestral-22b": {
    "release": "Mai 2024",
    "arch": "Dense 22B · spécialisé code · 80+ langages",
    "training": "Corpus code multilingue.",
    "strengths": [
      "HumanEval 81.1 and MBPP 78.2 — competitive with much larger models at release",
      "Broad language coverage including niche languages",
      "32k context handles most repo files comfortably",
      "Strong fill-in-the-middle completion"
    ],
    "weaknesses": [
      "MNPL license blocks all production and commercial use",
      "Outclassed by Qwen 2.5 Coder 14B for permissive-licensed alternatives",
      "32k context is tight for large-repo agents"
    ],
    "benchmarks": [
      {
        "name": "HumanEval",
        "score": 81.1
      },
      {
        "name": "MBPP",
        "score": 78.2
      },
      {
        "name": "Spider",
        "score": 63.5
      }
    ],
    "alternatives": [
      "devstral-small-2",
      "qwen25-coder-32b",
      "qwen25-coder-14b"
    ],
    "install": "ollama run codestral:22b",
    "desc": "Mistral AI's 22B code specialist covering 80+ programming languages, with strong HumanEval and MBPP scores. Locked behind the restrictive MNPL license — personal and research use only.",
    "verdict": "Capable code model held back by its non-production license — for anything you'd ship, pick Qwen 2.5 Coder 14B instead."
  },
  "codestral-mamba-7b": {
    "release": "Juillet 2024",
    "arch": "Mamba2 SSM pur · inférence linéaire",
    "training": "Premier Mamba sérieux pour le code.",
    "strengths": [
      "Verified 256k context for whole-repo reasoning",
      "Constant memory footprint regardless of sequence length",
      "Apache 2.0 license",
      "Linear-time inference scales gracefully on long inputs"
    ],
    "weaknesses": [
      "No official Ollama support",
      "Only partial llama.cpp integration",
      "Requires mistral-inference or vLLM for full functionality",
      "Quality trails transformer-based coders of similar size"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen25-coder-14b",
      "codestral-22b",
      "falcon-mamba-7b"
    ],
    "install": "# HuggingFace : mistralai/Mamba-Codestral-7B-v0.1",
    "desc": "Mistral AI's pure Mamba SSM architecture for code, with linear-time inference and a 256k context window. Apache 2.0, but tooling support is still patchy.",
    "verdict": "The first serious Mamba code model — pick it for long-context experiments, not for daily completion work."
  },
  "magistral-small-24b": {
    "release": "Juin 2025",
    "arch": "Dense 24B · raisonnement CoT · base Small 3.1",
    "training": "RL sur raisonnement.",
    "strengths": [
      "First open Mistral reasoner with a real RL training pipeline",
      "AIME24 70.7% — competitive with much larger reasoners",
      "Apache 2.0 license",
      "Runs on a single 24GB GPU at Q4"
    ],
    "weaknesses": [
      "Highly verbose in thinking mode — token costs add up",
      "Recommended effective context capped around 40k",
      "Trails DeepSeek R1 distills on hardest math benchmarks"
    ],
    "benchmarks": [
      {
        "name": "AIME 2024",
        "score": 70.7
      },
      {
        "name": "MATH-500",
        "score": 90
      }
    ],
    "alternatives": [
      "deepseek-r2-32b",
      "qwq-32b",
      "phi4-reasoning-14b"
    ],
    "install": "ollama run magistral:24b",
    "desc": "Mistral AI's first open reasoning model, built on Small 3.1 with RL-trained chain-of-thought. Hits 70.7% on AIME24 under Apache 2.0.",
    "verdict": "Mistral's first credible reasoning model — solid math chops under Apache 2.0, if you can stomach the verbose CoT."
  },
  "mistral-large-3": {
    "release": "Décembre 2025",
    "arch": "MoE granular 675B/41B actifs + encoder vision 2.5B · 256k ctx",
    "training": "From scratch sur 3000 H200.",
    "strengths": [
      "Top-tier open weights — #2 OSS non-reasoning on LMArena",
      "Apache 2.0 — fully unrestricted commercial use",
      "Native multimodal with 2.5B vision encoder",
      "256k context window",
      "Strong multilingual coverage out of the box"
    ],
    "weaknesses": [
      "405GB at Q4 — needs an H200 or B200 server class deployment",
      "Active expert count (41B) still demands substantial inference compute",
      "Overkill for most single-GPU or developer-laptop use cases"
    ],
    "benchmarks": [],
    "alternatives": [
      "mistral-small-4",
      "qwen35-397b-a17b",
      "deepseek-v32"
    ],
    "install": "# HuggingFace : mistralai/Mistral-Large-3-675B-Instruct-2512",
    "desc": "Mistral AI's flagship 675B MoE (41B active) with a 2.5B vision encoder, trained from scratch on 3,000 H200s and released under Apache 2.0. Currently #2 OSS non-reasoning model on LMArena.",
    "verdict": "The most capable open-weight non-reasoning model shipping today — if you have the H200s, this replaces closed frontier APIs."
  },
  "mistral-medium-35": {
    "release": "29 avril 2026",
    "arch": "Dense 128B · vision encoder · 256k ctx · raisonnement configurable · EAGLE draft head intégré",
    "training": "Premier flagship merged Mistral : remplace Medium 3.1, Magistral et Devstral 2 dans Le Chat / Vibe.",
    "strengths": [
      "SWE-Bench Verified 77.6% — best-in-class for open weights",
      "τ³-Telecom 91.4% for tool-using agents",
      "256k context with strong long-context retention",
      "Vision-enabled and multilingual across 24 languages",
      "Modified MIT — permissive for most commercial use"
    ],
    "weaknesses": [
      "~74GB at Q4 — needs a 4-GPU box for comfortable serving",
      "Revenue clause kicks in for large enterprises",
      "Single-model consolidation means no separate specialized variants"
    ],
    "benchmarks": [
      {
        "name": "SWE-Bench Verified",
        "score": 77.6
      },
      {
        "name": "τ³-Telecom",
        "score": 91.4
      }
    ],
    "alternatives": [
      "mistral-large-3",
      "qwen35-122b-a10b",
      "deepseek-v4-flash"
    ],
    "install": "ollama run mistral-medium-3.5:128b",
    "desc": "Mistral AI's first merged flagship — a dense 128B with vision, 256k context, and configurable reasoning. Hits 77.6% on SWE-Bench Verified, consolidating Medium 3.1, Magistral, and Devstral 2 into one model.",
    "verdict": "The first Mistral flagship that bundles coding, reasoning, and vision into one model — and it's competitive on every axis."
  },
  "llama-3-1-405b": {
    "release": "Juillet 2024",
    "arch": "Dense 405B · GQA",
    "training": "15T tokens Meta.",
    "strengths": [
      "The reference dense open model — widely benchmarked and well-understood",
      "MMLU 88.6, HumanEval 89.0",
      "128k context",
      "Mature ecosystem support across all serving frameworks"
    ],
    "weaknesses": [
      "240+ GB at Q4 — needs a serious multi-GPU server",
      "Hugging Face gated access",
      "Llama 3.1 Community License with MAU clause",
      "Largely superseded by MoE alternatives at similar quality"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 88.6
      },
      {
        "name": "HumanEval",
        "score": 89
      },
      {
        "name": "GSM8K",
        "score": 96.8
      }
    ],
    "alternatives": [
      "llama33-70b",
      "deepseek-v32",
      "qwen35-397b-a17b"
    ],
    "install": "ollama run llama3.1:405b",
    "desc": "Meta's reference open dense model at 405B parameters, with MMLU 88.6 and HumanEval 89.0. Gated on Hugging Face and over 240GB even at Q4.",
    "verdict": "Still the canonical dense open model, but MoE alternatives now deliver comparable quality at a fraction of the inference cost."
  },
  "llama-4-scout": {
    "release": "Avril 2025",
    "arch": "MoE 16 experts · 109B/17B actifs · iRoPE · multimodal natif",
    "training": "Meta Llama 4 flagship compact.",
    "strengths": [
      "10M token context — unmatched among open models",
      "Runs on a single H100 thanks to MoE sparsity",
      "Native multimodal input — no separate vision adapter needed",
      "17B active parameters keeps inference fast"
    ],
    "weaknesses": [
      "Hugging Face gated access",
      "Llama 4 Community License with the >700M MAU clause",
      "Long-context quality drops well before the 10M ceiling",
      "Newer than Llama 3.1 — tooling still catching up"
    ],
    "benchmarks": [
      {
        "name": "MMLU-Pro",
        "score": 74
      }
    ],
    "alternatives": [
      "llama-4-maverick",
      "qwen3-235b-a22b",
      "mistral-small-4"
    ],
    "install": "ollama run llama4:scout",
    "desc": "Meta's compact Llama 4 MoE — 109B total, 17B active, natively multimodal, with an unprecedented 10M token context. Fits on a single H100.",
    "verdict": "The long-context champion of open weights — if you actually need 10M tokens, nothing else comes close on a single H100."
  },
  "llama-4-maverick": {
    "release": "Avril 2025",
    "arch": "MoE 128 experts · 400B/17B actifs · multimodal natif · 1M ctx",
    "training": "Gros frère de Scout.",
    "strengths": [
      "LMArena 1417 — top-tier open chat quality",
      "MMLU-Pro 80",
      "1M token context",
      "Native multimodal with strong vision performance",
      "17B active keeps inference cost manageable"
    ],
    "weaknesses": [
      "245GB download — non-trivial storage and bandwidth",
      "Hugging Face gated access",
      "Llama 4 Community License with >700M MAU clause",
      "Outclassed on reasoning by R1-class models"
    ],
    "benchmarks": [
      {
        "name": "LMArena",
        "score": 70.85
      },
      {
        "name": "MMLU-Pro",
        "score": 80
      }
    ],
    "alternatives": [
      "llama-4-scout",
      "qwen35-397b-a17b",
      "deepseek-v3-671b"
    ],
    "install": "ollama run llama4:maverick",
    "desc": "Meta's larger Llama 4 MoE at 400B total with 17B active across 128 experts, natively multimodal. LMArena 1417 and 1M token context, but 245GB to download.",
    "verdict": "Meta's biggest open chat model and a credible GPT-4-class alternative — if you can host 245GB and accept the MAU clause."
  },
  "llama31-nemotron-70b": {
    "release": "Octobre 2024",
    "arch": "Dense Llama 3.1 70B · RLHF NVIDIA intensif",
    "training": "RLHF sur préférences humaines.",
    "strengths": [
      "Arena Hard 85.0 — topped the leaderboard at release",
      "AlpacaEval 2 LC 57.6",
      "MT-Bench 8.98",
      "Strong RLHF on real human preference data"
    ],
    "weaknesses": [
      "Llama 3.1 Community License with MAU clause",
      "Hugging Face gated access",
      "Now overtaken on reasoning by Qwen 2.5 72B and R1 distills",
      "~42GB at Q4 — needs dual 24GB GPUs"
    ],
    "benchmarks": [
      {
        "name": "Arena Hard",
        "score": 85
      },
      {
        "name": "AlpacaEval 2 LC",
        "score": 57.6
      }
    ],
    "alternatives": [
      "tulu3-70b",
      "llama33-70b",
      "nemotron-3-super-120b"
    ],
    "install": "ollama run nemotron:70b",
    "desc": "NVIDIA's RLHF tune of Llama 3.1 70B that topped Arena Hard at 85.0 at release. Strong alignment and instruction-following on familiar Llama foundations.",
    "verdict": "An excellent RLHF tune of Llama 3.1 70B — still strong for alignment-heavy chat, though reasoning specialists have since pulled ahead."
  },
  "qwen25-72b": {
    "release": "Septembre 2024",
    "arch": "Dense 72B · GQA · 131k ctx via YaRN",
    "training": "Flagship dense Qwen 2.5.",
    "strengths": [
      "MMLU 86.1 — close to much larger models",
      "HumanEval 86.6 strong for a general-purpose model",
      "MATH 83.1",
      "131k context with solid long-context behavior"
    ],
    "weaknesses": [
      "Custom Qwen License with the 100M MAU clause",
      "~42GB at Q4 — dual-GPU territory",
      "Slower than MoE alternatives like Qwen 3 30B-A3B for similar quality"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 86.1
      },
      {
        "name": "HumanEval",
        "score": 86.6
      },
      {
        "name": "MATH",
        "score": 83.1
      }
    ],
    "alternatives": [
      "qwen25-32b",
      "llama33-70b",
      "qwen3-32b"
    ],
    "install": "ollama run qwen2.5:72b",
    "desc": "Alibaba's flagship Qwen 2.5 dense at 72B, with MMLU 86.1 and HumanEval 86.6. Strong across the board but under the custom Qwen License with a 100M MAU threshold.",
    "verdict": "The strongest open dense 72B you can self-host — just check the license before scaling past 100M MAU."
  },
  "qwen25-14b": {
    "release": "Septembre 2024",
    "arch": "Dense 14B · GQA · 131k ctx",
    "training": "29+ langues.",
    "strengths": [
      "Apache 2.0 — fully commercial-friendly",
      "MMLU 79.7 and HumanEval 83.5 at 14B scale",
      "Excellent VRAM-to-quality ratio",
      "131k context via YaRN extension"
    ],
    "weaknesses": [
      "Native context is 32k — 131k requires YaRN configuration",
      "Outscored on hard reasoning by 30B+ alternatives",
      "Vision not included — pick Qwen2.5-VL if you need it"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 79.7
      },
      {
        "name": "HumanEval",
        "score": 83.5
      },
      {
        "name": "GSM8K",
        "score": 83.1
      }
    ],
    "alternatives": [
      "qwen25-7b",
      "qwen25-32b",
      "qwen3-14b"
    ],
    "install": "ollama run qwen2.5:14b",
    "desc": "Alibaba's Apache 2.0 dense 14B hitting MMLU 79.7 and HumanEval 83.5 across 29+ languages. The pragmatic sweet spot for self-hosted general-purpose chat.",
    "verdict": "The default Apache 2.0 dense model for self-hosted general chat — solid quality at a price most teams can run."
  },
  "qwen25-coder-14b": {
    "release": "Novembre 2024",
    "arch": "Dense 14B code · FIM",
    "training": "5.5T tokens code.",
    "strengths": [
      "HumanEval 89.6 — competitive with much larger coders",
      "LiveCodeBench 37.1",
      "Apache 2.0 license",
      "131k context for long-file work"
    ],
    "weaknesses": [
      "Weaker than general 14B models on non-code chat",
      "No vision input",
      "Outscored by frontier closed APIs on the hardest benchmarks"
    ],
    "benchmarks": [
      {
        "name": "HumanEval",
        "score": 89.6
      },
      {
        "name": "MBPP",
        "score": 86.2
      },
      {
        "name": "LiveCodeBench",
        "score": 37.1
      }
    ],
    "alternatives": [
      "qwen25-coder-7b",
      "qwen25-coder-32b",
      "devstral-small-2"
    ],
    "install": "ollama run qwen2.5-coder:14b",
    "desc": "Alibaba's Qwen 2.5 Coder 14B under Apache 2.0 with HumanEval 89.6 and LiveCodeBench 37.1. The VRAM sweet spot for serious self-hosted code generation.",
    "verdict": "The pragmatic Apache 2.0 coder — strong benchmarks, 24GB VRAM, and no licensing landmines."
  },
  "qwen3-30b-a3b": {
    "release": "Avril 2025",
    "arch": "MoE 128 experts · 30B/3B actifs · hybrid thinking",
    "training": "Base Qwen 3.",
    "strengths": [
      "3B active parameters keeps inference fast and cheap",
      "MMLU 81.4 and AIME24 80.4 — strong on both general and reasoning",
      "Apache 2.0",
      "Hybrid thinking toggle per request",
      "100+ language coverage"
    ],
    "weaknesses": [
      "~19GB at Q4 — slightly tight on 16GB cards",
      "Thinking mode adds latency and token cost",
      "MoE routing complicates some fine-tuning workflows"
    ],
    "benchmarks": [
      {
        "name": "MMLU (base)",
        "score": 81.38
      },
      {
        "name": "AIME 2024",
        "score": 80.4
      }
    ],
    "alternatives": [
      "qwen3-14b",
      "qwen3-32b",
      "qwen3-235b-a22b"
    ],
    "install": "ollama run qwen3:30b-a3b",
    "desc": "Alibaba's Qwen 3 MoE with 30B total and just 3B active parameters, supporting hybrid thinking mode. MMLU 81.4, AIME24 80.4, 100+ languages, Apache 2.0.",
    "verdict": "The most pragmatic Apache 2.0 model on the market — MoE speed, reasoning on demand, and one of the strongest 24GB-class options."
  },
  "deepseek-r1-distill-qwen-14b": {
    "release": "Janvier 2025",
    "arch": "Dense Qwen 2.5 14B · SFT sur traces R1",
    "training": "Distillation du R1 671B.",
    "strengths": [
      "AIME24 69.7 and MATH-500 93.9",
      "Beats o1-mini on multiple reasoning benchmarks",
      "MIT license — no usage restrictions",
      "131k context"
    ],
    "weaknesses": [
      "Verbose CoT inflates token costs",
      "Slower than non-reasoning 14B for simple queries",
      "No vision or tool-use specialization"
    ],
    "benchmarks": [
      {
        "name": "AIME 2024",
        "score": 69.7
      },
      {
        "name": "MATH-500",
        "score": 93.9
      },
      {
        "name": "GPQA",
        "score": 59.1
      }
    ],
    "alternatives": [
      "deepseek-r1-distill-qwen-15b",
      "deepseek-r1-7b",
      "deepseek-r2-32b"
    ],
    "install": "ollama run deepseek-r1:14b",
    "desc": "DeepSeek's R1 reasoning distilled into Qwen 14B under MIT. AIME24 69.7 and MATH-500 93.9 — beats o1-mini on most reasoning benchmarks.",
    "verdict": "The best 14B reasoner on permissive license today — a serious local alternative to o1-mini for STEM workloads."
  },
  "phi4-mini": {
    "release": "Février 2025",
    "arch": "Dense 3.8B · GQA · LongRoPE · shared embeddings · 200k vocab",
    "training": "Corpus Phi haute qualité.",
    "strengths": [
      "Native function calling at 3.8B",
      "128k context via LongRoPE",
      "MIT license",
      "200k vocabulary improves tokenization efficiency"
    ],
    "weaknesses": [
      "English-first — multilingual coverage is thin",
      "Outscored on raw quality by Qwen 2.5 3B",
      "Tool-calling reliability still trails larger models"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 67.3
      },
      {
        "name": "HumanEval",
        "score": 74.4
      },
      {
        "name": "MATH",
        "score": 71.5
      }
    ],
    "alternatives": [
      "phi35-mini",
      "gemma3-4b",
      "smollm2-17b"
    ],
    "install": "ollama run phi4-mini:3.8b",
    "desc": "Microsoft's 3.8B Phi-4 Mini under MIT with native function calling, 128k context via LongRoPE, and a 200k vocab. MMLU 67.3 and HumanEval 74.4.",
    "verdict": "The MIT-licensed pick for small tool-using agents — strong function calling and 128k context in a 3.8B footprint."
  },
  "phi4-mini-reasoning": {
    "release": "Avril 2025",
    "arch": "Dense 3.8B · entraîné sur traces R1",
    "training": "Distillation reasoning synthétique.",
    "strengths": [
      "AIME24 57.5 — exceptional for 3.8B",
      "MATH-500 94.6 nearly matches frontier models",
      "Fits comfortably on any laptop",
      "MIT license"
    ],
    "weaknesses": [
      "English-first",
      "Verbose CoT typical of reasoning models",
      "Outside math, quality trails the base Phi-4 Mini"
    ],
    "benchmarks": [
      {
        "name": "AIME 2024",
        "score": 57.5
      },
      {
        "name": "MATH-500",
        "score": 94.6
      }
    ],
    "alternatives": [
      "phi4-reasoning-14b",
      "deepseek-r1-distill-qwen-15b"
    ],
    "install": "ollama run phi4-mini-reasoning:3.8b",
    "desc": "Microsoft's 3.8B Phi-4 Mini variant trained on R1-style reasoning traces under MIT. AIME24 57.5 and MATH-500 94.6 — remarkable math chops for the size.",
    "verdict": "Pound-for-pound the most impressive small reasoner under MIT — pick it for math on the smallest hardware."
  },
  "tulu3-8b": {
    "release": "Novembre 2024",
    "arch": "Dense Llama 3.1 8B · SFT + DPO + RLVR",
    "training": "Données + code + evals publics.",
    "strengths": [
      "Best fully-open RLHF recipe shipped to date",
      "GSM8K 87.6 is class-leading at 8B",
      "IFEval 82.4 shows strong instruction adherence",
      "Training data, code, and evals all publicly available",
      "Stable behavior on standard chat benchmarks"
    ],
    "weaknesses": [
      "Inherits the Llama 3.1 Community License",
      "No native vision or tool-use specialization",
      "Eclipsed at the frontier by larger open models"
    ],
    "benchmarks": [
      {
        "name": "GSM8K",
        "score": 87.6
      },
      {
        "name": "MATH",
        "score": 42
      },
      {
        "name": "IFEval",
        "score": 82.4
      }
    ],
    "alternatives": [
      "llama3-8b",
      "olmo3-7b",
      "qwen3-8b"
    ],
    "install": "ollama run tulu3:8b",
    "desc": "Allen AI's fully open post-training recipe applied to Llama 3.1 8B, hitting 87.6 on GSM8K with all data, code, and evals released publicly.",
    "verdict": "The reference open RLHF recipe at 8B — choose it when reproducibility and post-training transparency matter as much as benchmark scores."
  },
  "tulu3-70b": {
    "release": "Novembre 2024",
    "arch": "Dense Llama 3.1 70B · recette Tülu complète",
    "training": "SFT + DPO + RLVR sur 70B.",
    "strengths": [
      "Beats Claude Haiku, GPT-3.5 Turbo, and GPT-4o-mini on key evals",
      "GSM8K 93.5 and HumanEval+ 92.4 at open weights",
      "Fully open SFT + DPO + RLVR recipe",
      "Strong instruction following and refusal calibration",
      "Stable, well-documented behavior for production deploys"
    ],
    "weaknesses": [
      "~40 GB VRAM at Q4 — needs serious hardware",
      "Bound by Llama 3.1 Community License",
      "No multimodal capabilities"
    ],
    "benchmarks": [
      {
        "name": "GSM8K",
        "score": 93.5
      },
      {
        "name": "HumanEval+",
        "score": 92.4
      },
      {
        "name": "IFEval",
        "score": 83.2
      }
    ],
    "alternatives": [
      "tulu3-8b",
      "llama33-70b",
      "llama31-nemotron-70b"
    ],
    "install": "ollama run tulu3:70b",
    "desc": "Allen AI's fully open RLHF stack on Llama 3.1 70B, beating Claude Haiku, GPT-3.5 Turbo, and GPT-4o-mini on standard reasoning and code benchmarks.",
    "verdict": "The strongest fully open post-trained 70B available — a credible self-hosted replacement for closed mid-tier chat APIs."
  },
  "granite4-small": {
    "release": "Octobre 2025",
    "arch": "Hybride Mamba-2/Transformer (9:1) + MoE granulaire · 32B/9B actifs",
    "training": "Granite 4.0 family.",
    "strengths": [
      "Hybrid Mamba-2 architecture cuts long-context memory by ~70%",
      "MoE design keeps active params at 9B for fast inference",
      "Apache 2.0 with no usage restrictions",
      "Built with enterprise governance and provenance in mind",
      "Strong throughput on commodity multi-GPU setups"
    ],
    "weaknesses": [
      "Requires a recent llama.cpp build for hybrid architecture support",
      "Tooling ecosystem still catching up to dense Llama-class models",
      "Quality trails frontier 30B+ dense models on hard reasoning"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-30b-a3b",
      "granite33-8b",
      "granite4-tiny"
    ],
    "install": "ollama run granite4:small-h",
    "desc": "IBM's hybrid Mamba-2 + MoE model with 32B total and 9B active parameters, engineered to slash long-context memory use by roughly 70% versus comparable transformers under Apache 2.0.",
    "verdict": "The most memory-efficient open MoE for long-context enterprise work — pick it when VRAM, license, and 128k context all matter."
  },
  "granite4-tiny": {
    "release": "Octobre 2025",
    "arch": "Hybride Mamba-2 + MoE granulaire · 7B/1B actifs",
    "training": "Edge variant du 4.0.",
    "strengths": [
      "Extremely low compute cost per token via 1B active params",
      "Apache 2.0 license with no commercial strings attached",
      "128k context handled efficiently thanks to hybrid Mamba-2",
      "Tiny memory footprint suits edge and serverless deploys"
    ],
    "weaknesses": [
      "Quality lags dense 3B models on some single-shot tasks",
      "Smaller active capacity hurts complex reasoning",
      "Needs current llama.cpp support to run efficiently"
    ],
    "benchmarks": [],
    "alternatives": [
      "granite32-8b",
      "smollm2-17b",
      "gemma3-4b"
    ],
    "install": "ollama run granite4:tiny-h",
    "desc": "IBM's edge-class hybrid MoE with 7B total and only 1B active parameters — Apache 2.0 licensed and built for embedded and low-cost serving.",
    "verdict": "The most efficient Apache-licensed MoE for edge inference — the right pick when cost-per-token and license cleanliness trump raw quality."
  },
  "smollm3-3b": {
    "release": "Juillet 2025",
    "arch": "Dense 3B · dual-mode think/no-think · 64k natif + YaRN",
    "training": "Fully open (données + recette).",
    "strengths": [
      "Compact think mode delivers reasoning at 3B scale",
      "Native support for six languages",
      "Apache 2.0 with fully open training data and recipe",
      "128k context unusual at this size",
      "Strong MMLU and GSM8K for the parameter count"
    ],
    "weaknesses": [
      "No official Ollama distribution — needs manual setup",
      "Quality ceiling typical of 3B dense models on hard tasks",
      "Smaller community than competing 3B releases"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 59.7
      },
      {
        "name": "GSM8K",
        "score": 70.9
      }
    ],
    "alternatives": [
      "smollm2-17b",
      "qwen3-8b",
      "phi4-mini"
    ],
    "install": "# HuggingFace : HuggingFaceTB/SmolLM3-3B",
    "desc": "HuggingFace's 3B model with dual think/no-think modes, 128k context, and full open data and recipe — punching at MMLU 59.7 and GSM8K 70.9.",
    "verdict": "The reasoning-capable 3B to beat — ideal for edge deployments that still need think-mode and 128k context."
  },
  "minicpm-v-26": {
    "release": "Août 2024",
    "arch": "VLM 8B · SigLIP-400M + Qwen2-7B",
    "training": "Multi-image, vidéo, aspect ratio libre.",
    "strengths": [
      "Beats GPT-4o on OCRBench in the sub-25B class",
      "OpenCompass 65.2 matches much larger VLMs",
      "Handles 1.8MP inputs without aggressive downsampling",
      "Native multi-image and video reasoning",
      "Free aspect-ratio handling avoids letterboxing artifacts"
    ],
    "weaknesses": [
      "MiniCPM Model License requires registration for commercial use",
      "Smaller community than Qwen2-VL or Llama-class VLMs",
      "Tooling support varies across inference backends"
    ],
    "benchmarks": [
      {
        "name": "OpenCompass",
        "score": 65.2
      }
    ],
    "alternatives": [
      "qwen25-vl-7b",
      "minicpm-o-26",
      "llava-onevision-7b"
    ],
    "install": "ollama run minicpm-v:8b",
    "desc": "OpenBMB's 8B vision-language model pairing SigLIP and Qwen2, scoring 65.2 on OpenCompass and beating GPT-4o on OCRBench among sub-25B models.",
    "verdict": "The OCR champion among compact open VLMs — the right call when document fidelity beats pure chat quality."
  },
  "minicpm-o-26": {
    "release": "Janvier 2025",
    "arch": "Omni 8B · SigLIP + Whisper-medium + ChatTTS + Qwen2.5-7B",
    "training": "End-to-end streaming speech.",
    "strengths": [
      "End-to-end full-duplex speech streaming",
      "OpenCompass 70.2 across vision-language tasks",
      "Beats GPT-4o on single-image evaluations",
      "Unified omni-modal architecture in 8B"
    ],
    "weaknesses": [
      "Ollama integration is image-only — audio needs native inference",
      "Speech and audio paths require the official runtime",
      "Same MiniCPM license registration requirements"
    ],
    "benchmarks": [
      {
        "name": "OpenCompass",
        "score": 70.2
      }
    ],
    "alternatives": [
      "qwen25-omni-7b",
      "phi4-multimodal",
      "moshi-7b"
    ],
    "install": "ollama run openbmb/minicpm-o2.6",
    "desc": "OpenBMB's omni-modal 8B model adding audio and full-duplex speech streaming on top of vision, scoring 70.2 on OpenCompass and beating GPT-4o on single-image tasks.",
    "verdict": "The closest open answer to GPT-4o omni — pick it when you need streaming voice and vision in a single self-hosted 8B model."
  },
  "falcon3-7b": {
    "release": "Décembre 2024",
    "arch": "Dense 7B · GQA · 32k ctx",
    "training": "14T tokens.",
    "strengths": [
      "MMLU 70.5 matches Qwen2.5-7B",
      "Trained on 14T tokens for broad knowledge coverage",
      "Five-language native support out of the box",
      "Permissive commercial license under TII Falcon-LLM 2.0",
      "32k context covers most production needs"
    ],
    "weaknesses": [
      "TII license is permissive but not Apache 2.0",
      "Smaller community than Llama or Qwen ecosystems",
      "No official multimodal variants"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 70.5
      },
      {
        "name": "GSM8K",
        "score": 80.8
      }
    ],
    "alternatives": [
      "falcon3-10b",
      "qwen3-8b",
      "llama3-8b"
    ],
    "install": "ollama run falcon3:7b",
    "desc": "TII's 7B trained on 14T tokens, hitting MMLU 70.5 — on par with Qwen2.5-7B — with native support for English, French, Spanish, German, and Portuguese.",
    "verdict": "A credible non-Chinese 7B with Qwen-class quality — pick it for European multilingual work that needs a permissive commercial license."
  },
  "falcon3-10b": {
    "release": "Décembre 2024",
    "arch": "Dense 10B · depth-upscaled de 7B",
    "training": "Successeur du 7B.",
    "strengths": [
      "SOTA among sub-13B models at release",
      "MMLU 73.1 with strong knowledge breadth",
      "Efficient depth-upscaled design from the 7B base",
      "Five-language coverage with permissive licensing",
      "Strong GSM8K performance for the size class"
    ],
    "weaknesses": [
      "TII Falcon-LLM 2.0 license, not Apache 2.0",
      "Limited fine-tune ecosystem versus Llama derivatives",
      "No multimodal version available"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 73.1
      },
      {
        "name": "GSM8K",
        "score": 83.1
      }
    ],
    "alternatives": [
      "falcon3-7b",
      "qwen25-14b",
      "gemma2-9b"
    ],
    "install": "ollama run falcon3:10b",
    "desc": "TII's depth-upscaled 10B successor to Falcon 3 7B, hitting MMLU 73.1 and GSM8K 83.1 — state-of-the-art under 13B at release.",
    "verdict": "The strongest sub-13B Falcon to date — a solid mid-size pick when you need multilingual quality without the Llama license."
  },
  "command-r-35b": {
    "release": "Mars 2024",
    "arch": "Dense 35B · optimisé RAG et tool-use · GQA",
    "training": "10 langues évaluées, 23 entraînées.",
    "strengths": [
      "First open model designed natively for RAG and tool use",
      "128k context for long retrieval pipelines",
      "10 evaluated languages, 23 in pretraining",
      "Strong citation and grounding behavior"
    ],
    "weaknesses": [
      "CC-BY-NC 4.0 license blocks commercial deployment",
      "Superseded by Command R+ 104B for production quality",
      "No multimodal capabilities"
    ],
    "benchmarks": [],
    "alternatives": [
      "command-r-plus-104b",
      "aya-expanse-32b",
      "qwen25-32b"
    ],
    "install": "ollama run command-r:35b",
    "desc": "Cohere's original Command R, a 35B optimized for RAG and tool use across 10 languages with 128k context — but locked under CC-BY-NC for non-commercial use only.",
    "verdict": "Historically important but commercially off-limits — choose it only for research, and reach for Command R+ everywhere else."
  },
  "aya-23-8b": {
    "release": "Mai 2024",
    "arch": "Dense 8B · IFT sur Aya Collection",
    "training": "23 langues.",
    "strengths": [
      "Solid pre-Expanse multilingual coverage",
      "23 languages including French, Arabic, and Chinese",
      "Compact 8B footprint for the breadth"
    ],
    "weaknesses": [
      "CC-BY-NC 4.0 license blocks commercial use",
      "8k context is restrictive by current standards",
      "Superseded by Aya Expanse 8B in every dimension"
    ],
    "benchmarks": [],
    "alternatives": [
      "aya-expanse-8b",
      "qwen25-7b",
      "eurollm-9b"
    ],
    "install": "ollama run aya:8b",
    "desc": "Cohere For AI's pre-Expanse 8B multilingual model covering 23 languages, now superseded by Aya Expanse 8B and restricted to non-commercial use.",
    "verdict": "Skip for new work — Aya Expanse 8B replaces this model with better quality under the same license."
  },
  "yi-coder-9b": {
    "release": "Septembre 2024",
    "arch": "Dense 9B code · base Llama · 128k ctx",
    "training": "52 langages de programmation.",
    "strengths": [
      "LiveCodeBench 23% leads the sub-10B field",
      "Outperforms DeepSeek Coder 33B at a fraction of the size",
      "Coverage across 52 programming languages",
      "Apache 2.0 license",
      "128k context enables repo-scale code understanding"
    ],
    "weaknesses": [
      "Less popular than Qwen Coder, so fewer fine-tunes exist",
      "No instruction-tuned variant beyond chat",
      "Quality gap versus Qwen 2.5 Coder 7B/14B in 2025"
    ],
    "benchmarks": [
      {
        "name": "LiveCodeBench",
        "score": 23
      }
    ],
    "alternatives": [
      "qwen25-coder-7b",
      "qwen25-coder-14b",
      "devstral-small-2"
    ],
    "install": "ollama run yi-coder:9b",
    "desc": "01.AI's 9B code model covering 52 programming languages, hitting 23% on LiveCodeBench — best-in-class under 10B and beating DeepSeek Coder 33B.",
    "verdict": "The strongest sub-10B code model in its release window — still a sharp pick when you need 128k context on modest hardware."
  },
  "jamba-15-mini": {
    "release": "Août 2024",
    "arch": "Hybride SSM-Transformer (Mamba+Attention) + MoE · 52B/12B actifs",
    "training": "256k ctx effectif (86% à 256k RULER).",
    "strengths": [
      "Effective 256k context (86% on RULER)",
      "Unique SSM-Transformer hybrid architecture",
      "Strong throughput vs. dense models of similar capability",
      "Solid 9-language coverage"
    ],
    "weaknesses": [
      "Custom Jamba license is not OSI-approved",
      "Partial llama.cpp support complicates local deployment",
      "Superseded by Jamba 1.6 and 1.7",
      "Smaller fine-tune ecosystem than Llama or Qwen"
    ],
    "benchmarks": [],
    "alternatives": [
      "granite4-small",
      "mistral-small-4",
      "qwen3-30b-a3b"
    ],
    "install": "# HuggingFace : ai21labs/AI21-Jamba-Mini-1.5",
    "desc": "AI21 Labs' hybrid SSM-Transformer with MoE routing, activating 12B of 52B parameters. Delivers a verified 256k context window but ships under AI21's non-OSI Jamba license.",
    "verdict": "A novel hybrid with real long-context performance, now eclipsed by newer Jamba releases and gated by a non-standard license."
  },
  "hunyuan-a13b": {
    "release": "Juin 2025",
    "arch": "MoE fine-grained · 80B/13B actifs · dual fast/slow thinking",
    "training": "256k ctx natif.",
    "strengths": [
      "Competitive with o1 and DeepSeek on mainstream benchmarks",
      "Native 256k context",
      "Dual fast/slow thinking for latency-quality tradeoffs",
      "Only 13B active parameters keeps inference cheap"
    ],
    "weaknesses": [
      "Tencent Hunyuan license has commercial restrictions",
      "No official Ollama distribution",
      "Tooling support trails Qwen and Llama"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-235b-a22b",
      "step-35-flash",
      "nemotron-3-super-120b"
    ],
    "install": "# HuggingFace : tencent/Hunyuan-A13B-Instruct",
    "desc": "Tencent's fine-grained MoE activating 13B of 80B parameters, with dual fast/slow thinking modes and a 256k context. Released under Tencent's custom Hunyuan license.",
    "verdict": "Frontier-tier MoE reasoning at a manageable active-parameter count, held back mainly by the custom Tencent license."
  },
  "llava-onevision-7b": {
    "release": "Août 2024",
    "arch": "VLM 7B · SO400M + Qwen2-7B · image/multi-image/vidéo",
    "training": "LMMs-Lab (Singapour).",
    "strengths": [
      "Fully Apache 2.0 with no commercial gotchas",
      "Genuine multi-image and video support",
      "Mature ecosystem with strong community traction",
      "Solid Qwen2-7B language backbone"
    ],
    "weaknesses": [
      "No official Ollama packaging",
      "English-first; weaker on non-English vision QA",
      "Outpaced by Qwen3-VL on most 2025 benchmarks"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen25-vl-7b",
      "minicpm-v-26",
      "molmo-7b"
    ],
    "install": "# HuggingFace : lmms-lab/llava-onevision-qwen2-7b-ov",
    "desc": "An Apache-licensed 7B vision-language model from LMMs-Lab, combining SigLIP SO400M with Qwen2-7B. Handles single images, multi-image inputs, and video at over 170k monthly downloads.",
    "verdict": "A dependable, truly open VLM for self-hosters who value Apache licensing over the latest leaderboard score."
  },
  "jais-30b": {
    "release": "Août 2023 (v3 2024)",
    "arch": "Décodeur natif · SwiGLU · ALiBi · Arabic-first",
    "training": "MBZUAI + Core42 + Cerebras.",
    "strengths": [
      "Native Arabic-first architecture, not a Llama fine-tune",
      "Strong bilingual Arabic-English performance",
      "Apache 2.0 license enables commercial use",
      "Backed by MBZUAI, Core42, and Cerebras"
    ],
    "weaknesses": [
      "8k context limits long-document workflows",
      "No official Ollama distribution",
      "Weaker than Jais Adapted 70B for hardest Arabic tasks"
    ],
    "benchmarks": [],
    "alternatives": [
      "jais-70b",
      "aya-expanse-32b",
      "qwen25-32b"
    ],
    "install": "# HuggingFace : core42/jais-30b-chat-v3",
    "desc": "MBZUAI and Core42's reference open Arabic LLM — a 30B trained natively (not a fine-tune) for Arabic with strong English bilingual support under Apache 2.0.",
    "verdict": "The default open Arabic LLM at the 30B class — pick it for native Arabic quality without the Llama license."
  },
  "sarvam-m-24b": {
    "release": "Mai 2025",
    "arch": "Dense 24B · base Mistral Small 3.1 · hybrid think/non-think",
    "training": "11 langues indiennes + EN.",
    "strengths": [
      "+86% gain on romanized Indic GSM-8K",
      "Hybrid think/no-think mode toggle",
      "11 Indian languages plus English",
      "Apache 2.0 with permissive commercial use",
      "Mistral Small 3.1 base brings solid general quality"
    ],
    "weaknesses": [
      "No official Ollama distribution yet",
      "Strong Indic focus limits broader multilingual use",
      "Smaller community ecosystem than Mistral mainline"
    ],
    "benchmarks": [],
    "alternatives": [
      "mistral-small-31-24b",
      "aya-expanse-32b"
    ],
    "install": "# HuggingFace : sarvamai/sarvam-m",
    "desc": "Sarvam AI's 24B built on Mistral Small 3.1 with hybrid think/no-think modes, gaining +86% on romanized GSM-8K Indic and covering 11 Indian languages plus English.",
    "verdict": "The top open model for Indic markets — pick it when you need real Indian-language coverage with hybrid reasoning."
  },
  "salamandra-7b": {
    "release": "Février 2025",
    "arch": "Dense 7.7B · RoPE · SwiGLU · GQA · 256k vocab",
    "training": "7.8T tokens, 35 langues UE + 92 langages de programmation.",
    "strengths": [
      "Backed by EU sovereignty and BSC infrastructure",
      "35 European languages natively supported",
      "Apache 2.0 license",
      "Coverage of 92 programming languages",
      "7.8T tokens of training data"
    ],
    "weaknesses": [
      "8k context falls short for long-document use",
      "No official Ollama distribution",
      "Quality trails frontier 7B models on English benchmarks"
    ],
    "benchmarks": [],
    "alternatives": [
      "eurollm-9b",
      "teuken-7b",
      "lucie-7b"
    ],
    "install": "# HuggingFace : BSC-LT/salamandra-7b-instruct",
    "desc": "Barcelona Supercomputing Center's 7.8B trained on 7.8T tokens covering 35 European languages and 92 programming languages — built for EU sovereignty under Apache 2.0.",
    "verdict": "The reference EU-sovereign 7B — choose it when European language breadth and provenance matter more than top-tier English benchmarks."
  },
  "eurollm-22b": {
    "release": "Février 2026",
    "arch": "Dense 22.6B · 56 couches · GQA 48Q/8KV · SwiGLU · RoPE θ=1M",
    "training": "35 langues UE + pertinentes.",
    "strengths": [
      "22B scale gives meaningful headroom over EuroLLM 9B",
      "Apache 2.0 license",
      "32k context handles document-length European-language workloads",
      "EU sovereignty across the full project stack",
      "Fresh February 2026 release with current training data"
    ],
    "weaknesses": [
      "No official Ollama distribution at launch",
      "Smaller community than mainline open models",
      "Tooling and quantization support still maturing"
    ],
    "benchmarks": [],
    "alternatives": [
      "eurollm-9b",
      "teuken-7b",
      "salamandra-40b"
    ],
    "install": "# HuggingFace : utter-project/EuroLLM-22B-Instruct-2512",
    "desc": "Utter Project's 22.6B EU-sovereign model released February 2026 covering 35 European languages with 32k context — the heavy-duty successor to EuroLLM 9B.",
    "verdict": "The new heavyweight EuroLLM — choose it when you've outgrown the 9B and need EU-sovereign multilingual quality at production scale."
  },
  "claire-7b": {
    "release": "Novembre 2023",
    "arch": "LoRA fine-tune de Falcon-7B sur dialogue FR spontané",
    "training": "LINAGORA + OpenLLM-France.",
    "strengths": [
      "Natural, spoken-style French output",
      "Lightweight 7B footprint for local experimentation",
      "Backed by OpenLLM-France community work",
      "Targeted training on authentic French dialogue data"
    ],
    "weaknesses": [
      "CC-BY-NC-SA license blocks most commercial use",
      "Tiny 2k context window by modern standards",
      "Built on aging Falcon-7B base",
      "Outclassed by Mistral and Qwen on general French tasks"
    ],
    "benchmarks": [],
    "alternatives": [
      "lucie-7b",
      "pleias-3b",
      "helium-1-2b"
    ],
    "install": "# HuggingFace : OpenLLM-France/Claire-7B-0.1 (Apache : Claire-7B-Apache-0.1)",
    "desc": "LINAGORA's LoRA fine-tune of Falcon-7B specialized for spontaneous French dialogue. Released under CC-BY-NC-SA 4.0, with a separate Apache-licensed variant available for commercial work.",
    "verdict": "A historically interesting French-dialogue specialist, but the restrictive license and 2k context make it a research-only pick today."
  },
  "qwen36-27b": {
    "release": "Avril 2026",
    "arch": "Dense 27B · Gated DeltaNet + Gated Attention · multimodal · 64 couches",
    "training": "Successeur dense de Qwen 3.5 27B, génération 3.6.",
    "strengths": [
      "77.2% SWE-bench Verified — frontier coding accuracy",
      "Native multimodal text + image",
      "262k context, extendable to 1M with YaRN",
      "Apache 2.0",
      "Dense 27B fits comfortably on consumer hardware"
    ],
    "weaknesses": [
      "Needs 16+ GB VRAM at Q4",
      "Hybrid architecture requires a recent llama.cpp build",
      "Dense design means no MoE inference efficiency"
    ],
    "benchmarks": [
      {
        "name": "SWE-bench Verified",
        "score": 77.2
      },
      {
        "name": "Terminal-Bench",
        "score": 59.3
      },
      {
        "name": "SkillsBench",
        "score": 48.2
      }
    ],
    "alternatives": [
      "qwen36-35b-a3b",
      "qwen35-27b",
      "qwen3-coder-next"
    ],
    "install": "ollama run qwen3.6:27b",
    "desc": "Dense 27B multimodal model from Alibaba (April 2026), scoring 77.2% on SWE-bench Verified with 262k native context (1M via YaRN). The Qwen 3.6 generation's developer-friendly workhorse.",
    "verdict": "The single-GPU coding model to beat in 2026 — Apache 2.0, multimodal, and frontier-grade on SWE-bench."
  },
  "deepseek-v4-pro": {
    "release": "Avril 2026",
    "arch": "MoE 1.6T/49B actifs · CSA+HCA hybrid attention · mHC · Muon optimizer · FP4+FP8 mixte",
    "training": "32T+ tokens pré-entraînement.",
    "strengths": [
      "The most capable open-weight model available, period",
      "MIT license at frontier scale",
      "1M context window",
      "Three configurable thinking modes (Non / High / Max)",
      "Hybrid CSA+HCA attention for efficient long-context"
    ],
    "weaknesses": [
      "960+ GB VRAM in Q4 — server farm only",
      "No community quantizations yet at release",
      "Three-mode reasoning adds inference complexity",
      "32T+ token pretraining means very high training carbon footprint"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-v4-flash",
      "deepseek-v32",
      "kimi-k26"
    ],
    "install": "# HuggingFace : deepseek-ai/DeepSeek-V4-Pro",
    "desc": "DeepSeek's frontier MoE: 1.6T total / 49B active params, MIT-licensed, 1M context, with CSA+HCA hybrid attention and three reasoning modes. The absolute open-weight ceiling as of April 2026.",
    "verdict": "The new open-weight ceiling. If you have the hardware, nothing else comes close."
  },
  "deepseek-v4-flash": {
    "release": "Avril 2026",
    "arch": "MoE 284B/13B actifs · CSA+HCA hybrid · mHC · Muon · FP4+FP8 mixte",
    "training": "Cible « efficient reasoning » à coût réduit vs V4 Pro.",
    "strengths": [
      "MIT license",
      "1M context window",
      "Only 13B active params — fast for its total size",
      "Three thinking modes inherited from V4 Pro",
      "Base and Instruct variants available"
    ],
    "weaknesses": [
      "Around 170 GB VRAM in Q4 — still multi-GPU",
      "Official community quantizations were lagging at launch",
      "Quality trails V4 Pro on the hardest reasoning tasks"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-v4-pro",
      "deepseek-v32",
      "minimax-m27"
    ],
    "install": "# HuggingFace : deepseek-ai/DeepSeek-V4-Flash (GGUF communautaire en cours)",
    "desc": "DeepSeek V4's efficient sibling: 284B MoE with 13B active params, MIT-licensed, 1M context, and the same three-mode reasoning stack. Frontier-adjacent quality at a fraction of the inference cost.",
    "verdict": "The efficient way into the V4 family — MIT, 1M context, and inference cost that won't bankrupt you."
  },
  "tencent-hy3-preview": {
    "release": "Avril 2026",
    "arch": "MoE 295B/21B actifs · 80 couches + 1 MTP layer · 192 experts top-8 · GQA 64Q/8KV · BF16",
    "training": "Fast/slow-thinking fusionnés.",
    "strengths": [
      "Tencent's first frontier-scale open-weight release",
      "256k context window",
      "Both Base and Instruct variants shipped",
      "MTP module accelerates long-form generation",
      "Fused fast/slow thinking in one model"
    ],
    "weaknesses": [
      "Custom Tencent Hunyuan Community License — legal review required",
      "Around 177 GB VRAM in Q4",
      "No Ollama support at launch",
      "Preview status means rough edges in tooling"
    ],
    "benchmarks": [],
    "alternatives": [
      "hunyuan-a13b",
      "hunyuan-20-large",
      "qwen3-235b-a22b"
    ],
    "install": "# HuggingFace : tencent/Hy3-preview",
    "desc": "Tencent's frontier preview: 295B MoE with 21B active params plus a 3.8B MTP module, 80 layers, top-8 of 192 experts, with fused fast/slow thinking. Released April 2026 under the custom Hunyuan license.",
    "verdict": "A serious frontier preview from Tencent, held back from broader adoption by its custom license."
  },
  "llada2-uni": {
    "release": "Avril 2026",
    "arch": "MoE 16B/1B actifs + Discrete Semantic Tokenizer (SigLIP-VQ) + Decoder Diffusion 6.2B + VAE",
    "training": "Masked Token Prediction paradigm. Decoder-turbo distillé (10× accélération, 8 steps au lieu de 50). SPRINT acceleration.",
    "strengths": [
      "The first Apache 2.0 open diffusion LLM",
      "Unified text, vision, generation, and editing",
      "Interleaved 'thinking' mode during diffusion",
      "Decoder-turbo distillation runs 8 diffusion steps instead of 50",
      "Apache 2.0 commercial license"
    ],
    "weaknesses": [
      "Diffusion architecture not supported by Ollama or llama.cpp",
      "Requires Flash Attention 2 and CUDA 12.4",
      "Around 47 GB VRAM during active generation",
      "Only 8k context window"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen25-omni-7b",
      "minicpm-o-26",
      "phi4-multimodal"
    ],
    "install": "# HuggingFace : inclusionAI/LLaDA2.0-Uni (Flash Attn 2 + CUDA 12.4 requis)",
    "desc": "Ant Group's first open Apache 2.0 diffusion LLM: a 16B/1B MoE paired with a 6.2B diffusion decoder, unifying text and vision generation and editing. Released April 2026.",
    "verdict": "A research-first release that proves Apache 2.0 dLLMs are real — production users should wait for tooling to catch up."
  },
  "mimo-v25-pro": {
    "release": "22 avril 2026",
    "arch": "MoE 1.02T/42B actifs · 70 couches (1 dense + 69 MoE) · 384 experts top-8 · hybrid SWA/GA 6:1 · MTP 3 layers · FP8 E4M3",
    "training": "Three-stage post-training : SFT → domain-specialized RL (math, sécurité, agentic) → Multi-Teacher On-Policy Distillation.",
    "strengths": [
      "MIT license at frontier agentic scale",
      "1M context window",
      "Supports 1,000+ tool calls per chain",
      "57.2% on SWE-Bench Pro",
      "Hybrid 6:1 attention cuts KV-cache by 7x vs. full attention"
    ],
    "weaknesses": [
      "Roughly 600 GB VRAM in Q4 — datacenter only",
      "No official Ollama quantization",
      "MTP support is uneven across inference engines"
    ],
    "benchmarks": [
      {
        "name": "SWE-Bench Pro",
        "score": 57.2
      },
      {
        "name": "Claw-Eval",
        "score": 63.8
      },
      {
        "name": "τ3-Bench",
        "score": 72.9
      }
    ],
    "alternatives": [
      "deepseek-v4-pro",
      "kimi-k26",
      "minimax-m27"
    ],
    "install": "# HuggingFace : XiaomiMiMo/MiMo-V2.5-Pro",
    "desc": "Xiaomi's MIT-licensed frontier agentic model: 1.02T MoE with 42B active params, 57.2% on SWE-Bench Pro, 1M context, and 6:1 hybrid attention. Released April 2026.",
    "verdict": "The open agentic frontier — MIT, million-token, thousand-call — if you have the silicon to run it."
  },
  "mimo-v25": {
    "release": "22 avril 2026",
    "arch": "MoE 310B/15B actifs · 48 couches (1 dense + 47 MoE) · 256 experts top-8 · ViT 729M + Audio 261M · MTP 329M · FP8",
    "training": "≈48T tokens · pipeline pré-train texte → projector warmup → multimodal pré-train → SFT agentic → RL+MOPD.",
    "strengths": [
      "Omnimodal under MIT — text, image, video, audio",
      "1M context window",
      "87.7 Video-MME and 81.0 CharXiv RQ",
      "Permissive MIT license at frontier scale",
      "MoE design keeps active compute reasonable"
    ],
    "weaknesses": [
      "Around 180 GB VRAM in Q4",
      "Video and audio inference pipelines are not yet standardized",
      "No Ollama support"
    ],
    "benchmarks": [
      {
        "name": "Video-MME",
        "score": 87.7
      },
      {
        "name": "CharXiv RQ",
        "score": 81
      },
      {
        "name": "MMMU-Pro",
        "score": 77.9
      }
    ],
    "alternatives": [
      "mimo-v2-flash",
      "qwen3-omni-30b",
      "qwen25-omni-7b"
    ],
    "install": "# HuggingFace : XiaomiMiMo/MiMo-V2.5",
    "desc": "Xiaomi's MIT-licensed omnimodal model: 310B MoE with 15B active params handling text, image, video, and audio. Scores 87.7 on Video-MME with 1M context. Released April 2026.",
    "verdict": "The first MIT-licensed model that genuinely handles video alongside everything else."
  },
  "granite41-8b": {
    "release": "29 avril 2026",
    "arch": "Transformer dense · 40 couches · GQA 32Q/8KV · embedding 4096 · MLP hidden 12 800 · RoPE",
    "training": "Post-training amélioré : SFT + RL alignment. 12 langues : EN, DE, ES, FR, JA, PT, AR, CS, IT, KO, NL, ZH. Cluster NVIDIA GB200 NVL72 (CoreWeave).",
    "strengths": [
      "Apache 2.0 with full transparency on training",
      "Strong tool calling and instruction following",
      "12 native languages including French",
      "131k context window",
      "Excellent quality-per-parameter at the 8B tier"
    ],
    "weaknesses": [
      "No official Ollama tag at release",
      "Reasoning in non-English languages still trails English",
      "No MoE variant at this size"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 73.84
      },
      {
        "name": "GSM8K",
        "score": 92.49
      },
      {
        "name": "HumanEval",
        "score": 85.37
      },
      {
        "name": "ArenaHard",
        "score": 68.98
      },
      {
        "name": "AlpacaEval",
        "score": 50.08
      }
    ],
    "alternatives": [
      "granite33-8b",
      "qwen3-8b",
      "mistral-7b-instruct"
    ],
    "install": "# HuggingFace : ibm-granite/granite-4.1-8b",
    "desc": "IBM's dense 8B Granite 4.1 release: Apache 2.0, 12 languages, 131k context, MMLU 73.84, HumanEval 85.37. Trained on a CoreWeave GB200 NVL72 cluster.",
    "verdict": "IBM's most usable open model yet — Apache 2.0, multilingual, and well-suited for enterprise tool use."
  },
  "nemotron-omni-30b": {
    "release": "28 avril 2026",
    "arch": "Hybrid Mamba2-Transformer MoE · 30B total / 3B actifs · Conv3D + EVS · vision/audio/vidéo intégrés",
    "training": "354.6M échantillons · ~717B tokens sur 1 395 datasets. Anglais uniquement. Variantes BF16, FP8, NVFP4 publiées.",
    "strengths": [
      "Native omnimodal: text, image, audio, video",
      "256k context window",
      "9x throughput versus other open omni models",
      "Runs on a single GPU thanks to 3B active MoE",
      "First-class NVIDIA NIM pipeline"
    ],
    "weaknesses": [
      "English-only",
      "Full multimodal requires llama.cpp or vLLM (Ollama is text-only)",
      "NVIDIA Open Model License is not Apache or MIT"
    ],
    "benchmarks": [],
    "alternatives": [
      "nemotron-nano-3-30b",
      "qwen3-omni-30b",
      "mimo-v25"
    ],
    "install": "# HuggingFace : nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-BF16",
    "desc": "NVIDIA's omnimodal MoE: 30B total / 3B active, handling text, image, audio, and video in 256k context. Hybrid Mamba2-MoE architecture delivers 9x the throughput of competing open omni models. Released April 2026.",
    "verdict": "The fastest open omnimodal model on a single GPU — as long as you only need English."
  },
  "nemotron-cascade-2": {
    "release": "Avril 2026",
    "arch": "MoE 30B/3B actifs · thinking mode + instruct unifie · 128k ctx",
    "training": "Entraine par NVIDIA. Medaille d'or IMO 2025 et IOI 2025 en mode thinking. Optimise pour le raisonnement mathematique et le code competitif.",
    "strengths": [
      "Gold medal at IMO 2025 and IOI 2025 in thinking mode",
      "Fast inference with only 3B active params",
      "Fits on a 24 GB GPU at Q4",
      "Commercial use allowed under NVIDIA Open Model License"
    ],
    "weaknesses": [
      "NVIDIA Open Model License — not Apache or MIT",
      "32+ GB VRAM total in Q4 (full model is 30B)",
      "Thinking mode generation can be slow"
    ],
    "benchmarks": [
      {
        "name": "AIME 2025",
        "score": 88
      }
    ],
    "alternatives": [
      "deepseek-r1-32b",
      "qwen3-32b",
      "gemma4-26b-moe"
    ],
    "install": "ollama run nemotron-cascade-2",
    "desc": "NVIDIA's 30B MoE (3B active) with both thinking and instruct modes. Earned IMO 2025 and IOI 2025 gold medals — 30B-class reasoning at 3B-active inference speed. Released April 2026.",
    "verdict": "Olympic-grade reasoning at 3B-active inference cost — the sharpest open math and code model in its weight class."
  },
  "laguna-xs2": {
    "release": "28 avril 2026",
    "arch": "MoE 33B/3B actifs · 256 experts + 1 partagé · 40 couches (10 attention globale + 30 sliding-window 512) · KV-cache FP8 · 128k ctx",
    "training": "Premier modèle open-weight de Poolside, optimisé pour l'agentic coding local. Optimiseur Muon, BF16, reasoning natif avec interleaved thinking.",
    "strengths": [
      "68.2% SWE-Bench Verified — top-tier among open models",
      "Runs on a 36 GB Mac",
      "Apache 2.0 with no commercial restrictions",
      "Native tool calls and streaming",
      "Official Ollama tag with multiple quantizations"
    ],
    "weaknesses": [
      "Coding-specialized — not a general chat model",
      "MoE + SWA architecture needs transformers v5.6.2 or newer",
      "Interleaved thinking can slow first-token latency"
    ],
    "benchmarks": [
      {
        "name": "SWE-Bench Verified",
        "score": 68.2
      },
      {
        "name": "SWE-Bench Multilingual",
        "score": 62.4
      },
      {
        "name": "SWE-Bench Pro",
        "score": 44.5
      },
      {
        "name": "Terminal-Bench 2.0",
        "score": 30.1
      }
    ],
    "alternatives": [
      "devstral-small-2",
      "qwen3-coder-next",
      "qwen36-35b-a3b"
    ],
    "install": "ollama run laguna-xs.2",
    "desc": "Poolside's first open-weight release: a 33B MoE (3B active) under Apache 2.0 built specifically for agentic coding. Scores 68.2% on SWE-Bench Verified and runs on a 36 GB Mac.",
    "verdict": "The strongest open coding model that actually fits on a developer laptop — Apache 2.0 to boot."
  },
  "granite41-30b": {
    "release": "29 avril 2026",
    "arch": "Transformer dense · 64 couches · GQA 32Q/8KV · embedding 4096 · MLP hidden 32 768 · SwiGLU · RoPE · RMSNorm",
    "training": "Fine-tuné depuis Granite-4.1-30B-Base. Pipeline SFT + RL alignment. 12 langues : EN, DE, ES, FR, JA, PT, AR, CS, IT, KO, NL, ZH. Cluster NVIDIA GB200 NVL72 (CoreWeave).",
    "strengths": [
      "Apache 2.0 with IBM-grade transparency",
      "Native OpenAI function-calling schema",
      "12 languages including French",
      "131k context window",
      "Official Ollama tag with multiple quantizations"
    ],
    "weaknesses": [
      "Needs ~32 GB VRAM at Q4 — RTX 5090 territory",
      "No MoE variant at this size",
      "Non-English reasoning trails English"
    ],
    "benchmarks": [],
    "alternatives": [
      "granite41-8b",
      "qwen3-32b",
      "mistral-small-32-24b"
    ],
    "install": "ollama run granite4.1:30b",
    "desc": "IBM's dense 30B Granite 4.1: Apache 2.0, 12 languages, 131k context, with OpenAI-compatible tool calling. Built on the same GB200 NVL72 cluster as the rest of the 4.1 lineup.",
    "verdict": "The Granite to pick when 8B feels light: Apache 2.0, function-calling native, and built for enterprise."
  },
  "granite41-3b": {
    "release": "29 avril 2026",
    "arch": "Transformer dense · 40 couches · GQA 40Q/8KV · embedding 2560 · MLP hidden 8 192 · SwiGLU · RoPE · RMSNorm",
    "training": "SFT + RL alignment, mêmes données et pipeline que la 8B et 30B. 12 langues dont FR. Cluster NVIDIA GB200 NVL72.",
    "strengths": [
      "Apache 2.0 with full openness",
      "Tool calling plus FIM code completion",
      "12 languages including French",
      "131k context at only 3B params",
      "Fits in 3 GB VRAM"
    ],
    "weaknesses": [
      "Reasoning lags the 8B and 30B siblings",
      "Demanding chat use cases really want the 8B model"
    ],
    "benchmarks": [],
    "alternatives": [
      "granite41-8b",
      "smollm3-3b",
      "llama3-3b"
    ],
    "install": "ollama run granite4.1:3b",
    "desc": "IBM's dense 3B Granite 4.1: Apache 2.0, 12 languages, 131k context, with tool calling and FIM code support. The smallest Granite tier, sharing data and pipeline with its larger siblings.",
    "verdict": "A serious 3B option for edge and embedded — same Granite recipe, just smaller."
  },
  "ling-26-1t": {
    "release": "23 avril 2026",
    "arch": "BailingMoeV2.5 · MoE 1T total / 50B actifs · 256 experts top-8 + 1 partagé · 80 couches · hybrid MLA + Linear Attention · 256k ctx",
    "training": "Famille Ling 2.6 (Ant Group). Stratégie Contextual Process Redundancy Suppression et 'Fast Thinking' pour réduire l'overhead de tokens. Tool-call parser compatible Qwen2.5.",
    "strengths": [
      "Permissive MIT license",
      "Top open non-reasoning Intelligence Index (34)",
      "256k context window",
      "Efficient hybrid MLA + Linear Attention",
      "Mature agentic tool calling, compatible with Qwen2.5 parsers"
    ],
    "weaknesses": [
      "Around 600 GB VRAM in Q4 — datacenter required",
      "Hugging Face weights only — no Ollama tag",
      "Not a reasoning model; pick DeepSeek V4 for deliberation"
    ],
    "benchmarks": [
      {
        "name": "AA Intelligence Index",
        "score": 34
      }
    ],
    "alternatives": [
      "ring-1t",
      "mimo-v25-pro",
      "deepseek-v4-pro"
    ],
    "install": "# HuggingFace : inclusionAI/Ling-2.6-1T",
    "desc": "Ant Group's Ling 2.6 1T: MIT-licensed MoE with 50B active params, hybrid MLA + Linear Attention, and 256k context. Top open non-reasoning model with an Intelligence Index of 34.",
    "verdict": "The MIT-licensed flagship to beat for non-reasoning, agentic workloads at trillion-parameter scale."
  },
  "nemotron3": {
    "release": "4 mai 2026",
    "arch": "Transformer dense · 33B paramètres · 128k contexte",
    "training": "Famille Nemotron NVIDIA, alignement RLHF orienté raisonnement et code.",
    "strengths": [
      "Dense 33B sized to saturate a 24GB consumer GPU at Q4",
      "128k context handles long codebases and reports",
      "RLHF tuned for reasoning and code, not just chat",
      "Open weights backed by NVIDIA's research stack"
    ],
    "weaknesses": [
      "NVIDIA Open Model License has commercial terms worth reviewing carefully",
      "Gated on Hugging Face (click-through access required)",
      "Dense 33B is heavier than comparable MoE alternatives at inference"
    ],
    "benchmarks": [],
    "alternatives": [
      "nemotron-3-super-120b",
      "qwen25-32b",
      "nemotron-cascade-2"
    ],
    "install": "ollama run nemotron3",
    "desc": "NVIDIA's dense 33B model targeting balanced chat, code, and reasoning workloads. Fits a single RTX 4090 at Q4 with a 128k context window.",
    "verdict": "A solid single-GPU workhorse for teams that want strong reasoning and code on a 4090 without depending on an API."
  },
  "nemotron-3-nano": {
    "release": "11 avril 2026",
    "arch": "MoE · 30B total / 3.5B actifs · 128k contexte",
    "training": "Famille Nemotron 3, distillation et RL alignment orientés raisonnement, code et chat.",
    "strengths": [
      "MoE routing yields 3.5B-class latency with 30B-class capability",
      "128k context for large documents and repos",
      "Strong across chat, code, and reasoning in one checkpoint",
      "Distillation plus RL alignment from the broader Nemotron family"
    ],
    "weaknesses": [
      "Needs ~39GB system RAM when partially offloaded to CPU",
      "NVIDIA Open Model License — review commercial terms",
      "Gated on Hugging Face"
    ],
    "benchmarks": [],
    "alternatives": [
      "nemotron-cascade-2",
      "qwen3-30b-a3b",
      "nemotron-nano-3-30b"
    ],
    "install": "ollama run nemotron-3-nano",
    "desc": "NVIDIA's 30B-parameter MoE with only 3.5B active per token, delivering 30B-class quality at small-model speeds across chat, code, and reasoning. 128k context.",
    "verdict": "The fast lane of the Nemotron 3 family — pick it when you want 30B output quality but can't afford 30B latency."
  },
  "medgemma": {
    "release": "20 avril 2026",
    "arch": "Gemma 4 · 4B paramètres · multimodal texte + image · 128k contexte",
    "training": "Variante médicale de Gemma, fine-tuning sur littérature clinique, imagerie radiologique et comptes-rendus médicaux.",
    "strengths": [
      "Domain-tuned on clinical literature and radiology imagery",
      "Compact 4B footprint (~2.3GB VRAM at Q4)",
      "True multimodal — text plus medical images",
      "Permissive Gemma license for research and most commercial use"
    ],
    "weaknesses": [
      "Decision-support only — not approved for direct clinical use",
      "Narrow specialization; weak outside medical contexts",
      "Gated on Hugging Face"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma3-4b",
      "gemma4-e4b"
    ],
    "install": "ollama run medgemma",
    "desc": "Google's 4B medical variant of Gemma with vision and text, tuned for radiology, clinical imaging, and report drafting. 128k context, Gemma license.",
    "verdict": "A pocket-sized clinical assistant for research and report drafting — never a substitute for a licensed clinician."
  },
  "gemma4": {
    "release": "6 mai 2026",
    "arch": "Gemma 4 base · 2B dense · multimodal texte + image · 128k contexte",
    "training": "Famille Gemma 4 Google, version 2B base multimodale, entraînée pour edge/laptop.",
    "strengths": [
      "Runs on integrated GPUs at ~1.2GB VRAM in Q4",
      "Multimodal text and image input out of the box",
      "128k context unusual at this parameter count",
      "Permissive Gemma license"
    ],
    "weaknesses": [
      "Reasoning lags behind 4B and larger Gemma variants",
      "Gated on Hugging Face (click-through access)"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma4-e2b",
      "gemma3-4b",
      "gemma2-2b"
    ],
    "install": "ollama run gemma4",
    "desc": "Google's 2B base model in the Gemma 4 family with text and image input, 128k context, and a 1.2GB Q4 footprint that runs on integrated graphics or a Raspberry Pi 5.",
    "verdict": "The smallest Gemma 4 that still feels useful — a strong default for edge multimodal apps."
  },
  "mixtral-8x7b": {
    "release": "Décembre 2023",
    "arch": "MoE 8×7B · 32 experts, 2 actifs par token · 47B total / 13B actifs",
    "training": "Corpus multilingue Mistral AI. Premier grand MoE open-weight populaire.",
    "strengths": [
      "Quality well above dense models of equivalent active params",
      "Strong coding and multilingual performance",
      "Apache 2.0 license",
      "Battle-tested in production stacks"
    ],
    "weaknesses": [
      "Roughly 26GB VRAM at Q4 — same footprint as a dense 47B",
      "Eclipsed by Qwen 3 and Llama 3.3 in 2025 benchmarks",
      "32k context now feels limiting",
      "Knowledge cutoff predates current tooling"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 70.6
      },
      {
        "name": "HumanEval",
        "score": 40.2
      },
      {
        "name": "HellaSwag",
        "score": 86.7
      }
    ],
    "alternatives": [
      "mistral-small-24b",
      "qwen25-32b",
      "llama33-70b"
    ],
    "install": "ollama run mixtral:8x7b",
    "desc": "The Mistral AI MoE that popularized open-weight sparse models. Eight 7B experts deliver 47B-class output, but you pay 47B-class VRAM costs.",
    "verdict": "A historically important MoE, now a second-tier choice — newer dense 24-32B models match it for less VRAM."
  },
  "croissant-llm": {
    "release": "Janvier 2024",
    "arch": "Transformer dense · 1.3B · entraîné sur Jean Zay (IDRIS)",
    "training": "Projet MLIA (Sorbonne) — corpus web FR/EN équilibrées, corpus public transparent.",
    "strengths": [
      "Runs in roughly 1GB VRAM at Q4",
      "Native French/English balance, not an afterthought",
      "Fully auditable training corpus",
      "Permissive MIT-style licensing"
    ],
    "weaknesses": [
      "2048-token context is too tight for most real tasks",
      "Quality is well below any modern 2025 model",
      "No vision, tools, or chain-of-thought reasoning",
      "Limited ecosystem and tooling support"
    ],
    "benchmarks": [
      {
        "name": "FrenchBench",
        "score": 38
      }
    ],
    "alternatives": [
      "lucie-7b",
      "apertus-8b",
      "mistral-7b-instruct"
    ],
    "install": "ollama pull hf.co/manu/croissant-llm-chat-v0.1-GGUF",
    "desc": "A 1.3B bilingual French/English model from Sorbonne's MLIA lab, light enough to run on a CPU and shipped with a fully auditable training corpus.",
    "verdict": "An academic milestone for transparent bilingual training — not competitive for production use in 2025."
  },
  "llama3-8b": {
    "release": "Juillet 2024",
    "arch": "Transformer dense · 32 couches · GQA · Llama 3.1 8B",
    "training": "15T tokens multilingues Meta. Fine-tuning instruction following.",
    "strengths": [
      "128k context window",
      "Strong instruction following and coding",
      "Enormous ecosystem of fine-tunes and integrations",
      "Solid quality-to-size ratio"
    ],
    "weaknesses": [
      "Beaten by Qwen 3 8B on most 2025 benchmarks",
      "No vision in this checkpoint",
      "Llama Community license restricts use above 700M MAU"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 73
      },
      {
        "name": "HumanEval",
        "score": 72.6
      },
      {
        "name": "GPQA",
        "score": 46.7
      }
    ],
    "alternatives": [
      "qwen3-8b",
      "gemma3-12b",
      "mistral-7b-instruct"
    ],
    "install": "ollama run llama3.1:8b",
    "desc": "Meta's Llama 3.1 8B, the open-weight benchmark of 2024. A 128k context, well-behaved instruction follower with the largest ecosystem in the open-source world.",
    "verdict": "Still a dependable open-weight default, but Qwen 3 8B is the better pick if license terms allow."
  },
  "llama3-3b": {
    "release": "Septembre 2024",
    "arch": "Transformer dense · Llama 3.2 3B · architecture légère pour edge",
    "training": "Corpus multilingue Meta + distillation depuis grands modèles Llama.",
    "strengths": [
      "128k context in a 3B parameter footprint",
      "Fast CPU inference",
      "Strong baseline for edge and mobile use cases",
      "Distilled from larger Llama models for better quality density"
    ],
    "weaknesses": [
      "Noticeably weaker than 7B+ models on complex tasks",
      "No vision in this checkpoint",
      "Subject to Llama Community license terms"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 63.4
      },
      {
        "name": "HellaSwag",
        "score": 79.2
      }
    ],
    "alternatives": [
      "qwen25-3b",
      "phi35-mini",
      "gemma2-2b"
    ],
    "install": "ollama run llama3.2:3b",
    "desc": "Meta's 3B instruct model with a full 128k context, tuned for laptops, mobile, and edge devices where memory and battery matter.",
    "verdict": "The best 3B open-weight model for edge use cases — pick it when memory and latency dominate the brief."
  },
  "qwen25-7b": {
    "release": "Septembre 2024",
    "arch": "Transformer dense · 28 couches · GQA · Qwen 2.5",
    "training": "18T tokens, forte couverture multilingue (29 langues), données code et maths enrichies.",
    "strengths": [
      "128k context window",
      "Apache 2.0 license with no MAU restrictions",
      "Strong multilingual performance across 29 languages",
      "Better math and coding than Llama 3.1 8B at the same size"
    ],
    "weaknesses": [
      "Surpassed by Qwen 3 8B in 2025",
      "Trails Qwen 2.5 Coder on dedicated coding tasks",
      "Reasoning weaker than DeepSeek R1 distills"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 74.2
      },
      {
        "name": "HumanEval",
        "score": 84.8
      },
      {
        "name": "MATH",
        "score": 75.5
      }
    ],
    "alternatives": [
      "qwen3-8b",
      "llama3-8b",
      "mistral-7b-instruct"
    ],
    "install": "ollama run qwen2.5:7b",
    "desc": "Alibaba's Qwen 2.5 7B, a top-tier 7B for its era with a 128k context, strong multilingual coverage across 29 languages, and Apache 2.0 licensing.",
    "verdict": "A still-useful general-purpose 7B with permissive licensing — but check Qwen 3 first if you're starting fresh."
  },
  "qwen25-32b": {
    "release": "Septembre 2024",
    "arch": "Transformer dense · 64 couches · GQA · Qwen 2.5",
    "training": "18T tokens. Fort en raisonnement, code et instructions longues.",
    "strengths": [
      "Quality on par with many 70B models",
      "128k context",
      "Apache 2.0 license",
      "Strong math, code, and reasoning"
    ],
    "weaknesses": [
      "Needs ~19GB VRAM at Q4 — pushes the limits of a single 24GB card",
      "Outperformed by Qwen 3 32B in 2025",
      "No native vision"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 83.3
      },
      {
        "name": "HumanEval",
        "score": 90.2
      },
      {
        "name": "MATH",
        "score": 83.1
      }
    ],
    "alternatives": [
      "qwen3-32b",
      "mistral-small-24b",
      "llama33-70b"
    ],
    "install": "ollama run qwen2.5:32b",
    "desc": "Alibaba's Qwen 2.5 32B, the open-weight 32B reference of late 2024 — matching 70B-class quality on most benchmarks at half the VRAM.",
    "verdict": "A landmark open-weight 32B that's still a strong default — upgrade to Qwen 3 32B when you can."
  },
  "qwen25-coder-7b": {
    "release": "Novembre 2024",
    "arch": "Transformer dense spécialisé code · Qwen 2.5 Coder 7B",
    "training": "Pré-entraînement Qwen 2.5 + 5.5T tokens code, 92 langages de programmation.",
    "strengths": [
      "Strong HumanEval and code completion for a 7B",
      "128k context for repo-scale prompts",
      "Coverage of 92 programming languages",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "Beaten clearly by the 32B variant on complex code tasks",
      "Weaker than general 7Bs for non-coding chat",
      "Limited reasoning on multi-step debugging"
    ],
    "benchmarks": [
      {
        "name": "HumanEval",
        "score": 88.4
      },
      {
        "name": "MBPP",
        "score": 83.5
      }
    ],
    "alternatives": [
      "qwen25-coder-32b",
      "deepseek-coder-v2-16b",
      "qwen25-coder-3b"
    ],
    "install": "ollama run qwen2.5-coder:7b",
    "desc": "A 7B coding specialist from Alibaba covering 92 programming languages with a 128k context. Competitive with proprietary models on HumanEval at this size.",
    "verdict": "The right pick when you want a local code model that fits on a single 8GB-class GPU and still pulls its weight."
  },
  "gemma2-2b": {
    "release": "Juin 2024",
    "arch": "Transformer dense · Gemma 2 2B · logit-softcapping + attention locale/globale",
    "training": "3T tokens, architecture compacte Google distillée depuis modèles plus grands.",
    "strengths": [
      "Runs comfortably in under 2GB VRAM at Q4",
      "Best-in-class 2B quality for its release window",
      "Workable on commodity CPUs",
      "Google's Gemma license permits broad use"
    ],
    "weaknesses": [
      "8k context is restrictive for modern RAG",
      "Falls apart on multi-step reasoning",
      "No vision, no tool calling out of the box"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 52.2
      },
      {
        "name": "HellaSwag",
        "score": 74.9
      }
    ],
    "alternatives": [
      "llama3-3b",
      "qwen25-3b",
      "phi35-mini"
    ],
    "install": "ollama run gemma2:2b",
    "desc": "Google's Gemma 2 2B, a compact instruct model distilled from larger Gemmas. Small enough to run on a Raspberry Pi 5 or modest CPU.",
    "verdict": "The best 2B for edge and CPU workloads — just don't expect it to reason."
  },
  "gemma2-9b": {
    "release": "Juin 2024",
    "arch": "Transformer dense · Gemma 2 9B · sliding window attention",
    "training": "8T tokens. Architecture distillée depuis Gemma 2 27B.",
    "strengths": [
      "Beats Llama 3 8B on multiple benchmarks",
      "Solid quality-per-parameter",
      "Reliable instruction following",
      "Distilled from Gemma 2 27B for better quality density"
    ],
    "weaknesses": [
      "8k context is the standout limitation",
      "No vision capabilities",
      "Gemma license is more restrictive than Apache 2.0"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 71.3
      },
      {
        "name": "HellaSwag",
        "score": 87.2
      },
      {
        "name": "HumanEval",
        "score": 40.2
      }
    ],
    "alternatives": [
      "llama3-8b",
      "qwen25-7b",
      "mistral-7b-instruct"
    ],
    "install": "ollama run gemma2:9b",
    "desc": "Google's Gemma 2 9B, a distilled instruct model that outperforms Llama 3 8B on several benchmarks at a slightly larger size.",
    "verdict": "A strong 9B if you can live with 8k context — otherwise pick Qwen 2.5 7B or Llama 3.1 8B for the 128k window."
  },
  "gemma2-27b": {
    "release": "Juin 2024",
    "arch": "Transformer dense · Gemma 2 27B · logit-softcapping",
    "training": "13T tokens. Le plus grand de la famille Gemma 2.",
    "strengths": [
      "Quality close to 70B-class models",
      "Runs in roughly 16GB VRAM at Q4",
      "Strong instruction following",
      "Robust multilingual output"
    ],
    "weaknesses": [
      "8k context is a major handicap in 2025",
      "Gemma license is less permissive than Apache 2.0",
      "No vision in this checkpoint"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 75.2
      },
      {
        "name": "HellaSwag",
        "score": 89.5
      },
      {
        "name": "HumanEval",
        "score": 51.8
      }
    ],
    "alternatives": [
      "llama33-70b",
      "qwen25-32b",
      "mistral-small-24b"
    ],
    "install": "ollama run gemma2:27b",
    "desc": "The flagship of the Gemma 2 family from Google. Approaches 70B-class quality on a single 24GB GPU at Q4, with strong multilingual coverage.",
    "verdict": "Excellent raw quality undermined by an 8k context — only pick it when your prompts stay short."
  },
  "phi35-mini": {
    "release": "Août 2024",
    "arch": "Dense · 3.8B · Phi-3.5 Mini · fenêtre glissante + FlashAttention",
    "training": "Données synthétiques haute qualité Microsoft. Fort accent pédagogique.",
    "strengths": [
      "128k context in a 3.8B footprint",
      "MIT license with no commercial restrictions",
      "Fast inference on modest hardware",
      "Strong reasoning relative to its size"
    ],
    "weaknesses": [
      "Memory footprint is high for a 3.8B at full context",
      "Outclassed by Phi-4 14B on overall quality",
      "Synthetic-heavy training can show as a narrow knowledge base"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 69
      },
      {
        "name": "HumanEval",
        "score": 62.8
      }
    ],
    "alternatives": [
      "phi4-14b",
      "qwen25-3b",
      "llama3-3b"
    ],
    "install": "ollama run phi3.5",
    "desc": "Microsoft's Phi-3.5 Mini, a 3.8B model trained on heavily curated synthetic data with a 128k context. Punches above its weight on reasoning.",
    "verdict": "A clever small model with a huge context — useful when you need 128k tokens and minimal VRAM."
  },
  "phi4-14b": {
    "release": "Décembre 2024",
    "arch": "Dense · 14B · Phi-4 · données synthétiques exclusives Microsoft",
    "training": "Corpus synthétique Microsoft ultra-filtré. Accent raisonnement et maths.",
    "strengths": [
      "Top-tier 14B reasoning at release",
      "MIT license",
      "Strong math, science, and code performance",
      "Tight, well-formatted outputs"
    ],
    "weaknesses": [
      "16k context is a significant limitation",
      "Weaker multilingual coverage than Qwen",
      "Narrower world knowledge from synthetic training"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 84.8
      },
      {
        "name": "MATH",
        "score": 80.4
      },
      {
        "name": "HumanEval",
        "score": 82.6
      }
    ],
    "alternatives": [
      "qwen3-14b",
      "mistral-nemo-12b",
      "gemma3-12b"
    ],
    "install": "ollama run phi4:14b",
    "desc": "Microsoft's Phi-4 14B, trained on ultra-curated synthetic data with a heavy STEM bias. The 14B reasoning leader at the end of 2024.",
    "verdict": "The reasoning-focused 14B to pick — just budget around its short context window."
  },
  "deepseek-r1-7b": {
    "release": "Janvier 2025",
    "arch": "Distillation DeepSeek R1 vers Qwen 2.5 7B · chain-of-thought explicite",
    "training": "Distillé depuis R1 671B. RL sur problèmes de raisonnement (maths, code, logique).",
    "strengths": [
      "Explicit chain-of-thought reasoning at 7B scale",
      "Strong AIME and MATH scores for its size",
      "32k context",
      "MIT license"
    ],
    "weaknesses": [
      "Very verbose due to thinking tokens",
      "Trails the 32B distill on complex reasoning",
      "Higher token costs per response",
      "Weaker than general 7Bs on casual chat"
    ],
    "benchmarks": [
      {
        "name": "AIME 2024",
        "score": 55.5
      },
      {
        "name": "MATH-500",
        "score": 92.8
      }
    ],
    "alternatives": [
      "deepseek-r1-32b",
      "qwen3-8b",
      "phi4-14b"
    ],
    "install": "ollama run deepseek-r1:7b",
    "desc": "A 7B DeepSeek model distilled from R1 671B with explicit chain-of-thought reasoning. Surprisingly strong on AIME and MATH for its size.",
    "verdict": "A capable reasoning-specialist 7B — but bump up to the 32B distill if accuracy matters more than tokens."
  },
  "deepseek-coder-v2-16b": {
    "release": "Juin 2024",
    "arch": "MoE léger · DeepSeek Coder V2 Lite · 16B · 128k contexte",
    "training": "Pré-entraînement code DeepSeek V2 Lite + fine-tuning 338 langages.",
    "strengths": [
      "128k context for code",
      "MoE architecture keeps inference fast",
      "Coverage of 338 programming languages",
      "Strong code generation and repair"
    ],
    "weaknesses": [
      "Lite version trails the 236B DeepSeek Coder V2 by a wide margin",
      "Beaten by Qwen 2.5 Coder 32B on standard benchmarks",
      "MoE memory footprint is larger than active params suggest"
    ],
    "benchmarks": [
      {
        "name": "HumanEval",
        "score": 81.1
      },
      {
        "name": "LiveCodeBench",
        "score": 28.8
      }
    ],
    "alternatives": [
      "qwen25-coder-32b",
      "qwen25-coder-7b",
      "granite33-8b"
    ],
    "install": "ollama run deepseek-coder-v2:16b-lite-instruct",
    "desc": "A 16B MoE code specialist from DeepSeek covering 338 programming languages with a 128k context. Fast inference for its quality tier.",
    "verdict": "Worth a look for exotic language coverage and speed — Qwen 2.5 Coder 32B still wins on raw quality."
  },
  "llama32-vision-11b": {
    "release": "Septembre 2024",
    "arch": "Dense · 11B · vision cross-attention · encodeur CLIP · Llama 3.2",
    "training": "Llama 3.1 8B + adaptateurs vision. Premier modèle vision officiel Meta.",
    "strengths": [
      "128k text context with image input",
      "Strong OCR and image description",
      "Built on the well-supported Llama 3 base",
      "First-party Meta multimodal release"
    ],
    "weaknesses": [
      "Vision quality trails Qwen2-VL and LLaVA-OneVision",
      "Subject to Llama Community license terms",
      "No video understanding",
      "Image inputs add significant VRAM overhead"
    ],
    "benchmarks": [
      {
        "name": "MMMU",
        "score": 50.7
      },
      {
        "name": "DocVQA",
        "score": 88.4
      }
    ],
    "alternatives": [
      "qwen2-vl-7b",
      "gemma3-12b",
      "internvl-35-8b"
    ],
    "install": "ollama run llama3.2-vision:11b",
    "desc": "Meta's first official multimodal Llama. An 11B vision-language model built on Llama 3.1 8B with added image adapters and a 128k text context.",
    "verdict": "A solid Llama-family vision model — but Qwen2-VL is the better open-weight choice when license terms allow."
  },
  "qwen2-vl-7b": {
    "release": "Octobre 2024",
    "arch": "Dense 7B · M-RoPE vision+texte · résolution dynamique · Qwen2-VL",
    "training": "Pré-entraînement multimodal Qwen2-VL. Fort en OCR, vidéo courte, documents.",
    "strengths": [
      "Dynamic resolution from 20px up to 16K",
      "Best-in-class OCR and document handling at 7B",
      "Apache 2.0 license",
      "Short video input support"
    ],
    "weaknesses": [
      "32k combined text+image context",
      "Outperformed by Qwen3-VL on newer benchmarks",
      "Memory pressure scales fast at high resolutions"
    ],
    "benchmarks": [
      {
        "name": "MMMU",
        "score": 54.1
      },
      {
        "name": "DocVQA",
        "score": 94.5
      },
      {
        "name": "OCRBench",
        "score": 845
      }
    ],
    "alternatives": [
      "qwen3-vl-8b",
      "gemma3-12b",
      "internvl-35-8b"
    ],
    "install": "ollama run qwen2-vl:7b",
    "desc": "Alibaba's Qwen 2 VL 7B — a top-tier open-weight vision model with dynamic resolution, multilingual OCR, and short video understanding.",
    "verdict": "The strongest open-weight 7B vision model for OCR and documents — upgrade to Qwen3-VL once it fits your stack."
  },
  "qwen35-9b": {
    "release": "Avril 2025",
    "arch": "Dense · 9B · Qwen 3.5 · hybrid thinking · 262k contexte natif",
    "training": "Évolution Qwen 3 avec contexte 262k et thinking amélioré. 119 langues.",
    "strengths": [
      "262K native context in a 9B parameter model",
      "Toggleable thinking mode for cost control",
      "Strong multilingual performance across 119 languages",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "Fine-tune ecosystem is still less mature than Qwen 2.5",
      "Thinking mode can be verbose by default"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-8b",
      "qwen25-7b",
      "gemma3-12b"
    ],
    "install": "ollama run qwen3.5:9b",
    "desc": "Alibaba's next-generation dense 9B model with a 262K native context window and an improved toggleable thinking mode. Apache 2.0 licensed.",
    "verdict": "The best long-context Apache-licensed 9B today, especially if you need toggleable reasoning."
  },
  "qwen35-27b": {
    "release": "Avril 2025",
    "arch": "Dense · 27B · Qwen 3.5 · hybrid thinking · 262k contexte natif",
    "training": "Corpus enrichi Qwen 3.5, fort en raisonnement complexe avec long contexte.",
    "strengths": [
      "262K native context window",
      "Well-calibrated thinking mode",
      "Strong math and science reasoning",
      "Apache 2.0 license"
    ],
    "weaknesses": [
      "Needs ~16GB VRAM in Q4",
      "Gemma 3 27B is a close competitor",
      "Thinking mode adds latency on simple queries"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-32b",
      "gemma3-27b",
      "qwen25-32b"
    ],
    "install": "ollama run qwen3.5:27b",
    "desc": "Alibaba's dense 27B Qwen 3.5 with a 262K context window and calibrated thinking mode. One of the best quality-to-size trade-offs in the open 25B-30B class.",
    "verdict": "The best Apache-licensed dense model in the 27B class for long-context reasoning."
  },
  "qwen25-3b": {
    "release": "Septembre 2024",
    "arch": "Transformer dense · Qwen 2.5 3B · compact multilingue",
    "training": "Corpus Qwen 2.5 complet, version compressée 3B.",
    "strengths": [
      "Around 2GB VRAM at Q4 — runs on almost anything",
      "Multilingual coverage rare at this size",
      "MMLU 65.6 and HumanEval 74.4 punch above its weight",
      "32k context out of the box"
    ],
    "weaknesses": [
      "Qwen Research License blocks commercial use",
      "Quality gap vs 7B-and-up is meaningful for non-trivial tasks",
      "32k context limits long-document work"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 65.6
      }
    ],
    "alternatives": [
      "llama3-3b",
      "gemma2-2b",
      "phi35-mini"
    ],
    "install": "ollama run qwen2.5:3b",
    "desc": "Alibaba's compact 3B Qwen 2.5 instruct model with surprisingly strong MMLU 65.6 and HumanEval 74.4. Locked to non-commercial use under the Qwen Research License.",
    "verdict": "A strong 3B for research and edge prototyping, but the Qwen Research License rules it out of production."
  },
  "qwen25-coder-15b": {
    "release": "Novembre 2024",
    "arch": "Dense · 1.5B · Qwen 2.5 Coder · spécialisé code compact",
    "training": "1.5B params, corpus code 92 langages, idéal pour complétion légère.",
    "strengths": [
      "Around 1GB VRAM at Q4 — runs nearly anywhere",
      "Strong inline completion for a 1.5B model",
      "Apache 2.0 license",
      "92 programming languages covered"
    ],
    "weaknesses": [
      "1.5B caps code quality — not for complex generation",
      "32k context only",
      "Outclassed on harder tasks by 7B+ coders"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen25-coder-3b",
      "qwen25-coder-7b",
      "phi35-mini"
    ],
    "install": "ollama run qwen2.5-coder:1.5b",
    "desc": "Alibaba's smallest Qwen 2.5 Coder at 1.5B parameters under Apache 2.0, covering 92 programming languages. HumanEval 70.7 makes it a serious on-device completion model.",
    "verdict": "An impressively capable 1.5B coder — keep it for on-device completion, not for whole-feature generation."
  },
  "qwen25-coder-3b": {
    "release": "Novembre 2024",
    "arch": "Dense · 3B · Qwen 2.5 Coder · code optimisé",
    "training": "3B params code-optimisé, 92 langages de programmation.",
    "strengths": [
      "HumanEval 84.1 — exceptional for 3B",
      "Around 2GB VRAM at Q4",
      "Fast inline completion",
      "92 programming languages"
    ],
    "weaknesses": [
      "Qwen Research License blocks commercial use",
      "32k context only",
      "Trails the 7B Coder on complex multi-file tasks"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen25-coder-7b",
      "qwen25-coder-15b",
      "qwen25-3b"
    ],
    "install": "ollama run qwen2.5-coder:3b",
    "desc": "Alibaba's 3B Qwen 2.5 Coder hitting HumanEval 84.1, covering 92 programming languages. Restricted to non-commercial use under the Qwen Research License.",
    "verdict": "Best-in-class 3B coder benchmark-wise, but the non-commercial license pushes most teams to the Apache 2.0 1.5B or 14B variants."
  },
  "deepseek-r1-distill-qwen-15b": {
    "release": "Janvier 2025",
    "arch": "Distillation DeepSeek R1 vers Qwen 2.5 1.5B · chain-of-thought",
    "training": "Distillé depuis R1 671B. Version ultra-compacte 1.5B avec raisonnement CoT.",
    "strengths": [
      "Around 1GB VRAM at Q4 — runs on any laptop",
      "Visible chain-of-thought reasoning",
      "MIT license — fully unrestricted",
      "128k context in a 1.5B model"
    ],
    "weaknesses": [
      "Reasoning depth is genuinely limited at 1.5B despite CoT",
      "Highly verbose — token costs add up fast",
      "Outclassed by the 14B distill on anything non-trivial"
    ],
    "benchmarks": [
      {
        "name": "MATH-500",
        "score": 83.9
      }
    ],
    "alternatives": [
      "deepseek-r1-7b",
      "qwen25-3b",
      "phi35-mini"
    ],
    "install": "ollama run deepseek-r1:1.5b",
    "desc": "DeepSeek's R1 reasoning distilled into a 1.5B MIT-licensed model with visible chain-of-thought. Hits MATH-500 83.9 and runs on any laptop.",
    "verdict": "A fun MIT-licensed reasoning model that fits anywhere, but the 1.5B ceiling shows on real problems."
  },
  "gemma3n-e2b": {
    "release": "Mai 2025",
    "arch": "Gemma 3n E2B · architecture on-device · 2B effectifs · matPow",
    "training": "Google Gemma 3n, optimisé mobile/edge avec embeddings per-layer partagés.",
    "strengths": [
      "Built specifically for mobile and edge hardware",
      "140+ language coverage in a tiny footprint",
      "MatFormer architecture maximizes memory efficiency",
      "Per-layer shared embeddings cut RAM use"
    ],
    "weaknesses": [
      "32k context only",
      "Absolute quality trails Gemma 3 9B",
      "Gemma license — not as permissive as Apache 2.0",
      "Multimodal features not exposed via Ollama"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma3n-e4b",
      "gemma4",
      "qwen25-3b"
    ],
    "install": "ollama run gemma3n:e2b",
    "desc": "Google's Gemma 3n with 2B effective parameters (6B raw) using MatFormer, covering 140+ languages. Optimized for mobile and edge; text-only on Ollama.",
    "verdict": "Google's most memory-efficient small model — purpose-built for mobile and edge inference, with multilingual to match."
  },
  "gemma3n-e4b": {
    "release": "Mai 2025",
    "arch": "Gemma 3n E4B · architecture on-device · 4B effectifs",
    "training": "Google Gemma 3n 4B, multimodal texte+image, 140 langues.",
    "strengths": [
      "4B effective parameters punch well above mobile-class weights",
      "Integrated multimodal — text and image input",
      "140 language coverage",
      "Open Gemma license"
    ],
    "weaknesses": [
      "32k context only",
      "Beaten by Gemma 3 12B in desktop scenarios",
      "Gemma license — less permissive than Apache 2.0",
      "Multimodal support uneven across runtimes"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma3n-e2b",
      "gemma3-12b",
      "gemma4-e4b"
    ],
    "install": "ollama run gemma3n:e4b",
    "desc": "Google's full Gemma 3n with 4B effective parameters (8B raw) and nested MatFormer architecture. Native multimodal across 140 languages for high-end mobile deployments.",
    "verdict": "The full-fat Gemma 3n — strong mobile multimodal with surprising quality, if Gemma's license fits your use case."
  },
  "granite32-8b": {
    "release": "Octobre 2024",
    "arch": "Dense · 8B · IBM Granite 3.2 · RAG et agents enterprise",
    "training": "Corpus enterprise IBM, fort en code (100 langages), données 2024.",
    "strengths": [
      "128k context",
      "Apache 2.0 license",
      "Strong RAG and enterprise instruction following",
      "IBM Safety Guardrails included",
      "Toggleable thinking mode"
    ],
    "weaknesses": [
      "Trails Llama 3.1 8B on general chat",
      "Very enterprise-flavored tone",
      "Weaker than Qwen 2.5 7B on coding tasks"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 67
      },
      {
        "name": "HumanEval",
        "score": 72
      }
    ],
    "alternatives": [
      "granite33-8b",
      "llama3-8b",
      "qwen25-7b"
    ],
    "install": "ollama run granite3.2:8b",
    "desc": "IBM's enterprise-focused 8B Granite 3.2 with a toggleable thinking mode under Apache 2.0. MMLU 65.5 and IFEval 70.9, with built-in IBM safety guardrails.",
    "verdict": "The default open 8B for enterprise RAG and regulated workloads — picked for safety guardrails and IBM support, not chat quality."
  },
  "granite33-8b": {
    "release": "Janvier 2025",
    "arch": "Dense · 8B · IBM Granite 3.3 · agents et tool use améliorés",
    "training": "Évolution Granite 3.2 avec amélioration agent/tool use et code.",
    "strengths": [
      "128k context",
      "Apache 2.0 license",
      "Strong agentic and tool-use behavior",
      "Fill-in-the-middle code completion added",
      "Better instruction following than 3.2"
    ],
    "weaknesses": [
      "Still very enterprise-flavored",
      "Less versatile than Qwen 3 8B on open-ended chat",
      "Code quality trails dedicated coders like Qwen 2.5 Coder 7B"
    ],
    "benchmarks": [],
    "alternatives": [
      "granite32-8b",
      "llama3-8b",
      "qwen3-8b"
    ],
    "install": "ollama run granite3.3:8b",
    "desc": "IBM's update to Granite 3.2 8B adding fill-in-the-middle code support and improved instruction following. Apache 2.0 with strong agent and tool-use behavior.",
    "verdict": "A clean upgrade over Granite 3.2 8B for enterprise agents — better tool use, better code, same Apache 2.0 backbone."
  },
  "olmoe-1b-7b": {
    "release": "Septembre 2024",
    "arch": "MoE · 7B total / 1B actifs · 64 experts, 8 actifs par token",
    "training": "AllenAI OLMoE. Données ouvertes Dolmino + The Pile 2.",
    "strengths": [
      "Very fast inference with only 1.3B active parameters",
      "Training corpus is 100% open source (Dolmino + Pile 2)",
      "Apache 2.0 license throughout",
      "Competitive with Llama2-13B-Chat at a fraction of the cost"
    ],
    "weaknesses": [
      "4096-token context is limiting for modern workloads",
      "Quality trails recent dense 7B models",
      "Limited tooling and quantization support"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 52
      }
    ],
    "alternatives": [
      "qwen25-3b",
      "llama3-3b",
      "gemma2-2b"
    ],
    "install": "ollama run olmoe",
    "desc": "Allen AI's OLMoE is the only MoE released with weights, training data, and code fully open — 7B total with 1.3B active, matching Llama2-13B-Chat quality.",
    "verdict": "The only truly open MoE end-to-end — pick it for research and education over raw production quality."
  },
  "molmo-7b": {
    "release": "Septembre 2024",
    "arch": "Dense · 7B vision · basé Qwen2 7B + encodeur CLIP OpenAI",
    "training": "AllenAI PixMo — données pointage/annotation humaines originales, totalement open.",
    "strengths": [
      "Pointing capability is rare in open VLMs",
      "Apache 2.0 across weights and PixMo training data",
      "Performance lands between GPT-4V and GPT-4o on standard benchmarks",
      "Transparent human-annotated training set"
    ],
    "weaknesses": [
      "4096-token context cap limits multi-turn vision chats",
      "OCR quality trails Qwen2-VL 7B",
      "Smaller community ecosystem than mainstream VLMs"
    ],
    "benchmarks": [
      {
        "name": "MMMU",
        "score": 58.6
      }
    ],
    "alternatives": [
      "qwen2-vl-7b",
      "llama32-vision-11b",
      "internvl-35-8b"
    ],
    "install": "ollama run molmo",
    "desc": "Allen AI's Apache-licensed VLM built on Qwen2-7B and CLIP, scoring between GPT-4V and GPT-4o on benchmarks with unique pointing and grounding capabilities.",
    "verdict": "The open VLM to choose when you need pointing and grounding under a clean commercial license."
  },
  "molmo-72b": {
    "release": "Septembre 2024",
    "arch": "Dense · 72B vision · basé Qwen2 72B + encodeur CLIP OpenAI",
    "training": "AllenAI PixMo dataset, version 72B maximale.",
    "strengths": [
      "Top-tier vision quality among open-weight VLMs",
      "Apache 2.0 license with PixMo open training data",
      "Strong on complex visual reasoning and dense scenes",
      "Human evaluation second only to GPT-4o"
    ],
    "weaknesses": [
      "~42 GB VRAM at Q4 typically requires 2-3 GPUs",
      "4096-token context constrains long multimodal sessions",
      "No official GGUF release complicates llama.cpp use"
    ],
    "benchmarks": [
      {
        "name": "MMMU",
        "score": 72.2
      }
    ],
    "alternatives": [
      "llava-onevision-72b",
      "qwen3-vl-30b",
      "qwen2-vl-7b"
    ],
    "install": "ollama run molmo:72b",
    "desc": "Allen AI's flagship Apache 2.0 VLM built on Qwen2-72B, ranked #2 in human evaluation behind only GPT-4o for visual understanding.",
    "verdict": "The highest-quality fully open VLM — choose it when you have the GPUs and need GPT-4o-class vision on-prem."
  },
  "falcon-mamba-7b": {
    "release": "Août 2024",
    "arch": "Architecture Mamba (SSM) · 7B · sans Transformer · inférence O(1)",
    "training": "TII UAE — corpus 5.5T tokens. Architecture State Space Model pure.",
    "strengths": [
      "O(1) memory per token at inference",
      "No practical context limit imposed by attention",
      "Apache 2.0 license",
      "Demonstrates Mamba viability at production scale"
    ],
    "weaknesses": [
      "Weaker in-context learning than transformers of equal size",
      "No vision or multimodal support",
      "Trained context is only 8k despite architectural headroom"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 62
      }
    ],
    "alternatives": [
      "mistral-7b-instruct",
      "llama3-8b",
      "qwen25-7b"
    ],
    "install": "ollama run falcon-mamba:7b",
    "desc": "TII's first serious pure Mamba SSM at scale — 7B with constant memory per token, sidestepping transformer attention costs entirely.",
    "verdict": "The benchmark pure-Mamba 7B — pick it to study SSMs or to serve streaming workloads where attention costs hurt most."
  },
  "aya-23-35b": {
    "release": "Mai 2024",
    "arch": "Dense · 35B · Cohere Command R+ backbone · 23 langues natives",
    "training": "Cohere For AI — 23 langues dont FR/AR/ZH, données instruction multilingues.",
    "strengths": [
      "Strong native quality across 23 languages",
      "Good instruction following in non-English settings",
      "Backed by Cohere's Command base architecture",
      "Competitive multilingual coverage for its era"
    ],
    "weaknesses": [
      "CC-BY-NC 4.0 license blocks commercial deployment",
      "~20 GB VRAM at Q4 with only 8k context",
      "Reasoning capabilities lag 2025-class open models"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen25-32b",
      "salamandra-40b",
      "apertus-70b"
    ],
    "install": "ollama run aya:35b",
    "desc": "Cohere For AI's 35B pre-Expanse multilingual model on the Command base, covering 23 languages with strong instruction following — but locked to non-commercial use.",
    "verdict": "A strong pre-Expanse multilingual 35B — useful for research, but Aya Expanse and modern peers have moved past it."
  },
  "yi-15-34b": {
    "release": "Mai 2024",
    "arch": "Transformer dense · 34B · Yi 1.5 · Llama-compatible",
    "training": "01.AI — 3.1T tokens multilingues EN/ZH. Successeur de Yi-34B.",
    "strengths": [
      "Excellent Chinese-language performance",
      "Compatible with Llama tooling and quantization",
      "Apache 2.0 license enables free commercial use",
      "Stable chat behavior and well-understood quirks"
    ],
    "weaknesses": [
      "4096-token context is severely limiting today",
      "Outclassed by Qwen 2.5 32B in 2025",
      "No multimodal or tool-use specialization"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 77.2
      },
      {
        "name": "HumanEval",
        "score": 75.2
      }
    ],
    "alternatives": [
      "qwen25-32b",
      "llama33-70b",
      "qwen3-32b"
    ],
    "install": "ollama run yi:34b",
    "desc": "01.AI's dense 34B chat model under Apache 2.0, trained on 3.6T tokens with strong English-Chinese bilingual quality.",
    "verdict": "A competent Apache-licensed bilingual 34B from 2024 — only pick it over Qwen 2.5 32B when license terms force your hand."
  },
  "dbrx-instruct": {
    "release": "Mars 2024",
    "arch": "MoE · 132B total / 36B actifs · 16 experts, 4 actifs par token",
    "training": "Databricks — 12T tokens haute qualité, fort en code et science.",
    "strengths": [
      "State-of-the-art quality at March 2024 release",
      "Strong on code and math benchmarks",
      "Databricks Open Model License is broadly permissive",
      "12T tokens of high-quality training data"
    ],
    "weaknesses": [
      "~76 GB VRAM at Q4 demands multi-GPU serving",
      "Largely outclassed by DeepSeek V3 and R1 in 2025",
      "HuggingFace repo is gated, slowing access"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 73.7
      },
      {
        "name": "HumanEval",
        "score": 70.1
      }
    ],
    "alternatives": [
      "deepseek-v4-flash",
      "qwen3-32b",
      "llama33-70b"
    ],
    "install": "ollama run dbrx",
    "desc": "Databricks' 132B MoE with 36B active params, trained on 12T tokens — state-of-the-art at March 2024 release but largely surpassed by DeepSeek V3 and R1.",
    "verdict": "Historically important but no longer competitive — only choose it inside Databricks pipelines where the integration justifies the cost."
  },
  "jais-70b": {
    "release": "Novembre 2024",
    "arch": "Dense · 70B · spécialisé arabe + anglais · MBZUAI/Core42 UAE",
    "training": "MBZUAI/Core42 — corpus 395B tokens arabe natif + anglais haute qualité.",
    "strengths": [
      "Strongest open-weight Arabic model available",
      "GPT-4-level performance in Arabic",
      "Jais license permits commercial use",
      "GQA improves inference efficiency at 70B scale"
    ],
    "weaknesses": [
      "~40 GB VRAM at Q4",
      "4096-token context is restrictive for long documents",
      "Limited capability outside Arabic and English"
    ],
    "benchmarks": [],
    "alternatives": [
      "aya-23-35b",
      "llama33-70b",
      "qwen25-32b"
    ],
    "install": "ollama pull hf.co/inceptionai/jais-instruct-GGUF",
    "desc": "MBZUAI and Core42's Llama-2 70B extended with 32k Arabic tokens and GQA — the strongest open-weight Arabic LLM, reaching GPT-4-class quality in Arabic.",
    "verdict": "The clear top pick for Arabic at 70B — choose it when GPT-4-grade Arabic must run on your own hardware."
  },
  "salamandra-40b": {
    "release": "Décembre 2024",
    "arch": "Dense · 40B · BSC MareNostrum · langues romanes souveraines",
    "training": "Barcelona Supercomputing Center — 7.68T tokens, fort en catalan, espagnol, français, occitan.",
    "strengths": [
      "Sovereign European model purpose-built for Romance languages",
      "Unique native Catalan capability among open models",
      "Apache 2.0 license",
      "7.68T tokens with strong Iberian-language coverage"
    ],
    "weaknesses": [
      "~24 GB VRAM at Q4",
      "8192-token context limits modern long-context use",
      "Limited fine-tune ecosystem and gated repo access"
    ],
    "benchmarks": [],
    "alternatives": [
      "apertus-70b",
      "aya-23-35b",
      "lucie-7b"
    ],
    "install": "ollama pull hf.co/BSC-LT/salamandra-40b-instruct-GGUF",
    "desc": "BSC's 40B scaled-up Salamandra covering 35 EU languages with native Catalan support — though the HuggingFace repo is gated and successor ALIA-40B is now available.",
    "verdict": "The strongest open model for Catalan and Iberian Romance languages — but check ALIA-40B first if you can run either."
  },
  "llava-onevision-72b": {
    "release": "Septembre 2024",
    "arch": "Dense vision · 72B · LLaVA-OneVision · Qwen2 72B backbone",
    "training": "LMMs-Lab — images, vidéos, documents, multi-image. Dataset OneVision.",
    "strengths": [
      "State-of-the-art open vision quality at 2024 release",
      "Robust multi-image and video reasoning",
      "Apache 2.0 with no usage restrictions",
      "Solid bilingual EN/CN coverage"
    ],
    "weaknesses": [
      "Around 42 GB VRAM at Q4 — needs serious GPU resources",
      "32k context limits long-document workflows",
      "Surpassed by Qwen3-VL 30B in 2025 benchmarks"
    ],
    "benchmarks": [
      {
        "name": "MMMU",
        "score": 69.5
      }
    ],
    "alternatives": [
      "qwen3-vl-30b",
      "molmo-72b",
      "qwen2-vl-7b"
    ],
    "install": "ollama run llava-onevision:72b",
    "desc": "The 72B Apache-licensed flagship from LMMs-Lab, built on Qwen2-72B with strong English and Chinese vision performance. A 2024 state-of-the-art open VLM.",
    "verdict": "A heavyweight Apache VLM that still delivers, though Qwen3-VL has since taken the open-vision crown at lower cost."
  },
  "arctic-instruct": {
    "release": "Avril 2024",
    "arch": "Dense + MoE hybride · 480B total / 17B actifs · 128 experts · Snowflake",
    "training": "Snowflake — focus enterprise SQL, code, raisonnement analytique.",
    "strengths": [
      "Highly efficient inference for its 480B total size",
      "Strong on SQL and analytical tasks",
      "Apache 2.0 with no commercial restrictions",
      "Battle-tested in enterprise scenarios"
    ],
    "weaknesses": [
      "Around 290 GB VRAM at Q4 — GPU cluster territory",
      "4k context is severely limiting in 2026",
      "Outclassed by modern MoEs across most benchmarks"
    ],
    "benchmarks": [
      {
        "name": "MMLU",
        "score": 67.3
      },
      {
        "name": "HumanEval",
        "score": 64.3
      }
    ],
    "alternatives": [
      "deepseek-v4-flash",
      "qwen3-32b",
      "llama33-70b"
    ],
    "install": "# Nécessite multi-GPU — non disponible via Ollama standard",
    "desc": "Snowflake's hybrid Dense-MoE with 17B active parameters out of 480B total. Apache-licensed and tuned for enterprise analytics, but the 4k context shows its age.",
    "verdict": "A historically important enterprise MoE, but the 4k context and infrastructure demands push it out of contention for new deployments."
  },
  "grok-1": {
    "release": "Mars 2024",
    "arch": "MoE · 314B total / 86B actifs · 8 experts, 2 actifs · xAI",
    "training": "xAI — premier modèle open-source xAI. Poids bruts publiés sans fine-tuning officiel.",
    "strengths": [
      "First open-weight model from xAI",
      "Apache 2.0 with full commercial freedom",
      "Efficient MoE design with top-2 routing across 8 experts",
      "Useful base for community fine-tunes"
    ],
    "weaknesses": [
      "Around 188 GB VRAM at Q4",
      "Raw base weights — no official instruct variant",
      "Comprehensively outpaced by Grok 2 and beyond",
      "Limited community fine-tunes vs. Llama or Qwen"
    ],
    "benchmarks": [],
    "alternatives": [
      "arctic-instruct",
      "dbrx-instruct",
      "deepseek-v4-flash"
    ],
    "install": "# Non disponible via Ollama — poids HuggingFace uniquement",
    "desc": "xAI's first open-weight release: a 314B MoE with about 86B active parameters under Apache 2.0. Base model only — no official instruction tuning shipped.",
    "verdict": "A landmark open release that's now mostly a research artifact — pick a modern MoE for any real workload."
  },
  "gpt-oss-120b": {
    "release": "Avril 2025",
    "arch": "MoE · ~117B total / ~20B actifs · OpenAI open-source · 128k ctx",
    "training": "OpenAI — premier modèle open-weight publié par OpenAI sous licence MIT.",
    "strengths": [
      "Matches o4-mini on reasoning and coding benchmarks",
      "Apache 2.0 license with full commercial use",
      "128k context out of the box",
      "Fits on a single 80 GB accelerator"
    ],
    "weaknesses": [
      "Around 70 GB VRAM at Q4 — multi-GPU for higher precision",
      "MoE deployment is operationally more complex than dense"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-v4-flash",
      "qwen3-32b",
      "llama33-70b"
    ],
    "install": "ollama run openai/gpt-oss:120b",
    "desc": "OpenAI's first open-weight return: a 117B MoE with 5.1B active parameters, matching o4-mini quality. Fits a single 80 GB GPU and ships under Apache 2.0.",
    "verdict": "The most consequential open-weight release in years — frontier OpenAI quality on a single GPU under Apache 2.0."
  },
  "gpt-oss-20b": {
    "release": "Avril 2025",
    "arch": "MoE · ~21B total / ~4B actifs · OpenAI open-source compact",
    "training": "Version légère de la série GPT open-source OpenAI, idéale pour déploiement local.",
    "strengths": [
      "Apache 2.0 with full commercial freedom",
      "Around 13 GB VRAM at Q4 — runs on a 16 GB card",
      "OpenAI quality in an accessible footprint",
      "Native 128k context"
    ],
    "weaknesses": [
      "MoE format uses more VRAM than equivalent dense models",
      "Fewer community fine-tunes than Llama or Qwen"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-14b",
      "phi4-14b",
      "gemma3-12b"
    ],
    "install": "ollama run openai/gpt-oss:20b",
    "desc": "OpenAI's compact open-weight MoE with 3.6B active out of 21B total parameters. Matches o3-mini on a laptop-class GPU under Apache 2.0.",
    "verdict": "The clear default for local OpenAI-quality inference — accessible VRAM, 128k context, and a real license."
  },
  "kimi-k26": {
    "release": "Mai 2025",
    "arch": "MoE · 1T total / ~32B actifs · Moonshot AI · 256k contexte",
    "training": "Moonshot AI Kimi K2.6 — corpus web massif avec focus long-contexte.",
    "strengths": [
      "1T total parameters with frontier-class performance",
      "Native 256k context window",
      "Unique 300-agent swarm coordination mode",
      "Multimodal across text and vision"
    ],
    "weaknesses": [
      "Around 600 GB VRAM at Q4 — datacenter only",
      "API-first; local hosting is impractical for most teams",
      "Modified MIT terms need legal review"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-v4-flash",
      "qwen3-vl-235b",
      "qwen35-122b-a10b"
    ],
    "install": "# Multi-GPU data-center requis — API Moonshot recommandée",
    "desc": "Moonshot AI's April 2026 flagship: roughly 1T total parameters with 32B active, native multimodal, plus an agent-swarm mode coordinating up to 300 sub-agents.",
    "verdict": "A genuine frontier open-weight model, but you'll be consuming it via API unless you run a datacenter."
  },
  "qwen3-vl-235b": {
    "release": "Mai 2025",
    "arch": "MoE vision · 235B total / 22B actifs · Qwen3-VL flagship",
    "training": "Qwen3-VL 235B — texte, images, vidéo, 262k contexte natif.",
    "strengths": [
      "Top open-weight vision model as of May 2025",
      "262k native context, extensible to 1M tokens",
      "Apache 2.0 license",
      "Only 22B active parameters keeps inference tractable"
    ],
    "weaknesses": [
      "Around 142 GB VRAM at Q4 — multi-GPU required",
      "Heavier operational lift than dense alternatives",
      "Overkill for simple captioning workloads"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-vl-30b",
      "qwen2-vl-7b",
      "gemma4-26b-moe"
    ],
    "install": "ollama run qwen3-vl:235b",
    "desc": "Alibaba's flagship Qwen 3 vision model: 235B MoE with 22B active parameters and a native 256k context that extends to 1M. The current open-weight vision leader.",
    "verdict": "The open-vision benchmark to beat — if you can afford the GPUs, this is the model to deploy."
  },
  "qwen3-vl-30b": {
    "release": "Mai 2025",
    "arch": "MoE vision · 30B · Qwen3-VL · 262k contexte",
    "training": "Qwen3-VL 30B — bon compromis qualité/accessibilité vision MoE.",
    "strengths": [
      "Around 19 GB VRAM at Q4 — fits a single 24 GB card",
      "Native 262k multimodal context",
      "Efficient MoE with only 3B active parameters",
      "Apache 2.0"
    ],
    "weaknesses": [
      "Lags the 235B on complex scene understanding",
      "Fewer fine-tunes than the older Qwen2-VL family"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-vl-8b",
      "qwen2-vl-7b",
      "llava-onevision-72b"
    ],
    "install": "ollama run qwen3-vl:30b",
    "desc": "Qwen 3 VL's sweet spot: a 30B MoE with 3B active parameters and 256k context. Delivers most of the 235B's quality at a fraction of the hardware cost.",
    "verdict": "The pragmatic open-vision choice in 2026 — most of the flagship's quality on hardware most teams already own."
  },
  "qwen3-vl-8b": {
    "release": "Mai 2025",
    "arch": "Dense vision · 8B · Qwen3-VL · 262k contexte",
    "training": "Qwen3-VL 8B — version accessible de la famille vision Qwen3.",
    "strengths": [
      "Around 6 GB VRAM at Q4 — runs almost anywhere",
      "262k multimodal context in an 8B model",
      "Solid OCR and document analysis",
      "Apache 2.0"
    ],
    "weaknesses": [
      "Trails the 30B variant on complex scene reasoning",
      "Limited capacity for advanced visual reasoning"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen2-vl-7b",
      "internvl-35-8b",
      "gemma3-12b"
    ],
    "install": "ollama run qwen3-vl:8b",
    "desc": "The dense 8B entry in Qwen 3 VL, offering strong OCR and document analysis with a remarkable 256k multimodal context for its size.",
    "verdict": "The go-to small open VLM — Apache-licensed, long-context, and capable enough for most production document workflows."
  },
  "ernie-45-300b": {
    "release": "Avril 2025",
    "arch": "MoE · 300B · ERNIE 4.5 · Baidu PaddlePaddle",
    "training": "Baidu — corpus chinois massif + multilinguisme. Poids partiellement publiés.",
    "strengths": [
      "Best-in-class Chinese-language performance",
      "Efficient MoE inference with 47B active",
      "300B total parameters at frontier scale",
      "128k context window"
    ],
    "weaknesses": [
      "Around 180 GB VRAM at Q4",
      "Baidu license has commercial restrictions",
      "Limited adoption and support outside China",
      "Only partial weights publicly released"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen35-122b-a10b",
      "kimi-k26",
      "pangu-pro-moe-72b"
    ],
    "install": "# Déploiement complexe — vérifier la licence Baidu avant usage",
    "desc": "Baidu's first open release at frontier scale: a 300B MoE with 47B active parameters. Strongest open model for Chinese, with partial weight publication.",
    "verdict": "The strongest open model for Chinese workloads, but licensing and limited ecosystem outside China constrain its reach."
  },
  "ernie-45-21b": {
    "release": "Avril 2025",
    "arch": "MoE · 21B · ERNIE 4.5 compact · raisonnement optimisé",
    "training": "Baidu ERNIE 4.5 version compacte avec spécialisation raisonnement.",
    "strengths": [
      "Around 13 GB VRAM at Q4",
      "Compact MoE optimized for reasoning",
      "Strong Chinese-language performance",
      "128k context window"
    ],
    "weaknesses": [
      "Weaker multilingual coverage than Qwen",
      "Baidu license terms need verification",
      "Smaller community than Qwen or Llama equivalents"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-14b",
      "deepseek-r1-7b",
      "phi4-14b"
    ],
    "install": "ollama pull hf.co/baidu/ernie-4.5-21b-GGUF",
    "desc": "Baidu's compact reasoning MoE with 3B active parameters out of 21B total. Fast inference thanks to the small active set, with Chinese-language strength.",
    "verdict": "An efficient reasoning MoE with real Chinese strength, but Qwen's compact models remain easier to adopt outside China."
  },
  "ring-1t": {
    "release": "Mars 2025",
    "arch": "MoE ring-all-reduce · 1T total · Ant Group · 131k contexte",
    "training": "Ant Group — premier trillion-parameter open-weight chinois, architecture ring-MoE.",
    "strengths": [
      "First trillion-parameter Chinese open-weight model",
      "MIT license with full commercial freedom",
      "Original ring-MoE all-reduce architecture",
      "131k context window"
    ],
    "weaknesses": [
      "Around 600 GB VRAM at Q4 — datacenter only",
      "Commercial licensing for downstream use is complex",
      "Operationally heavy to deploy and tune"
    ],
    "benchmarks": [],
    "alternatives": [
      "kimi-k26",
      "qwen3-vl-235b",
      "deepseek-v4-flash"
    ],
    "install": "# Infrastructure data-center requise — non disponible en local standard",
    "desc": "Ant Group's MIT-licensed open reasoner: 1T total parameters with 50B active, using a novel ring-all-reduce MoE architecture. Top of the open-reasoning leaderboards.",
    "verdict": "A frontier open reasoner with a permissive license — practical only for teams running real datacenter infrastructure."
  },
  "seed-oss-36b": {
    "release": "Avril 2025",
    "arch": "Dense · 36B · ByteDance Seed-OSS · 524k contexte natif",
    "training": "ByteDance — très long contexte (524k tokens) supporté nativement.",
    "strengths": [
      "524k native context — a record for accessible dense models",
      "Dense 36B is easier to deploy than equivalent MoEs",
      "Strong long-document comprehension",
      "Apache 2.0"
    ],
    "weaknesses": [
      "Around 22 GB VRAM at Q4 (much more with full context)",
      "ByteDance license terms need a careful read",
      "Limited fine-tune ecosystem at launch"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen35-27b",
      "qwen35-122b-a10b",
      "llama33-70b"
    ],
    "install": "ollama pull hf.co/ByteDance/seed-oss-36b-GGUF",
    "desc": "ByteDance's first major open release: a dense 36B model with a native 524k context — roughly 4× the competition. Apache 2.0.",
    "verdict": "Unmatched long-context for a dense open model — the pick when you genuinely need to load 500k+ tokens at once."
  },
  "exaone-45-33b": {
    "release": "Avril 2025",
    "arch": "Dense · 33B · EXAONE 4.5 · LG AI Research · vision intégrée",
    "training": "LG AI Research — corpus coréen+anglais, vision multimodale ajoutée, 262k ctx.",
    "strengths": [
      "262k context in a 33B model",
      "Integrated vision capabilities at this scale",
      "Strong Korean and English performance",
      "Top-10 placement on independent intelligence benchmarks"
    ],
    "weaknesses": [
      "Around 20 GB VRAM at Q4",
      "EXAONE license requires review for commercial use",
      "Smaller English-focused community than Llama or Qwen"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-32b",
      "llama33-70b",
      "gemma3-27b"
    ],
    "install": "ollama run exaone4.5:33b",
    "desc": "LG AI Research's multimodal Korean flagship: a 33B model with 256k context that lands in the top 10 of the Artificial Analysis Intelligence Index.",
    "verdict": "The clear pick for Korean multimodal work — capable, compact, and competitive globally, with licensing caveats to verify."
  },
  "nemotron-nano-3-30b": {
    "release": "Mai 2025",
    "arch": "MoE · 30B total / 3B actifs · Nemotron-Nano-3 · 1M contexte natif",
    "training": "NVIDIA — distillé depuis Llama, optimisé edge avec contexte 1 million tokens.",
    "strengths": [
      "Native 1M-token context window",
      "Ultra-efficient MoE with only 3B active parameters",
      "Roughly 4× throughput improvement over Nemotron 2",
      "Permissive NVIDIA Open Model license"
    ],
    "weaknesses": [
      "Full 1M context consumes substantial VRAM in practice",
      "Hybrid architecture has thinner tooling support",
      "Distilled from Llama — inherits some base-model quirks"
    ],
    "benchmarks": [],
    "alternatives": [
      "nemotron3",
      "nemotron-3-nano",
      "qwen3-32b"
    ],
    "install": "ollama run nemotron3:30b",
    "desc": "NVIDIA's Mamba-2 + Transformer hybrid MoE with 3B active out of 30B total parameters. A native 1M-token context with roughly 4× the throughput of Nemotron 2.",
    "verdict": "The throughput-and-context champion for edge MoE deployments — built for workloads where 128k context isn't enough."
  },
  "nemotron-nano-v2-vl-12b": {
    "release": "Mai 2025",
    "arch": "Dense vision · 12.6B · Nemotron-Nano-v2 VL · 128k contexte",
    "training": "NVIDIA Nemotron Nano v2 multimodal — texte + images en 12B.",
    "strengths": [
      "Combined vision and text in a 12B footprint",
      "128k context window",
      "Strong DocVQA and ChartQA benchmark scores",
      "NVIDIA Open Model license"
    ],
    "weaknesses": [
      "Trails Qwen3-VL 30B on complex visual reasoning",
      "NVIDIA license terms differ from Apache or MIT",
      "Smaller community than Qwen or LLaVA families"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-vl-8b",
      "llama32-vision-11b",
      "internvl-35-8b"
    ],
    "install": "ollama run nemotron3-v2:12b",
    "desc": "NVIDIA's 12.6B enterprise VLM with strong DocVQA and ChartQA scores, tuned for professional document extraction workflows.",
    "verdict": "A focused enterprise VLM that punches above its weight on documents and charts — the right call when extraction is the job."
  },
  "apertus-70b": {
    "release": "Avril 2025",
    "arch": "Dense · 70B · Swiss AI Initiative · souveraineté européenne",
    "training": "Swiss AI — données européennes souveraines, fort en FR/DE/IT/RM (romanche).",
    "strengths": [
      "European data sovereignty story",
      "Only flagship model with native Romansh support",
      "Apache 2.0 license",
      "Strong across Alpine and broader European languages"
    ],
    "weaknesses": [
      "Around 40 GB VRAM at Q4 — multi-GPU required",
      "Smaller fine-tune ecosystem than Llama or Qwen",
      "English performance trails best-in-class US models"
    ],
    "benchmarks": [],
    "alternatives": [
      "salamandra-40b",
      "aya-23-35b",
      "llama33-70b"
    ],
    "install": "ollama pull hf.co/swissai/Apertus-70B-GGUF",
    "desc": "A Swiss AI joint effort (EPFL, ETH, CSCS) trained on 15T tokens covering 1000+ languages, including Swiss German and Romansh. Apache 2.0.",
    "verdict": "Europe's most credible sovereign open flagship — pick it when language coverage or data jurisdiction matters more than raw English benchmarks."
  },
  "apertus-8b": {
    "release": "Avril 2025",
    "arch": "Dense · 8B · Swiss AI Initiative · compact multilingue EU",
    "training": "Swiss AI — version compacte du modèle souverain européen.",
    "strengths": [
      "Around 6 GB VRAM at Q4 — runs on consumer hardware",
      "Native EU multilingual coverage",
      "Apache 2.0 license",
      "Practical for everyday assistant use"
    ],
    "weaknesses": [
      "Trails Qwen 3 8B on English and coding tasks",
      "Limited public fine-tunes",
      "Less benchmark coverage than mainstream 8B models"
    ],
    "benchmarks": [],
    "alternatives": [
      "apertus-70b",
      "lucie-7b",
      "qwen25-7b"
    ],
    "install": "ollama pull hf.co/swissai/Apertus-8B-GGUF",
    "desc": "The compact Swiss AI release trained on the Alps supercomputer, covering 1000+ languages including Swiss German and Romansh. Apache 2.0.",
    "verdict": "The accessible sovereign 8B for European multilingual work — choose it when language reach beats benchmark dominance."
  },
  "trinity-mini-26b": {
    "release": "Mars 2025",
    "arch": "MoE · 26B total / 3.5B actifs · Arcee AI · 131k contexte",
    "training": "Arcee AI — MoE compact pour agents et enterprise.",
    "strengths": [
      "Efficient MoE with around 3.5B active parameters",
      "131k context window",
      "Tuned for agent and tool-use workflows",
      "Apache 2.0"
    ],
    "weaknesses": [
      "Limited public benchmark coverage",
      "Less name recognition than Mistral or Qwen",
      "Smaller fine-tune ecosystem"
    ],
    "benchmarks": [],
    "alternatives": [
      "mistral-small-24b",
      "qwen3-14b",
      "phi4-14b"
    ],
    "install": "ollama pull hf.co/arcee-ai/Trinity-Mini-26B-GGUF",
    "desc": "Arcee AI's US-built MoE with 3B active parameters out of 26B total. Apache-licensed, fast in practice, and tuned for agent-style workloads.",
    "verdict": "A solid US-built MoE for agent work — worth a serious look if you value Apache licensing and a domestic vendor."
  },
  "hunyuan-20-large": {
    "release": "Novembre 2024",
    "arch": "MoE · 406B total / 52B actifs · Tencent HunyuanLLM · 262k ctx",
    "training": "Tencent — fort en chinois et anglais, RAG et long document.",
    "strengths": [
      "262k native context window",
      "Top-tier Chinese-language performance",
      "Efficient inference relative to 406B total size",
      "Strong on RAG and long-document tasks"
    ],
    "weaknesses": [
      "Around 245 GB VRAM at Q4 — heavy infrastructure needed",
      "Custom Tencent license requires careful legal review",
      "Limited adoption outside Chinese-speaking markets"
    ],
    "benchmarks": [],
    "alternatives": [
      "ernie-45-300b",
      "qwen35-122b-a10b",
      "deepseek-v4-flash"
    ],
    "install": "# Infrastructure Tencent — non disponible en local standard",
    "desc": "Tencent's 406B flagship MoE with 32B active parameters and 256k context. Strong on Chinese and English, but gated by the custom Tencent Hunyuan license.",
    "verdict": "Frontier-class bilingual long-context performance, but licensing and infrastructure demands narrow its practical audience."
  },
  "internvl-35-8b": {
    "release": "Janvier 2025",
    "arch": "Dense vision · 8B · InternVL 3.5 · InternLM backbone",
    "training": "OpenGVLab — OCR, VQA, charts, vidéos courtes, documents PDF.",
    "strengths": [
      "Top quality-per-parameter ratio in 8B vision",
      "Strong OCR and chart understanding",
      "Apache 2.0 license",
      "Solid VQA and short-video performance"
    ],
    "weaknesses": [
      "32k context limits long-document multimodal work",
      "Weaker multilingual coverage than Qwen2-VL",
      "No native long-context extension"
    ],
    "benchmarks": [
      {
        "name": "MMMU",
        "score": 61.5
      },
      {
        "name": "DocVQA",
        "score": 94.1
      }
    ],
    "alternatives": [
      "qwen3-vl-8b",
      "qwen2-vl-7b",
      "molmo-7b"
    ],
    "install": "ollama run internvl3.5:8b",
    "desc": "OpenGVLab's 8B vision-language model leading MMMU among open models. Built at Shanghai AI Lab and released under Apache 2.0.",
    "verdict": "The benchmark-leading small open VLM for OCR and charts — the right pick when you need accuracy more than context length."
  },
  "mimo-v2-flash": {
    "release": "Avril 2025",
    "arch": "MoE · 309B total / 52B actifs · Xiaomi MiMo V2 Flash",
    "training": "Xiaomi — fort en code et raisonnement, architecture inspirée DeepSeek.",
    "strengths": [
      "State-of-the-art SWE-Bench Verified score (73.4%) at release",
      "MoE design activates only 52B of 309B params, lowering inference cost",
      "128k context window suits whole-repo reasoning",
      "Permissive MIT license for commercial deployment",
      "Architecture borrows from DeepSeek's proven MoE recipe"
    ],
    "weaknesses": [
      "Requires roughly 185 GB VRAM in Q4 — multi-GPU or H100-class hardware",
      "Xiaomi's open-weight licensing is newer and worth a legal review",
      "Newer architecture may lag in tooling support outside vLLM"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-v4-flash",
      "qwen35-122b-a10b",
      "llama33-70b"
    ],
    "install": "ollama pull hf.co/xiaomiteam/MiMo-V2-Flash-GGUF",
    "desc": "Xiaomi's 309B-parameter sparse MoE (52B active) released under MIT, topping SWE-Bench Verified at 73.4% at launch. Built for heavy-duty code and reasoning work.",
    "verdict": "If you need an MIT-licensed, top-of-the-leaderboard coding model and have the GPUs to run it, MiMo V2 Flash is the pick."
  },
  "rakuten-ai-3": {
    "release": "Mars 2025",
    "arch": "MoE · 700B total · Rakuten AI 3 · 32k contexte",
    "training": "Rakuten — corpus JP/EN massif pour e-commerce et enterprise.",
    "strengths": [
      "Top-tier Japanese fluency, beating most open models on JP benchmarks",
      "700B total parameters give it broad knowledge depth",
      "Apache 2.0 — no commercial restrictions",
      "Backed by Rakuten's massive e-commerce and fintech corpus",
      "Built under Japan's GENIAC sovereign-AI initiative"
    ],
    "weaknesses": [
      "Roughly 420 GB VRAM in Q4 — datacenter-only",
      "32k context is tight versus modern 128k+ flagships",
      "Heavily skewed toward Japanese and commerce; weaker on global general tasks"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen35-122b-a10b",
      "ernie-45-300b",
      "kimi-k26"
    ],
    "install": "# Infrastructure lourde requise — non disponible en local standard",
    "desc": "Rakuten's flagship ~700B MoE model built under Japan's GENIAC program and released under Apache 2.0. Best-in-class Japanese performance with serious enterprise e-commerce DNA.",
    "verdict": "The default open-weight choice for Japanese enterprise; overkill and underspecialized for anyone else."
  },
  "kanana-2-30b": {
    "release": "Avril 2025",
    "arch": "MoE · 30B · Kakao Brain Kanana 2 · 131k contexte · coréen natif",
    "training": "Kakao — fort en coréen, raisonnement hybrid thinking/non-thinking.",
    "strengths": [
      "131k context window in a 30B MoE",
      "Hybrid thinking/non-thinking mode toggle",
      "Native Korean performance backed by Kakao's corpus",
      "MLA attention cuts KV-cache footprint",
      "Apache 2.0 with only 3B active params per token"
    ],
    "weaknesses": [
      "Around 18 GB VRAM in Q4 — fits a single GPU but tight on consumer cards",
      "Quality drops outside Korean and English"
    ],
    "benchmarks": [],
    "alternatives": [
      "exaone-45-33b",
      "qwen3-14b",
      "phi4-14b"
    ],
    "install": "ollama pull hf.co/kakaoai/Kanana-2-30B-GGUF",
    "desc": "Kakao's agentic 30B MoE (3B active) with native hybrid thinking and Korean-first training. Apache 2.0 with MLA attention and 131k context.",
    "verdict": "The strongest open Korean model right now, with thinking mode and a sane VRAM budget on the side."
  },
  "deepseek-ocr": {
    "release": "Avril 2025",
    "arch": "Dense vision · 3B · DeepSeek-OCR · spécialisé lecture de documents",
    "training": "DeepSeek — fine-tuning OCR massif sur documents scannés, reçus, formules LaTeX.",
    "strengths": [
      "Best-in-class OCR quality at only 3B parameters",
      "Handles LaTeX formulas and table structure cleanly",
      "Runs in ~2 GB VRAM at Q4 — fits anywhere",
      "MIT license, no commercial restrictions",
      "Optical-compression approach reduces token usage on long documents"
    ],
    "weaknesses": [
      "8k context limits multi-page document handling",
      "OCR-only — not a general-purpose VLM",
      "Limited reasoning capability beyond extraction"
    ],
    "benchmarks": [],
    "alternatives": [
      "hunyuan-ocr-1b",
      "internvl-35-8b",
      "qwen2-vl-7b"
    ],
    "install": "ollama run deepseek-ocr:3b",
    "desc": "DeepSeek's 3B MIT-licensed OCR specialist built on DeepEncoder, notable for its 'optical compression' approach. Punches well above its weight on documents, LaTeX, and tables.",
    "verdict": "Drop-in MIT OCR engine that beats far larger general VLMs at extraction tasks."
  },
  "hunyuan-ocr-1b": {
    "release": "Mars 2025",
    "arch": "Dense vision · 1B · Tencent Hunyuan OCR ultra-compact",
    "training": "Tencent — extraction texte documents scannés et images, version ultra-compacte.",
    "strengths": [
      "Runs in under 1 GB VRAM at Q4",
      "Beats 200B+ general VLMs on document benchmarks",
      "End-to-end model — no separate detection/recognition stages",
      "Latency low enough for real-time mobile use"
    ],
    "weaknesses": [
      "1B ceiling shows on noisy or complex layouts",
      "8k context limits multi-page workflows",
      "Tencent Hunyuan License is custom — review before commercial use"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-ocr",
      "internvl-35-8b",
      "qwen2-vl-7b"
    ],
    "install": "ollama pull hf.co/tencent/Hunyuan-OCR-1B-GGUF",
    "desc": "Tencent's 1B end-to-end OCR model that outperforms 235B general VLMs on document tasks. Engineered for edge and mobile deployment.",
    "verdict": "The OCR model to pick when every megabyte counts; for messy real-world documents, step up to DeepSeek-OCR."
  },
  "gemma4-26b-moe": {
    "release": "Avril 2026",
    "arch": "MoE · 26B · Gemma 4 · multimodal texte+image+audio · 128k contexte",
    "training": "Google Gemma 4 MoE 26B — multimodal natif avec audio, vision et texte.",
    "strengths": [
      "Unified text, image, and audio in 26B/4B-active MoE",
      "128k context",
      "Strong reasoning relative to size",
      "Backed by Google's training infrastructure and corpus",
      "4B active params keep inference cheap"
    ],
    "weaknesses": [
      "Around 16 GB VRAM in Q4",
      "Gated on Hugging Face with click-through agreement",
      "Gemma license has more restrictions than Apache or MIT"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma4-31b",
      "gemma4-e4b",
      "qwen3-vl-30b"
    ],
    "install": "ollama run gemma4:26b-moe",
    "desc": "Google's MoE variant of Gemma 4 with 26B total / 4B active params and full text+image+audio multimodality. The smallest open model with native audio understanding at this quality.",
    "verdict": "The most capable open multimodal model under 30B if you can live with the Gemma license."
  },
  "dots-llm1": {
    "release": "Avril 2025",
    "arch": "MoE · 142B total / 14B actifs · Rednote (Xiaohongshu) · 32k ctx",
    "training": "Rednote — fort en génération créative et contenu lifestyle.",
    "strengths": [
      "14B active params in a 142B MoE — efficient inference",
      "MIT license",
      "Strong creative and lifestyle content generation",
      "No synthetic data in training — more natural outputs"
    ],
    "weaknesses": [
      "Roughly 85 GB VRAM in Q4 — multi-GPU territory",
      "32k context lags modern flagships",
      "Output style optimized for Chinese social media — may not fit Western tone"
    ],
    "benchmarks": [],
    "alternatives": [
      "deepseek-v4-flash",
      "qwen35-122b-a10b",
      "ernie-45-21b"
    ],
    "install": "ollama pull hf.co/rednote/dots-llm1-GGUF",
    "desc": "Xiaohongshu's first LLM under the Rednote brand — a 142B MoE with 14B active params trained without synthetic data, matching Qwen2.5-72B. Released under MIT.",
    "verdict": "An MIT-licensed alternative for creative Chinese content; outside that niche, Qwen3 is the safer pick."
  },
  "qwen3-omni-30b": {
    "release": "Mai 2025",
    "arch": "MoE · 30B · Qwen3-Omni · texte + vision + audio end-to-end",
    "training": "Qwen3-Omni 30B — modèle omnimodal Qwen (texte, images, audio in/out).",
    "strengths": [
      "Native omnimodal I/O: text, image, audio in and out",
      "131k context",
      "Streaming speech for low-latency voice apps",
      "Apache 2.0 license",
      "Only 3B active params per token"
    ],
    "weaknesses": [
      "Around 19 GB VRAM in Q4",
      "Audio path is still maturing relative to text and vision",
      "Tooling support uneven outside vLLM"
    ],
    "benchmarks": [],
    "alternatives": [
      "gemma4-26b-moe",
      "qwen3-vl-30b",
      "qwen3-30b-a3b"
    ],
    "install": "ollama run qwen3-omni:30b",
    "desc": "Alibaba's omni-modal 30B MoE (3B active) with streaming speech, 119-language ASR, and Apache 2.0 licensing. The most accessible truly omnimodal open model.",
    "verdict": "The default open choice if you actually need audio in and out, not just text and images."
  },
  "qwen35-122b-a10b": {
    "release": "Avril 2025",
    "arch": "MoE · 122B total / 10B actifs · Qwen 3.5 flagship · 262k contexte",
    "training": "Qwen 3.5 flagship accessible — 10B actifs sur 122B, 262k ctx natif.",
    "strengths": [
      "Frontier-class quality with only 10B active params",
      "262k native context window",
      "Apache 2.0",
      "Single-H100 deployment is realistic",
      "Strong multilingual coverage"
    ],
    "weaknesses": [
      "Roughly 73 GB VRAM in Q4 — still needs multi-GPU on consumer cards",
      "Mid-flagship positioning means it's eclipsed by 397B on the hardest tasks"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-235b-a22b",
      "deepseek-v4-flash",
      "llama33-70b"
    ],
    "install": "ollama run qwen3.5:122b-a10b",
    "desc": "Alibaba's mid-flagship Qwen 3.5 with 122B total / 10B active params and 262k native context. Frontier-class quality that fits on a single H100.",
    "verdict": "The sweet spot of the Qwen 3.5 lineup: H100-friendly with frontier-grade output."
  },
  "pangu-pro-moe-72b": {
    "release": "Avril 2025",
    "arch": "MoE · 72B · Huawei PanGu Pro · architecture propriétaire",
    "training": "Huawei — spécialisé code enterprise et scenarios business CN.",
    "strengths": [
      "First-class optimization for Ascend NPUs",
      "Solid enterprise code and business reasoning",
      "Open weights from a major hyperscaler",
      "MoE design keeps inference tractable"
    ],
    "weaknesses": [
      "Around 42 GB VRAM in Q4",
      "32k context trails modern flagships",
      "Custom Pangu license requires legal review",
      "Tooling outside Huawei's stack is thin"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen35-27b",
      "exaone-45-33b",
      "llama33-70b"
    ],
    "install": "ollama pull hf.co/huawei/pangu-pro-moe-72b-GGUF",
    "desc": "Huawei's first open-weight release, a 72B MoE optimized for Ascend silicon. Strong on enterprise code and Chinese business scenarios, but the custom Pangu license needs careful review.",
    "verdict": "A reasonable pick if you're on Ascend; on NVIDIA hardware, Qwen 3.5 or DeepSeek will serve you better."
  },
  "qwen3-5": {
    "release": "Avril 2026",
    "arch": "Transformer dense · 0.8B paramètres",
    "training": "Famille Qwen 3.5 (Alibaba). Variante ultra-compacte alignée chat/instruct.",
    "strengths": [
      "Negligible memory footprint — under 1GB at Q4",
      "256k context, rare at this size",
      "Apache 2.0 distribution via Ollama",
      "Runs comfortably on CPU, integrated GPU, or Raspberry Pi"
    ],
    "weaknesses": [
      "Reasoning quality is inherently limited at 0.8B",
      "Text-only — no vision capability",
      "Hugging Face distribution uses the Qwen license rather than Apache"
    ],
    "benchmarks": [],
    "alternatives": [
      "smollm2-17b",
      "gemma4",
      "qwen35-9b"
    ],
    "install": "ollama run qwen3.5",
    "desc": "Alibaba's ultra-compact 0.8B chat model with a 256k context window and a sub-1GB Q4 footprint, Apache 2.0 on Ollama. Runs on CPUs, integrated GPUs, and Raspberry Pi.",
    "verdict": "The right pick when you need a real LLM in under a gigabyte and don't need it to think hard."
  },
  "medgemma1-5": {
    "release": "Mai 2026",
    "arch": "Gemma · 4B paramètres · multimodal texte + image · 128k contexte",
    "training": "Itération v1.5 du fine-tuning médical Google sur Gemma : littérature clinique, imagerie radiologique, comptes-rendus.",
    "strengths": [
      "Iterative refinement over MedGemma 1.0 with the same footprint",
      "Compact 4B (~2.3GB VRAM at Q4)",
      "Multimodal — text plus medical imagery",
      "128k context for long patient histories and literature"
    ],
    "weaknesses": [
      "Decision-support tool only — not for direct clinical use",
      "Narrow medical focus, weak general performance",
      "Gated on Hugging Face"
    ],
    "benchmarks": [],
    "alternatives": [
      "medgemma",
      "gemma4",
      "granite4-3b-vision"
    ],
    "install": "ollama run medgemma1.5",
    "desc": "Google's v1.5 update to MedGemma — a 4B vision-and-text model fine-tuned on clinical literature, radiology imagery, and medical reports. 128k context, Gemma license.",
    "verdict": "A drop-in upgrade to MedGemma 1.0 with sharper clinical performance at the same compact size."
  },
  "granite4-1": {
    "release": "Mai 2026",
    "arch": "Transformer dense · 3B paramètres · 128k contexte",
    "training": "Famille Granite 4.1 d'IBM. Entraînement multilingue (12 langues), tool calling OpenAI-compatible.",
    "strengths": [
      "Fully Apache 2.0 — no click-through, no commercial caveats",
      "128k context window",
      "OpenAI-compatible tool calling that works reliably",
      "Compact ~1.7GB VRAM at Q4"
    ],
    "weaknesses": [
      "Hugging Face distribution is gated even though the license is open",
      "Generic Ollama tag doesn't pin a specific size variant"
    ],
    "benchmarks": [],
    "alternatives": [
      "granite41-3b",
      "qwen3-8b",
      "llama3-3b"
    ],
    "install": "ollama run granite4.1",
    "desc": "IBM's Granite 4.1 in its generic 3B Ollama tag — Apache 2.0, 128k context, robust tool calling, and a sub-2GB Q4 footprint. Code- and chat-oriented.",
    "verdict": "The pragmatic Apache 2.0 default for agentic workflows when license friction is a non-starter."
  },
  "qwen3-6": {
    "release": "Avril 2026",
    "arch": "Transformer dense · 27B paramètres · multimodal texte + image · 256k contexte",
    "training": "Famille Qwen 3.6 (Alibaba). Multimodal vision, focus raisonnement et code multilingue.",
    "strengths": [
      "Native 256k context handles entire repos and long PDFs",
      "Genuinely multimodal — vision plus text",
      "Strong multilingual code performance",
      "Reasoning sharpened over earlier Qwen generations"
    ],
    "weaknesses": [
      "Qwen License — not strictly Apache, review terms",
      "Needs ~16GB VRAM at Q4",
      "Gated on Hugging Face"
    ],
    "benchmarks": [],
    "alternatives": [
      "qwen3-32b",
      "qwen35-27b",
      "qwen3-vl-30b"
    ],
    "install": "ollama run qwen3.6",
    "desc": "Alibaba's Qwen 3.6 27B — multimodal vision and text with a native 256k context, tuned for multilingual reasoning and code. Fits a 16GB GPU at Q4.",
    "verdict": "Qwen's most capable mid-size open model — a strong multimodal pick for a single 16GB+ GPU."
  },
  "lfm2-5-thinking": {
    "release": "Février 2026",
    "arch": "Liquid Foundation Model · 1.2B paramètres · 32k contexte · mode thinking",
    "training": "Famille LFM2.5 de Liquid AI. Variante reasoning avec chaîne de pensée explicite.",
    "strengths": [
      "Negligible memory footprint — under 1GB at Q4",
      "Runs comfortably on CPU and integrated GPUs",
      "Explicit thinking mode for visible chain-of-thought",
      "Low-latency inference suitable for interactive use"
    ],
    "weaknesses": [
      "1.2B parameters cap absolute capability",
      "32k context is short by 2026 standards",
      "LFM Open License rather than pure Apache"
    ],
    "benchmarks": [],
    "alternatives": [
      "smollm2-17b",
      "helium-1-2b",
      "smollm3-3b"
    ],
    "install": "ollama run lfm2.5-thinking",
    "desc": "Liquid AI's 1.2B reasoning variant with an explicit thinking mode, sub-1GB Q4 footprint, and CPU/iGPU-friendly inference. 32k context.",
    "verdict": "The most capable sub-2B reasoning model that still fits comfortably on a CPU-only laptop."
  },
  "glm-4-7-flash": {
    "release": "Février 2026",
    "arch": "Transformer dense · 3B paramètres · 128k contexte",
    "training": "Famille GLM 4.7 de Zhipu AI / THUDM (Tsinghua). Variante Flash optimisée latence, focus zh/en.",
    "strengths": [
      "MIT license — among the most permissive in the open ecosystem",
      "128k context in a 3B footprint",
      "Strong Chinese and English performance",
      "Compact ~1.7GB VRAM at Q4"
    ],
    "weaknesses": [
      "Gated on Hugging Face despite the open license",
      "Less versatile than the 30B GLM 4.7 variants"
    ],
    "benchmarks": [],
    "alternatives": [
      "glm-51",
      "granite41-3b",
      "qwen3-8b"
    ],
    "install": "ollama run glm-4.7-flash",
    "desc": "Zhipu AI's compact 3B variant of GLM 4.7, MIT-licensed with a 128k context. Optimized for low-latency bilingual Chinese-English chat.",
    "verdict": "MIT-licensed, fast, and bilingual — the GLM 4.7 to reach for when you need throughput over peak capability."
  }
}