[{"id": "mistral-7b-instruct", "name": "Mistral 7B Instruct", "author": "Mistral AI", "params": 7, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "general"], "family": "Mistral"}, {"id": "mistral-small-24b", "name": "Mistral Small 3", "author": "Mistral AI", "params": 24, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 14, "q5": 17, "q8": 26, "fp16": 48}, "tags": ["chat", "general", "code"], "family": "Mistral"}, {"id": "mixtral-8x7b", "name": "Mixtral 8x7B", "author": "Mistral AI", "params": 47, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 26, "q5": 32, "q8": 50, "fp16": 94}, "tags": ["chat", "general", "moe"], "family": "Mistral"}, {"id": "lucie-7b", "name": "Lucie 7B", "author": "OpenLLM-France", "params": 7, "license": "Apache 2.0", "ctx": 4096, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "fr"], "family": "Lucie"}, {"id": "croissant-llm", "name": "CroissantLLM 1.3B", "author": "CroissantLLM", "params": 1.3, "license": "MIT", "ctx": 2048, "vram": {"q4": 1, "q5": 1.2, "q8": 2, "fp16": 3}, "tags": ["chat", "fr", "small"], "family": "Croissant"}, {"id": "llama3-8b", "name": "Llama 3.1 8B", "author": "Meta", "params": 8, "license": "Llama 3 Community", "ctx": 131072, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 18}, "tags": ["chat", "general"], "family": "Llama"}, {"id": "llama3-70b", "name": "Llama 3.1 70B", "author": "Meta", "params": 70, "license": "Llama 3 Community", "ctx": 131072, "vram": {"q4": 40, "q5": 48, "q8": 75, "fp16": 140}, "tags": ["chat", "general"], "family": "Llama"}, {"id": "llama3-3b", "name": "Llama 3.2 3B", "author": "Meta", "params": 3, "license": "Llama 3 Community", "ctx": 131072, "vram": {"q4": 2.5, "q5": 3, "q8": 4.5, "fp16": 7}, "tags": ["chat", "small"], "family": "Llama"}, {"id": "qwen25-7b", "name": "Qwen 2.5 7B", "author": "Alibaba", "params": 7, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "general", "multilingual"], "family": "Qwen"}, {"id": "qwen25-32b", "name": "Qwen 2.5 32B", "author": "Alibaba", "params": 32, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 64}, "tags": ["chat", "general"], "family": "Qwen"}, {"id": "qwen25-coder-7b", "name": "Qwen 2.5 Coder 7B", "author": "Alibaba", "params": 7, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["code"], "family": "Qwen"}, {"id": "qwen25-coder-32b", "name": "Qwen 2.5 Coder 32B", "author": "Alibaba", "params": 32, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 64}, "tags": ["code"], "family": "Qwen"}, {"id": "gemma2-2b", "name": "Gemma 2 2B", "author": "Google", "params": 2, "license": "Gemma", "ctx": 8192, "vram": {"q4": 1.8, "q5": 2.2, "q8": 3.2, "fp16": 5}, "tags": ["chat", "small"], "family": "Gemma"}, {"id": "gemma2-9b", "name": "Gemma 2 9B", "author": "Google", "params": 9, "license": "Gemma", "ctx": 8192, "vram": {"q4": 6, "q5": 7.5, "q8": 11, "fp16": 20}, "tags": ["chat", "general"], "family": "Gemma"}, {"id": "gemma2-27b", "name": "Gemma 2 27B", "author": "Google", "params": 27, "license": "Gemma", "ctx": 8192, "vram": {"q4": 16, "q5": 19, "q8": 29, "fp16": 54}, "tags": ["chat", "general"], "family": "Gemma"}, {"id": "phi35-mini", "name": "Phi-3.5 Mini", "author": "Microsoft", "params": 3.8, "license": "MIT", "ctx": 131072, "vram": {"q4": 10, "q5": 12, "q8": 18, "fp16": 33}, "tags": ["chat", "small"], "family": "Phi"}, {"id": "phi4-14b", "name": "Phi-4 14B", "author": "Microsoft", "params": 14, "license": "MIT", "ctx": 16384, "vram": {"q4": 9, "q5": 11, "q8": 16, "fp16": 28}, "tags": ["chat", "general", "reasoning"], "family": "Phi"}, {"id": "deepseek-r1-7b", "name": "DeepSeek R1 Distill 7B", "author": "DeepSeek", "params": 7, "license": "MIT", "ctx": 32768, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["reasoning"], "family": "DeepSeek"}, {"id": "deepseek-r1-32b", "name": "DeepSeek R1 Distill 32B", "author": "DeepSeek", "params": 32, "license": "MIT", "ctx": 32768, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 64}, "tags": ["reasoning"], "family": "DeepSeek"}, {"id": "deepseek-coder-v2-16b", "name": "DeepSeek Coder V2 Lite 16B", "author": "DeepSeek", "params": 16, "license": "MIT", "ctx": 131072, "vram": {"q4": 10, "q5": 12, "q8": 18, "fp16": 32}, "tags": ["code"], "family": "DeepSeek"}, {"id": "llama32-vision-11b", "name": "Llama 3.2 Vision 11B", "author": "Meta", "params": 11, "license": "Llama 3 Community", "ctx": 131072, "vram": {"q4": 8, "q5": 10, "q8": 14, "fp16": 24}, "tags": ["vision", "chat"], "family": "Llama"}, {"id": "qwen2-vl-7b", "name": "Qwen 2 VL 7B", "author": "Alibaba", "params": 7, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 18}, "tags": ["vision", "chat"], "family": "Qwen"}, {"id": "mistral-nemo-12b", "name": "Mistral Nemo 12B Instruct", "author": "Mistral AI", "params": 12, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 7, "q5": 9, "q8": 13, "fp16": 24}, "tags": ["chat", "general", "multilingual", "fr"], "family": "Mistral"}, {"id": "mistral-small-31-24b", "name": "Mistral Small 3.1 24B", "author": "Mistral AI", "params": 24, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 14, "q5": 17, "q8": 26, "fp16": 48}, "tags": ["chat", "general", "vision", "multilingual", "fr"], "family": "Mistral"}, {"id": "llama33-70b", "name": "Llama 3.3 70B Instruct", "author": "Meta", "params": 70, "license": "Llama 3.3 Community", "ctx": 128000, "vram": {"q4": 40, "q5": 48, "q8": 75, "fp16": 140}, "tags": ["chat", "general", "reasoning"], "family": "Llama"}, {"id": "qwen3-8b", "name": "Qwen 3 8B", "author": "Alibaba", "params": 8, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "general", "reasoning", "multilingual"], "family": "Qwen"}, {"id": "qwen3-14b", "name": "Qwen 3 14B", "author": "Alibaba", "params": 14, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 9, "q5": 11, "q8": 16, "fp16": 28}, "tags": ["chat", "general", "reasoning", "multilingual"], "family": "Qwen"}, {"id": "qwen3-32b", "name": "Qwen 3 32B", "author": "Alibaba", "params": 32, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 64}, "tags": ["chat", "general", "reasoning", "multilingual"], "family": "Qwen"}, {"id": "qwen3-235b-a22b", "name": "Qwen 3 235B-A22B", "author": "Alibaba", "params": 235, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 142, "q5": 170, "q8": 250, "fp16": 470}, "tags": ["chat", "general", "reasoning", "multilingual", "moe"], "family": "Qwen"}, {"id": "qwen25-vl-7b", "name": "Qwen 2.5 VL 7B", "author": "Alibaba", "params": 7, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 18}, "tags": ["vision", "chat", "general"], "family": "Qwen"}, {"id": "qwen25-vl-72b", "name": "Qwen 2.5 VL 72B", "author": "Alibaba", "params": 72, "license": "Qwen License", "ctx": 128000, "vram": {"q4": 42, "q5": 50, "q8": 78, "fp16": 144}, "tags": ["vision", "chat", "general"], "family": "Qwen"}, {"id": "qwen25-omni-7b", "name": "Qwen 2.5 Omni 7B", "author": "Alibaba", "params": 7, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 18}, "tags": ["vision", "audio", "chat"], "family": "Qwen"}, {"id": "qwq-32b", "name": "QwQ 32B", "author": "Alibaba", "params": 32, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 64}, "tags": ["reasoning"], "family": "Qwen"}, {"id": "deepseek-r1-671b", "name": "DeepSeek R1 671B", "author": "DeepSeek", "params": 671, "license": "MIT", "ctx": 128000, "vram": {"q4": 400, "q5": 480, "q8": 720, "fp16": 1342}, "tags": ["reasoning", "moe"], "family": "DeepSeek"}, {"id": "deepseek-r1-distill-llama-70b", "name": "DeepSeek R1 Distill Llama 70B", "author": "DeepSeek", "params": 70, "license": "Llama 3.3 Community + DeepSeek", "ctx": 128000, "vram": {"q4": 40, "q5": 48, "q8": 75, "fp16": 140}, "tags": ["reasoning"], "family": "DeepSeek"}, {"id": "deepseek-v3-671b", "name": "DeepSeek V3 671B", "author": "DeepSeek", "params": 671, "license": "DeepSeek License", "ctx": 128000, "vram": {"q4": 400, "q5": 480, "q8": 720, "fp16": 1342}, "tags": ["chat", "general", "moe"], "family": "DeepSeek"}, {"id": "gemma3-4b", "name": "Gemma 3 4B", "author": "Google", "params": 4, "license": "Gemma", "ctx": 128000, "vram": {"q4": 10, "q5": 12, "q8": 18, "fp16": 33}, "tags": ["chat", "general", "vision", "multilingual", "small"], "family": "Gemma"}, {"id": "gemma3-12b", "name": "Gemma 3 12B", "author": "Google", "params": 12, "license": "Gemma", "ctx": 128000, "vram": {"q4": 7, "q5": 9, "q8": 13, "fp16": 24}, "tags": ["chat", "general", "vision", "multilingual"], "family": "Gemma"}, {"id": "gemma3-27b", "name": "Gemma 3 27B", "author": "Google", "params": 27, "license": "Gemma", "ctx": 128000, "vram": {"q4": 16, "q5": 19, "q8": 29, "fp16": 54}, "tags": ["chat", "general", "vision", "multilingual"], "family": "Gemma"}, {"id": "phi4-multimodal", "name": "Phi-4 Multimodal 5.6B", "author": "Microsoft", "params": 5.6, "license": "MIT", "ctx": 128000, "vram": {"q4": 4, "q5": 5, "q8": 7, "fp16": 12}, "tags": ["chat", "vision", "audio", "small"], "family": "Phi"}, {"id": "phi4-reasoning-14b", "name": "Phi-4 Reasoning 14B", "author": "Microsoft", "params": 14, "license": "MIT", "ctx": 32768, "vram": {"q4": 9, "q5": 11, "q8": 16, "fp16": 28}, "tags": ["reasoning"], "family": "Phi"}, {"id": "command-r-plus-104b", "name": "Command R+ 104B (08-2024)", "author": "Cohere", "params": 104, "license": "CC-BY-NC 4.0", "ctx": 128000, "vram": {"q4": 60, "q5": 72, "q8": 110, "fp16": 208}, "tags": ["chat", "general", "multilingual"], "family": "Command"}, {"id": "aya-expanse-8b", "name": "Aya Expanse 8B", "author": "Cohere For AI", "params": 8, "license": "CC-BY-NC 4.0", "ctx": 8192, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "general", "multilingual"], "family": "Aya"}, {"id": "aya-expanse-32b", "name": "Aya Expanse 32B", "author": "Cohere For AI", "params": 32, "license": "CC-BY-NC 4.0", "ctx": 8192, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 64}, "tags": ["chat", "general", "multilingual"], "family": "Aya"}, {"id": "eurollm-9b", "name": "EuroLLM 9B Instruct", "author": "Utter Project / UE", "params": 9, "license": "Apache 2.0", "ctx": 4096, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 18}, "tags": ["chat", "general", "multilingual", "fr"], "family": "EuroLLM"}, {"id": "teuken-7b", "name": "Teuken 7B Instruct", "author": "OpenGPT-X", "params": 7, "license": "Apache 2.0 (commercial)", "ctx": 4096, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 14}, "tags": ["chat", "general", "multilingual", "fr"], "family": "Teuken"}, {"id": "pleias-3b", "name": "Pleias 3B Preview", "author": "PleIAs", "params": 3, "license": "Apache 2.0", "ctx": 2048, "vram": {"q4": 2, "q5": 2.5, "q8": 3.5, "fp16": 6}, "tags": ["chat", "multilingual", "fr", "small"], "family": "Pleias"}, {"id": "pleias-rag-1b", "name": "Pleias-RAG 1B", "author": "PleIAs", "params": 1.2, "license": "Apache 2.0", "ctx": 2048, "vram": {"q4": 0.8, "q5": 1, "q8": 1.5, "fp16": 2.5}, "tags": ["chat", "fr", "small"], "family": "Pleias"}, {"id": "moshi-7b", "name": "Moshi 7B", "author": "Kyutai", "params": 7.6, "license": "CC-BY 4.0", "ctx": 4096, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 15}, "tags": ["audio", "fr"], "family": "Moshi"}, {"id": "helium-1-2b", "name": "Helium 1 2B", "author": "Kyutai", "params": 2, "license": "CC-BY-SA 4.0", "ctx": 4096, "vram": {"q4": 1.5, "q5": 2, "q8": 3, "fp16": 5}, "tags": ["chat", "general", "multilingual", "fr", "small"], "family": "Helium"}, {"id": "smollm2-17b", "name": "SmolLM2 1.7B Instruct", "author": "HuggingFace", "params": 1.7, "license": "Apache 2.0", "ctx": 8192, "vram": {"q4": 1.2, "q5": 1.5, "q8": 2.2, "fp16": 3.5}, "tags": ["chat", "general", "small"], "family": "SmolLM"}, {"id": "smolvlm2-22b", "name": "SmolVLM2 2.2B Instruct", "author": "HuggingFace", "params": 2.2, "license": "Apache 2.0", "ctx": 8192, "vram": {"q4": 1.6, "q5": 2, "q8": 3, "fp16": 4.5}, "tags": ["vision", "chat", "small"], "family": "SmolLM"}, {"id": "glm-51", "name": "GLM-5.1", "author": "Z.AI", "params": 744, "license": "MIT", "ctx": 200000, "vram": {"q4": 445, "q5": 535, "q8": 800, "fp16": 1488}, "tags": ["chat", "general", "reasoning", "multilingual", "moe"], "family": "GLM"}, {"id": "minimax-m27", "name": "MiniMax-M2.7", "author": "MiniMax", "params": 229, "license": "Apache 2.0", "ctx": 205000, "vram": {"q4": 138, "q5": 165, "q8": 246, "fp16": 458}, "tags": ["chat", "general", "reasoning", "moe"], "family": "MiniMax"}, {"id": "gemma4-31b", "name": "Gemma 4 31B", "author": "Google", "params": 31, "license": "Gemma", "ctx": 256000, "vram": {"q4": 18, "q5": 22, "q8": 33, "fp16": 62}, "tags": ["chat", "general", "vision", "audio", "multilingual"], "family": "Gemma"}, {"id": "gemma4-e4b", "name": "Gemma 4 E4B", "author": "Google", "params": 4, "license": "Gemma", "ctx": 128000, "vram": {"q4": 10, "q5": 12, "q8": 18, "fp16": 33}, "tags": ["chat", "general", "vision", "audio", "multilingual", "small"], "family": "Gemma"}, {"id": "qwen35-9b", "name": "Qwen 3.5 9B", "author": "Alibaba", "params": 9, "license": "Apache 2.0", "ctx": 262000, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 18}, "tags": ["chat", "general", "reasoning", "multilingual"], "family": "Qwen"}, {"id": "qwen35-27b", "name": "Qwen 3.5 27B", "author": "Alibaba", "params": 27, "license": "Apache 2.0", "ctx": 262000, "vram": {"q4": 16, "q5": 19, "q8": 29, "fp16": 54}, "tags": ["chat", "general", "reasoning", "multilingual"], "family": "Qwen"}, {"id": "qwen35-397b-a17b", "name": "Qwen 3.5 397B-A17B", "author": "Alibaba", "params": 397, "license": "Apache 2.0", "ctx": 262000, "vram": {"q4": 240, "q5": 285, "q8": 425, "fp16": 794}, "tags": ["chat", "general", "reasoning", "multilingual", "moe"], "family": "Qwen"}, {"id": "qwen36-35b-a3b", "name": "Qwen 3.6 35B-A3B", "author": "Alibaba", "params": 35, "license": "Apache 2.0", "ctx": 262000, "vram": {"q4": 21, "q5": 25, "q8": 38, "fp16": 70}, "tags": ["chat", "code", "reasoning", "moe"], "family": "Qwen"}, {"id": "qwen3-coder-next", "name": "Qwen3-Coder-Next 80B-A3B", "author": "Alibaba", "params": 80, "license": "Apache 2.0", "ctx": 262000, "vram": {"q4": 48, "q5": 58, "q8": 86, "fp16": 160}, "tags": ["code", "moe"], "family": "Qwen"}, {"id": "mistral-small-4", "name": "Mistral Small 4", "author": "Mistral AI", "params": 119, "license": "Apache 2.0", "ctx": 256000, "vram": {"q4": 72, "q5": 86, "q8": 128, "fp16": 238}, "tags": ["chat", "general", "code", "vision", "reasoning", "multilingual", "fr", "moe"], "family": "Mistral"}, {"id": "devstral-small-2", "name": "Devstral Small 2 24B", "author": "Mistral AI", "params": 24, "license": "Apache 2.0", "ctx": 256000, "vram": {"q4": 14, "q5": 17, "q8": 26, "fp16": 48}, "tags": ["code", "fr"], "family": "Mistral"}, {"id": "voxtral-4b-tts", "name": "Voxtral-4B-TTS", "author": "Mistral AI", "params": 4, "license": "CC-BY-NC 4.0", "ctx": 4096, "vram": {"q4": 10, "q5": 12, "q8": 18, "fp16": 33}, "tags": ["audio", "multilingual", "fr", "small"], "family": "Mistral"}, {"id": "deepseek-r2-32b", "name": "DeepSeek R2 32B", "author": "DeepSeek", "params": 32, "license": "MIT", "ctx": 128000, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 64}, "tags": ["reasoning"], "family": "DeepSeek"}, {"id": "deepseek-v32", "name": "DeepSeek V3.2", "author": "DeepSeek", "params": 685, "license": "MIT", "ctx": 128000, "vram": {"q4": 410, "q5": 490, "q8": 735, "fp16": 1370}, "tags": ["chat", "general", "moe"], "family": "DeepSeek"}, {"id": "kimi-k25", "name": "Kimi K2.5", "author": "Moonshot AI", "params": 1000, "license": "Modified MIT", "ctx": 256000, "vram": {"q4": 600, "q5": 720, "q8": 1080, "fp16": 2000}, "tags": ["chat", "general", "moe"], "family": "Kimi"}, {"id": "nemotron-3-super-120b", "name": "Nemotron 3 Super 120B", "author": "NVIDIA", "params": 120, "license": "NVIDIA Open Model License", "ctx": 128000, "vram": {"q4": 72, "q5": 86, "q8": 132, "fp16": 240}, "tags": ["chat", "general", "reasoning", "moe"], "family": "Nemotron"}, {"id": "olmo3-7b", "name": "OLMo 3 7B", "author": "Allen AI", "params": 7, "license": "Apache 2.0", "ctx": 8192, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 14}, "tags": ["chat", "general"], "family": "OLMo"}, {"id": "olmo3-32b", "name": "OLMo 3 32B", "author": "Allen AI", "params": 32, "license": "Apache 2.0", "ctx": 65536, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 64}, "tags": ["chat", "general", "reasoning"], "family": "OLMo"}, {"id": "tiny-aya-3b", "name": "Tiny Aya 3.35B", "author": "Cohere For AI", "params": 3.35, "license": "CC-BY-NC 4.0", "ctx": 8192, "vram": {"q4": 2.2, "q5": 2.7, "q8": 3.8, "fp16": 7}, "tags": ["chat", "multilingual", "small"], "family": "Aya"}, {"id": "granite4-3b-vision", "name": "Granite 4.0 3B Vision", "author": "IBM", "params": 3, "license": "Apache 2.0", "ctx": 16384, "vram": {"q4": 2.2, "q5": 2.7, "q8": 3.8, "fp16": 6.5}, "tags": ["vision", "chat", "small"], "family": "Granite"}, {"id": "step-35-flash", "name": "Step 3.5 Flash", "author": "StepFun", "params": 196, "license": "Apache 2.0", "ctx": 256000, "vram": {"q4": 118, "q5": 141, "q8": 210, "fp16": 392}, "tags": ["chat", "general", "moe"], "family": "Step"}, {"id": "falcon-h1r-7b", "name": "Falcon H1R 7B", "author": "TII", "params": 7, "license": "TII Falcon-LLM License 2.0", "ctx": 32768, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 14}, "tags": ["reasoning"], "family": "Falcon"}, {"id": "mixtral-8x22b", "name": "Mixtral 8x22B Instruct", "author": "Mistral AI", "params": 141, "license": "Apache 2.0", "ctx": 64000, "vram": {"q4": 82, "q5": 100, "q8": 150, "fp16": 282}, "tags": ["chat", "general", "moe", "multilingual", "fr"], "family": "Mistral"}, {"id": "mistral-small-32-24b", "name": "Mistral Small 3.2 24B", "author": "Mistral AI", "params": 24, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 14, "q5": 17, "q8": 26, "fp16": 48}, "tags": ["chat", "general", "vision", "multilingual", "fr"], "family": "Mistral"}, {"id": "codestral-22b", "name": "Codestral 22B v0.1", "author": "Mistral AI", "params": 22, "license": "Mistral Non-Production License", "ctx": 32000, "vram": {"q4": 13, "q5": 16, "q8": 24, "fp16": 44}, "tags": ["code", "fr"], "family": "Mistral"}, {"id": "codestral-mamba-7b", "name": "Codestral Mamba 7B", "author": "Mistral AI", "params": 7, "license": "Apache 2.0", "ctx": 256000, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 14}, "tags": ["code", "fr"], "family": "Mistral"}, {"id": "magistral-small-24b", "name": "Magistral Small 24B", "author": "Mistral AI", "params": 24, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 14, "q5": 17, "q8": 26, "fp16": 48}, "tags": ["reasoning", "fr"], "family": "Mistral"}, {"id": "mistral-large-3", "name": "Mistral Large 3 675B", "author": "Mistral AI", "params": 675, "license": "Apache 2.0", "ctx": 256000, "vram": {"q4": 405, "q5": 485, "q8": 720, "fp16": 1350}, "tags": ["chat", "general", "vision", "multilingual", "fr", "moe"], "family": "Mistral"}, {"id": "mistral-medium-35", "name": "Mistral Medium 3.5 128B", "author": "Mistral AI", "params": 128, "license": "Modified MIT", "ctx": 256000, "vram": {"q4": 74, "q5": 91, "q8": 137, "fp16": 256}, "tags": ["chat", "general", "code", "reasoning", "vision", "multilingual", "fr"], "family": "Mistral"}, {"id": "llama-3-1-405b", "name": "Llama 3.1 405B Instruct", "author": "Meta", "params": 405, "license": "Llama 3.1 Community", "ctx": 128000, "vram": {"q4": 240, "q5": 288, "q8": 435, "fp16": 810}, "tags": ["chat", "general", "reasoning"], "family": "Llama"}, {"id": "llama-4-scout", "name": "Llama 4 Scout 109B", "author": "Meta", "params": 109, "license": "Llama 4 Community", "ctx": 10000000, "vram": {"q4": 65, "q5": 78, "q8": 117, "fp16": 218}, "tags": ["chat", "general", "vision", "moe", "multilingual"], "family": "Llama"}, {"id": "llama-4-maverick", "name": "Llama 4 Maverick 400B", "author": "Meta", "params": 400, "license": "Llama 4 Community", "ctx": 1000000, "vram": {"q4": 240, "q5": 285, "q8": 425, "fp16": 800}, "tags": ["chat", "general", "vision", "moe", "multilingual"], "family": "Llama"}, {"id": "llama31-nemotron-70b", "name": "Llama 3.1 Nemotron 70B", "author": "NVIDIA", "params": 70, "license": "Llama 3.1 Community", "ctx": 128000, "vram": {"q4": 40, "q5": 48, "q8": 75, "fp16": 140}, "tags": ["chat", "general", "reasoning"], "family": "Nemotron"}, {"id": "qwen25-3b", "name": "Qwen 2.5 3B Instruct", "author": "Alibaba", "params": 3, "license": "Qwen Research License", "ctx": 32768, "vram": {"q4": 2, "q5": 2.5, "q8": 4, "fp16": 6}, "tags": ["chat", "general", "multilingual", "small"], "family": "Qwen"}, {"id": "qwen25-14b", "name": "Qwen 2.5 14B Instruct", "author": "Alibaba", "params": 14, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 9, "q5": 11, "q8": 16, "fp16": 28}, "tags": ["chat", "general", "multilingual"], "family": "Qwen"}, {"id": "qwen25-72b", "name": "Qwen 2.5 72B Instruct", "author": "Alibaba", "params": 72, "license": "Qwen License", "ctx": 131072, "vram": {"q4": 42, "q5": 50, "q8": 78, "fp16": 144}, "tags": ["chat", "general", "reasoning", "multilingual"], "family": "Qwen"}, {"id": "qwen25-coder-15b", "name": "Qwen 2.5 Coder 1.5B Instruct", "author": "Alibaba", "params": 1.5, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 1, "q5": 1.2, "q8": 2, "fp16": 3}, "tags": ["code", "small"], "family": "Qwen"}, {"id": "qwen25-coder-3b", "name": "Qwen 2.5 Coder 3B Instruct", "author": "Alibaba", "params": 3, "license": "Qwen Research License", "ctx": 32768, "vram": {"q4": 2, "q5": 2.5, "q8": 4, "fp16": 6}, "tags": ["code", "small"], "family": "Qwen"}, {"id": "qwen25-coder-14b", "name": "Qwen 2.5 Coder 14B Instruct", "author": "Alibaba", "params": 14, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 9, "q5": 11, "q8": 16, "fp16": 28}, "tags": ["code"], "family": "Qwen"}, {"id": "qwen3-30b-a3b", "name": "Qwen 3 30B-A3B", "author": "Alibaba", "params": 30, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 62}, "tags": ["chat", "general", "reasoning", "multilingual", "moe"], "family": "Qwen"}, {"id": "deepseek-r1-distill-qwen-15b", "name": "DeepSeek R1 Distill Qwen 1.5B", "author": "DeepSeek", "params": 1.5, "license": "MIT", "ctx": 131072, "vram": {"q4": 1, "q5": 1.2, "q8": 2, "fp16": 3}, "tags": ["reasoning", "small"], "family": "DeepSeek"}, {"id": "deepseek-r1-distill-qwen-14b", "name": "DeepSeek R1 Distill Qwen 14B", "author": "DeepSeek", "params": 14, "license": "MIT", "ctx": 131072, "vram": {"q4": 9, "q5": 11, "q8": 16, "fp16": 28}, "tags": ["reasoning"], "family": "DeepSeek"}, {"id": "phi4-mini", "name": "Phi-4 Mini 3.8B", "author": "Microsoft", "params": 3.8, "license": "MIT", "ctx": 128000, "vram": {"q4": 10, "q5": 12, "q8": 18, "fp16": 33}, "tags": ["chat", "general", "small"], "family": "Phi"}, {"id": "phi4-mini-reasoning", "name": "Phi-4 Mini Reasoning 3.8B", "author": "Microsoft", "params": 3.8, "license": "MIT", "ctx": 128000, "vram": {"q4": 10, "q5": 12, "q8": 18, "fp16": 33}, "tags": ["reasoning", "small"], "family": "Phi"}, {"id": "gemma3n-e2b", "name": "Gemma 3n E2B", "author": "Google", "params": 2, "license": "Gemma", "ctx": 32768, "vram": {"q4": 2, "q5": 2.5, "q8": 3.5, "fp16": 6}, "tags": ["chat", "general", "multilingual", "small"], "family": "Gemma"}, {"id": "gemma3n-e4b", "name": "Gemma 3n E4B", "author": "Google", "params": 4, "license": "Gemma", "ctx": 32768, "vram": {"q4": 4.5, "q5": 5.5, "q8": 8, "fp16": 14}, "tags": ["chat", "general", "multilingual", "small"], "family": "Gemma"}, {"id": "granite32-8b", "name": "Granite 3.2 8B Instruct", "author": "IBM", "params": 8, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "general"], "family": "Granite"}, {"id": "granite33-8b", "name": "Granite 3.3 8B Instruct", "author": "IBM", "params": 8, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "general", "code"], "family": "Granite"}, {"id": "granite4-small", "name": "Granite 4.0 H-Small 32B-A9B", "author": "IBM", "params": 32, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 64}, "tags": ["chat", "general", "moe"], "family": "Granite"}, {"id": "granite4-tiny", "name": "Granite 4.0 H-Tiny 7B-A1B", "author": "IBM", "params": 7, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 4, "q5": 5, "q8": 7, "fp16": 14}, "tags": ["chat", "general", "moe", "small"], "family": "Granite"}, {"id": "tulu3-8b", "name": "Tülu 3 8B", "author": "Allen AI", "params": 8, "license": "Llama 3.1 Community", "ctx": 128000, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 16}, "tags": ["chat", "general"], "family": "Tulu"}, {"id": "tulu3-70b", "name": "Tülu 3 70B", "author": "Allen AI", "params": 70, "license": "Llama 3.1 Community", "ctx": 128000, "vram": {"q4": 40, "q5": 48, "q8": 75, "fp16": 140}, "tags": ["chat", "general", "reasoning"], "family": "Tulu"}, {"id": "olmoe-1b-7b", "name": "OLMoE 1B-7B Instruct", "author": "Allen AI", "params": 7, "license": "Apache 2.0", "ctx": 4096, "vram": {"q4": 4, "q5": 5, "q8": 7, "fp16": 14}, "tags": ["chat", "general", "moe", "small"], "family": "OLMo"}, {"id": "molmo-7b", "name": "Molmo 7B-D", "author": "Allen AI", "params": 7, "license": "Apache 2.0", "ctx": 4096, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["vision", "chat"], "family": "Molmo"}, {"id": "molmo-72b", "name": "Molmo 72B", "author": "Allen AI", "params": 72, "license": "Apache 2.0", "ctx": 4096, "vram": {"q4": 42, "q5": 50, "q8": 78, "fp16": 144}, "tags": ["vision", "chat"], "family": "Molmo"}, {"id": "smollm3-3b", "name": "SmolLM3 3B", "author": "HuggingFace", "params": 3, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 2, "q5": 2.5, "q8": 4, "fp16": 6}, "tags": ["chat", "general", "reasoning", "small"], "family": "SmolLM"}, {"id": "minicpm-v-26", "name": "MiniCPM-V 2.6 8B", "author": "OpenBMB", "params": 8, "license": "MiniCPM Model License", "ctx": 32000, "vram": {"q4": 5.5, "q5": 7, "q8": 10, "fp16": 18}, "tags": ["vision", "chat"], "family": "MiniCPM"}, {"id": "minicpm-o-26", "name": "MiniCPM-o 2.6 8B", "author": "OpenBMB", "params": 8, "license": "MiniCPM Model License", "ctx": 32000, "vram": {"q4": 5.5, "q5": 7, "q8": 10, "fp16": 18}, "tags": ["vision", "audio", "chat"], "family": "MiniCPM"}, {"id": "falcon3-7b", "name": "Falcon 3 7B Instruct", "author": "TII", "params": 7, "license": "TII Falcon-LLM License 2.0", "ctx": 32000, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 14}, "tags": ["chat", "general", "multilingual"], "family": "Falcon"}, {"id": "falcon3-10b", "name": "Falcon 3 10B Instruct", "author": "TII", "params": 10, "license": "TII Falcon-LLM License 2.0", "ctx": 32000, "vram": {"q4": 6, "q5": 8, "q8": 12, "fp16": 20}, "tags": ["chat", "general", "multilingual"], "family": "Falcon"}, {"id": "falcon-mamba-7b", "name": "Falcon Mamba 7B", "author": "TII", "params": 7, "license": "TII Falcon-LLM License 2.0", "ctx": 8192, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 14}, "tags": ["chat", "general"], "family": "Falcon"}, {"id": "command-r-35b", "name": "Command R 35B v01", "author": "Cohere", "params": 35, "license": "CC-BY-NC 4.0", "ctx": 128000, "vram": {"q4": 20, "q5": 25, "q8": 37, "fp16": 70}, "tags": ["chat", "general", "multilingual"], "family": "Command"}, {"id": "aya-23-8b", "name": "Aya 23 8B", "author": "Cohere For AI", "params": 8, "license": "CC-BY-NC 4.0", "ctx": 8192, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "general", "multilingual"], "family": "Aya"}, {"id": "aya-23-35b", "name": "Aya 23 35B", "author": "Cohere For AI", "params": 35, "license": "CC-BY-NC 4.0", "ctx": 8192, "vram": {"q4": 20, "q5": 25, "q8": 37, "fp16": 70}, "tags": ["chat", "general", "multilingual"], "family": "Aya"}, {"id": "yi-15-34b", "name": "Yi 1.5 34B Chat", "author": "01.AI", "params": 34, "license": "Apache 2.0", "ctx": 4096, "vram": {"q4": 20, "q5": 24, "q8": 36, "fp16": 68}, "tags": ["chat", "general", "multilingual"], "family": "Yi"}, {"id": "yi-coder-9b", "name": "Yi Coder 9B Chat", "author": "01.AI", "params": 9, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 5.5, "q5": 7, "q8": 10, "fp16": 18}, "tags": ["code"], "family": "Yi"}, {"id": "dbrx-instruct", "name": "DBRX Instruct", "author": "Databricks", "params": 132, "license": "Databricks Open Model License", "ctx": 32768, "vram": {"q4": 76, "q5": 94, "q8": 140, "fp16": 264}, "tags": ["chat", "general", "moe"], "family": "DBRX"}, {"id": "jais-30b", "name": "Jais 30B Chat v3", "author": "MBZUAI / Core42", "params": 30, "license": "Apache 2.0", "ctx": 8192, "vram": {"q4": 18, "q5": 22, "q8": 33, "fp16": 60}, "tags": ["chat", "general", "multilingual"], "family": "Jais"}, {"id": "jais-70b", "name": "Jais Adapted 70B Chat", "author": "MBZUAI / Core42", "params": 70, "license": "Apache 2.0", "ctx": 4096, "vram": {"q4": 40, "q5": 48, "q8": 75, "fp16": 140}, "tags": ["chat", "general", "multilingual"], "family": "Jais"}, {"id": "sarvam-m-24b", "name": "Sarvam-M 24B", "author": "Sarvam AI", "params": 24, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 14, "q5": 17, "q8": 26, "fp16": 48}, "tags": ["chat", "general", "reasoning", "multilingual"], "family": "Sarvam"}, {"id": "salamandra-7b", "name": "Salamandra 7B Instruct", "author": "BSC", "params": 7.7, "license": "Apache 2.0", "ctx": 8192, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "general", "multilingual", "fr"], "family": "Salamandra"}, {"id": "salamandra-40b", "name": "Salamandra 40B Instruct", "author": "BSC", "params": 40, "license": "Apache 2.0", "ctx": 8192, "vram": {"q4": 24, "q5": 29, "q8": 43, "fp16": 80}, "tags": ["chat", "general", "multilingual", "fr"], "family": "Salamandra"}, {"id": "eurollm-22b", "name": "EuroLLM 22B Instruct 2512", "author": "Utter Project", "params": 22.6, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 13, "q5": 16, "q8": 24, "fp16": 45}, "tags": ["chat", "general", "multilingual", "fr"], "family": "EuroLLM"}, {"id": "claire-7b", "name": "Claire 7B 0.1", "author": "LINAGORA", "params": 7, "license": "CC-BY-NC-SA 4.0", "ctx": 2048, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 14}, "tags": ["chat", "fr"], "family": "Claire"}, {"id": "jamba-15-mini", "name": "Jamba 1.5 Mini", "author": "AI21 Labs", "params": 52, "license": "Jamba Open Model License", "ctx": 256000, "vram": {"q4": 30, "q5": 37, "q8": 55, "fp16": 104}, "tags": ["chat", "general", "moe", "multilingual"], "family": "Jamba"}, {"id": "hunyuan-a13b", "name": "Hunyuan-A13B Instruct", "author": "Tencent", "params": 80, "license": "Tencent Hunyuan License", "ctx": 262144, "vram": {"q4": 48, "q5": 57, "q8": 85, "fp16": 160}, "tags": ["chat", "general", "reasoning", "moe"], "family": "Hunyuan"}, {"id": "llava-onevision-7b", "name": "LLaVA-OneVision 7B", "author": "LMMs-Lab", "params": 7, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["vision", "chat"], "family": "LLaVA"}, {"id": "llava-onevision-72b", "name": "LLaVA-OneVision 72B", "author": "LMMs-Lab", "params": 72, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 42, "q5": 50, "q8": 78, "fp16": 144}, "tags": ["vision", "chat"], "family": "LLaVA"}, {"id": "arctic-instruct", "name": "Snowflake Arctic Instruct", "author": "Snowflake", "params": 480, "license": "Apache 2.0", "ctx": 4096, "vram": {"q4": 290, "q5": 345, "q8": 510, "fp16": 960}, "tags": ["chat", "general", "moe"], "family": "Arctic"}, {"id": "grok-1", "name": "Grok-1 (base)", "author": "xAI", "params": 314, "license": "Apache 2.0", "ctx": 8192, "vram": {"q4": 188, "q5": 225, "q8": 335, "fp16": 630}, "tags": ["chat", "general", "moe"], "family": "Grok"}, {"id": "gpt-oss-120b", "name": "gpt-oss 120B", "author": "OpenAI", "params": 117, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 70, "q5": 85, "q8": 125, "fp16": 234}, "tags": ["chat", "general", "reasoning", "moe"], "family": "gpt-oss"}, {"id": "gpt-oss-20b", "name": "gpt-oss 20B", "author": "OpenAI", "params": 21, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 13, "q5": 16, "q8": 23, "fp16": 42}, "tags": ["chat", "general", "reasoning", "moe", "small"], "family": "gpt-oss"}, {"id": "kimi-k26", "name": "Kimi K2.6", "author": "Moonshot AI", "params": 1000, "license": "Modified MIT", "ctx": 256000, "vram": {"q4": 600, "q5": 720, "q8": 1080, "fp16": 2000}, "tags": ["chat", "general", "vision", "moe"], "family": "Kimi"}, {"id": "qwen3-vl-235b", "name": "Qwen 3 VL 235B-A22B", "author": "Alibaba", "params": 235, "license": "Apache 2.0", "ctx": 262144, "vram": {"q4": 142, "q5": 170, "q8": 250, "fp16": 470}, "tags": ["vision", "chat", "general", "moe", "multilingual"], "family": "Qwen"}, {"id": "qwen3-vl-30b", "name": "Qwen 3 VL 30B-A3B", "author": "Alibaba", "params": 30, "license": "Apache 2.0", "ctx": 262144, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 62}, "tags": ["vision", "chat", "general", "moe", "multilingual"], "family": "Qwen"}, {"id": "qwen3-vl-8b", "name": "Qwen 3 VL 8B", "author": "Alibaba", "params": 8, "license": "Apache 2.0", "ctx": 262144, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 16}, "tags": ["vision", "chat", "general", "multilingual"], "family": "Qwen"}, {"id": "ernie-45-300b", "name": "ERNIE 4.5 300B-A47B", "author": "Baidu", "params": 300, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 180, "q5": 215, "q8": 320, "fp16": 600}, "tags": ["chat", "general", "multilingual", "moe"], "family": "ERNIE"}, {"id": "ernie-45-21b", "name": "ERNIE 4.5 21B-A3B Thinking", "author": "Baidu", "params": 21, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 13, "q5": 16, "q8": 23, "fp16": 42}, "tags": ["reasoning", "moe"], "family": "ERNIE"}, {"id": "ring-1t", "name": "Ring-1T", "author": "Ant Group", "params": 1000, "license": "MIT", "ctx": 131072, "vram": {"q4": 600, "q5": 720, "q8": 1080, "fp16": 2000}, "tags": ["reasoning", "moe"], "family": "Ring"}, {"id": "seed-oss-36b", "name": "Seed-OSS 36B Instruct", "author": "ByteDance", "params": 36, "license": "Apache 2.0", "ctx": 524288, "vram": {"q4": 22, "q5": 26, "q8": 40, "fp16": 72}, "tags": ["chat", "general"], "family": "Seed"}, {"id": "exaone-45-33b", "name": "EXAONE 4.5 33B", "author": "LG AI Research", "params": 33, "license": "EXAONE AI Model License", "ctx": 262144, "vram": {"q4": 20, "q5": 24, "q8": 36, "fp16": 66}, "tags": ["chat", "general", "vision", "multilingual"], "family": "EXAONE"}, {"id": "nemotron-nano-3-30b", "name": "Nemotron Nano 3 30B-A3B", "author": "NVIDIA", "params": 30, "license": "NVIDIA Open Model License", "ctx": 1000000, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 62}, "tags": ["chat", "general", "reasoning", "moe"], "family": "Nemotron"}, {"id": "nemotron-nano-v2-vl-12b", "name": "Nemotron Nano v2 VL 12B", "author": "NVIDIA", "params": 12.6, "license": "NVIDIA Open Model License", "ctx": 128000, "vram": {"q4": 8, "q5": 10, "q8": 14, "fp16": 25}, "tags": ["vision", "chat"], "family": "Nemotron"}, {"id": "apertus-70b", "name": "Apertus 70B", "author": "Swiss AI", "params": 70, "license": "Apache 2.0", "ctx": 65536, "vram": {"q4": 40, "q5": 48, "q8": 75, "fp16": 140}, "tags": ["chat", "general", "multilingual", "fr"], "family": "Apertus"}, {"id": "apertus-8b", "name": "Apertus 8B", "author": "Swiss AI", "params": 8, "license": "Apache 2.0", "ctx": 65536, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 16}, "tags": ["chat", "general", "multilingual", "fr"], "family": "Apertus"}, {"id": "trinity-mini-26b", "name": "Trinity Mini 26B-A3B", "author": "Arcee AI", "params": 26, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 15, "q5": 18, "q8": 28, "fp16": 52}, "tags": ["chat", "general", "moe"], "family": "Trinity"}, {"id": "hunyuan-20-large", "name": "Hunyuan Large 2.0", "author": "Tencent", "params": 406, "license": "Tencent Hunyuan License", "ctx": 262144, "vram": {"q4": 245, "q5": 290, "q8": 435, "fp16": 810}, "tags": ["chat", "general", "reasoning", "moe"], "family": "Hunyuan"}, {"id": "internvl-35-8b", "name": "InternVL 3.5 8B", "author": "OpenGVLab", "params": 8, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 6, "q5": 7, "q8": 10, "fp16": 16}, "tags": ["vision", "chat"], "family": "InternVL"}, {"id": "mimo-v2-flash", "name": "MiMo V2 Flash", "author": "Xiaomi", "params": 309, "license": "MIT", "ctx": 128000, "vram": {"q4": 185, "q5": 222, "q8": 330, "fp16": 618}, "tags": ["chat", "code", "moe"], "family": "MiMo"}, {"id": "rakuten-ai-3", "name": "Rakuten AI 3.0", "author": "Rakuten", "params": 700, "license": "Apache 2.0", "ctx": 32768, "vram": {"q4": 420, "q5": 500, "q8": 745, "fp16": 1400}, "tags": ["chat", "general", "multilingual", "moe"], "family": "Rakuten"}, {"id": "kanana-2-30b", "name": "Kanana 2 30B-A3B Thinking", "author": "Kakao", "params": 30, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 18, "q5": 22, "q8": 33, "fp16": 60}, "tags": ["chat", "general", "reasoning", "multilingual", "moe"], "family": "Kanana"}, {"id": "deepseek-ocr", "name": "DeepSeek-OCR", "author": "DeepSeek", "params": 3, "license": "MIT", "ctx": 8192, "vram": {"q4": 2, "q5": 2.5, "q8": 4, "fp16": 6}, "tags": ["vision", "chat", "small"], "family": "DeepSeek"}, {"id": "hunyuan-ocr-1b", "name": "HunyuanOCR 1B", "author": "Tencent", "params": 1, "license": "Tencent Hunyuan License", "ctx": 8192, "vram": {"q4": 0.8, "q5": 1, "q8": 1.5, "fp16": 2}, "tags": ["vision", "chat", "small"], "family": "Hunyuan"}, {"id": "gemma4-26b-moe", "name": "Gemma 4 26B-A4B MoE", "author": "Google", "params": 26, "license": "Gemma", "ctx": 128000, "vram": {"q4": 16, "q5": 19, "q8": 28, "fp16": 52}, "tags": ["chat", "general", "vision", "audio", "multilingual", "moe"], "family": "Gemma"}, {"id": "dots-llm1", "name": "dots.llm1 Instruct", "author": "Rednote", "params": 142, "license": "MIT", "ctx": 32768, "vram": {"q4": 85, "q5": 102, "q8": 152, "fp16": 284}, "tags": ["chat", "general", "moe"], "family": "dots"}, {"id": "qwen3-omni-30b", "name": "Qwen 3 Omni 30B-A3B", "author": "Alibaba", "params": 30, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 62}, "tags": ["vision", "audio", "chat", "moe"], "family": "Qwen"}, {"id": "qwen35-122b-a10b", "name": "Qwen 3.5 122B-A10B", "author": "Alibaba", "params": 122, "license": "Apache 2.0", "ctx": 262000, "vram": {"q4": 73, "q5": 88, "q8": 131, "fp16": 244}, "tags": ["chat", "general", "reasoning", "multilingual", "moe"], "family": "Qwen"}, {"id": "pangu-pro-moe-72b", "name": "Pangu Pro MoE 72B", "author": "Huawei", "params": 72, "license": "Pangu Model License", "ctx": 32768, "vram": {"q4": 42, "q5": 50, "q8": 78, "fp16": 144}, "tags": ["chat", "general", "moe"], "family": "Pangu"}, {"id": "qwen36-27b", "name": "Qwen 3.6 27B", "author": "Alibaba", "params": 27, "license": "Apache 2.0", "ctx": 262144, "vram": {"q4": 16, "q5": 19, "q8": 29, "fp16": 54}, "tags": ["chat", "general", "code", "reasoning", "vision", "multilingual"], "family": "Qwen"}, {"id": "deepseek-v4-pro", "name": "DeepSeek V4 Pro 1.6T", "author": "DeepSeek", "params": 1600, "license": "MIT", "ctx": 1000000, "vram": {"q4": 960, "q5": 1150, "q8": 1700, "fp16": 3200}, "tags": ["chat", "general", "reasoning", "moe", "multilingual"], "family": "DeepSeek"}, {"id": "deepseek-v4-flash", "name": "DeepSeek V4 Flash 284B", "author": "DeepSeek", "params": 284, "license": "MIT", "ctx": 1000000, "vram": {"q4": 170, "q5": 205, "q8": 305, "fp16": 568}, "tags": ["chat", "general", "reasoning", "moe", "multilingual"], "family": "DeepSeek"}, {"id": "tencent-hy3-preview", "name": "Tencent Hy3 Preview 295B", "author": "Tencent", "params": 295, "license": "Tencent Hunyuan License", "ctx": 256000, "vram": {"q4": 177, "q5": 210, "q8": 315, "fp16": 590}, "tags": ["chat", "general", "reasoning", "moe"], "family": "Hunyuan"}, {"id": "llada2-uni", "name": "LLaDA 2.0 Uni 16B", "author": "Ant Group / inclusionAI", "params": 16, "license": "Apache 2.0", "ctx": 8192, "vram": {"q4": 18, "q5": 22, "q8": 30, "fp16": 47}, "tags": ["chat", "vision", "general", "moe"], "family": "LLaDA"}, {"id": "mimo-v25-pro", "name": "MiMo V2.5 Pro", "author": "Xiaomi", "params": 1020, "license": "MIT", "ctx": 1000000, "vram": {"q4": 595, "q5": 720, "q8": 1090, "fp16": 2040}, "tags": ["chat", "reasoning", "code", "moe", "multilingual"], "family": "MiMo"}, {"id": "mimo-v25", "name": "MiMo V2.5", "author": "Xiaomi", "params": 310, "license": "MIT", "ctx": 1000000, "vram": {"q4": 180, "q5": 220, "q8": 330, "fp16": 620}, "tags": ["chat", "vision", "audio", "moe", "multilingual"], "family": "MiMo"}, {"id": "granite41-8b", "name": "Granite 4.1 8B Instruct", "author": "IBM", "params": 8, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 5, "q5": 6, "q8": 9, "fp16": 16}, "tags": ["chat", "general", "code", "multilingual"], "family": "Granite"}, {"id": "nemotron-omni-30b", "name": "Nemotron 3 Nano Omni 30B-A3B", "author": "NVIDIA", "params": 30, "license": "NVIDIA Open Model License", "ctx": 256000, "vram": {"q4": 21, "q5": 25, "q8": 33, "fp16": 62}, "tags": ["chat", "vision", "audio", "reasoning", "moe"], "family": "Nemotron"}, {"id": "laguna-xs2", "name": "Laguna XS.2", "author": "Poolside", "params": 33, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 66}, "tags": ["code", "moe"], "family": "Laguna"}, {"id": "granite41-30b", "name": "Granite 4.1 30B Instruct", "author": "IBM", "params": 30, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 17, "q5": 21, "q8": 32, "fp16": 60}, "tags": ["chat", "general", "code", "multilingual"], "family": "Granite"}, {"id": "granite41-3b", "name": "Granite 4.1 3B Instruct", "author": "IBM", "params": 3, "license": "Apache 2.0", "ctx": 131072, "vram": {"q4": 2, "q5": 2.5, "q8": 3, "fp16": 6}, "tags": ["chat", "general", "code", "multilingual", "small"], "family": "Granite"}, {"id": "ling-26-1t", "name": "Ling 2.6 1T", "author": "Ant Group / inclusionAI", "params": 1000, "license": "MIT", "ctx": 262144, "vram": {"q4": 580, "q5": 710, "q8": 1070, "fp16": 2000}, "tags": ["chat", "general", "moe", "multilingual"], "family": "Ling"}, {"id": "gemma4-e2b", "name": "Gemma 4 E2B", "author": "Google", "params": 2, "license": "Gemma", "ctx": 128000, "vram": {"q4": 7, "q5": 9, "q8": 13, "fp16": 25}, "tags": ["chat", "vision", "small", "multilingual", "reasoning"], "family": "Gemma"}, {"id": "nemotron-cascade-2", "name": "Nemotron Cascade 2 30B-A3B", "author": "NVIDIA", "params": 30, "license": "NVIDIA Open Model License", "ctx": 128000, "vram": {"q4": 17, "q5": 21, "q8": 32, "fp16": 60}, "tags": ["chat", "code", "reasoning", "moe"], "family": "Nemotron"}, {"id": "nemotron3", "name": "Nemotron 3 33B", "author": "NVIDIA", "params": 33, "license": "NVIDIA Open Model License", "ctx": 128000, "vram": {"q4": 19, "q5": 23, "q8": 35, "fp16": 66}, "tags": ["chat", "code", "reasoning"], "family": "Nemotron"}, {"id": "nemotron-3-nano", "name": "Nemotron 3 Nano 30B-A3B", "author": "NVIDIA", "params": 30, "license": "NVIDIA Open Model License", "ctx": 128000, "vram": {"q4": 17, "q5": 21, "q8": 32, "fp16": 60}, "tags": ["chat", "code", "reasoning", "moe"], "family": "Nemotron"}, {"id": "medgemma", "name": "MedGemma 4B", "author": "Google", "params": 4, "license": "Gemma", "ctx": 128000, "vram": {"q4": 2.3, "q5": 2.8, "q8": 4.3, "fp16": 8}, "tags": ["chat", "vision", "multilingual", "small"], "family": "Gemma"}, {"id": "gemma4", "name": "Gemma 4 2B", "author": "Google", "params": 2, "license": "Gemma", "ctx": 128000, "vram": {"q4": 1.2, "q5": 1.4, "q8": 2.1, "fp16": 4}, "tags": ["chat", "vision", "multilingual", "small"], "family": "Gemma"}, {"id": "qwen3-5", "name": "Qwen 3.5 0.8B", "author": "Alibaba", "params": 0.8, "license": "Apache 2.0", "ctx": 256000, "vram": {"q4": 0.5, "q5": 0.6, "q8": 0.9, "fp16": 1.6}, "tags": ["chat", "general", "small", "multilingual"], "family": "Qwen"}, {"id": "medgemma1-5", "name": "MedGemma 1.5 4B", "author": "Google", "params": 4, "license": "Gemma", "ctx": 128000, "vram": {"q4": 2.3, "q5": 2.8, "q8": 4.3, "fp16": 8}, "tags": ["chat", "vision", "multilingual", "small"], "family": "Gemma"}, {"id": "granite4-1", "name": "Granite 4.1", "author": "IBM", "params": 3, "license": "Apache 2.0", "ctx": 128000, "vram": {"q4": 1.7, "q5": 2.1, "q8": 3.2, "fp16": 6}, "tags": ["chat", "code"], "family": "Granite"}, {"id": "qwen3-6", "name": "Qwen 3.6 27B", "author": "Alibaba", "params": 27, "license": "Qwen License", "ctx": 256000, "vram": {"q4": 16, "q5": 19, "q8": 29, "fp16": 54}, "tags": ["chat", "code", "reasoning", "vision", "multilingual"], "family": "Qwen"}, {"id": "lfm2-5-thinking", "name": "LFM2.5 Thinking 1.2B", "author": "Liquid AI", "params": 1.2, "license": "LFM Open License v1.0", "ctx": 32768, "vram": {"q4": 0.7, "q5": 0.9, "q8": 1.3, "fp16": 2.4}, "tags": ["chat", "general", "reasoning", "small"], "family": "LFM"}, {"id": "glm-4-7-flash", "name": "GLM 4.7 Flash", "author": "Zhipu AI", "params": 3, "license": "MIT", "ctx": 128000, "vram": {"q4": 1.7, "q5": 2.1, "q8": 3.2, "fp16": 6}, "tags": ["chat", "multilingual"], "family": "GLM"}]