PORTULAN CLARIN logo

🥇 CLARIN-PT-LDB - Leaderboard of Open LLMs for Portuguese 🥇

The CLARIN-PT-LDB - Leaderboard of Open LLMs for Portuguese provides a service for running evaluations of open Large Language Models (LLMs) for Portuguese.

It keeps track of their scores under several benchmarks that address different aspects of model performance, namely language, culture and civility.

It accepts models available on Hugging Face to be submitted for evaluation.

This service is supported by PORTULAN CLARIN Research Infrastructure for the Science and Technology of Language.

Leaderboard

{
  • "headers": [
    • "T",
    • "Model",
    • "Average ⬆️",
    • "Tuguesice",
    • "DoNotAnswer",
    • "MuSR",
    • "Omniscience",
    • "GPQA-Diamond",
    • "MMLU",
    • "MMLU-Pro",
    • "CoPA",
    • "MRPC",
    • "RTE",
    • "Type",
    • "Architecture",
    • "Precision",
    • "Hub License",
    • "#Params (B)",
    • "Hub ❤️",
    • "Available on the hub",
    • "Model sha"
    ],
  • "data": [
    • [
      • "🔶",
      • "<a target="_blank" href="https://huggingface.co/PORTULAN/gervasio-70b-portuguese-ptpt-decoder-quantized-4bit" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">PORTULAN/gervasio-70b-portuguese-ptpt-decoder-quantized-4bit</a>",
      • 65.05,
      • 39.76,
      • 86.9,
      • 54.5,
      • 16.53,
      • 45.96,
      • 82.04,
      • 58.67,
      • 96,
      • 79.13,
      • 90.97,
      • "fine-tuned",
      • "?",
      • "float16",
      • "mit",
      • 70,
      • 6,
      • false,
      • "dummy"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Llama-3.3-70B-Instruct</a>",
      • 63.78,
      • 25.69,
      • 91.8,
      • 57.8,
      • 15.5,
      • 45.96,
      • 81.67,
      • 61.61,
      • 95,
      • 72.93,
      • 89.89,
      • "instruction-tuned",
      • "?",
      • "float16",
      • "mit",
      • 70,
      • 6,
      • false,
      • "dummy"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">mistralai/Mistral-Small-24B-Instruct-2501</a>",
      • 53.33,
      • 15.6,
      • 88.5,
      • 32.94,
      • 5.69,
      • 45.45,
      • 71.02,
      • 47.75,
      • 94,
      • 48.23,
      • 84.12,
      • "instruction-tuned",
      • "?",
      • "float16",
      • "mit",
      • 24,
      • 6,
      • false,
      • "dummy"
      ],
    • [
      • "🔶",
      • "<a target="_blank" href="https://huggingface.co/PORTULAN/gervasio-8b-portuguese-ptpt-decoder-quantized-4bit" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">PORTULAN/gervasio-8b-portuguese-ptpt-decoder-quantized-4bit</a>",
      • 49.94,
      • 11.31,
      • 83.6,
      • 24.07,
      • 4.58,
      • 34.85,
      • 62.15,
      • 36.79,
      • 87,
      • 77.45,
      • 77.62,
      • "fine-tuned",
      • "?",
      • "float16",
      • "mit",
      • 8.03,
      • 6,
      • false,
      • "dummy"
      ],
    • [
      • "⭕",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Llama-3.1-8B-Instruct</a>",
      • 48.85,
      • 9.79,
      • 83.6,
      • 23.94,
      • 3.58,
      • 32.32,
      • 61.49,
      • 36.1,
      • 83,
      • 75.25,
      • 79.42,
      • "instruction-tuned",
      • "?",
      • "float16",
      • "mit",
      • 8,
      • 6,
      • false,
      • "dummy"
      ]
    ],
  • "metadata": null
}