languagebench / .github /workflows /nightly-evals.yml
davidpomerenke's picture
Upload from GitHub Actions: Merge pull request #22 from datenlabor-bmz/dev
2cdada4 verified
name: Nightly Evaluation Run
on:
# schedule:
# - cron: '0 3 * * *' # Run at 3am UTC every day
workflow_dispatch: # Allow manual triggering
jobs:
run-evals:
runs-on: ubuntu-latest
# checking if this is working in case eval runs take longer than 6h github actions allowance
timeout-minutes: 1440 # 24 hours timeout
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Install dependencies
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
uv sync --frozen --extra dev
- name: Run evaluations
env:
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
HUGGINGFACE_ACCESS_TOKEN: ${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}
N_SENTENCES: 20
MAX_LANGUAGES: 150
run: |
uv run huggingface-cli login --token ${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}
uv run evals/download_data.py
uv run evals/main.py
- name: Restart HuggingFace Space
env:
HUGGINGFACE_ACCESS_TOKEN: ${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}
run: |
curl -X POST "https://huggingface.co/api/spaces/fair-forward/evals-for-every-language/restart" \
-H "Authorization: Bearer $HUGGINGFACE_ACCESS_TOKEN"