Subhadip007 commited on
Commit
d4041c3
·
1 Parent(s): b7b5abb

fix: use git clone for dataset instead of hf-cli to bypass rate limits

Browse files
Files changed (1) hide show
  1. Dockerfile +4 -2
Dockerfile CHANGED
@@ -5,6 +5,8 @@ WORKDIR /app
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
 
 
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
  # Copy requirements first (Docker layer caching)
@@ -28,9 +30,9 @@ COPY .env.example ./.env
28
  # Create remaining data dirs
29
  RUN mkdir -p data/raw data/processed logs
30
 
31
- # Download the 4.4 GB database from the limits-free HF Dataset
32
  # This happens during the Docker build so the API starts instantly later
33
- RUN huggingface-cli download Subhadip007/researchpilot-data --repo-type dataset --local-dir /app/data
34
 
35
  # HuggingFace Spaces uses port 7860
36
  ENV PORT=7860
 
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
8
+ git \
9
+ git-lfs \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
  # Copy requirements first (Docker layer caching)
 
30
  # Create remaining data dirs
31
  RUN mkdir -p data/raw data/processed logs
32
 
33
+ # Download the 4.4 GB database from the limits-free HF Dataset using git
34
  # This happens during the Docker build so the API starts instantly later
35
+ RUN git lfs install && git clone https://huggingface.co/datasets/Subhadip007/researchpilot-data /app/data
36
 
37
  # HuggingFace Spaces uses port 7860
38
  ENV PORT=7860