Spaces:
Running
Running
feat: add LibreOffice support in Dockerfile and simplify extractor.py for legacy doc/ppt conversion
80911d0 | FROM python:3.10-slim | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| build-essential \ | |
| libreoffice \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Create a non-root user (Hugging Face Spaces requirement) | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:$PATH | |
| WORKDIR $HOME/app | |
| # ββ Step 1: Install torch CPU-only first (biggest layer, most cacheable) ββ | |
| # Using PyTorch CPU index so we get ~600MB wheel instead of ~2GB GPU wheel | |
| RUN pip install --no-cache-dir \ | |
| torch==2.5.1+cpu \ | |
| --index-url https://download.pytorch.org/whl/cpu | |
| # ββ Step 2: Copy requirements and install remaining deps ββ | |
| COPY --chown=user backend/requirements.txt . | |
| # Install everything except torch (already installed above) | |
| RUN grep -v "^torch" requirements.txt | pip install --no-cache-dir -r /dev/stdin | |
| # Pre-download NLTK resources | |
| RUN python -m nltk.downloader punkt stopwords | |
| # Copy the rest of the application | |
| COPY --chown=user . . | |
| # Change working directory to backend where app.py is located | |
| WORKDIR $HOME/app/backend | |
| # Expose port 7860 for Hugging Face Spaces | |
| EXPOSE 7860 | |
| # Run the Flask app with Gunicorn | |
| CMD ["gunicorn", "-b", "0.0.0.0:7860", "--timeout", "300", "app:app"] | |