Spaces:
Sleeping
Sleeping
# Use a Python base image | |
FROM python:3.9-slim-buster | |
# Set working directory inside the container | |
WORKDIR /app | |
# Create a virtual environment and activate it. | |
# This ensures a clean and isolated environment for your dependencies. | |
RUN python -m venv /opt/venv | |
ENV PATH="/opt/venv/bin:$PATH" | |
# Create a dedicated directory for Hugging Face cache and ensure permissions. | |
# This prevents 'PermissionError' issues when downloading models by directing | |
# Hugging Face to a location with guaranteed write access. | |
ENV HF_HOME=/app/.hf_cache | |
RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME} | |
# Install dependencies from requirements.txt into the virtual environment | |
COPY requirements.txt . | |
RUN pip install --upgrade pip | |
RUN pip install --no-cache-dir -r requirements.txt | |
# Pre-download the TinyLlama model and its tokenizer during the build process. | |
# This makes the startup faster and ensures the model is available. | |
# Using torch_dtype=torch.bfloat16 and device_map="auto" for efficient loading. | |
RUN python -c "import torch; from transformers import AutoTokenizer, AutoModelForCausalLM; model_name='TinyLlama/TinyLlama-1.1B-Chat-v1.0'; AutoTokenizer.from_pretrained(model_name); AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, device_map='auto')" | |
# Copy your application code | |
COPY app.py . | |
# Expose the port your API will run on (Hugging Face Spaces typically uses 7860) | |
EXPOSE 7860 | |
# Command to start the FastAPI application using Uvicorn as a Python module. | |
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] | |