faq / Dockerfile
brendon-ai's picture
Update Dockerfile
0bd8f35 verified
raw
history blame
1.62 kB
# Use a Python base image
FROM python:3.9-slim-buster
# Set working directory inside the container
WORKDIR /app
# Create a virtual environment and activate it.
# This ensures a clean and isolated environment for your dependencies.
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Create a dedicated directory for Hugging Face cache and ensure permissions.
# This prevents 'PermissionError' issues when downloading models by directing
# Hugging Face to a location with guaranteed write access.
ENV HF_HOME=/app/.hf_cache
RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME}
# Install dependencies from requirements.txt into the virtual environment
COPY requirements.txt .
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt
# Pre-download the TinyLlama model and its tokenizer during the build process.
# This makes the startup faster and ensures the model is available.
# Using torch_dtype=torch.bfloat16 and device_map="auto" for efficient loading.
RUN python -c "import torch; from transformers import AutoTokenizer, AutoModelForCausalLM; model_name='TinyLlama/TinyLlama-1.1B-Chat-v1.0'; AutoTokenizer.from_pretrained(model_name); AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, device_map='auto')"
# Copy your application code
COPY app.py .
# Expose the port your API will run on (Hugging Face Spaces typically uses 7860)
EXPOSE 7860
# Command to start the FastAPI application using Uvicorn as a Python module.
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]