Spaces:
Sleeping
Sleeping
File size: 1,591 Bytes
6da83c8 37c6f87 3b1f63b 37c6f87 3b1f63b 5a3c9f4 3b1f63b 6da83c8 37c6f87 c48c4ab 0950c54 6da83c8 b04a975 c48c4ab f4b92bb 3b1f63b c48c4ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# Use a Python base image
FROM python:3.9-slim-buster
# Set working directory inside the container
WORKDIR /app
# Create a virtual environment and activate it.
# This ensures a clean and isolated environment for your dependencies.
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Create a dedicated directory for Hugging Face cache and ensure permissions.
# This prevents 'PermissionError' issues when downloading models by directing
# Hugging Face to a location with guaranteed write access.
ENV HF_HOME=/app/.hf_cache
RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME}
# Install dependencies from requirements.txt into the virtual environment
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Pre-download the TinyLlama model and its tokenizer during the build process.
# This makes the startup faster and ensures the model is available.
# Using torch_dtype=torch.bfloat16 and device_map="auto" for efficient loading.
RUN python -c "import torch; from transformers import AutoTokenizer, AutoModelForCausalLM; model_name='TinyLlama/TinyLlama-1.1B-Chat-v1.0'; AutoTokenizer.from_pretrained(model_name); AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, device_map='auto')"
# Copy your application code
COPY app.py .
# Expose the port your API will run on (Hugging Face Spaces typically uses 7860)
EXPOSE 7860
# Command to start the FastAPI application using Uvicorn as a Python module.
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|