File size: 1,591 Bytes
6da83c8
 
37c6f87
3b1f63b
37c6f87
 
3b1f63b
 
 
 
 
5a3c9f4
 
 
 
 
 
3b1f63b
6da83c8
 
37c6f87
c48c4ab
 
 
 
0950c54
6da83c8
 
b04a975
c48c4ab
 
f4b92bb
3b1f63b
c48c4ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Use a Python base image
FROM python:3.9-slim-buster

# Set working directory inside the container
WORKDIR /app

# Create a virtual environment and activate it.
# This ensures a clean and isolated environment for your dependencies.
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# Create a dedicated directory for Hugging Face cache and ensure permissions.
# This prevents 'PermissionError' issues when downloading models by directing
# Hugging Face to a location with guaranteed write access.
ENV HF_HOME=/app/.hf_cache
RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME}

# Install dependencies from requirements.txt into the virtual environment
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Pre-download the TinyLlama model and its tokenizer during the build process.
# This makes the startup faster and ensures the model is available.
# Using torch_dtype=torch.bfloat16 and device_map="auto" for efficient loading.
RUN python -c "import torch; from transformers import AutoTokenizer, AutoModelForCausalLM; model_name='TinyLlama/TinyLlama-1.1B-Chat-v1.0'; AutoTokenizer.from_pretrained(model_name); AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, device_map='auto')"

# Copy your application code
COPY app.py .

# Expose the port your API will run on (Hugging Face Spaces typically uses 7860)
EXPOSE 7860

# Command to start the FastAPI application using Uvicorn as a Python module.
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]