Spaces:
Sleeping
Sleeping
Update Dockerfile
Browse files- Dockerfile +7 -6
Dockerfile
CHANGED
@@ -19,15 +19,16 @@ RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME}
|
|
19 |
COPY requirements.txt .
|
20 |
RUN pip install --no-cache-dir -r requirements.txt
|
21 |
|
22 |
-
|
|
|
|
|
|
|
23 |
|
24 |
# Copy your application code
|
25 |
COPY app.py .
|
26 |
|
27 |
-
# Expose the port your API will run on
|
28 |
-
|
29 |
-
EXPOSE 8000
|
30 |
|
31 |
# Command to start the FastAPI application using Uvicorn as a Python module.
|
32 |
-
|
33 |
-
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
19 |
COPY requirements.txt .
|
20 |
RUN pip install --no-cache-dir -r requirements.txt
|
21 |
|
22 |
+
# Pre-download the TinyLlama model and its tokenizer during the build process.
|
23 |
+
# This makes the startup faster and ensures the model is available.
|
24 |
+
# Using torch_dtype=torch.bfloat16 and device_map="auto" for efficient loading.
|
25 |
+
RUN python -c "import torch; from transformers import AutoTokenizer, AutoModelForCausalLM; model_name='TinyLlama/TinyLlama-1.1B-Chat-v1.0'; AutoTokenizer.from_pretrained(model_name); AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, device_map='auto')"
|
26 |
|
27 |
# Copy your application code
|
28 |
COPY app.py .
|
29 |
|
30 |
+
# Expose the port your API will run on (Hugging Face Spaces typically uses 7860)
|
31 |
+
EXPOSE 7860
|
|
|
32 |
|
33 |
# Command to start the FastAPI application using Uvicorn as a Python module.
|
34 |
+
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|