Spaces:

brendon-ai
/

faq

Sleeping

brendon-ai commited on Jun 15

Commit

c48c4ab

verified ·

1 Parent(s): 665d3e9

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -19,15 +19,16 @@ RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME}
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-RUN python -c "from transformers import AutoTokenizer, AutoModelForMaskedLM; AutoTokenizer.from_pretrained('boltuix/NeuroBERT-Tiny'); AutoModelForMaskedLM.from_pretrained('boltuix/NeuroBERT-Tiny')"
 # Copy your application code
 COPY app.py .
-# Expose the port your API will run on
-# Hugging Face Spaces typically uses port 8000 for custom Docker builds
-EXPOSE 8000
 # Command to start the FastAPI application using Uvicorn as a Python module.
-# This is more robust as it explicitly invokes 'python' to run the 'uvicorn' module.
-CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Pre-download the TinyLlama model and its tokenizer during the build process.
+# This makes the startup faster and ensures the model is available.
+# Using torch_dtype=torch.bfloat16 and device_map="auto" for efficient loading.
+RUN python -c "import torch; from transformers import AutoTokenizer, AutoModelForCausalLM; model_name='TinyLlama/TinyLlama-1.1B-Chat-v1.0'; AutoTokenizer.from_pretrained(model_name); AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, device_map='auto')"
 # Copy your application code
 COPY app.py .
+# Expose the port your API will run on (Hugging Face Spaces typically uses 7860)
+EXPOSE 7860
 # Command to start the FastAPI application using Uvicorn as a Python module.
+CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]