brendon-ai commited on
Commit
c48c4ab
·
verified ·
1 Parent(s): 665d3e9

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +7 -6
Dockerfile CHANGED
@@ -19,15 +19,16 @@ RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME}
19
  COPY requirements.txt .
20
  RUN pip install --no-cache-dir -r requirements.txt
21
 
22
- RUN python -c "from transformers import AutoTokenizer, AutoModelForMaskedLM; AutoTokenizer.from_pretrained('boltuix/NeuroBERT-Tiny'); AutoModelForMaskedLM.from_pretrained('boltuix/NeuroBERT-Tiny')"
 
 
 
23
 
24
  # Copy your application code
25
  COPY app.py .
26
 
27
- # Expose the port your API will run on
28
- # Hugging Face Spaces typically uses port 8000 for custom Docker builds
29
- EXPOSE 8000
30
 
31
  # Command to start the FastAPI application using Uvicorn as a Python module.
32
- # This is more robust as it explicitly invokes 'python' to run the 'uvicorn' module.
33
- CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
19
  COPY requirements.txt .
20
  RUN pip install --no-cache-dir -r requirements.txt
21
 
22
+ # Pre-download the TinyLlama model and its tokenizer during the build process.
23
+ # This makes the startup faster and ensures the model is available.
24
+ # Using torch_dtype=torch.bfloat16 and device_map="auto" for efficient loading.
25
+ RUN python -c "import torch; from transformers import AutoTokenizer, AutoModelForCausalLM; model_name='TinyLlama/TinyLlama-1.1B-Chat-v1.0'; AutoTokenizer.from_pretrained(model_name); AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, device_map='auto')"
26
 
27
  # Copy your application code
28
  COPY app.py .
29
 
30
+ # Expose the port your API will run on (Hugging Face Spaces typically uses 7860)
31
+ EXPOSE 7860
 
32
 
33
  # Command to start the FastAPI application using Uvicorn as a Python module.
34
+ CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]