Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +5 -1
Dockerfile
CHANGED
@@ -61,8 +61,12 @@ RUN pip install uv setuptools
|
|
61 |
|
62 |
# Install vLLM
|
63 |
# RUN uv pip install --system vllm==0.10.0 torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
|
|
|
|
|
|
|
64 |
# Downgrade triton because following error occured when using triton==3.3.1
|
65 |
# https://github.com/vllm-project/vllm/issues/20259#issuecomment-3157159183
|
|
|
66 |
# /usr/local/lib/python3.12/dist-packages/vllm/attention/ops/prefix_prefill.py:36:0: error: Failures have been detected while processing an MLIR pass pipeline
|
67 |
# /usr/local/lib/python3.12/dist-packages/vllm/attention/ops/prefix_prefill.py:36:0: note: Pipeline failed while executing [`ConvertTritonGPUToLLVM` on 'builtin.module' operation]: reproducer generated at `std::errs, please share the reproducer above with Triton project.`
|
68 |
# INFO: 10.16.9.222:28100 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
|
@@ -177,7 +181,7 @@ RUN pip install uv setuptools
|
|
177 |
# INFO: Waiting for application shutdown.
|
178 |
# INFO: Application shutdown complete.
|
179 |
# INFO: Finished server process [27]
|
180 |
-
RUN uv pip install --system --index-strategy unsafe-best-match
|
181 |
|
182 |
# # Then, install xformers with the --no-build-isolation flag
|
183 |
# RUN uv pip install --system \
|
|
|
61 |
|
62 |
# Install vLLM
|
63 |
# RUN uv pip install --system vllm==0.10.0 torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
|
64 |
+
RUN uv pip install --system --index-strategy unsafe-best-match vllm==0.10.0 --extra-index-url https://download.pytorch.org/whl/cu128
|
65 |
+
|
66 |
+
|
67 |
# Downgrade triton because following error occured when using triton==3.3.1
|
68 |
# https://github.com/vllm-project/vllm/issues/20259#issuecomment-3157159183
|
69 |
+
# https://github.com/vllm-project/vllm/issues/19203#issuecomment-2989796604
|
70 |
# /usr/local/lib/python3.12/dist-packages/vllm/attention/ops/prefix_prefill.py:36:0: error: Failures have been detected while processing an MLIR pass pipeline
|
71 |
# /usr/local/lib/python3.12/dist-packages/vllm/attention/ops/prefix_prefill.py:36:0: note: Pipeline failed while executing [`ConvertTritonGPUToLLVM` on 'builtin.module' operation]: reproducer generated at `std::errs, please share the reproducer above with Triton project.`
|
72 |
# INFO: 10.16.9.222:28100 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
|
|
|
181 |
# INFO: Waiting for application shutdown.
|
182 |
# INFO: Application shutdown complete.
|
183 |
# INFO: Finished server process [27]
|
184 |
+
RUN uv pip install --system --index-strategy unsafe-best-match triton==3.2 --extra-index-url https://download.pytorch.org/whl/cu128
|
185 |
|
186 |
# # Then, install xformers with the --no-build-isolation flag
|
187 |
# RUN uv pip install --system \
|