Spaces:
Paused
Paused
Update Dockerfile
Browse files- Dockerfile +116 -1
Dockerfile
CHANGED
@@ -61,7 +61,122 @@ RUN pip install uv setuptools
|
|
61 |
|
62 |
# Install vLLM
|
63 |
# RUN uv pip install --system vllm==0.10.0 torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
# # Then, install xformers with the --no-build-isolation flag
|
67 |
# RUN uv pip install --system \
|
|
|
61 |
|
62 |
# Install vLLM
|
63 |
# RUN uv pip install --system vllm==0.10.0 torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
|
64 |
+
# Downgrade triton because following error occured when using triton==3.3.1
|
65 |
+
# /usr/local/lib/python3.12/dist-packages/vllm/attention/ops/prefix_prefill.py:36:0: error: Failures have been detected while processing an MLIR pass pipeline
|
66 |
+
# /usr/local/lib/python3.12/dist-packages/vllm/attention/ops/prefix_prefill.py:36:0: note: Pipeline failed while executing [`ConvertTritonGPUToLLVM` on 'builtin.module' operation]: reproducer generated at `std::errs, please share the reproducer above with Triton project.`
|
67 |
+
# INFO: 10.16.9.222:28100 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
|
68 |
+
# ERROR 08-06 19:13:13 [engine.py:165] RuntimeError('PassManager::run failed')
|
69 |
+
# ERROR 08-06 19:13:13 [engine.py:165] Traceback (most recent call last):
|
70 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 163, in start
|
71 |
+
# ERROR 08-06 19:13:13 [engine.py:165] self.run_engine_loop()
|
72 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 226, in run_engine_loop
|
73 |
+
# ERROR 08-06 19:13:13 [engine.py:165] request_outputs = self.engine_step()
|
74 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^
|
75 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 252, in engine_step
|
76 |
+
# ERROR 08-06 19:13:13 [engine.py:165] raise e
|
77 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 235, in engine_step
|
78 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return self.engine.step()
|
79 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^
|
80 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/llm_engine.py", line 1334, in step
|
81 |
+
# ERROR 08-06 19:13:13 [engine.py:165] outputs = self.model_executor.execute_model(
|
82 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
83 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/executor_base.py", line 146, in execute_model
|
84 |
+
# ERROR 08-06 19:13:13 [engine.py:165] output = self.collective_rpc("execute_model",
|
85 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
86 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
|
87 |
+
# ERROR 08-06 19:13:13 [engine.py:165] answer = run_method(self.driver_worker, method, args, kwargs)
|
88 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
89 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/utils/__init__.py", line 2985, in run_method
|
90 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return func(*args, **kwargs)
|
91 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^
|
92 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/worker_base.py", line 417, in execute_model
|
93 |
+
# ERROR 08-06 19:13:13 [engine.py:165] output = self.model_runner.execute_model(
|
94 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
95 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
|
96 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return func(*args, **kwargs)
|
97 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^
|
98 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/model_runner.py", line 1703, in execute_model
|
99 |
+
# ERROR 08-06 19:13:13 [engine.py:165] hidden_or_intermediate_states = model_executable(
|
100 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^
|
101 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
|
102 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return self._call_impl(*args, **kwargs)
|
103 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
104 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1762, in _call_impl
|
105 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return forward_call(*args, **kwargs)
|
106 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
107 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 584, in forward
|
108 |
+
# ERROR 08-06 19:13:13 [engine.py:165] model_output = self.model(input_ids, positions, intermediate_tensors,
|
109 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
110 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/compilation/decorators.py", line 206, in __call__
|
111 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return self.forward(*args, **kwargs)
|
112 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
113 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 392, in forward
|
114 |
+
# ERROR 08-06 19:13:13 [engine.py:165] hidden_states, residual = layer(positions, hidden_states, residual)
|
115 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
116 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
|
117 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return self._call_impl(*args, **kwargs)
|
118 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
119 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1762, in _call_impl
|
120 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return forward_call(*args, **kwargs)
|
121 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
122 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 305, in forward
|
123 |
+
# ERROR 08-06 19:13:13 [engine.py:165] hidden_states = self.self_attn(positions=positions,
|
124 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
125 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
|
126 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return self._call_impl(*args, **kwargs)
|
127 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
128 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1762, in _call_impl
|
129 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return forward_call(*args, **kwargs)
|
130 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
131 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/llama.py", line 203, in forward
|
132 |
+
# ERROR 08-06 19:13:13 [engine.py:165] attn_output = self.attn(q, k, v)
|
133 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^
|
134 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
|
135 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return self._call_impl(*args, **kwargs)
|
136 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
137 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1762, in _call_impl
|
138 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return forward_call(*args, **kwargs)
|
139 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
140 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/attention/layer.py", line 288, in forward
|
141 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return torch.ops.vllm.unified_attention(
|
142 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
143 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/_ops.py", line 1158, in __call__
|
144 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return self._op(*args, **(kwargs or {}))
|
145 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
146 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/attention/layer.py", line 448, in unified_attention
|
147 |
+
# ERROR 08-06 19:13:13 [engine.py:165] output = self.impl.forward(self, query, key, value, kv_cache,
|
148 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
149 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/attention/backends/xformers.py", line 584, in forward
|
150 |
+
# ERROR 08-06 19:13:13 [engine.py:165] out = PagedAttention.forward_prefix(
|
151 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
152 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/attention/ops/paged_attn.py", line 214, in forward_prefix
|
153 |
+
# ERROR 08-06 19:13:13 [engine.py:165] context_attention_fwd(
|
154 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
|
155 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return func(*args, **kwargs)
|
156 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^
|
157 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/vllm/attention/ops/prefix_prefill.py", line 850, in context_attention_fwd
|
158 |
+
# ERROR 08-06 19:13:13 [engine.py:165] _fwd_kernel[grid](
|
159 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/triton/runtime/jit.py", line 347, in <lambda>
|
160 |
+
# ERROR 08-06 19:13:13 [engine.py:165] return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
|
161 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
162 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/triton/runtime/jit.py", line 569, in run
|
163 |
+
# ERROR 08-06 19:13:13 [engine.py:165] kernel = self.compile(src, target=target, options=options.__dict__)
|
164 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
165 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/triton/compiler/compiler.py", line 284, in compile
|
166 |
+
# ERROR 08-06 19:13:13 [engine.py:165] next_module = compile_ir(module, metadata)
|
167 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
168 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/triton/backends/nvidia/compiler.py", line 450, in <lambda>
|
169 |
+
# ERROR 08-06 19:13:13 [engine.py:165] stages["llir"] = lambda src, metadata: self.make_llir(src, metadata, options, capability)
|
170 |
+
# ERROR 08-06 19:13:13 [engine.py:165] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
171 |
+
# ERROR 08-06 19:13:13 [engine.py:165] File "/usr/local/lib/python3.12/dist-packages/triton/backends/nvidia/compiler.py", line 341, in make_llir
|
172 |
+
# ERROR 08-06 19:13:13 [engine.py:165] pm.run(mod)
|
173 |
+
# ERROR 08-06 19:13:13 [engine.py:165] RuntimeError: PassManager::run failed
|
174 |
+
# DEBUG 08-06 19:13:13 [engine.py:169] MQLLMEngine is shut down.
|
175 |
+
# INFO: Shutting down
|
176 |
+
# INFO: Waiting for application shutdown.
|
177 |
+
# INFO: Application shutdown complete.
|
178 |
+
# INFO: Finished server process [27]
|
179 |
+
RUN uv pip install --system --index-strategy unsafe-best-match vllm==0.10.0 triton==3.2 --extra-index-url https://download.pytorch.org/whl/cu128
|
180 |
|
181 |
# # Then, install xformers with the --no-build-isolation flag
|
182 |
# RUN uv pip install --system \
|