600,000 downloads in a month and not a single working HuggingFace space! Puzzling...

#22
by Didier - opened

The title should be pretty explicit.
There isn't a single working Hugging Face Space with the olmOCR model.
I tried... and also could not get it to "work".
I also tried locally on a Macbook Air M, and I get the error other people are getting. Hence not working locally either.
So many downloads and yet not a single example of the model actually working in the Hugging Face transformers ecosystem... Puzzling...

Error on macOS:


IndexError Traceback (most recent call last)
Cell In[12], line 2
1 # Generate the output
----> 2 output = model.generate(
3 **inputs,
4 temperature=0.1,
5 max_new_tokens=1_024,
6 num_return_sequences=1,
7 do_sample=True,
8 )

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/torch/utils/_contextlib.py:116, in context_decorator..decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/transformers/generation/utils.py:2215, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
2207 input_ids, model_kwargs = self._expand_inputs_for_generation(
2208 input_ids=input_ids,
2209 expand_size=generation_config.num_return_sequences,
2210 is_encoder_decoder=self.config.is_encoder_decoder,
2211 **model_kwargs,
2212 )
2214 # 12. run sample (it degenerates to greedy search when generation_config.do_sample=False)
-> 2215 result = self._sample(
2216 input_ids,
2217 logits_processor=prepared_logits_processor,
2218 stopping_criteria=prepared_stopping_criteria,
2219 generation_config=generation_config,
2220 synced_gpus=synced_gpus,
2221 streamer=streamer,
2222 **model_kwargs,
2223 )
2225 elif generation_mode in (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):
2226 # 11. prepare beam search scorer
2227 beam_scorer = BeamSearchScorer(
2228 batch_size=batch_size,
2229 num_beams=generation_config.num_beams,
(...) 2234 max_length=generation_config.max_length,
2235 )

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/transformers/generation/utils.py:3206, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
3203 model_inputs.update({"output_hidden_states": output_hidden_states} if output_hidden_states else {})
3205 # forward pass to get next token
-> 3206 outputs = self(**model_inputs, return_dict=True)
3208 # synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping
3209 model_kwargs = self._update_model_kwargs_for_generation(
3210 outputs,
3211 model_kwargs,
3212 is_encoder_decoder=self.config.is_encoder_decoder,
3213 )

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/torch/nn/modules/module.py:1739, in Module._wrapped_call_impl(self, *args, **kwargs)
1737 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1738 else:
-> 1739 return self._call_impl(*args, **kwargs)

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/torch/nn/modules/module.py:1750, in Module._call_impl(self, *args, **kwargs)
1745 # If we don't have any hooks, we want to skip the rest of the logic in
1746 # this function, and just call forward.
1747 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1748 or _global_backward_pre_hooks or _global_backward_hooks
1749 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1750 return forward_call(*args, **kwargs)
1752 result = None
1753 called_always_called_hooks = set()

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py:1686, in Qwen2VLForConditionalGeneration.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, pixel_values, pixel_values_videos, image_grid_thw, video_grid_thw, rope_deltas)
1684 if pixel_values is not None:
1685 pixel_values = pixel_values.type(self.visual.get_dtype())
-> 1686 image_embeds = self.visual(pixel_values, grid_thw=image_grid_thw)
1687 n_image_tokens = (input_ids == self.config.image_token_id).sum().item()
1688 n_image_features = image_embeds.shape[0]

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/torch/nn/modules/module.py:1739, in Module._wrapped_call_impl(self, *args, **kwargs)
1737 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1738 else:
-> 1739 return self._call_impl(*args, **kwargs)

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/torch/nn/modules/module.py:1750, in Module._call_impl(self, *args, **kwargs)
1745 # If we don't have any hooks, we want to skip the rest of the logic in
1746 # this function, and just call forward.
1747 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1748 or _global_backward_pre_hooks or _global_backward_hooks
1749 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1750 return forward_call(*args, **kwargs)
1752 result = None
1753 called_always_called_hooks = set()

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py:1049, in Qwen2VisionTransformerPretrainedModel.forward(self, hidden_states, grid_thw)
1046 cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0)
1048 for blk in self.blocks:
-> 1049 hidden_states = blk(hidden_states, cu_seqlens=cu_seqlens, rotary_pos_emb=rotary_pos_emb)
1051 return self.merger(hidden_states)

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/torch/nn/modules/module.py:1739, in Module._wrapped_call_impl(self, *args, **kwargs)
1737 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1738 else:
-> 1739 return self._call_impl(*args, **kwargs)

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/torch/nn/modules/module.py:1750, in Module._call_impl(self, *args, **kwargs)
1745 # If we don't have any hooks, we want to skip the rest of the logic in
1746 # this function, and just call forward.
1747 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1748 or _global_backward_pre_hooks or _global_backward_hooks
1749 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1750 return forward_call(*args, **kwargs)
1752 result = None
1753 called_always_called_hooks = set()

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py:431, in Qwen2VLVisionBlock.forward(self, hidden_states, cu_seqlens, rotary_pos_emb)
430 def forward(self, hidden_states, cu_seqlens, rotary_pos_emb) -> torch.Tensor:
--> 431 hidden_states = hidden_states + self.attn(
432 self.norm1(hidden_states), cu_seqlens=cu_seqlens, rotary_pos_emb=rotary_pos_emb
433 )
434 hidden_states = hidden_states + self.mlp(self.norm2(hidden_states))
435 return hidden_states

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/torch/nn/modules/module.py:1739, in Module._wrapped_call_impl(self, *args, **kwargs)
1737 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1738 else:
-> 1739 return self._call_impl(*args, **kwargs)

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/torch/nn/modules/module.py:1750, in Module._call_impl(self, *args, **kwargs)
1745 # If we don't have any hooks, we want to skip the rest of the logic in
1746 # this function, and just call forward.
1747 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1748 or _global_backward_pre_hooks or _global_backward_hooks
1749 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1750 return forward_call(*args, **kwargs)
1752 result = None
1753 called_always_called_hooks = set()

File /opt/anaconda3/envs/olmocr/lib/python3.13/site-packages/transformers/models/qwen2_vl/modeling_qwen2_vl.py:404, in VisionSdpaAttention.forward(self, hidden_states, cu_seqlens, rotary_pos_emb)
402 k = k.transpose(0, 1)
403 v = v.transpose(0, 1)
--> 404 attn_output = F.scaled_dot_product_attention(q, k, v, attention_mask, dropout_p=0.0)
405 attn_output = attn_output.transpose(0, 1)
406 attn_output = attn_output.reshape(seq_length, -1)

IndexError: Dimension out of range (expected to be in range of [-3, 2], but got 3)

Hi! Thanks for the feedback. We’re currently working on closing out old tickets, and we apologize that we didn’t get to you in a timely fashion. We’re closing this out for now, but if you’d still like an answer, please re-open and we will get back to you!

baileyk changed discussion status to closed

Sign up or log in to comment