Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,256 Bytes
5be3d23 2ba0a0c 5be3d23 2ba0a0c 5be3d23 6a423bd 366cc4b 5be3d23 6a423bd e547e36 6a423bd 5be3d23 2ba0a0c 5be3d23 bbdfb03 5be3d23 2ba0a0c 5be3d23 6a423bd 2ba0a0c 5be3d23 2ba0a0c bbdfb03 2ba0a0c bbdfb03 5be3d23 6a423bd 2ba0a0c e3288b1 2ba0a0c db07883 f7ae658 db07883 366cc4b 5be3d23 bbdfb03 f9438a5 bbdfb03 f9438a5 bbdfb03 f7ae658 f9438a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
from threading import Thread
from qwen_vl_utils import process_vision_info
import torch
import time
# Check if a GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"
local_path = "Fancy-MLLM/R1-OneVision-7B"
# Load the model on the appropriate device (GPU if available, otherwise CPU)
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
local_path, torch_dtype="auto", device_map=device
)
processor = AutoProcessor.from_pretrained(local_path)
def generate_output(image, text, button_click):
# Prepare input data
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image, 'min_pixels': 1003520, 'max_pixels': 12845056},
{"type": "text", "text": text},
],
}
]
# Prepare inputs for the model
text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text=[text_input],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
# Move inputs to the same device as the model
inputs = inputs.to(model.device)
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=4096,
top_p=0.001,
top_k=1,
temperature=0.01,
repetition_penalty=1.0,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
generated_text = ''
try:
for new_text in streamer:
generated_text += new_text
yield f"β{generated_text}"
except Exception as e:
print(f"Error: {e}")
yield f"Error occurred: {str(e)}"
Css = """
#output-markdown {
overflow-y: auto;
white-space: pre-wrap;
word-wrap: break-word;
}
#output-markdown .math {
overflow-x: auto;
max-width: 100%;
}
.markdown-text {
white-space: pre-wrap;
word-wrap: break-word;
}
.markdown-output {
min-height: 20vh;
max-width: 100%;
overflow-y: auto;
}
#qwen-md .katex-display { display: inline; }
#qwen-md .katex-display>.katex { display: inline; }
#qwen-md .katex-display>.katex>.katex-html { display: inline; }
"""
with gr.Blocks(css=Css) as demo:
gr.HTML("""<center><font size=8>π¦ R1-OneVision Demo</center>""")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Upload") # **ζΉε PIL ε€η**
input_text = gr.Textbox(label="Input your question")
with gr.Row():
clear_btn = gr.ClearButton([input_image, input_text])
submit_btn = gr.Button("Submit", variant="primary")
with gr.Column():
output_text = gr.Markdown(elem_id="qwen-md", container=True, elem_classes="markdown-output")
submit_btn.click(fn=generate_output, inputs=[input_image, input_text], outputs=output_text)
demo.launch(share=True)
|