Spaces:
Sleeping
Sleeping
Commit
·
425af9c
1
Parent(s):
20a6be7
Upd primary endpoint 32B and fallback 7B
Browse files
app.py
CHANGED
@@ -60,23 +60,21 @@ def gemini_flash_completion(prompt, model="gemini-2.5-flash-preview-04-17", temp
|
|
60 |
return "Error generating response from Gemini."
|
61 |
|
62 |
# —— Qwen 2.5 VL Client Setup —————
|
63 |
-
qwen_client = Client("prithivMLmods/Qwen2.5-VL-7B-Instruct")
|
64 |
logger.info("[Qwen] Using remote API via Gradio Client")
|
65 |
-
|
66 |
def qwen_image_summary(image_file: UploadFile, subject: str, level: str) -> str:
|
67 |
from gradio_client import Client, handle_file
|
68 |
-
import tempfile
|
69 |
-
|
|
|
70 |
if image_file.content_type not in {"image/png", "image/jpeg", "image/jpg"}:
|
71 |
raise HTTPException(415, "Only PNG or JPEG images are supported")
|
72 |
-
# Write
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
# Prompt
|
79 |
-
instruction = f"""
|
80 |
You are an academic tutor.
|
81 |
|
82 |
The student has submitted an image that may contain multiple exam-style questions or study material. Your task is to:
|
@@ -97,12 +95,42 @@ def qwen_image_summary(image_file: UploadFile, subject: str, level: str) -> str:
|
|
97 |
|
98 |
Only include what appears in the image. Be accurate and neat.
|
99 |
"""
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
model_name="Qwen2.5-VL-7B-Instruct",
|
105 |
-
text=instruction,
|
106 |
image=handle_file(tmp_path),
|
107 |
max_new_tokens=1024,
|
108 |
temperature=0.6,
|
@@ -111,13 +139,19 @@ def qwen_image_summary(image_file: UploadFile, subject: str, level: str) -> str:
|
|
111 |
repetition_penalty=1.2,
|
112 |
api_name="/generate_image"
|
113 |
)
|
114 |
-
|
|
|
115 |
os.remove(tmp_path)
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
|
123 |
# ————— Unified Chat Endpoint —————
|
|
|
60 |
return "Error generating response from Gemini."
|
61 |
|
62 |
# —— Qwen 2.5 VL Client Setup —————
|
|
|
63 |
logger.info("[Qwen] Using remote API via Gradio Client")
|
64 |
+
# Read and reasoning on image data sending over
|
65 |
def qwen_image_summary(image_file: UploadFile, subject: str, level: str) -> str:
|
66 |
from gradio_client import Client, handle_file
|
67 |
+
import tempfile, os
|
68 |
+
from fastapi import HTTPException
|
69 |
+
# Not accepted format
|
70 |
if image_file.content_type not in {"image/png", "image/jpeg", "image/jpg"}:
|
71 |
raise HTTPException(415, "Only PNG or JPEG images are supported")
|
72 |
+
# Write and save image sending over on cache
|
73 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
|
74 |
+
tmp.write(image_file.file.read())
|
75 |
+
tmp_path = tmp.name
|
76 |
+
# Engineered prompting
|
77 |
+
instruction = f"""
|
|
|
|
|
78 |
You are an academic tutor.
|
79 |
|
80 |
The student has submitted an image that may contain multiple exam-style questions or study material. Your task is to:
|
|
|
95 |
|
96 |
Only include what appears in the image. Be accurate and neat.
|
97 |
"""
|
98 |
+
|
99 |
+
# ——— 1️⃣ Primary: 32B Model (Qwen/Qwen2.5-VL-32B-Instruct) ———
|
100 |
+
try:
|
101 |
+
logger.info("[Qwen32B] Using /predict ...")
|
102 |
+
client32 = Client("Qwen/Qwen2.5-VL-32B-Instruct")
|
103 |
+
# Payload handler
|
104 |
+
_chatbot_payload = [
|
105 |
+
(None, instruction.strip()),
|
106 |
+
(None, {"file": tmp_path})
|
107 |
+
]
|
108 |
+
# Call client
|
109 |
+
result = client32.predict(_chatbot=_chatbot_payload, api_name="/predict")
|
110 |
+
# Clean result
|
111 |
+
if isinstance(result, (list, tuple)) and result:
|
112 |
+
assistant_reply = (result[0] or "").strip()
|
113 |
+
else:
|
114 |
+
assistant_reply = str(result).strip()
|
115 |
+
# Primary success
|
116 |
+
if assistant_reply:
|
117 |
+
logger.info("[Qwen32B] ✅ Successfully transcribed.")
|
118 |
+
os.remove(tmp_path)
|
119 |
+
return assistant_reply
|
120 |
+
# Empty return
|
121 |
+
raise ValueError("Empty result from 32B")
|
122 |
+
# Fail on primary
|
123 |
+
except Exception as e_32b:
|
124 |
+
logger.warning(f"[Qwen32B] ❌ Failed: {e_32b} — falling back to Qwen 7B")
|
125 |
+
|
126 |
+
# ——— 2️⃣ Fallback: 7B Model (prithivMLmods/Qwen2.5-VL) ———
|
127 |
+
try:
|
128 |
+
logger.info("[Qwen7B] Using /generate_image fallback ...")
|
129 |
+
client7 = Client("prithivMLmods/Qwen2.5-VL")
|
130 |
+
# Fallback client calling
|
131 |
+
result = client7.predict(
|
132 |
model_name="Qwen2.5-VL-7B-Instruct",
|
133 |
+
text=instruction.strip(),
|
134 |
image=handle_file(tmp_path),
|
135 |
max_new_tokens=1024,
|
136 |
temperature=0.6,
|
|
|
139 |
repetition_penalty=1.2,
|
140 |
api_name="/generate_image"
|
141 |
)
|
142 |
+
# Clean result
|
143 |
+
result = (result or "").strip()
|
144 |
os.remove(tmp_path)
|
145 |
+
# Extract fallback result
|
146 |
+
if result:
|
147 |
+
logger.info("[Qwen7B] ✅ Fallback succeeded.")
|
148 |
+
return result
|
149 |
+
# Empty return
|
150 |
+
raise ValueError("Empty result from 7B fallback")
|
151 |
+
# Fail on both
|
152 |
+
except Exception as e_7b:
|
153 |
+
logger.error(f"[Qwen7B] ❌ Fallback also failed: {e_7b}")
|
154 |
+
raise HTTPException(500, "❌ Both Qwen image models failed to process the image.")
|
155 |
|
156 |
|
157 |
# ————— Unified Chat Endpoint —————
|