LiamKhoaLe commited on
Commit
425af9c
·
1 Parent(s): 20a6be7

Upd primary endpoint 32B and fallback 7B

Browse files
Files changed (1) hide show
  1. app.py +57 -23
app.py CHANGED
@@ -60,23 +60,21 @@ def gemini_flash_completion(prompt, model="gemini-2.5-flash-preview-04-17", temp
60
  return "Error generating response from Gemini."
61
 
62
  # —— Qwen 2.5 VL Client Setup —————
63
- qwen_client = Client("prithivMLmods/Qwen2.5-VL-7B-Instruct")
64
  logger.info("[Qwen] Using remote API via Gradio Client")
65
-
66
  def qwen_image_summary(image_file: UploadFile, subject: str, level: str) -> str:
67
  from gradio_client import Client, handle_file
68
- import tempfile
69
- # Read file with appropriate format
 
70
  if image_file.content_type not in {"image/png", "image/jpeg", "image/jpg"}:
71
  raise HTTPException(415, "Only PNG or JPEG images are supported")
72
- # Write/read file
73
- try:
74
- with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
75
- tmp.write(image_file.file.read())
76
- tmp_path = tmp.name
77
- logger.info(f"[Qwen] File saved at {tmp_path}, sending to /generate_image...")
78
- # Prompt
79
- instruction = f"""
80
  You are an academic tutor.
81
 
82
  The student has submitted an image that may contain multiple exam-style questions or study material. Your task is to:
@@ -97,12 +95,42 @@ def qwen_image_summary(image_file: UploadFile, subject: str, level: str) -> str:
97
 
98
  Only include what appears in the image. Be accurate and neat.
99
  """
100
- # Client spec
101
- client = Client("prithivMLmods/Qwen2.5-VL")
102
- # Client configs
103
- result = client.predict(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  model_name="Qwen2.5-VL-7B-Instruct",
105
- text=instruction,
106
  image=handle_file(tmp_path),
107
  max_new_tokens=1024,
108
  temperature=0.6,
@@ -111,13 +139,19 @@ def qwen_image_summary(image_file: UploadFile, subject: str, level: str) -> str:
111
  repetition_penalty=1.2,
112
  api_name="/generate_image"
113
  )
114
- logger.info("[Qwen] Summary returned from /generate_image")
 
115
  os.remove(tmp_path)
116
- return result.strip()
117
- # Error
118
- except Exception as e:
119
- logger.error(f"[QWEN_API_ERROR] {e}")
120
- raise HTTPException(500, "❌ Qwen image analysis failed")
 
 
 
 
 
121
 
122
 
123
  # ————— Unified Chat Endpoint —————
 
60
  return "Error generating response from Gemini."
61
 
62
  # —— Qwen 2.5 VL Client Setup —————
 
63
  logger.info("[Qwen] Using remote API via Gradio Client")
64
+ # Read and reasoning on image data sending over
65
  def qwen_image_summary(image_file: UploadFile, subject: str, level: str) -> str:
66
  from gradio_client import Client, handle_file
67
+ import tempfile, os
68
+ from fastapi import HTTPException
69
+ # Not accepted format
70
  if image_file.content_type not in {"image/png", "image/jpeg", "image/jpg"}:
71
  raise HTTPException(415, "Only PNG or JPEG images are supported")
72
+ # Write and save image sending over on cache
73
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
74
+ tmp.write(image_file.file.read())
75
+ tmp_path = tmp.name
76
+ # Engineered prompting
77
+ instruction = f"""
 
 
78
  You are an academic tutor.
79
 
80
  The student has submitted an image that may contain multiple exam-style questions or study material. Your task is to:
 
95
 
96
  Only include what appears in the image. Be accurate and neat.
97
  """
98
+
99
+ # ——— 1️⃣ Primary: 32B Model (Qwen/Qwen2.5-VL-32B-Instruct) ———
100
+ try:
101
+ logger.info("[Qwen32B] Using /predict ...")
102
+ client32 = Client("Qwen/Qwen2.5-VL-32B-Instruct")
103
+ # Payload handler
104
+ _chatbot_payload = [
105
+ (None, instruction.strip()),
106
+ (None, {"file": tmp_path})
107
+ ]
108
+ # Call client
109
+ result = client32.predict(_chatbot=_chatbot_payload, api_name="/predict")
110
+ # Clean result
111
+ if isinstance(result, (list, tuple)) and result:
112
+ assistant_reply = (result[0] or "").strip()
113
+ else:
114
+ assistant_reply = str(result).strip()
115
+ # Primary success
116
+ if assistant_reply:
117
+ logger.info("[Qwen32B] ✅ Successfully transcribed.")
118
+ os.remove(tmp_path)
119
+ return assistant_reply
120
+ # Empty return
121
+ raise ValueError("Empty result from 32B")
122
+ # Fail on primary
123
+ except Exception as e_32b:
124
+ logger.warning(f"[Qwen32B] ❌ Failed: {e_32b} — falling back to Qwen 7B")
125
+
126
+ # ——— 2️⃣ Fallback: 7B Model (prithivMLmods/Qwen2.5-VL) ———
127
+ try:
128
+ logger.info("[Qwen7B] Using /generate_image fallback ...")
129
+ client7 = Client("prithivMLmods/Qwen2.5-VL")
130
+ # Fallback client calling
131
+ result = client7.predict(
132
  model_name="Qwen2.5-VL-7B-Instruct",
133
+ text=instruction.strip(),
134
  image=handle_file(tmp_path),
135
  max_new_tokens=1024,
136
  temperature=0.6,
 
139
  repetition_penalty=1.2,
140
  api_name="/generate_image"
141
  )
142
+ # Clean result
143
+ result = (result or "").strip()
144
  os.remove(tmp_path)
145
+ # Extract fallback result
146
+ if result:
147
+ logger.info("[Qwen7B] Fallback succeeded.")
148
+ return result
149
+ # Empty return
150
+ raise ValueError("Empty result from 7B fallback")
151
+ # Fail on both
152
+ except Exception as e_7b:
153
+ logger.error(f"[Qwen7B] ❌ Fallback also failed: {e_7b}")
154
+ raise HTTPException(500, "❌ Both Qwen image models failed to process the image.")
155
 
156
 
157
  # ————— Unified Chat Endpoint —————