Spaces:
Running
Running
# Access site: https://binkhoale1812-tutorbot.hf.space | |
import os | |
import time | |
import uvicorn | |
import tempfile | |
import psutil | |
import logging | |
from fastapi import FastAPI, UploadFile, File, Form, HTTPException | |
from fastapi.responses import JSONResponse | |
from fastapi.middleware.cors import CORSMiddleware | |
from google import genai | |
from gradio_client import Client, handle_file | |
# ———————— Logging ————————— | |
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s — %(name)s — %(levelname)s — %(message)s", force=True) | |
logger = logging.getLogger("tutor-chatbot") | |
logger.setLevel(logging.DEBUG) | |
logger.info("🚀 Starting Tutor Chatbot API...") | |
# —————— Environment ——————— | |
gemini_flash_api_key = os.getenv("FlashAPI") | |
if not gemini_flash_api_key: | |
raise ValueError("❌ Missing Gemini Flash API key!") | |
# —————— System Check —————— | |
def check_system_resources(): | |
memory = psutil.virtual_memory() | |
cpu = psutil.cpu_percent(interval=1) | |
disk = psutil.disk_usage("/") | |
logger.info(f"🔍 RAM: {memory.percent}%, CPU: {cpu}%, Disk: {disk.percent}%") | |
check_system_resources() | |
os.environ["OMP_NUM_THREADS"] = "1" | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
# —————— FastAPI Setup ————— | |
app = FastAPI(title="Tutor Chatbot API") | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=[ | |
"http://localhost:5173", | |
"http://localhost:3000", | |
"https://ai-tutor-beta-topaz.vercel.app", | |
], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# —————— Gemini 2.5 API Call —————— | |
def gemini_flash_completion(prompt, model="gemini-2.5-flash-preview-04-17", temperature=0.7): | |
client = genai.Client(api_key=gemini_flash_api_key) | |
try: | |
response = client.models.generate_content(model=model, contents=prompt) | |
return response.text | |
except Exception as e: | |
logger.error(f"❌ Gemini error: {e}") | |
return "Error generating response from Gemini." | |
# —— Qwen 2.5 VL Client Setup ————— | |
logger.info("[Qwen] Using remote API via Gradio Client") | |
# Read and reasoning on image data sending over | |
def qwen_image_summary(image_file: UploadFile, subject: str, level: str) -> str: | |
from gradio_client import Client, handle_file | |
import tempfile, os | |
from fastapi import HTTPException | |
# Not accepted format | |
if image_file.content_type not in {"image/png", "image/jpeg", "image/jpg"}: | |
raise HTTPException(415, "Only PNG or JPEG images are supported") | |
# Write and save image sending over on cache | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp: | |
tmp.write(image_file.file.read()) | |
tmp_path = tmp.name | |
# Engineered prompting | |
instruction = f""" | |
You are an academic tutor. | |
The student has submitted an image that may contain multiple exam-style questions or study material. Your task is to: | |
1. Carefully extract **each individual question** from the image (if visible), even if they are numbered (e.g., 1., 2., 3.). | |
2. If any question contains **multiple-choice options** (e.g., a), b), c), d)), include them exactly as shown. | |
3. Preserve the original structure and wording as much as possible — DO NOT paraphrase. | |
4. Do not include commentary, analysis, or summaries — just return the extracted question(s) cleanly. | |
Format your output as: | |
1. Question 1 text | |
a) option A | |
b) option B | |
c) option C | |
d) option D | |
2. Question 2 text | |
a) ... (if applicable) | |
Only include what appears in the image. Be accurate and neat. | |
""" | |
# ——— 1️⃣ Primary: 32B Model (Qwen/Qwen2.5-VL-32B-Instruct) ——— | |
try: | |
logger.info("[Qwen32B] Using /predict ...") | |
client32 = Client("Qwen/Qwen2.5-VL-32B-Instruct") | |
# Payload handler | |
_chatbot_payload = [ | |
(None, instruction.strip()), | |
(None, {"file": tmp_path}) | |
] | |
# Call client | |
result = client32.predict(_chatbot=_chatbot_payload, api_name="/predict") | |
# Clean result | |
if isinstance(result, (list, tuple)) and result: | |
assistant_reply = (result[0] or "").strip() | |
else: | |
assistant_reply = str(result).strip() | |
# Primary success | |
if assistant_reply: | |
logger.info("[Qwen32B] ✅ Successfully transcribed.") | |
os.remove(tmp_path) | |
return assistant_reply | |
# Empty return | |
raise ValueError("Empty result from 32B") | |
# Fail on primary | |
except Exception as e_32b: | |
logger.warning(f"[Qwen32B] ❌ Failed: {e_32b} — falling back to Qwen 7B") | |
# ——— 2️⃣ Fallback: 7B Model (prithivMLmods/Qwen2.5-VL) ——— | |
try: | |
logger.info("[Qwen7B] Using /generate_image fallback ...") | |
client7 = Client("prithivMLmods/Qwen2.5-VL") | |
# Fallback client calling | |
result = client7.predict( | |
model_name="Qwen2.5-VL-7B-Instruct", | |
text=instruction.strip(), | |
image=handle_file(tmp_path), | |
max_new_tokens=1024, | |
temperature=0.6, | |
top_p=0.9, | |
top_k=50, | |
repetition_penalty=1.2, | |
api_name="/generate_image" | |
) | |
# Clean result | |
result = (result or "").strip() | |
os.remove(tmp_path) | |
# Extract fallback result | |
if result: | |
logger.info("[Qwen7B] ✅ Fallback succeeded.") | |
return result | |
# Empty return | |
raise ValueError("Empty result from 7B fallback") | |
# Fail on both | |
except Exception as e_7b: | |
logger.error(f"[Qwen7B] ❌ Fallback also failed: {e_7b}") | |
raise HTTPException(500, "❌ Both Qwen image models failed to process the image.") | |
# ————— Unified Chat Endpoint ————— | |
async def chat_endpoint( | |
query: str = Form(""), | |
subject: str = Form("general"), | |
level: str = Form("secondary"), | |
lang: str = Form("EN"), | |
image: UploadFile = File(None) | |
): | |
start_time = time.time() | |
image_context = "" | |
# Step 1: If image is present, get transcription from Qwen | |
if image: | |
logger.info("[Router] 📸 Image uploaded — using Qwen2.5-VL for transcription") | |
try: | |
image_context = qwen_image_summary(image, subject, level) | |
except HTTPException as e: | |
return JSONResponse(status_code=e.status_code, content={"response": e.detail}) | |
# Step 2: Build prompt for Gemini depending on presence of text and/or image | |
if query and image_context: | |
# Case: image + query | |
prompt = f""" | |
You are an academic tutor specialized in **{subject}** at **{level}** level. | |
Below is an image submitted by a student and transcribed by a vision model: | |
--- BEGIN IMAGE CONTEXT --- | |
{image_context} | |
--- END IMAGE CONTEXT --- | |
The student asked the following: | |
**Question:** {query} | |
Respond appropriately using markdown: | |
- **Bold** key ideas | |
- *Italic* for reasoning | |
- Provide examples if useful | |
**Response Language:** {lang} | |
""" | |
elif image_context and not query: | |
# Case: image only — auto-answer based on content | |
prompt = f""" | |
You are an academic tutor specialized in **{subject}** at **{level}** level. | |
A student submitted an image with no question. Below is the vision model’s transcription: | |
--- BEGIN IMAGE CONTENT --- | |
{image_context} | |
--- END IMAGE CONTENT --- | |
Based on this image, explain its key ideas and help the student understand it. | |
Assume it's part of their study material. | |
Respond using markdown: | |
- **Bold** key terms | |
- *Italic* for explanations | |
- Give brief insights or examples | |
**Response Language:** {lang} | |
""" | |
elif query and not image_context: | |
# Case: text only | |
prompt = f""" | |
You are an academic tutor specialized in **{subject}** at **{level}** level. | |
**Question:** {query} | |
Answer clearly using markdown: | |
- **Bold** key terms | |
- *Italic* for explanations | |
- Include examples if helpful | |
**Response Language:** {lang} | |
""" | |
else: | |
# Nothing was sent | |
return JSONResponse(content={"response": "❌ Please provide either a query, an image, or both."}) | |
# Step 3: Call Gemini | |
response_text = gemini_flash_completion(prompt) | |
end_time = time.time() | |
response_text += f"\n\n*(Response time: {end_time - start_time:.2f} seconds)*" | |
return JSONResponse(content={"response": response_text}) | |
# ————— Clsr Pydantic Schema ————— | |
from pydantic import BaseModel, Field, validator | |
from typing import Optional, List, Literal | |
# Dynamic cls | |
class StudyPreferences(BaseModel): | |
daysPerWeek: int = Field(..., ge=1, le=7) | |
hoursPerSession: float = Field(..., ge=0.5, le=4) | |
numberWeekTotal: float = Field(..., ge=1, le=52) | |
learningStyle: Literal["step-by-step", "conceptual", "visual"] | |
# Dynamic cls | |
class ClassroomRequest(BaseModel): | |
id: str | |
name: str = Field(..., min_length=2) | |
role: Literal["tutor", "student"] | |
subject: str | |
gradeLevel: str | |
notice: Optional[str] = None | |
textbookUrl: Optional[str] = None | |
syllabusUrl: Optional[str] = None | |
studyPreferences: StudyPreferences | |
# —————— Time table creator —————— | |
import json | |
from fastapi import Body | |
async def create_classroom(payload: ClassroomRequest = Body(...)): | |
""" | |
Generate a detailed study timetable based on classroom parameters. | |
""" | |
# ---------- Build prompt for Gemini 2.5 ---------- | |
prefs = payload.studyPreferences | |
prompt = f""" | |
You are an expert academic coordinator. | |
Create a **{prefs.numberWeekTotal}-week study timetable** for a classroom with the following settings: | |
- Subject: {payload.subject} | |
- Grade level: {payload.gradeLevel} | |
- Instruction (Optional): {payload.notice} | |
- Study days per week: {prefs.daysPerWeek} | |
- Hours per session: {prefs.hoursPerSession} | |
- Preferred learning style: {prefs.learningStyle} | |
- Role perspective: {payload.role} | |
{"- Textbook URL: " + payload.textbookUrl if payload.textbookUrl else "Not Available"} | |
{"- Syllabus URL: " + payload.syllabusUrl if payload.syllabusUrl else "Not Available"} | |
Requirements: | |
1. Divide each week into exactly {prefs.daysPerWeek} sessions (label Day 1 … Day {prefs.daysPerWeek}). | |
2. For **each session**, return: | |
- `week` (1-{prefs.numberWeekTotal}) | |
- `day` (1-{prefs.daysPerWeek}) | |
- `durationHours` (fixed: {prefs.hoursPerSession}) | |
- `topic` (max 15 words) | |
- `activities` (array of 2-3 bullet strings) | |
- `materials` (array of links/titles; include textbook chapters if URL given, else suggesting external textbook/document for referencing) | |
- `homework` (concise task ≤ 50 words) | |
3. **Output pure JSON only** using the schema: | |
```json | |
{{ | |
"classroom_id": "<same id as request>", | |
"timetable": [{{session objects as listed}}] | |
}} | |
Do not wrap JSON in markdown fences or commentary. | |
""" | |
raw = gemini_flash_completion(prompt).strip() | |
# ---------- Attempt to parse JSON ---------- | |
try: | |
timetable_json = json.loads(raw) | |
except json.JSONDecodeError: | |
logger.warning("Gemini returned invalid JSON; sending raw text.") | |
return JSONResponse(content={"classroom_id": payload.id, "timetable_raw": raw}) | |
# Ensure id is echoed (fallback if model forgot) | |
timetable_json["classroom_id"] = payload.id | |
return JSONResponse(content=timetable_json) | |
# —————— Launch Server ——————— | |
if __name__ == "__main__": | |
logger.info("✅ Launching FastAPI server...") | |
try: | |
uvicorn.run(app, host="0.0.0.0", port=7860, log_level="debug") | |
except Exception as e: | |
logger.error(f"❌ Server startup failed: {e}") | |
exit(1) | |