Spaces:

abaryan
/

BioXP-0.5b-v2

Sleeping

App Files Files Community

Abaryan commited on Jun 1

Commit

30ca71a

verified ·

1 Parent(s): e8f34c8

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -103

app.py CHANGED Viewed

@@ -2,14 +2,52 @@ from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import torch
-from transformers import AutoModelForMultipleChoice, AutoTokenizer
 import os
 from datasets import load_dataset
 import random
-from typing import Optional, List
 import gradio as gr
-app = FastAPI()
 # Add CORS middleware for Gradio
 app.add_middleware(
@@ -34,82 +72,97 @@ class DatasetQuestion(BaseModel):
     cop: Optional[int] = None  # Correct option (0-3)
     exp: Optional[str] = None  # Explanation if available
-# Global variables
-model = None
-tokenizer = None
-dataset = None
-def load_model():
-    global model, tokenizer, dataset
-    try:
-        # Load your fine-tuned model and tokenizer
-        model_name = os.getenv("BioXP-0.5b", "rgb2gbr/GRPO_BioMedmcqa_Qwen2.5-0.5B")
-        model = AutoModelForMultipleChoice.from_pretrained(model_name)
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        # Load MedMCQA dataset
-        dataset = load_dataset("openlifescienceai/medmcqa")
-        # Move model to GPU if available
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        model = model.to(device)
-        model.eval()
-    except Exception as e:
-        raise Exception(f"Error loading model: {str(e)}")
 def predict_gradio(question: str, option_a: str, option_b: str, option_c: str, option_d: str):
     """Gradio interface prediction function"""
     try:
         options = [option_a, option_b, option_c, option_d]
-        inputs = []
-        for option in options:
-            text = f"{question} {option}"
-            inputs.append(text)
-        encodings = tokenizer(
-            inputs,
             padding=True,
             truncation=True,
-            max_length=512,
-            return_tensors="pt"
         )
         device = next(model.parameters()).device
-        encodings = {k: v.to(device) for k, v in encodings.items()}
         with torch.no_grad():
-            outputs = model(**encodings)
-            logits = outputs.logits
-            probabilities = torch.softmax(logits, dim=1)[0].tolist()
-            predicted_class = torch.argmax(logits, dim=1).item()
         # Format the output for Gradio
-        result = f"Predicted Answer: {options[predicted_class]}\n\n"
-        result += "Confidence Scores:\n"
-        for i, (opt, prob) in enumerate(zip(options, probabilities)):
-            result += f"{opt}: {prob:.2%}\n"
         return result
     except Exception as e:
         return f"Error: {str(e)}"
-def get_random_question():
-    """Get a random question for Gradio interface"""
-    if dataset is None:
-        return "Error: Dataset not loaded", "", "", "", ""
-    index = random.randint(0, len(dataset['train']) - 1)
-    question_data = dataset['train'][index]
-    return (
-        question_data['question'],
-        question_data['opa'],
-        question_data['opb'],
-        question_data['opc'],
-        question_data['opd']
-    )
 # Create Gradio interface
 with gr.Blocks(title="Medical MCQ Predictor") as demo:
     gr.Markdown("# Medical MCQ Predictor")
@@ -136,7 +189,7 @@ with gr.Blocks(title="Medical MCQ Predictor") as demo:
     )
     random_btn.click(
-        fn=get_random_question,
         inputs=[],
         outputs=[question, option_a, option_b, option_c, option_d]
     )
@@ -144,36 +197,11 @@ with gr.Blocks(title="Medical MCQ Predictor") as demo:
 # Mount Gradio app to FastAPI
 app = gr.mount_gradio_app(app, demo, path="/")
-@app.on_event("startup")
-async def startup_event():
-    load_model()
 @app.get("/dataset/question")
 async def get_dataset_question(index: Optional[int] = None, random_question: bool = False):
     """Get a question from the MedMCQA dataset"""
     try:
-        if dataset is None:
-            raise HTTPException(status_code=500, detail="Dataset not loaded")
-        if random_question:
-            index = random.randint(0, len(dataset['train']) - 1)
-        elif index is None:
-            raise HTTPException(status_code=400, detail="Either index or random_question must be provided")
-        question_data = dataset['train'][index]
-        question = DatasetQuestion(
-            question=question_data['question'],
-            opa=question_data['opa'],
-            opb=question_data['opb'],
-            opc=question_data['opc'],
-            opd=question_data['opd'],
-            cop=question_data['cop'] if 'cop' in question_data else None,
-            exp=question_data['exp'] if 'exp' in question_data else None
-        )
-        return question
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -183,35 +211,42 @@ async def predict(request: QuestionRequest):
         raise HTTPException(status_code=400, detail="Exactly 4 options are required")
     try:
-        inputs = []
-        for option in request.options:
-            text = f"{request.question} {option}"
-            inputs.append(text)
-        encodings = tokenizer(
-            inputs,
             padding=True,
             truncation=True,
-            max_length=512,
-            return_tensors="pt"
         )
         device = next(model.parameters()).device
-        encodings = {k: v.to(device) for k, v in encodings.items()}
         with torch.no_grad():
-            outputs = model(**encodings)
-            logits = outputs.logits
-            probabilities = torch.softmax(logits, dim=1)[0].tolist()
-            predicted_class = torch.argmax(logits, dim=1).item()
         response = {
-            "predicted_option": request.options[predicted_class],
-            "option_index": predicted_class,
-            "confidence": probabilities[predicted_class],
-            "probabilities": {
-                f"option_{i}": prob for i, prob in enumerate(probabilities)
-            }
         }
         return response

 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import os
 from datasets import load_dataset
 import random
+from typing import Optional, List, Tuple, Union
 import gradio as gr
+from contextlib import asynccontextmanager
+# Global variables
+model = None
+tokenizer = None
+dataset = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup: Load the model
+    global model, tokenizer, dataset
+    try:
+        # Load your fine-tuned model and tokenizer
+        model_name = os.getenv("MODEL_NAME", "rgb2gbr/BioXP-0.5B-MedMCQA")
+        model = AutoModelForCausalLM.from_pretrained(model_name)
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Load MedMCQA dataset
+        dataset = load_dataset("openlifescienceai/medmcqa")
+        # Move model to GPU if available
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model = model.to(device)
+        model.eval()
+    except Exception as e:
+        print(f"Error loading model: {str(e)}")
+        raise e
+    yield  # This is where FastAPI serves the application
+    # Shutdown: Clean up resources if needed
+    if model is not None:
+        del model
+    if tokenizer is not None:
+        del tokenizer
+    if dataset is not None:
+        del dataset
+    torch.cuda.empty_cache()
+app = FastAPI(lifespan=lifespan)
 # Add CORS middleware for Gradio
 app.add_middleware(
     cop: Optional[int] = None  # Correct option (0-3)
     exp: Optional[str] = None  # Explanation if available
+def format_prompt(question: str, options: List[str]) -> str:
+    """Format the prompt for the model"""
+    prompt = f"Question: {question}\n\nOptions:\n"
+    for i, opt in enumerate(options):
+        prompt += f"{chr(65+i)}. {opt}\n"
+    prompt += "\nAnswer:"
+    return prompt
+def get_question(index: Optional[int] = None, random_question: bool = False, format: str = "api") -> Union[DatasetQuestion, Tuple[str, str, str, str, str]]:
+    """
+    Get a question from the dataset.
+    Args:
+        index: Optional question index
+        random_question: Whether to get a random question
+        format: 'api' for DatasetQuestion object, 'gradio' for tuple
+    """
+    if dataset is None:
+        raise Exception("Dataset not loaded")
+    if random_question:
+        index = random.randint(0, len(dataset['train']) - 1)
+    elif index is None:
+        raise ValueError("Either index or random_question must be provided")
+    question_data = dataset['train'][index]
+    if format == "gradio":
+        return (
+            question_data['question'],
+            question_data['opa'],
+            question_data['opb'],
+            question_data['opc'],
+            question_data['opd']
+        )
+    return DatasetQuestion(
+        question=question_data['question'],
+        opa=question_data['opa'],
+        opb=question_data['opb'],
+        opc=question_data['opc'],
+        opd=question_data['opd'],
+        cop=question_data['cop'] if 'cop' in question_data else None,
+        exp=question_data['exp'] if 'exp' in question_data else None
+    )
 def predict_gradio(question: str, option_a: str, option_b: str, option_c: str, option_d: str):
     """Gradio interface prediction function"""
     try:
         options = [option_a, option_b, option_c, option_d]
+        # Format the prompt
+        prompt = format_prompt(question, options)
+        # Tokenize the input
+        inputs = tokenizer(
+            prompt,
+            return_tensors="pt",
             padding=True,
             truncation=True,
+            max_length=512
         )
         device = next(model.parameters()).device
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Generate prediction
         with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=10,
+                num_return_sequences=1,
+                temperature=0.7,
+                do_sample=False,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        # Decode the output
+        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract the answer from the prediction
+        answer = prediction.split("Answer:")[-1].strip()
         # Format the output for Gradio
+        result = f"Model Output:\n{prediction}\n\n"
+        result += f"Extracted Answer: {answer}"
         return result
     except Exception as e:
         return f"Error: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="Medical MCQ Predictor") as demo:
     gr.Markdown("# Medical MCQ Predictor")
     )
     random_btn.click(
+        fn=lambda: get_question(random_question=True, format="gradio"),
         inputs=[],
         outputs=[question, option_a, option_b, option_c, option_d]
     )
 # Mount Gradio app to FastAPI
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/dataset/question")
 async def get_dataset_question(index: Optional[int] = None, random_question: bool = False):
     """Get a question from the MedMCQA dataset"""
     try:
+        return get_question(index=index, random_question=random_question)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
         raise HTTPException(status_code=400, detail="Exactly 4 options are required")
     try:
+        # Format the prompt
+        prompt = format_prompt(request.question, request.options)
+        # Tokenize the input
+        inputs = tokenizer(
+            prompt,
+            return_tensors="pt",
             padding=True,
             truncation=True,
+            max_length=512
         )
         device = next(model.parameters()).device
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Generate prediction
         with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=10,
+                num_return_sequences=1,
+                temperature=0.7,
+                do_sample=False,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        # Decode the output
+        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract the answer from the prediction
+        answer = prediction.split("Answer:")[-1].strip()
         response = {
+            "model_output": prediction,
+            "extracted_answer": answer,
+            "full_response": prediction
         }
         return response