Spaces:

Usmansafder
/

LaMini-LM-API

Sleeping

App Files Files Community

usmansafdarktk commited on Jul 6

Commit

1574d49

1 Parent(s): c963314

Add torch import to fix model loading error

Browse files

Files changed (1) hide show

main.py +23 -22

main.py CHANGED Viewed

@@ -1,37 +1,38 @@
 import os
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import pipeline
-import logging
-# Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Log cache directory
-logger.info(f"TRANSFORMERS_CACHE set to: {os.getenv('TRANSFORMERS_CACHE', '/.cache')}")
-app = FastAPI(title="LaMini-LM API",
-              description="API for text generation using LaMini-GPT-774M", version="1.0.0")
-# Define request model
 class TextGenerationRequest(BaseModel):
-    instruction: str  # Changed from prompt for consistency
     max_length: int = 100
     temperature: float = 1.0
     top_p: float = 0.9
-# Load model (cached after first load)
-try:
-    logger.info("Loading LaMini-GPT-774M model...")
-    generator = pipeline('text-generation', model='MBZUAI/LaMini-GPT-774M', device=-1)
-    logger.info("Model loaded successfully.")
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-    gc.collect()
-except Exception as e:
-    logger.error(f"Failed to load model: {str(e)}")
-    generator = None  # Allow server to run for health check
 @app.get("/health")
 async def health_check():
@@ -43,10 +44,11 @@ async def root():
 @app.post("/generate")
 async def generate_text(request: TextGenerationRequest):
     if generator is None:
         raise HTTPException(status_code=503, detail="Model not loaded. Check server logs.")
     try:
-        # Validate inputs
         if not request.instruction.strip():
             raise HTTPException(status_code=400, detail="Instruction cannot be empty")
         if request.max_length < 10 or request.max_length > 500:
@@ -56,7 +58,6 @@ async def generate_text(request: TextGenerationRequest):
         if request.top_p <= 0 or request.top_p > 1:
             raise HTTPException(status_code=400, detail="top_p must be between 0 and 1")
-        # Generate text
         logger.info(f"Generating text for instruction: {request.instruction[:50]}...")
         wrapper = "Instruction: You are a helpful assistant. Please respond to the following instruction.\n\nInstruction: {}\n\nResponse:".format(
             request.instruction)

 import os
+import logging
+import torch
+import gc
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import pipeline
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+app = FastAPI(title="LaMini-LM API", description="API for text generation using LaMini-GPT-774M", version="1.0.0")
 class TextGenerationRequest(BaseModel):
+    instruction: str
     max_length: int = 100
     temperature: float = 1.0
     top_p: float = 0.9
+generator = None
+def load_model():
+    global generator
+    if generator is None:
+        try:
+            logger.info("Loading LaMini-GPT-774M model...")
+            generator = pipeline('text-generation', model='MBZUAI/LaMini-GPT-774M', device=-1)
+            logger.info("Model loaded successfully.")
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            gc.collect()
+        except Exception as e:
+            logger.error(f"Failed to load model: {str(e)}")
+            generator = None
+            raise Exception(f"Model loading failed: {str(e)}")
 @app.get("/health")
 async def health_check():
 @app.post("/generate")
 async def generate_text(request: TextGenerationRequest):
+    if generator is None:
+        load_model()
     if generator is None:
         raise HTTPException(status_code=503, detail="Model not loaded. Check server logs.")
     try:
         if not request.instruction.strip():
             raise HTTPException(status_code=400, detail="Instruction cannot be empty")
         if request.max_length < 10 or request.max_length > 500:
         if request.top_p <= 0 or request.top_p > 1:
             raise HTTPException(status_code=400, detail="top_p must be between 0 and 1")
         logger.info(f"Generating text for instruction: {request.instruction[:50]}...")
         wrapper = "Instruction: You are a helpful assistant. Please respond to the following instruction.\n\nInstruction: {}\n\nResponse:".format(
             request.instruction)