Spaces:

brendon-ai
/

faq-huggingface-model

Running

App Files Files Community

brendon-ai commited on Jun 24

Commit

a5a32fe

verified ·

1 Parent(s): 5734a73

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -21

app.py CHANGED Viewed

@@ -4,23 +4,24 @@ from pydantic import BaseModel
 import torch
 from transformers import pipeline
 import os
-app = FastAPI()
-# --- Model Loading (Global Scope to load once) ---
-# This part will be executed only once when the FastAPI application starts up.
-# This saves memory and time compared to loading the model on every request.
-generator = None # Initialize generator to None
-@app.on_event("startup")
-async def load_model():
     """
-    Load the model when the FastAPI application starts.
     """
-    global generator
     try:
-        # Check for GPU
         if torch.cuda.is_available():
             print(f"CUDA is available! Using {torch.cuda.get_device_name(0)}")
             device = 0 # Use GPU
@@ -28,17 +29,30 @@ async def load_model():
             print("CUDA not available, using CPU.")
             device = -1 # Use CPU
-        # Load a text generation pipeline
-        # For a free tier/small GPU, consider a smaller model like 'distilgpt2' or 'gpt2'
-        # For larger GPUs, you can try models like 'meta-llama/Llama-2-7b-hf' (requires auth)
-        # or 'mistralai/Mistral-7B-Instruct-v0.2'
         print(f"Loading model 'distilgpt2' on device: {'cuda' if device == 0 else 'cpu'}")
         generator = pipeline('text-generation', model='distilgpt2', device=device)
         print("Model loaded successfully!")
     except Exception as e:
-        print(f"Error loading model: {e}")
-        # You might want to raise an exception or log this more robustly in production
-        # For a simple app, we'll let it fail and then handle requests later.
 # --- Define Request Body Schema ---
 class PromptRequest(BaseModel):
@@ -53,7 +67,8 @@ async def generate_text(request: PromptRequest):
     Generates text based on a given prompt using the loaded LLM.
     """
     if generator is None:
-        raise HTTPException(status_code=503, detail="Model not loaded. Please try again later.")
     try:
         result = generator(
@@ -63,9 +78,11 @@ async def generate_text(request: PromptRequest):
         )
         return {"generated_text": result[0]['generated_text']}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error during text generation: {e}")
-# --- Basic Health Check Endpoint (Optional but Recommended) ---
 @app.get("/")
 async def read_root():
     """

 import torch
 from transformers import pipeline
 import os
+from contextlib import asynccontextmanager # Import this!
+# --- Global variable for the model ---
+# It's important to declare this globally so it can be accessed within
+# the lifespan function and the API endpoint functions.
+generator = None
+# --- Lifespan Event Handler ---
+@asynccontextmanager
+async def lifespan(app: FastAPI):
     """
+    Handles startup and shutdown events for the FastAPI application.
+    Loads the model on startup and can optionally clean up on shutdown.
     """
+    global generator # Declare intent to modify the global 'generator' variable
     try:
+        # --- Startup Code: Load the model ---
+        # This code runs BEFORE the application starts receiving requests.
         if torch.cuda.is_available():
             print(f"CUDA is available! Using {torch.cuda.get_device_name(0)}")
             device = 0 # Use GPU
             print("CUDA not available, using CPU.")
             device = -1 # Use CPU
         print(f"Loading model 'distilgpt2' on device: {'cuda' if device == 0 else 'cpu'}")
         generator = pipeline('text-generation', model='distilgpt2', device=device)
         print("Model loaded successfully!")
+        # 'yield' signifies that the startup code has completed, and the application
+        # can now start processing requests.
+        yield
     except Exception as e:
+        print(f"Error loading model during startup: {e}")
+        # In a real application, you might want to exit here if the model is crucial
+        # sys.exit(1) or raise an exception to prevent the app from starting unhealthy.
+    finally:
+        # --- Shutdown Code (Optional): Clean up resources ---
+        # This code runs AFTER the application has finished handling requests and is shutting down.
+        # For a simple model loaded like this, there might not be explicit cleanup needed.
+        # If you had database connections, external client sessions, etc., you'd close them here.
+        print("Application shutting down. Any cleanup can go here.")
+# --- Initialize FastAPI application with the lifespan handler ---
+app = FastAPI(lifespan=lifespan) # Pass the lifespan function to the FastAPI app
 # --- Define Request Body Schema ---
 class PromptRequest(BaseModel):
     Generates text based on a given prompt using the loaded LLM.
     """
     if generator is None:
+        # This indicates a failure during startup, or the app started unhealthy
+        raise HTTPException(status_code=503, detail="Model not loaded. Service unavailable.")
     try:
         result = generator(
         )
         return {"generated_text": result[0]['generated_text']}
     except Exception as e:
+        # Log the full exception for debugging in production
+        print(f"Error during text generation: {e}")
         raise HTTPException(status_code=500, detail=f"Error during text generation: {e}")
+# --- Basic Health Check Endpoint ---
 @app.get("/")
 async def read_root():
     """