brendon-ai commited on
Commit
a5a32fe
·
verified ·
1 Parent(s): 5734a73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -21
app.py CHANGED
@@ -4,23 +4,24 @@ from pydantic import BaseModel
4
  import torch
5
  from transformers import pipeline
6
  import os
 
7
 
8
- app = FastAPI()
 
 
 
9
 
10
- # --- Model Loading (Global Scope to load once) ---
11
- # This part will be executed only once when the FastAPI application starts up.
12
- # This saves memory and time compared to loading the model on every request.
13
-
14
- generator = None # Initialize generator to None
15
-
16
- @app.on_event("startup")
17
- async def load_model():
18
  """
19
- Load the model when the FastAPI application starts.
 
20
  """
21
- global generator
22
  try:
23
- # Check for GPU
 
24
  if torch.cuda.is_available():
25
  print(f"CUDA is available! Using {torch.cuda.get_device_name(0)}")
26
  device = 0 # Use GPU
@@ -28,17 +29,30 @@ async def load_model():
28
  print("CUDA not available, using CPU.")
29
  device = -1 # Use CPU
30
 
31
- # Load a text generation pipeline
32
- # For a free tier/small GPU, consider a smaller model like 'distilgpt2' or 'gpt2'
33
- # For larger GPUs, you can try models like 'meta-llama/Llama-2-7b-hf' (requires auth)
34
- # or 'mistralai/Mistral-7B-Instruct-v0.2'
35
  print(f"Loading model 'distilgpt2' on device: {'cuda' if device == 0 else 'cpu'}")
36
  generator = pipeline('text-generation', model='distilgpt2', device=device)
37
  print("Model loaded successfully!")
 
 
 
 
 
38
  except Exception as e:
39
- print(f"Error loading model: {e}")
40
- # You might want to raise an exception or log this more robustly in production
41
- # For a simple app, we'll let it fail and then handle requests later.
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # --- Define Request Body Schema ---
44
  class PromptRequest(BaseModel):
@@ -53,7 +67,8 @@ async def generate_text(request: PromptRequest):
53
  Generates text based on a given prompt using the loaded LLM.
54
  """
55
  if generator is None:
56
- raise HTTPException(status_code=503, detail="Model not loaded. Please try again later.")
 
57
 
58
  try:
59
  result = generator(
@@ -63,9 +78,11 @@ async def generate_text(request: PromptRequest):
63
  )
64
  return {"generated_text": result[0]['generated_text']}
65
  except Exception as e:
 
 
66
  raise HTTPException(status_code=500, detail=f"Error during text generation: {e}")
67
 
68
- # --- Basic Health Check Endpoint (Optional but Recommended) ---
69
  @app.get("/")
70
  async def read_root():
71
  """
 
4
  import torch
5
  from transformers import pipeline
6
  import os
7
+ from contextlib import asynccontextmanager # Import this!
8
 
9
+ # --- Global variable for the model ---
10
+ # It's important to declare this globally so it can be accessed within
11
+ # the lifespan function and the API endpoint functions.
12
+ generator = None
13
 
14
+ # --- Lifespan Event Handler ---
15
+ @asynccontextmanager
16
+ async def lifespan(app: FastAPI):
 
 
 
 
 
17
  """
18
+ Handles startup and shutdown events for the FastAPI application.
19
+ Loads the model on startup and can optionally clean up on shutdown.
20
  """
21
+ global generator # Declare intent to modify the global 'generator' variable
22
  try:
23
+ # --- Startup Code: Load the model ---
24
+ # This code runs BEFORE the application starts receiving requests.
25
  if torch.cuda.is_available():
26
  print(f"CUDA is available! Using {torch.cuda.get_device_name(0)}")
27
  device = 0 # Use GPU
 
29
  print("CUDA not available, using CPU.")
30
  device = -1 # Use CPU
31
 
 
 
 
 
32
  print(f"Loading model 'distilgpt2' on device: {'cuda' if device == 0 else 'cpu'}")
33
  generator = pipeline('text-generation', model='distilgpt2', device=device)
34
  print("Model loaded successfully!")
35
+
36
+ # 'yield' signifies that the startup code has completed, and the application
37
+ # can now start processing requests.
38
+ yield
39
+
40
  except Exception as e:
41
+ print(f"Error loading model during startup: {e}")
42
+ # In a real application, you might want to exit here if the model is crucial
43
+ # sys.exit(1) or raise an exception to prevent the app from starting unhealthy.
44
+
45
+ finally:
46
+ # --- Shutdown Code (Optional): Clean up resources ---
47
+ # This code runs AFTER the application has finished handling requests and is shutting down.
48
+ # For a simple model loaded like this, there might not be explicit cleanup needed.
49
+ # If you had database connections, external client sessions, etc., you'd close them here.
50
+ print("Application shutting down. Any cleanup can go here.")
51
+
52
+
53
+ # --- Initialize FastAPI application with the lifespan handler ---
54
+ app = FastAPI(lifespan=lifespan) # Pass the lifespan function to the FastAPI app
55
+
56
 
57
  # --- Define Request Body Schema ---
58
  class PromptRequest(BaseModel):
 
67
  Generates text based on a given prompt using the loaded LLM.
68
  """
69
  if generator is None:
70
+ # This indicates a failure during startup, or the app started unhealthy
71
+ raise HTTPException(status_code=503, detail="Model not loaded. Service unavailable.")
72
 
73
  try:
74
  result = generator(
 
78
  )
79
  return {"generated_text": result[0]['generated_text']}
80
  except Exception as e:
81
+ # Log the full exception for debugging in production
82
+ print(f"Error during text generation: {e}")
83
  raise HTTPException(status_code=500, detail=f"Error during text generation: {e}")
84
 
85
+ # --- Basic Health Check Endpoint ---
86
  @app.get("/")
87
  async def read_root():
88
  """