gouravbhadraDev commited on
Commit
ef5f658
·
verified ·
1 Parent(s): fd64d36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -38,12 +38,12 @@ def scrape(url: str = Query(...)):
38
 
39
  MODEL_NAME = "microsoft/phi-2"
40
 
41
- # Load the pipeline once at startup with device auto-mapping
42
  text_generator = pipeline(
43
  "text-generation",
44
  model=MODEL_NAME,
45
  trust_remote_code=True,
46
- device=0 if torch.cuda.is_available() else -1,
47
  )
48
 
49
  class PromptRequest(BaseModel):
@@ -51,13 +51,16 @@ class PromptRequest(BaseModel):
51
 
52
  @app.post("/generate")
53
  async def generate_text(request: PromptRequest):
54
- # Prepare messages as expected by the model pipeline
55
- messages = [{"role": "user", "content": request.prompt}]
 
 
 
 
 
 
 
56
 
57
- # Call the pipeline with messages
58
- outputs = text_generator(messages)
59
-
60
- # The pipeline returns a list of dicts with 'generated_text'
61
  generated_text = outputs[0]['generated_text']
62
 
63
  # Optional: parse reasoning and content if your model uses special tags like </think>
 
38
 
39
  MODEL_NAME = "microsoft/phi-2"
40
 
41
+ # Load the text-generation pipeline once at startup
42
  text_generator = pipeline(
43
  "text-generation",
44
  model=MODEL_NAME,
45
  trust_remote_code=True,
46
+ device=0 if torch.cuda.is_available() else -1, # GPU if available, else CPU
47
  )
48
 
49
  class PromptRequest(BaseModel):
 
51
 
52
  @app.post("/generate")
53
  async def generate_text(request: PromptRequest):
54
+ # The model expects a string prompt, so pass request.prompt directly
55
+ outputs = text_generator(
56
+ request.prompt,
57
+ max_new_tokens=512,
58
+ temperature=0.7,
59
+ top_p=0.9,
60
+ do_sample=True,
61
+ num_return_sequences=1,
62
+ )
63
 
 
 
 
 
64
  generated_text = outputs[0]['generated_text']
65
 
66
  # Optional: parse reasoning and content if your model uses special tags like </think>