akshaybhure098 commited on
Commit
9cf4b85
·
verified ·
1 Parent(s): 3f24b3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -26
app.py CHANGED
@@ -1,26 +1,32 @@
1
- from fastapi import FastAPI
2
- from pydantic import BaseModel
3
- from llama_cpp import Llama
4
-
5
- app = FastAPI()
6
-
7
- # Load the model
8
- llm = Llama.from_pretrained(
9
- repo_id="unsloth/phi-4-GGUF",
10
- filename="phi-4-Q4_K_M.gguf",
11
- )
12
-
13
- # Define request model
14
- class ChatRequest(BaseModel):
15
- system_prompt: str
16
- query: str
17
-
18
- @app.post("/chat-p4q4")
19
- async def chat(request: ChatRequest):
20
- response = llm.create_chat_completion(
21
- messages=[
22
- {"role": "system", "content": request.system_prompt},
23
- {"role": "user", "content": request.query},
24
- ]
25
- )
26
- return {"response": response}
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+
5
+ app = FastAPI()
6
+
7
+ # Load the model
8
+ llm = Llama.from_pretrained(
9
+ repo_id="unsloth/phi-4-GGUF",
10
+ filename="phi-4-Q4_K_M.gguf",
11
+ )
12
+
13
+ # Define request model
14
+ class ChatRequest(BaseModel):
15
+ system_prompt: str
16
+ query: str
17
+
18
+ @app.post("/chat-p4q4")
19
+ async def chat(request: ChatRequest):
20
+ try:
21
+ response = llm.create_chat_completion(
22
+ messages=[
23
+ {"role": "system", "content": request.system_prompt},
24
+ {"role": "user", "content": request.query},
25
+ ]
26
+ )
27
+ return {"response": response}
28
+ except Exception as e:
29
+ # Log the error or print it for debugging
30
+ print("Error during model inference:", e)
31
+ return {"error": str(e)}
32
+