gouravbhadraDev commited on
Commit
e6978bd
·
verified ·
1 Parent(s): d776534

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -4,26 +4,32 @@ from llama_cpp import Llama
4
 
5
  app = FastAPI()
6
 
7
- import os
8
-
9
-
10
-
11
-
12
  qwen3_gguf_llm = Llama.from_pretrained(
13
- repo_id="unsloth/Qwen3-0.6B-GGUF",
14
- filename="Qwen3-0.6B-UD-Q8_K_XL.gguf",
15
  )
16
 
17
  class PromptRequest(BaseModel):
18
  prompt: str
19
 
20
  class GenerateResponse(BaseModel):
21
- reasoning_content: str = ""
22
  generated_text: str
23
 
 
 
 
24
  @app.post("/generate/qwen3-0.6b-gguf", response_model=GenerateResponse)
25
  async def generate_qwen3_gguf_endpoint(request: PromptRequest):
26
- messages = [{"role": "user", "content": request.prompt}]
27
- response = qwen3_gguf_llm.create_chat_completion(messages=messages)
28
- generated_text = response['choices'][0]['message']['content']
29
- return GenerateResponse(generated_text=generated_text)
 
 
 
 
 
 
 
 
 
 
4
 
5
  app = FastAPI()
6
 
 
 
 
 
 
7
  qwen3_gguf_llm = Llama.from_pretrained(
8
+ repo_id="unsloth/Qwen3-0.6B-GGUF",
9
+ filename="Qwen3-0.6B-UD-Q8_K_XL.gguf",
10
  )
11
 
12
  class PromptRequest(BaseModel):
13
  prompt: str
14
 
15
  class GenerateResponse(BaseModel):
 
16
  generated_text: str
17
 
18
+ # Simple in-memory conversation memory (list of messages)
19
+ conversation_history = []
20
+
21
  @app.post("/generate/qwen3-0.6b-gguf", response_model=GenerateResponse)
22
  async def generate_qwen3_gguf_endpoint(request: PromptRequest):
23
+ # Append user message to history
24
+ conversation_history.append({"role": "user", "content": request.prompt})
25
+
26
+ # Call the model with full conversation history
27
+ response = qwen3_gguf_llm.create_chat_completion(messages=conversation_history)
28
+
29
+ # Extract assistant reply
30
+ assistant_message = response['choices'][0]['message']['content']
31
+
32
+ # Append assistant reply to history
33
+ conversation_history.append({"role": "assistant", "content": assistant_message})
34
+
35
+ return GenerateResponse(generated_text=assistant_message)