File size: 923 Bytes
80c3a84
6c0215b
d5939d1
d15392d
 
 
14fe8f8
 
 
 
 
 
 
224a523
ad67d60
80c3a84
 
e9f3a9a
80c3a84
 
 
a0b62ab
80c3a84
 
 
ddad736
ad67d60
80c3a84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama

app = FastAPI()

import os

print("Current working directory:", os.getcwd())
print("Files in current directory:", os.listdir("."))
print("Files in root directory:", os.listdir("/"))
print("Files in /app directory:", os.listdir("/app"))

qwen3_gguf_llm = Llama(model_path="Qwen3-0.6B-UD-IQ1_S.gguf")

class PromptRequest(BaseModel):
    prompt: str

class GenerateResponse(BaseModel):
    reasoning_content: str = ""
    generated_text: str

@app.post("/generate/qwen3-0.6b-gguf", response_model=GenerateResponse)
async def generate_qwen3_gguf_endpoint(request: PromptRequest):
    messages = [{"role": "user", "content": request.prompt}]
    response = qwen3_gguf_llm.create_chat_completion(messages=messages)
    generated_text = response['choices'][0]['message']['content']
    return GenerateResponse(generated_text=generated_text)