Spaces:
Sleeping
Sleeping
File size: 923 Bytes
80c3a84 6c0215b d5939d1 d15392d 14fe8f8 224a523 ad67d60 80c3a84 e9f3a9a 80c3a84 a0b62ab 80c3a84 ddad736 ad67d60 80c3a84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
app = FastAPI()
import os
print("Current working directory:", os.getcwd())
print("Files in current directory:", os.listdir("."))
print("Files in root directory:", os.listdir("/"))
print("Files in /app directory:", os.listdir("/app"))
qwen3_gguf_llm = Llama(model_path="Qwen3-0.6B-UD-IQ1_S.gguf")
class PromptRequest(BaseModel):
prompt: str
class GenerateResponse(BaseModel):
reasoning_content: str = ""
generated_text: str
@app.post("/generate/qwen3-0.6b-gguf", response_model=GenerateResponse)
async def generate_qwen3_gguf_endpoint(request: PromptRequest):
messages = [{"role": "user", "content": request.prompt}]
response = qwen3_gguf_llm.create_chat_completion(messages=messages)
generated_text = response['choices'][0]['message']['content']
return GenerateResponse(generated_text=generated_text)
|