Spaces:
Runtime error
Runtime error
tech-envision
commited on
Commit
·
6fce4ec
1
Parent(s):
4ff72e9
Add FastAPI server for LLM chat
Browse files- README.md +12 -0
- requirements.txt +3 -1
- server.py +13 -0
- src/__init__.py +2 -1
- src/api/__init__.py +3 -0
- src/api/app.py +13 -0
- src/api/models.py +20 -0
- src/api/router.py +29 -0
README.md
CHANGED
@@ -37,6 +37,18 @@ When using the Discord bot, attach one or more text files to a message to
|
|
37 |
upload them automatically. The bot responds with the location of each document
|
38 |
inside the VM so they can be referenced in subsequent prompts.
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
## Docker
|
41 |
|
42 |
A Dockerfile is provided to run the Discord bot along with an Ollama server. The image installs Ollama, pulls the LLM and embedding models, and starts both the server and the bot.
|
|
|
37 |
upload them automatically. The bot responds with the location of each document
|
38 |
inside the VM so they can be referenced in subsequent prompts.
|
39 |
|
40 |
+
## API Server
|
41 |
+
|
42 |
+
An HTTP API is provided using FastAPI. Run the server with:
|
43 |
+
|
44 |
+
```bash
|
45 |
+
python server.py
|
46 |
+
```
|
47 |
+
|
48 |
+
Send a POST request to `/chat` with the fields `user`, `session` and `prompt` to
|
49 |
+
receive the assistant's reply. Conversation history is persisted in
|
50 |
+
`chat.db`. Use the `/reset` endpoint to clear previous messages for a session.
|
51 |
+
|
52 |
## Docker
|
53 |
|
54 |
A Dockerfile is provided to run the Discord bot along with an Ollama server. The image installs Ollama, pulls the LLM and embedding models, and starts both the server and the bot.
|
requirements.txt
CHANGED
@@ -3,4 +3,6 @@ ollama
|
|
3 |
peewee
|
4 |
discord.py
|
5 |
colorama
|
6 |
-
python-dotenv
|
|
|
|
|
|
3 |
peewee
|
4 |
discord.py
|
5 |
colorama
|
6 |
+
python-dotenv
|
7 |
+
fastapi
|
8 |
+
uvicorn
|
server.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import uvicorn
|
4 |
+
|
5 |
+
from src.api import create_app
|
6 |
+
|
7 |
+
|
8 |
+
def main() -> None:
|
9 |
+
uvicorn.run(create_app(), host="0.0.0.0", port=8000)
|
10 |
+
|
11 |
+
|
12 |
+
if __name__ == "__main__": # pragma: no cover - manual start
|
13 |
+
main()
|
src/__init__.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from .chat import ChatSession
|
2 |
from .tools import execute_terminal, set_vm
|
3 |
from .vm import LinuxVM
|
|
|
4 |
|
5 |
-
__all__ = ["ChatSession", "execute_terminal", "set_vm", "LinuxVM"]
|
|
|
1 |
from .chat import ChatSession
|
2 |
from .tools import execute_terminal, set_vm
|
3 |
from .vm import LinuxVM
|
4 |
+
from .api import create_app
|
5 |
|
6 |
+
__all__ = ["ChatSession", "execute_terminal", "set_vm", "LinuxVM", "create_app"]
|
src/api/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .app import create_app
|
2 |
+
|
3 |
+
__all__ = ["create_app"]
|
src/api/app.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
from fastapi import FastAPI
|
4 |
+
|
5 |
+
from .router import router
|
6 |
+
|
7 |
+
__all__ = ["create_app"]
|
8 |
+
|
9 |
+
|
10 |
+
def create_app() -> FastAPI:
|
11 |
+
app = FastAPI(title="LLM Chat API", version="1.0.0")
|
12 |
+
app.include_router(router)
|
13 |
+
return app
|
src/api/models.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
from pydantic import BaseModel, Field
|
4 |
+
|
5 |
+
__all__ = ["ChatRequest", "ChatResponse", "ResetRequest", "ResetResponse"]
|
6 |
+
|
7 |
+
class ChatRequest(BaseModel):
|
8 |
+
user: str = Field(..., example="default")
|
9 |
+
session: str = Field(..., example="default")
|
10 |
+
prompt: str = Field(..., min_length=1, example="Hello")
|
11 |
+
|
12 |
+
class ChatResponse(BaseModel):
|
13 |
+
reply: str
|
14 |
+
|
15 |
+
class ResetRequest(BaseModel):
|
16 |
+
user: str
|
17 |
+
session: str
|
18 |
+
|
19 |
+
class ResetResponse(BaseModel):
|
20 |
+
removed: int
|
src/api/router.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
from fastapi import APIRouter, HTTPException, status
|
4 |
+
|
5 |
+
from .models import ChatRequest, ChatResponse, ResetRequest, ResetResponse
|
6 |
+
from ..chat import ChatSession
|
7 |
+
from ..db import reset_history
|
8 |
+
from ..log import get_logger
|
9 |
+
|
10 |
+
router = APIRouter()
|
11 |
+
log = get_logger(__name__)
|
12 |
+
|
13 |
+
|
14 |
+
@router.post("/chat", response_model=ChatResponse, status_code=status.HTTP_200_OK)
|
15 |
+
async def chat_endpoint(payload: ChatRequest) -> ChatResponse:
|
16 |
+
log.debug("chat request user=%s session=%s", payload.user, payload.session)
|
17 |
+
async with ChatSession(user=payload.user, session=payload.session) as chat:
|
18 |
+
try:
|
19 |
+
reply = await chat.chat(payload.prompt)
|
20 |
+
except Exception as exc: # pragma: no cover - runtime errors
|
21 |
+
log.exception("chat processing failed")
|
22 |
+
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
23 |
+
return ChatResponse(reply=reply)
|
24 |
+
|
25 |
+
|
26 |
+
@router.post("/reset", response_model=ResetResponse, status_code=status.HTTP_200_OK)
|
27 |
+
async def reset_endpoint(payload: ResetRequest) -> ResetResponse:
|
28 |
+
removed = reset_history(payload.user, payload.session)
|
29 |
+
return ResetResponse(removed=removed)
|