tech-envision commited on
Commit
6fce4ec
·
1 Parent(s): 4ff72e9

Add FastAPI server for LLM chat

Browse files
README.md CHANGED
@@ -37,6 +37,18 @@ When using the Discord bot, attach one or more text files to a message to
37
  upload them automatically. The bot responds with the location of each document
38
  inside the VM so they can be referenced in subsequent prompts.
39
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  ## Docker
41
 
42
  A Dockerfile is provided to run the Discord bot along with an Ollama server. The image installs Ollama, pulls the LLM and embedding models, and starts both the server and the bot.
 
37
  upload them automatically. The bot responds with the location of each document
38
  inside the VM so they can be referenced in subsequent prompts.
39
 
40
+ ## API Server
41
+
42
+ An HTTP API is provided using FastAPI. Run the server with:
43
+
44
+ ```bash
45
+ python server.py
46
+ ```
47
+
48
+ Send a POST request to `/chat` with the fields `user`, `session` and `prompt` to
49
+ receive the assistant's reply. Conversation history is persisted in
50
+ `chat.db`. Use the `/reset` endpoint to clear previous messages for a session.
51
+
52
  ## Docker
53
 
54
  A Dockerfile is provided to run the Discord bot along with an Ollama server. The image installs Ollama, pulls the LLM and embedding models, and starts both the server and the bot.
requirements.txt CHANGED
@@ -3,4 +3,6 @@ ollama
3
  peewee
4
  discord.py
5
  colorama
6
- python-dotenv
 
 
 
3
  peewee
4
  discord.py
5
  colorama
6
+ python-dotenv
7
+ fastapi
8
+ uvicorn
server.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import uvicorn
4
+
5
+ from src.api import create_app
6
+
7
+
8
+ def main() -> None:
9
+ uvicorn.run(create_app(), host="0.0.0.0", port=8000)
10
+
11
+
12
+ if __name__ == "__main__": # pragma: no cover - manual start
13
+ main()
src/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
  from .chat import ChatSession
2
  from .tools import execute_terminal, set_vm
3
  from .vm import LinuxVM
 
4
 
5
- __all__ = ["ChatSession", "execute_terminal", "set_vm", "LinuxVM"]
 
1
  from .chat import ChatSession
2
  from .tools import execute_terminal, set_vm
3
  from .vm import LinuxVM
4
+ from .api import create_app
5
 
6
+ __all__ = ["ChatSession", "execute_terminal", "set_vm", "LinuxVM", "create_app"]
src/api/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .app import create_app
2
+
3
+ __all__ = ["create_app"]
src/api/app.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from fastapi import FastAPI
4
+
5
+ from .router import router
6
+
7
+ __all__ = ["create_app"]
8
+
9
+
10
+ def create_app() -> FastAPI:
11
+ app = FastAPI(title="LLM Chat API", version="1.0.0")
12
+ app.include_router(router)
13
+ return app
src/api/models.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ __all__ = ["ChatRequest", "ChatResponse", "ResetRequest", "ResetResponse"]
6
+
7
+ class ChatRequest(BaseModel):
8
+ user: str = Field(..., example="default")
9
+ session: str = Field(..., example="default")
10
+ prompt: str = Field(..., min_length=1, example="Hello")
11
+
12
+ class ChatResponse(BaseModel):
13
+ reply: str
14
+
15
+ class ResetRequest(BaseModel):
16
+ user: str
17
+ session: str
18
+
19
+ class ResetResponse(BaseModel):
20
+ removed: int
src/api/router.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from fastapi import APIRouter, HTTPException, status
4
+
5
+ from .models import ChatRequest, ChatResponse, ResetRequest, ResetResponse
6
+ from ..chat import ChatSession
7
+ from ..db import reset_history
8
+ from ..log import get_logger
9
+
10
+ router = APIRouter()
11
+ log = get_logger(__name__)
12
+
13
+
14
+ @router.post("/chat", response_model=ChatResponse, status_code=status.HTTP_200_OK)
15
+ async def chat_endpoint(payload: ChatRequest) -> ChatResponse:
16
+ log.debug("chat request user=%s session=%s", payload.user, payload.session)
17
+ async with ChatSession(user=payload.user, session=payload.session) as chat:
18
+ try:
19
+ reply = await chat.chat(payload.prompt)
20
+ except Exception as exc: # pragma: no cover - runtime errors
21
+ log.exception("chat processing failed")
22
+ raise HTTPException(status_code=500, detail=str(exc)) from exc
23
+ return ChatResponse(reply=reply)
24
+
25
+
26
+ @router.post("/reset", response_model=ResetResponse, status_code=status.HTTP_200_OK)
27
+ async def reset_endpoint(payload: ResetRequest) -> ResetResponse:
28
+ removed = reset_history(payload.user, payload.session)
29
+ return ResetResponse(removed=removed)