|
from fastapi import FastAPI, Request |
|
from fastapi.responses import StreamingResponse, JSONResponse |
|
from pydantic import BaseModel |
|
import requests |
|
import time |
|
import json |
|
from typing import List, Optional |
|
from models import AVAILABLE_MODELS |
|
|
|
app = FastAPI() |
|
|
|
def unix_id(): |
|
return str(int(time.time() * 1000)) |
|
|
|
class Message(BaseModel): |
|
role: str |
|
content: str |
|
|
|
class ChatRequest(BaseModel): |
|
messages: List[Message] |
|
model: str |
|
stream: Optional[bool] = False |
|
|
|
@app.get("/v1/models") |
|
async def list_models(): |
|
return {"object": "list", "data": AVAILABLE_MODELS} |
|
|
|
@app.post("/v1/chat/completions") |
|
async def chat_completion(request: ChatRequest): |
|
headers = { |
|
'accept': 'text/event-stream', |
|
'content-type': 'application/json', |
|
'origin': 'https://www.chatwithmono.xyz', |
|
'referer': 'https://www.chatwithmono.xyz/', |
|
'user-agent': 'Mozilla/5.0', |
|
} |
|
|
|
payload = { |
|
"messages": [{"role": msg.role, "content": msg.content} for msg in request.messages], |
|
"model": request.model |
|
} |
|
|
|
if request.stream: |
|
def event_stream(): |
|
chat_id = f"chatcmpl-{unix_id()}" |
|
created = int(time.time()) |
|
sent_done = False |
|
|
|
with requests.post( |
|
"https://www.chatwithmono.xyz/api/chat", |
|
headers=headers, |
|
json=payload, |
|
stream=True, |
|
timeout=120 |
|
) as response: |
|
for line in response.iter_lines(decode_unicode=True): |
|
if line.startswith("0:"): |
|
try: |
|
content_piece = json.loads(line[2:]) |
|
chunk_data = { |
|
"id": chat_id, |
|
"object": "chat.completion.chunk", |
|
"created": created, |
|
"model": request.model, |
|
"choices": [ |
|
{ |
|
"delta": {"content": content_piece}, |
|
"index": 0, |
|
"finish_reason": None |
|
} |
|
] |
|
} |
|
yield f"data: {json.dumps(chunk_data)}\n\n" |
|
except: |
|
continue |
|
elif line.startswith(("e:", "d:")) and not sent_done: |
|
sent_done = True |
|
done_chunk = { |
|
"id": chat_id, |
|
"object": "chat.completion.chunk", |
|
"created": created, |
|
"model": request.model, |
|
"choices": [ |
|
{ |
|
"delta": {}, |
|
"index": 0, |
|
"finish_reason": "stop" |
|
} |
|
] |
|
} |
|
yield f"data: {json.dumps(done_chunk)}\n\ndata: [DONE]\n\n" |
|
|
|
return StreamingResponse(event_stream(), media_type="text/event-stream") |
|
|
|
else: |
|
assistant_response = "" |
|
usage_info = {} |
|
|
|
with requests.post( |
|
"https://www.chatwithmono.xyz/api/chat", |
|
headers=headers, |
|
json=payload, |
|
stream=True, |
|
timeout=120 |
|
) as response: |
|
for chunk in response.iter_lines(decode_unicode=True): |
|
if chunk.startswith("0:"): |
|
try: |
|
piece = json.loads(chunk[2:]) |
|
assistant_response += piece |
|
except: |
|
continue |
|
elif chunk.startswith(("e:", "d:")): |
|
try: |
|
data = json.loads(chunk[2:]) |
|
usage_info = data.get("usage", {}) |
|
except: |
|
continue |
|
|
|
return JSONResponse(content={ |
|
"id": f"chatcmpl-{unix_id()}", |
|
"object": "chat.completion", |
|
"created": int(time.time()), |
|
"model": request.model, |
|
"choices": [{ |
|
"index": 0, |
|
"message": { |
|
"role": "assistant", |
|
"content": assistant_response |
|
}, |
|
"finish_reason": "stop" |
|
}], |
|
"usage": { |
|
"prompt_tokens": usage_info.get("promptTokens", 0), |
|
"completion_tokens": usage_info.get("completionTokens", 0), |
|
"total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0), |
|
} |
|
}) |
|
|