from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse, JSONResponse from pydantic import BaseModel import requests import time import json from typing import List, Optional from models import AVAILABLE_MODELS app = FastAPI() def unix_id(): return str(int(time.time() * 1000)) class Message(BaseModel): role: str content: str class ChatRequest(BaseModel): messages: List[Message] model: str stream: Optional[bool] = False @app.get("/v1/models") async def list_models(): return {"object": "list", "data": AVAILABLE_MODELS} @app.post("/v1/chat/completions") async def chat_completion(request: ChatRequest): headers = { 'accept': 'text/event-stream', 'content-type': 'application/json', 'origin': 'https://www.chatwithmono.xyz', 'referer': 'https://www.chatwithmono.xyz/', 'user-agent': 'Mozilla/5.0', } payload = { "messages": [{"role": msg.role, "content": msg.content} for msg in request.messages], "model": request.model } if request.stream: def event_stream(): chat_id = f"chatcmpl-{unix_id()}" created = int(time.time()) sent_done = False with requests.post( "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload, stream=True, timeout=120 ) as response: for line in response.iter_lines(decode_unicode=True): if line.startswith("0:"): try: content_piece = json.loads(line[2:]) chunk_data = { "id": chat_id, "object": "chat.completion.chunk", "created": created, "model": request.model, "choices": [ { "delta": {"content": content_piece}, "index": 0, "finish_reason": None } ] } yield f"data: {json.dumps(chunk_data)}\n\n" except: continue elif line.startswith(("e:", "d:")) and not sent_done: sent_done = True done_chunk = { "id": chat_id, "object": "chat.completion.chunk", "created": created, "model": request.model, "choices": [ { "delta": {}, "index": 0, "finish_reason": "stop" } ] } yield f"data: {json.dumps(done_chunk)}\n\ndata: [DONE]\n\n" return StreamingResponse(event_stream(), media_type="text/event-stream") else: assistant_response = "" usage_info = {} with requests.post( "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload, stream=True, timeout=120 ) as response: for chunk in response.iter_lines(decode_unicode=True): if chunk.startswith("0:"): try: piece = json.loads(chunk[2:]) assistant_response += piece # this is just a string fragment except: continue elif chunk.startswith(("e:", "d:")): try: data = json.loads(chunk[2:]) usage_info = data.get("usage", {}) except: continue return JSONResponse(content={ "id": f"chatcmpl-{unix_id()}", "object": "chat.completion", "created": int(time.time()), "model": request.model, "choices": [{ "index": 0, "message": { "role": "assistant", "content": assistant_response # correctly concatenated string }, "finish_reason": "stop" }], "usage": { "prompt_tokens": usage_info.get("promptTokens", 0), "completion_tokens": usage_info.get("completionTokens", 0), "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0), } })