Spaces:

hadadrjt
/

api

Paused

App Files Files Community

hadadrjt commited on 2 days ago

Commit

d1ba698

1 Parent(s): 360d36a

api: Shut up!

Browse files

Files changed (1) hide show

app.py +41 -55

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import time
 import json
 import asyncio
 import logging
 from typing import Optional, List, Union, Dict, Any, Literal
 from fastapi import FastAPI, HTTPException, Request, status
 from fastapi.middleware.cors import CORSMiddleware
@@ -29,6 +30,7 @@ class SessionData:
         self.history: List[Dict[str, Any]] = []
         self.last_access: float = time.time()
         self.active_tasks: Dict[str, asyncio.Task] = {}
 class SessionManager:
     def __init__(self):
@@ -48,55 +50,49 @@ class SessionManager:
                             for task_id, task in data.active_tasks.items():
                                 if not task.done():
                                     task.cancel()
-                                    logger.info(f"Cancelled active task {task_id} for expired session {sid}")
                 for user, sid in expired:
                     if user in self.sessions and sid in self.sessions[user]:
                         del self.sessions[user][sid]
                         if not self.sessions[user]:
                             del self.sessions[user]
-                        logger.info(f"Session expired: user={user} session={sid}")
     async def get_session(self, user: Optional[str], session_id: Optional[str]) -> (str, str, SessionData):
         async with self.lock:
             if not user:
                 user = str(uuid.uuid4())
-                logger.debug(f"Generated new user ID: {user}")
             if user not in self.sessions:
                 self.sessions[user] = {}
             if not session_id or session_id not in self.sessions[user]:
                 session_id = str(uuid.uuid4())
                 self.sessions[user][session_id] = SessionData()
-                logger.info(f"Created new session: user={user} session={session_id}")
             session = self.sessions[user][session_id]
             session.last_access = time.time()
-            logger.debug(f"Session accessed: user={user} session={session_id} history_length={len(session.history)}")
             return user, session_id, session
 session_manager = SessionManager()
 async def refresh_client(app: FastAPI):
     while True:
-        await asyncio.sleep(1)
         async with app.state.client_lock:
-            if app.state.client is None:
-                await asyncio.sleep(1)
-                continue
-        while True:
-            await asyncio.sleep(15)
-            async with app.state.client_lock:
-                if app.state.client is not None:
-                    try:
-                        old_client = app.state.client
-                        app.state.client = None
-                        del old_client
-                        app.state.client = Client("https://hadadrjt-ai.hf.space/")
-                        logger.info("Refreshed Gradio client connection")
-                    except Exception as e:
-                        logger.error(f"Error refreshing Gradio client: {e}", exc_info=True)
-                        app.state.client = None
-                        await asyncio.sleep(5)
-                else:
-                    break
 @asynccontextmanager
 async def lifespan(app: FastAPI):
@@ -104,13 +100,15 @@ async def lifespan(app: FastAPI):
     app.state.client = None
     app.state.client_lock = asyncio.Lock()
     app.state.refresh_task = asyncio.create_task(refresh_client(app))
-    logger.info("App lifespan started, refresh client task running")
     try:
         yield
     finally:
         app.state.refresh_task.cancel()
         await asyncio.sleep(0.1)
-        logger.info("App lifespan ended, refresh client task cancelled")
 app = FastAPI(
     title="J.A.R.V.I.S. OpenAI-Compatible API",
@@ -220,20 +218,16 @@ async def get_client(app: FastAPI) -> Client:
 async def call_gradio(client: Client, params: dict):
     for attempt in range(3):
         try:
-            logger.debug(f"Calling Gradio attempt {attempt+1}")
             return await asyncio.to_thread(lambda: client.submit(**params))
         except Exception as e:
-            logger.warning(f"Gradio call failed attempt {attempt+1}: {e}", exc_info=True)
             await asyncio.sleep(0.2 * (attempt + 1))
-    logger.error("Gradio upstream error after 3 attempts")
     raise HTTPException(status_code=502, detail="Upstream Gradio app error")
 async def stream_response(job, session_id: str, session_history: List[Dict[str, Any]], new_messages: List[Message], response_type: str):
     partial = ""
     try:
         chunks = await asyncio.to_thread(lambda: list(job))
-    except Exception as e:
-        logger.error(f"Streaming error: {e}", exc_info=True)
         chunks = []
     for chunk in chunks:
         try:
@@ -258,8 +252,7 @@ async def stream_response(job, session_id: str, session_history: List[Dict[str,
                     "session_id": session_id
                 }
             yield f"data: {json.dumps(data)}\n\n"
-        except Exception as e:
-            logger.error(f"Chunk yield error: {e}", exc_info=True)
             continue
     session_history.extend([m.model_dump() for m in new_messages if m.role != "system"])
     session_history.append({"role": "assistant", "content": partial})
@@ -271,9 +264,15 @@ async def stream_response(job, session_id: str, session_history: List[Dict[str,
     }
     yield f"data: {json.dumps(done_data)}\n\n"
 @app.post("/v1/chat/completions")
 async def chat_completions(req: ChatCompletionRequest):
     user, session_id, session = await session_manager.get_session(req.user, req.session_id)
     req.messages = sanitize_messages(req.messages)
     for m in req.messages:
         if m.role == "system":
@@ -299,7 +298,6 @@ async def chat_completions(req: ChatCompletionRequest):
         "function_call": req.function_call or req.tool_choice,
     }
     params = {k: v for k, v in params.items() if v is not None}
-    logger.info(f"Chat completion request user={user} session={session_id} model={req.model} stream={req.stream}")
     client = await get_client(app)
     if req.stream:
         job = await call_gradio(client, params)
@@ -309,12 +307,10 @@ async def chat_completions(req: ChatCompletionRequest):
         loop = asyncio.get_running_loop()
         try:
             result = await loop.run_in_executor(None, lambda: client.predict(**params))
-        except Exception as e:
-            logger.error(f"Gradio predict error: {e}", exc_info=True)
             raise HTTPException(status_code=502, detail="Upstream Gradio app error")
         session.history.extend([m.model_dump() for m in req.messages if m.role != "system"])
         session.history.append({"role": "assistant", "content": result})
-        logger.info(f"Chat completion response sent user={user} session={session_id}")
         return {
             "id": str(uuid.uuid4()),
             "object": "chat.completion",
@@ -324,7 +320,11 @@ async def chat_completions(req: ChatCompletionRequest):
 @app.post("/v1/completions")
 async def completions(req: CompletionRequest):
-    user, session_id, _ = await session_manager.get_session(req.user, req.session_id)
     prompt = req.prompt if isinstance(req.prompt, str) else "\n".join(req.prompt)
     params = {
         "message": prompt,
@@ -343,7 +343,6 @@ async def completions(req: CompletionRequest):
         "seed": req.seed,
     }
     params = {k: v for k, v in params.items() if v is not None}
-    logger.info(f"Completion request user={user} session={session_id} model={req.model} stream={req.stream}")
     client = await get_client(app)
     if req.stream:
         job = await call_gradio(client, params)
@@ -353,29 +352,24 @@ async def completions(req: CompletionRequest):
         loop = asyncio.get_running_loop()
         try:
             result = await loop.run_in_executor(None, lambda: client.predict(**params))
-        except Exception as e:
-            logger.error(f"Gradio predict error: {e}", exc_info=True)
             raise HTTPException(status_code=502, detail="Upstream Gradio app error")
-        logger.info(f"Completion response sent user={user} session={session_id}")
         return {"id": str(uuid.uuid4()), "object": "text_completion", "choices": [{"text": result}]}
 @app.post("/v1/embeddings")
 async def embeddings(req: EmbeddingRequest):
     inputs = req.input if isinstance(req.input, list) else [req.input]
     embeddings = [[0.0] * 768 for _ in inputs]
-    logger.info(f"Embedding request model={req.model} inputs_count={len(inputs)}")
     return {"object": "list", "data": [{"embedding": emb, "index": i} for i, emb in enumerate(embeddings)]}
 @app.get("/v1/models")
 async def get_models():
-    logger.info("Models list requested")
     return {"object": "list", "data": [{"id": "Q8_K_XL", "object": "model", "owned_by": "J.A.R.V.I.S."}]}
 @app.get("/v1/history")
 async def get_history(user: Optional[str] = None, session_id: Optional[str] = None):
     user = user or "anonymous"
     sessions = session_manager.sessions
-    logger.info(f"History requested user={user} session={session_id}")
     if user in sessions and session_id and session_id in sessions[user]:
         return {"user": user, "session_id": session_id, "history": sessions[user][session_id].history}
     return {"user": user, "session_id": session_id, "history": []}
@@ -384,7 +378,6 @@ async def get_history(user: Optional[str] = None, session_id: Optional[str] = No
 async def cancel_response(user: Optional[str], session_id: Optional[str], task_id: Optional[str]):
     user = user or "anonymous"
     if not task_id:
-        logger.warning(f"Cancel response missing task_id user={user} session={session_id}")
         raise HTTPException(status_code=400, detail="Missing task_id for cancellation")
     async with session_manager.lock:
         if user in session_manager.sessions and session_id in session_manager.sessions[user]:
@@ -392,9 +385,7 @@ async def cancel_response(user: Optional[str], session_id: Optional[str], task_i
             task = session.active_tasks.get(task_id)
             if task and not task.done():
                 task.cancel()
-                logger.info(f"Cancelled task {task_id} for user={user} session={session_id}")
                 return {"message": f"Cancelled task {task_id}"}
-    logger.warning(f"Task not found or already completed task_id={task_id} user={user} session={session_id}")
     raise HTTPException(status_code=404, detail="Task not found or already completed")
 @app.api_route("/v1", methods=["POST", "GET", "OPTIONS", "HEAD"])
@@ -403,15 +394,12 @@ async def router(request: Request):
         try:
             body_json = await request.json()
         except Exception:
-            logger.error("Invalid JSON body in router POST")
             raise HTTPException(status_code=400, detail="Invalid JSON body")
         try:
             body = RouterRequest(**body_json)
         except ValidationError as e:
-            logger.error(f"Validation error in router POST: {e.errors()}")
             raise HTTPException(status_code=422, detail=e.errors())
         endpoint = body.endpoint or "chat/completions"
-        logger.info(f"Router POST to endpoint={endpoint}")
         if endpoint == "chat/completions":
             if not body.model or not body.messages:
                 raise HTTPException(status_code=422, detail="Missing 'model' or 'messages'")
@@ -432,12 +420,10 @@ async def router(request: Request):
         elif endpoint == "history":
             return await get_history(body.user, body.session_id)
         elif endpoint == "responses/cancel":
-            return await cancel_response(body.user, body.session_id, body.session_id)
         else:
-            logger.warning(f"Router POST unknown endpoint: {endpoint}")
             raise HTTPException(status_code=404, detail="Endpoint not found")
     else:
-        logger.info(f"Router {request.method} called - only POST supported with JSON body")
         return JSONResponse({"message": "Send POST request with JSON body"}, status_code=status.HTTP_405_METHOD_NOT_ALLOWED)
 @app.get("/")

 import json
 import asyncio
 import logging
+import os
 from typing import Optional, List, Union, Dict, Any, Literal
 from fastapi import FastAPI, HTTPException, Request, status
 from fastapi.middleware.cors import CORSMiddleware
         self.history: List[Dict[str, Any]] = []
         self.last_access: float = time.time()
         self.active_tasks: Dict[str, asyncio.Task] = {}
+        self.last_request_time: float = 0.0
 class SessionManager:
     def __init__(self):
                             for task_id, task in data.active_tasks.items():
                                 if not task.done():
                                     task.cancel()
                 for user, sid in expired:
                     if user in self.sessions and sid in self.sessions[user]:
                         del self.sessions[user][sid]
                         if not self.sessions[user]:
                             del self.sessions[user]
     async def get_session(self, user: Optional[str], session_id: Optional[str]) -> (str, str, SessionData):
         async with self.lock:
             if not user:
                 user = str(uuid.uuid4())
             if user not in self.sessions:
                 self.sessions[user] = {}
             if not session_id or session_id not in self.sessions[user]:
                 session_id = str(uuid.uuid4())
                 self.sessions[user][session_id] = SessionData()
             session = self.sessions[user][session_id]
             session.last_access = time.time()
             return user, session_id, session
 session_manager = SessionManager()
 async def refresh_client(app: FastAPI):
     while True:
+        await asyncio.sleep(15 * 60)
         async with app.state.client_lock:
+            if app.state.client is not None:
+                try:
+                    old_client = app.state.client
+                    app.state.client = None
+                    del old_client
+                    app.state.client = Client("https://hadadrjt-ai.hf.space/")
+                    logger.info("Refreshed Gradio client connection")
+                except Exception as e:
+                    logger.error(f"Error refreshing Gradio client: {e}", exc_info=True)
+                    app.state.client = None
+async def clear_terminal_periodically():
+    while True:
+        await asyncio.sleep(300)
+        if os.name == "nt":
+            os.system("cls")
+        else:
+            print("\033c", end="", flush=True)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     app.state.client = None
     app.state.client_lock = asyncio.Lock()
     app.state.refresh_task = asyncio.create_task(refresh_client(app))
+    app.state.cleanup_task = asyncio.create_task(session_manager.cleanup())
+    app.state.clear_log_task = asyncio.create_task(clear_terminal_periodically())
     try:
         yield
     finally:
         app.state.refresh_task.cancel()
+        app.state.cleanup_task.cancel()
+        app.state.clear_log_task.cancel()
         await asyncio.sleep(0.1)
 app = FastAPI(
     title="J.A.R.V.I.S. OpenAI-Compatible API",
 async def call_gradio(client: Client, params: dict):
     for attempt in range(3):
         try:
             return await asyncio.to_thread(lambda: client.submit(**params))
         except Exception as e:
             await asyncio.sleep(0.2 * (attempt + 1))
     raise HTTPException(status_code=502, detail="Upstream Gradio app error")
 async def stream_response(job, session_id: str, session_history: List[Dict[str, Any]], new_messages: List[Message], response_type: str):
     partial = ""
     try:
         chunks = await asyncio.to_thread(lambda: list(job))
+    except Exception:
         chunks = []
     for chunk in chunks:
         try:
                     "session_id": session_id
                 }
             yield f"data: {json.dumps(data)}\n\n"
+        except Exception:
             continue
     session_history.extend([m.model_dump() for m in new_messages if m.role != "system"])
     session_history.append({"role": "assistant", "content": partial})
     }
     yield f"data: {json.dumps(done_data)}\n\n"
+RATE_LIMIT_SECONDS = 1.0
 @app.post("/v1/chat/completions")
 async def chat_completions(req: ChatCompletionRequest):
     user, session_id, session = await session_manager.get_session(req.user, req.session_id)
+    now = time.time()
+    if now - session.last_request_time < RATE_LIMIT_SECONDS:
+        raise HTTPException(status_code=429, detail="Too many requests, please slow down")
+    session.last_request_time = now
     req.messages = sanitize_messages(req.messages)
     for m in req.messages:
         if m.role == "system":
         "function_call": req.function_call or req.tool_choice,
     }
     params = {k: v for k, v in params.items() if v is not None}
     client = await get_client(app)
     if req.stream:
         job = await call_gradio(client, params)
         loop = asyncio.get_running_loop()
         try:
             result = await loop.run_in_executor(None, lambda: client.predict(**params))
+        except Exception:
             raise HTTPException(status_code=502, detail="Upstream Gradio app error")
         session.history.extend([m.model_dump() for m in req.messages if m.role != "system"])
         session.history.append({"role": "assistant", "content": result})
         return {
             "id": str(uuid.uuid4()),
             "object": "chat.completion",
 @app.post("/v1/completions")
 async def completions(req: CompletionRequest):
+    user, session_id, session = await session_manager.get_session(req.user, req.session_id)
+    now = time.time()
+    if now - session.last_request_time < RATE_LIMIT_SECONDS:
+        raise HTTPException(status_code=429, detail="Too many requests, please slow down")
+    session.last_request_time = now
     prompt = req.prompt if isinstance(req.prompt, str) else "\n".join(req.prompt)
     params = {
         "message": prompt,
         "seed": req.seed,
     }
     params = {k: v for k, v in params.items() if v is not None}
     client = await get_client(app)
     if req.stream:
         job = await call_gradio(client, params)
         loop = asyncio.get_running_loop()
         try:
             result = await loop.run_in_executor(None, lambda: client.predict(**params))
+        except Exception:
             raise HTTPException(status_code=502, detail="Upstream Gradio app error")
         return {"id": str(uuid.uuid4()), "object": "text_completion", "choices": [{"text": result}]}
 @app.post("/v1/embeddings")
 async def embeddings(req: EmbeddingRequest):
     inputs = req.input if isinstance(req.input, list) else [req.input]
     embeddings = [[0.0] * 768 for _ in inputs]
     return {"object": "list", "data": [{"embedding": emb, "index": i} for i, emb in enumerate(embeddings)]}
 @app.get("/v1/models")
 async def get_models():
     return {"object": "list", "data": [{"id": "Q8_K_XL", "object": "model", "owned_by": "J.A.R.V.I.S."}]}
 @app.get("/v1/history")
 async def get_history(user: Optional[str] = None, session_id: Optional[str] = None):
     user = user or "anonymous"
     sessions = session_manager.sessions
     if user in sessions and session_id and session_id in sessions[user]:
         return {"user": user, "session_id": session_id, "history": sessions[user][session_id].history}
     return {"user": user, "session_id": session_id, "history": []}
 async def cancel_response(user: Optional[str], session_id: Optional[str], task_id: Optional[str]):
     user = user or "anonymous"
     if not task_id:
         raise HTTPException(status_code=400, detail="Missing task_id for cancellation")
     async with session_manager.lock:
         if user in session_manager.sessions and session_id in session_manager.sessions[user]:
             task = session.active_tasks.get(task_id)
             if task and not task.done():
                 task.cancel()
                 return {"message": f"Cancelled task {task_id}"}
     raise HTTPException(status_code=404, detail="Task not found or already completed")
 @app.api_route("/v1", methods=["POST", "GET", "OPTIONS", "HEAD"])
         try:
             body_json = await request.json()
         except Exception:
             raise HTTPException(status_code=400, detail="Invalid JSON body")
         try:
             body = RouterRequest(**body_json)
         except ValidationError as e:
             raise HTTPException(status_code=422, detail=e.errors())
         endpoint = body.endpoint or "chat/completions"
         if endpoint == "chat/completions":
             if not body.model or not body.messages:
                 raise HTTPException(status_code=422, detail="Missing 'model' or 'messages'")
         elif endpoint == "history":
             return await get_history(body.user, body.session_id)
         elif endpoint == "responses/cancel":
+            return await cancel_response(body.user, body.session_id, body.tool_choice if isinstance(body.tool_choice, str) else None)
         else:
             raise HTTPException(status_code=404, detail="Endpoint not found")
     else:
         return JSONResponse({"message": "Send POST request with JSON body"}, status_code=status.HTTP_405_METHOD_NOT_ALLOWED)
 @app.get("/")