Spaces:

hadadrjt
/

api

Paused

App Files Files Community

hadadrjt commited on 11 days ago

Commit

e61d441

1 Parent(s): c0d067d

api: Restructured repo.

Browse files

Files changed (16) hide show

README.md +3 -2
app.py +56 -448
config.py +10 -0
src/__init__.py +0 -0
src/cores/__init__.py +0 -0
src/cores/sessions.py +120 -0
src/models/__init__.py +0 -0
src/models/requests.py +37 -0
src/routes/__init__.py +0 -0
src/routes/v1/__init__.py +0 -0
src/routes/v1/chat_completions.py +141 -0
src/routes/v1/history.py +54 -0
src/routes/v1/models.py +39 -0
src/routes/v1/responses.py +114 -0
src/services/__init__.py +0 -0
src/services/streaming.py +120 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: J.A.R.V.I.S. API Endpoint
 license: apache-2.0
 license_link: https://huggingface.co/hadadrjt/JARVIS/blob/main/LICENSE
 emoji: 👀
@@ -8,7 +8,8 @@ colorTo: green
 sdk: gradio
 sdk_version: 5.34.0
 app_file: app.py
-pinned: false
 models:
   - hadadrjt/JARVIS
 ---

 ---
+title: API
 license: apache-2.0
 license_link: https://huggingface.co/hadadrjt/JARVIS/blob/main/LICENSE
 emoji: 👀
 sdk: gradio
 sdk_version: 5.34.0
 app_file: app.py
+pinned: true
+short_description: J.A.R.V.I.S. API Endpoint!
 models:
   - hadadrjt/JARVIS
 ---

app.py CHANGED Viewed

@@ -3,465 +3,73 @@
 # SPDX-License-Identifier: Apache-2.0
 #
-import json
-import time
-import uuid
-import asyncio
-import uvicorn
-from contextlib import asynccontextmanager
-from fastapi import FastAPI, HTTPException
-from fastapi.responses import JSONResponse, StreamingResponse
-from gradio_client import Client
-from pydantic import BaseModel
-from typing import AsyncGenerator, Optional, Dict, List, Tuple, Any
-# Default AI model name used when no model is specified by user
-MODEL = "JARVIS: 2.1.3"
-# Session store keeps track of active sessions.
-# Each session_id maps to a tuple:
-# (last_update_timestamp, session_data_dict)
-# session_data_dict contains:
-#   - "model": the AI model name used in this session
-#   - "history": list of past chat messages (input and response)
-#   - "client": the Gradio Client instance specific to this session
-session_store: Dict[str, Tuple[float, Dict]] = {}
-# Duration (in seconds) after which inactive sessions are removed
-EXPIRE = 3600  # 1 hour
-# Create FastAPI app instance
 app = FastAPI()
-class ResponseRequest(BaseModel):
-    """
-    Defines the expected structure of the request body for /v1/responses endpoint.
-    Attributes:
-    - model: Optional; specifies which AI model to use. Defaults to MODEL if not provided.
-    - input: The user's input text to send to the AI.
-    - stream: Optional; if True, the response will be streamed incrementally.
-    - session_id: Optional; unique identifier for the user's session. If missing, a new session will be created.
-    """
-    model: Optional[str] = None
-    input: str
-    stream: Optional[bool] = False
-    session_id: Optional[str] = None
-class OpenAIChatRequest(BaseModel):
-    """
-    Defines the OpenAI-compatible request structure for /v1/chat/completions endpoint.
-    Attributes:
-    - model: Optional; specifies which AI model to use. Defaults to MODEL if not provided.
-    - messages: List of message objects containing 'role' and 'content'
-    - stream: Optional; if True, the response will be streamed incrementally.
-    - session_id: Optional; unique session identifier for maintaining conversation history
-    """
-    model: Optional[str] = None
-    messages: List[Dict[str, str]]
-    stream: Optional[bool] = False
-    session_id: Optional[str] = None
-def cleanup_expired_sessions():
-    """
-    Remove sessions that have been inactive for longer than EXPIRE.
-    This helps free up memory by deleting old sessions and closing their clients.
-    """
-    now = time.time()
-    expired_sessions = [
-        sid for sid, (last_update, _) in session_store.items()
-        if now - last_update > EXPIRE
-    ]
-    for sid in expired_sessions:
-        # Attempt to close the Gradio client associated with the session
-        _, data = session_store[sid]
-        client = data.get("client")
-        if client:
-            try:
-                client.close()
-            except Exception:
-                # Ignore errors during client close to avoid crashing cleanup
-                pass
-        # Remove the session from the store
-        del session_store[sid]
-def create_client_for_model(model: str) -> Client:
-    """
-    Create a new Gradio Client instance and set it to use the specified AI model.
-    Parameters:
-    - model: The name of the AI model to initialize the client with.
-    Returns:
-    - A new Gradio Client instance configured with the given model.
-    """
-    client = Client("hadadrjt/ai")
-    # Set the model on the Gradio client by calling the /change_model API
-    client.predict(new=model, api_name="/change_model")
-    return client
-def get_or_create_session(session_id: Optional[str], model: str) -> str:
-    """
-    Retrieve an existing session by session_id or create a new one if it doesn't exist.
-    Also cleans up expired sessions before proceeding.
-    Parameters:
-    - session_id: The unique identifier of the session (optional).
-    - model: The AI model to use for this session.
-    Returns:
-    - The session_id for the active or newly created session.
-    """
-    cleanup_expired_sessions()
-    # If no session_id provided or session does not exist, create a new session
-    if not session_id or session_id not in session_store:
-        session_id = str(uuid.uuid4())  # Generate a new unique session ID
-        client = create_client_for_model(model)  # Create a new client for this session
-        session_store[session_id] = (time.time(), {
-            "model": model,
-            "history": [],
-            "client": client
-        })
-    else:
-        # Session exists, update last access time and check if model changed
-        last_update, data = session_store[session_id]
-        if data["model"] != model:
-            # If model changed, close old client and create a new one with the new model
-            old_client = data.get("client")
-            if old_client:
-                try:
-                    old_client.close()
-                except Exception:
-                    pass  # Ignore errors on close
-            new_client = create_client_for_model(model)
-            data["model"] = model
-            data["client"] = new_client
-            session_store[session_id] = (time.time(), data)
-        else:
-            # Just update the last access time to keep session alive
-            session_store[session_id] = (time.time(), data)
-    return session_id
-async def event_generator(user_input: str, model: str, session_id: str) -> AsyncGenerator[str, None]:
-    """
-    Asynchronous generator that streams AI responses incrementally as Server-Sent Events (SSE).
-    Parameters:
-    - user_input: The input text from the user.
-    - model: The AI model to use.
-    - session_id: The unique session identifier.
-    Yields:
-    - JSON-formatted chunks representing incremental AI response deltas.
-    """
-    last_update, session_data = session_store.get(session_id, (0, None))
-    if session_data is None:
-        # Session not found; yield error and stop
-        yield f"data: {json.dumps({'error': 'Session not found'})}\n\n"
-        return
-    client = session_data["client"]
-    if client is None:
-        # Client missing for session; yield error and stop
-        yield f"data: {json.dumps({'error': 'AI client not available'})}\n\n"
-        return
-    try:
-        # Submit the user input to the AI model via Gradio client
-        jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
-    except Exception as e:
-        # If submission fails, yield error and stop
-        yield f"data: {json.dumps({'error': f'Failed to submit to AI: {str(e)}'})}\n\n"
-        return
-    buffer = ""  # Buffer to track full response text progressively
-    try:
-        for partial in jarvis_response:
-            # Extract the current partial text from the response
-            text = partial[0][0][1]
-            # Calculate the delta (new text since last chunk)
-            if text.startswith(buffer):
-                delta = text[len(buffer):]
-            else:
-                delta = text
-            buffer = text  # Update buffer with latest full text
-            if delta == "":
-                # Skip empty delta chunks
-                continue
-            # Prepare chunk data in OpenAI streaming format
-            chunk = {
-                "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
-                "object": "chat.completion.chunk",
-                "created": int(time.time()),
-                "model": model,
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"content": delta},
-                        "finish_reason": None
-                    }
-                ]
-            }
-            # Yield the chunk as a Server-Sent Event
-            yield f"data: {json.dumps(chunk)}\n\n"
-        # After streaming completes, save the full input-response pair to session history
-        session_data["history"].append({"input": user_input, "response": buffer})
-        session_store[session_id] = (time.time(), session_data)  # Update last access time
-        # Send a final chunk signaling completion of the stream
-        done_chunk = {
-            "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
-            "object": "chat.completion.chunk",
-            "created": int(time.time()),
-            "model": model,
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": {},
-                    "finish_reason": "stop"
-                }
-            ]
-        }
-        yield f"data: {json.dumps(done_chunk)}\n\n"
-    except Exception as e:
-        # If streaming fails at any point, yield an error chunk
-        error_chunk = {
-            "error": {"message": f"Streaming error: {str(e)}"}
-        }
-        yield f"data: {json.dumps(error_chunk)}\n\n"
-@app.post("/v1/responses")
-async def responses(req: ResponseRequest):
-    """
-    Original API endpoint to get AI responses.
-    Supports both streaming and non-streaming modes.
-    Workflow:
-    - Validate or create session.
-    - Ensure AI client is available.
-    - Handle streaming or full response accordingly.
-    - Save chat history per session.
-    Returns:
-    - JSON response with AI output and session ID.
-    """
-    model = req.model or MODEL  # Use requested model or default
-    session_id = get_or_create_session(req.session_id, model)  # Get or create session
-    last_update, session_data = session_store[session_id]
-    user_input = req.input
-    client = session_data["client"]
-    if client is None:
-        # If client is missing, return 503 error
-        raise HTTPException(status_code=503, detail="AI client not available")
-    if req.stream:
-        # If streaming requested, return a streaming response using event_generator
-        return StreamingResponse(event_generator(user_input, model, session_id), media_type="text/event-stream")
-    # Non-streaming request: submit input and collect full response
-    try:
-        jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
-    except Exception as e:
-        # Return 500 error if submission fails
-        raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
-    buffer = ""
-    for partial in jarvis_response:
-        text = partial[0][0][1]
-        buffer = text  # Update buffer with latest full response
-    # Save input and response to session history and update last access time
-    session_data["history"].append({"input": user_input, "response": buffer})
-    session_store[session_id] = (time.time(), session_data)
-    # Prepare the JSON response in OpenAI style format
-    response = {
-        "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
-        "object": "chat.completion",
-        "created": int(time.time()),
-        "model": model,
-        "choices": [
-            {
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": buffer
-                },
-                "finish_reason": "stop"
-            }
-        ],
-        "session_id": session_id  # Return session_id so client can reuse it
-    }
-    # Return the JSON response
-    return JSONResponse(response)
-@app.post("/v1/chat/completions")
-async def openai_chat_completions(req: OpenAIChatRequest):
-    """
-    OpenAI-compatible endpoint for chat completions.
-    Supports both streaming and non-streaming modes.
-    Workflow:
-    - Validate message structure and extract conversation history
-    - Validate or create session
-    - Update session history from messages
-    - Handle streaming or full response
-    - Save new interaction to session history
-    Returns:
-    - JSON response in OpenAI format with session ID extension
-    """
-    # Validate messages structure
-    if not req.messages:
-        raise HTTPException(status_code=400, detail="Messages cannot be empty")
-    # Extract conversation history and current input
-    history = []
-    current_input = ""
-    # Process messages to extract conversation history
-    try:
-        # Last message should be from user and used as current input
-        if req.messages[-1]["role"] != "user":
-            raise ValueError("Last message must be from user")
-        current_input = req.messages[-1]["content"]
-        # Process message pairs (user + assistant)
-        messages = req.messages[:-1]  # Exclude last message (current input)
-        for i in range(0, len(messages), 2):
-            if i+1 < len(messages):
-                user_msg = messages[i]
-                assistant_msg = messages[i+1]
-                if user_msg["role"] != "user" or assistant_msg["role"] != "assistant":
-                    # Skip invalid pairs but continue processing
-                    continue
-                history.append({
-                    "input": user_msg["content"],
-                    "response": assistant_msg["content"]
-                })
-    except (KeyError, ValueError) as e:
-        raise HTTPException(status_code=400, detail=f"Invalid message format: {str(e)}")
-    model = req.model or MODEL  # Use requested model or default
-    session_id = get_or_create_session(req.session_id, model)  # Get or create session
-    last_update, session_data = session_store[session_id]
-    # Update session history from messages
-    session_data["history"] = history
-    session_store[session_id] = (time.time(), session_data)
-    client = session_data["client"]
-    if client is None:
-        raise HTTPException(status_code=503, detail="AI client not available")
-    if req.stream:
-        # Streaming response
-        return StreamingResponse(
-            event_generator(current_input, model, session_id),
-            media_type="text/event-stream"
-        )
-    # Non-streaming response
-    try:
-        jarvis_response = client.submit(multi={"text": current_input}, api_name="/api")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
-    buffer = ""
-    for partial in jarvis_response:
-        text = partial[0][0][1]
-        buffer = text
-    # Update session history with new interaction
-    session_data["history"].append({"input": current_input, "response": buffer})
-    session_store[session_id] = (time.time(), session_data)
-    # Format response in OpenAI style
-    response = {
-        "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
-        "object": "chat.completion",
-        "created": int(time.time()),
-        "model": model,
-        "choices": [
-            {
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": buffer
-                },
-                "finish_reason": "stop"
-            }
-        ],
-        "session_id": session_id  # Custom extension for session management
-    }
-    return JSONResponse(response)
-@app.get("/v1/models")
-async def list_models():
-    """
-    OpenAI-compatible endpoint to list available models.
-    Returns a fixed list containing our default model.
-    This endpoint is required by many OpenAI-compatible clients.
-    """
-    return JSONResponse({
-        "object": "list",
-        "data": [
-            {
-                "id": MODEL,
-                "object": "model",
-                "created": 0,  # Timestamp not available
-                "owned_by": "J.A.R.V.I.S."
-            }
-        ]
-    })
-@app.get("/v1/history")
-async def get_history(session_id: Optional[str] = None):
-    """
-    Endpoint to retrieve chat history for a given session.
-    Parameters:
-    - session_id: The unique session identifier.
-    Returns:
-    - JSON object containing session_id and list of past input-response pairs.
-    Raises:
-    - 404 error if session_id is missing or session does not exist.
-    """
-    if not session_id or session_id not in session_store:
-        raise HTTPException(status_code=404, detail="Session not found or session_id missing.")
-    _, session_data = session_store[session_id]
-    return {"session_id": session_id, "history": session_data["history"]}
 @app.get("/")
 def root():
     """
-    Simple health check endpoint.
-    Returns basic status indicating if API is running.
-    """
-    return {"status": "API is running"}
-# Run the app with Uvicorn ASGI server when executed directly
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

 # SPDX-License-Identifier: Apache-2.0
 #
+import asyncio  # Import asyncio for asynchronous programming support in Python
+import json  # Import json module to handle JSON encoding and decoding
+import uvicorn  # Import uvicorn, an ASGI server implementation for running FastAPI applications
+from contextlib import asynccontextmanager  # Import asynccontextmanager to create asynchronous context managers if needed
+from fastapi import FastAPI  # Import FastAPI class to create the main web application instance
+from fastapi.responses import Response  # Import Response class to send raw HTTP responses
+from fastapi.responses import JSONResponse  # Import JSONResponse class for sending JSON formatted HTTP responses
+from src.routes.v1 import responses, chat_completions, models, history # Import router modules from the src.routes.v1 package to organize API endpoints
+# Initialize a FastAPI application
 app = FastAPI()
+# Include the 'responses' router under the '/v1' prefix with the tag "Responses"
+# This router handles endpoints related to general responses
+app.include_router(responses.router, prefix="/v1", tags=["Responses"])
+# Include the 'chat_completions' router under the '/v1/chat' prefix with the tag "Chat Completions"
+# This router manages chat completion related API endpoints
+app.include_router(chat_completions.router, prefix="/v1/chat", tags=["Chat Completions"])
+# Include the 'models' router under the '/v1' prefix with the tag "Models"
+# This router provides endpoints related to available models
+app.include_router(models.router, prefix="/v1", tags=["Models"])
+# Include the 'history' router under the '/v1' prefix with the tag "History"
+# This router manages API endpoints related to user or session history
+app.include_router(history.router, prefix="/v1", tags=["History"])
+# Define a root path GET endpoint for the base URL '/'
+# This endpoint acts as a health check to confirm the API is operational
 @app.get("/")
 def root():
     """
+    Health check endpoint that returns a JSON response with API status information.
+    It confirms the API is running and lists active routers with their URLs and statuses.
+    This is useful for monitoring and basic connectivity testing.
+    """
+    # Create a dictionary containing status information and URLs of active routers
+    data = {
+        "Status": "API is running!",
+        "Endpoint": "https://hadadrjt-api.hf.space/v1",
+        "Type": "OpenAI-style",
+        "Router 1": {
+            "URL": "https://hadadrjt-api.hf.space/v1/chat/completions",
+            "Status": "Active"
+        },
+        "Router 2": {
+            "URL": "https://hadadrjt-api.hf.space/v1/responses",
+            "Status": "Active"
+        },
+        "Router 3": {
+            "URL": "https://hadadrjt-api.hf.space/v1/models",
+            "Status": "Active"
+        },
+        "Router 4": {
+            "URL": "https://hadadrjt-api.hf.space/v1/history",
+            "Status": "Active"
+        }
+    }
+    # Convert the dictionary to a pretty-printed JSON string with indentation for readability
+    json_content = json.dumps(data, indent=4)
+    # Return the JSON string as an HTTP response with content type set to application/json
+    return Response(content=json_content, media_type="application/json")
+# Check if this script is being run directly (not imported as a module)
 if __name__ == "__main__":
+    # Run the FastAPI app using the Uvicorn ASGI server
+    # Bind to all available network interfaces on port 7860
     uvicorn.run(app, host="0.0.0.0", port=7860)

config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+# Default AI model name used when no model is specified by user
+MODEL = "JARVIS: 2.1.3"
+# Duration (in seconds) after which inactive sessions are removed
+EXPIRE = 3600  # 1 hour

src/__init__.py ADDED Viewed

File without changes

src/cores/__init__.py ADDED Viewed

File without changes

src/cores/sessions.py ADDED Viewed

	@@ -0,0 +1,120 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import time  # Import the time module to work with timestamps for session expiration checks
+import uuid  # Import uuid module to generate unique session identifiers
+from gradio_client import Client  # Import Client from gradio_client to interact with the AI model API
+from typing import Dict, Tuple, Optional, Any  # Import type hints for better code clarity and validation
+from config import EXPIRE  # Import the EXPIRE constant which defines session timeout duration
+# Dictionary to store active user sessions
+# Key: session_id (string) uniquely identifying each session
+# Value: Tuple containing:
+#   - last_update_timestamp (float): the last time this session was accessed or updated
+#   - session_data_dict (dict): holds session-specific data including:
+#       - "model": the AI model name currently used in this session
+#       - "history": a list that tracks the conversation history (inputs and responses)
+#       - "client": the Gradio Client instance associated with this session for API calls
+session_store: Dict[str, Tuple[float, Dict[str, Any]]] = {}
+def cleanup_expired_sessions():
+    """
+    Iterate through all stored sessions and remove those that have been inactive
+    for longer than the configured EXPIRE duration. This function helps prevent
+    memory leaks and resource wastage by closing Gradio clients and deleting
+    session data for sessions no longer in use.
+    """
+    now = time.time()  # Get the current time in seconds since epoch
+    # Identify all sessions where the time since last update exceeds the expiration limit
+    expired_sessions = [
+        sid for sid, (last_update, _) in session_store.items()
+        if now - last_update > EXPIRE
+    ]
+    # For each expired session, safely close the associated Gradio client and remove session data
+    for sid in expired_sessions:
+        _, data = session_store[sid]  # Extract session data dictionary
+        client = data.get("client")  # Retrieve the Gradio client instance if it exists
+        if client:
+            try:
+                client.close()  # Attempt to close the client connection to release resources
+            except Exception:
+                # Suppress any exceptions during client close to ensure cleanup continues smoothly
+                pass
+        del session_store[sid]  # Remove the session entry from the session store dictionary
+def create_client_for_model(model: str) -> Client:
+    """
+    Instantiate a new Gradio Client connected to the AI model API and configure it
+    to use the specified model. This client will be used to send requests and receive
+    responses for the given AI model in a session.
+    Parameters:
+    - model (str): The name of the AI model to initialize the client with.
+    Returns:
+    - Client: A configured Gradio Client instance ready to interact with the model.
+    """
+    client = Client("hadadrjt/ai")  # Create a new Gradio Client pointing to the AI service endpoint
+    # Call the /change_model API on the client to switch to the requested AI model
+    client.predict(new=model, api_name="/change_model")
+    return client  # Return the configured client instance
+def get_or_create_session(session_id: Optional[str], model: str) -> str:
+    """
+    Retrieve an existing session by its session ID or create a new session if none exists.
+    This function also performs cleanup of expired sessions before proceeding to ensure
+    efficient resource management.
+    If the requested session exists but uses a different model than specified, the session's
+    client is replaced with a new one configured for the new model.
+    Parameters:
+    - session_id (Optional[str]): The unique identifier of the session to retrieve. If None or
+      invalid, a new session will be created.
+    - model (str): The AI model to be used for this session.
+    Returns:
+    - str: The session ID of the active or newly created session.
+    """
+    cleanup_expired_sessions()  # Remove any sessions that have timed out before proceeding
+    # Check if the provided session_id is valid and exists in the session store
+    if not session_id or session_id not in session_store:
+        # Generate a new unique session ID using UUID4
+        session_id = str(uuid.uuid4())
+        # Create a new Gradio client configured for the requested model
+        client = create_client_for_model(model)
+        # Store the new session with current timestamp, model name, empty history, and client instance
+        session_store[session_id] = (time.time(), {
+            "model": model,
+            "history": [],
+            "client": client
+        })
+    else:
+        # Existing session found, retrieve its last update time and data dictionary
+        last_update, data = session_store[session_id]
+        # Check if the model requested differs from the one currently associated with the session
+        if data["model"] != model:
+            # Close the old client to release resources before switching models
+            old_client = data.get("client")
+            if old_client:
+                try:
+                    old_client.close()
+                except Exception:
+                    # Ignore any exceptions during client close to avoid interrupting flow
+                    pass
+            # Create a new client configured for the new model
+            new_client = create_client_for_model(model)
+            # Update session data with the new model and client instance
+            data["model"] = model
+            data["client"] = new_client
+            # Update the session store with the new timestamp and updated data dictionary
+            session_store[session_id] = (time.time(), data)
+        else:
+            # Model has not changed, just update the last access time to keep session active
+            session_store[session_id] = (time.time(), data)
+    return session_id  # Return the active or newly created session ID

src/models/__init__.py ADDED Viewed

File without changes

src/models/requests.py ADDED Viewed

	@@ -0,0 +1,37 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from pydantic import BaseModel  # Import BaseModel from Pydantic to define data models with validation and serialization support
+from typing import Optional, Dict, List  # Import Optional for optional fields, Dict for dictionary types, and List for list types
+class ResponseRequest(BaseModel):
+    """
+    Data model representing the request body structure for the /v1/responses API endpoint.
+    Attributes:
+    - model: Optional string specifying the AI model to use; defaults to a predefined MODEL if omitted.
+    - input: Required string containing the user's input text to send to the AI.
+    - stream: Optional boolean indicating if the response should be streamed incrementally; defaults to False.
+    - session_id: Optional string serving as a unique identifier for the user's session; if not provided, a new session is created.
+    """
+    model: Optional[str] = None  # AI model identifier, optional with default None
+    input: str                   # User input text, required
+    stream: Optional[bool] = False  # Stream response flag, optional with default False
+    session_id: Optional[str] = None  # Session identifier, optional
+class OpenAIChatRequest(BaseModel):
+    """
+    Data model defining the OpenAI-compatible request format for the /v1/chat/completions API endpoint.
+    Attributes:
+    - model: Optional string specifying the AI model to use; defaults to a predefined MODEL if omitted.
+    - messages: List of message dictionaries, each containing 'role' and 'content' keys, representing the conversation history.
+    - stream: Optional boolean indicating if the response should be streamed incrementally; defaults to False.
+    - session_id: Optional string serving as a unique session identifier to maintain conversation context.
+    """
+    model: Optional[str] = None  # AI model identifier, optional with default None
+    messages: List[Dict[str, str]]  # List of chat messages with roles and content, required
+    stream: Optional[bool] = False  # Stream response flag, optional with default False
+    session_id: Optional[str] = None  # Session identifier, optional

src/routes/__init__.py ADDED Viewed

File without changes

src/routes/v1/__init__.py ADDED Viewed

File without changes

src/routes/v1/chat_completions.py ADDED Viewed

	@@ -0,0 +1,141 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import time  # Import time module to handle timestamps and time-based operations
+import uuid  # Import uuid module to generate unique identifiers for responses
+from fastapi import APIRouter, HTTPException  # Import APIRouter to create route groups and HTTPException for error handling
+from fastapi.responses import JSONResponse, StreamingResponse  # Import response classes for JSON and streaming responses
+from src.models.requests import OpenAIChatRequest  # Import request model defining the expected structure of chat requests
+from src.cores.sessions import get_or_create_session, session_store  # Import session management functions and storage
+from src.services.streaming import event_generator  # Import generator function for streaming chat responses
+from config import MODEL  # Import default model configuration
+# Create an API router instance to group related endpoints for chat completions
+router = APIRouter()
+@router.post("/completions")
+async def openai_chat_completions(req: OpenAIChatRequest):
+    """
+    Handle OpenAI-compatible chat completion requests.
+    Supports streaming and non-streaming modes based on client request.
+    Steps:
+    - Validate the presence and structure of messages in the request
+    - Extract conversation history and current user input from messages
+    - Retrieve or create a session for managing conversation state
+    - Update session history with prior conversation
+    - If streaming is requested, return a streaming response
+    - Otherwise, submit input to AI client and collect full response
+    - Append new interaction to session history
+    - Return response formatted according to OpenAI chat completion API
+    Returns:
+        JSONResponse or StreamingResponse with chat completion data and session ID
+    """
+    # Ensure messages list is not empty, else raise HTTP 400 error
+    if not req.messages:
+        raise HTTPException(status_code=400, detail="Messages cannot be empty")
+    history = []  # Initialize conversation history list
+    current_input = ""  # Initialize variable to hold current user input
+    # Process messages to separate conversation history and current input
+    try:
+        # The last message must be from the user and represents current input
+        if req.messages[-1]["role"] != "user":
+            raise ValueError("Last message must be from user")
+        current_input = req.messages[-1]["content"]  # Extract current input text
+        # Iterate over message pairs (user followed by assistant) to build history
+        messages = req.messages[:-1]  # Exclude last message as it is current input
+        for i in range(0, len(messages), 2):
+            if i + 1 < len(messages):
+                user_msg = messages[i]
+                assistant_msg = messages[i + 1]
+                # Validate message roles; skip pairs that do not match expected pattern
+                if user_msg["role"] != "user" or assistant_msg["role"] != "assistant":
+                    continue
+                # Append input-response pair to history
+                history.append({
+                    "input": user_msg["content"],
+                    "response": assistant_msg["content"]
+                })
+    except (KeyError, ValueError) as e:
+        # Raise HTTP 400 error if message format is invalid
+        raise HTTPException(status_code=400, detail=f"Invalid message format: {str(e)}")
+    # Determine model to use: requested model or default from config
+    model = req.model or MODEL
+    # Retrieve existing session or create a new one using session ID and model
+    session_id = get_or_create_session(req.session_id, model)
+    # Get last update time and session data from session store
+    last_update, session_data = session_store[session_id]
+    # Update session history with extracted conversation history
+    session_data["history"] = history
+    # Save updated session data with current timestamp
+    session_store[session_id] = (time.time(), session_data)
+    client = session_data["client"]  # Retrieve AI client instance from session
+    # If AI client is not available, raise HTTP 503 error
+    if client is None:
+        raise HTTPException(status_code=503, detail="AI client not available")
+    # If streaming is requested, return a streaming response using event generator
+    if req.stream:
+        return StreamingResponse(
+            event_generator(current_input, model, session_id),
+            media_type="text/event-stream"
+        )
+    # For non-streaming requests, submit input to AI client and collect response
+    try:
+        jarvis_response = client.submit(multi={"text": current_input}, api_name="/api")
+    except Exception as e:
+        # Raise HTTP 500 error if submission to AI client fails
+        raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
+    buffer = ""  # Initialize buffer to accumulate AI response text
+    # Iterate over partial responses from AI client to build full response text
+    for partial in jarvis_response:
+        text = partial[0][0][1]  # Extract text from nested response structure
+        buffer = text  # Update buffer with latest text chunk
+    # Append new input-response pair to session history
+    session_data["history"].append({"input": current_input, "response": buffer})
+    # Update session store with new history and timestamp
+    session_store[session_id] = (time.time(), session_data)
+    # Construct response in OpenAI chat completion format with session ID extension
+    response = {
+        "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",  # Unique response ID
+        "object": "chat.completion",
+        "created": int(time.time()),  # Timestamp of response creation
+        "model": model,  # Model used for completion
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": buffer  # AI-generated response content
+                },
+                "finish_reason": "stop"
+            }
+        ],
+        "session_id": session_id  # Custom field for session management
+    }
+    # Return JSON response with formatted completion data
+    return JSONResponse(response)

src/routes/v1/history.py ADDED Viewed

	@@ -0,0 +1,54 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+# Import APIRouter and HTTPException classes from FastAPI framework
+# APIRouter is used to create modular route handlers
+# HTTPException is used to generate HTTP error responses with specific status codes and details
+from fastapi import APIRouter, HTTPException
+# Import Optional type hint from typing module
+# Optional is used to indicate that a function parameter can be of a specified type or None
+from typing import Optional
+# Import session_store dictionary from the sessions module located in src.cores package
+# session_store holds active session data keyed by session identifiers
+from src.cores.sessions import session_store
+# Create an instance of APIRouter to define routes related to session history
+router = APIRouter()
+# Define an asynchronous GET endpoint at path "/history" to retrieve chat history for a session
+@router.get("/history")
+async def get_history(session_id: Optional[str] = None):
+    """
+    This function handles GET requests to fetch the chat history for a specific session.
+    Parameters:
+    - session_id (Optional[str]): A string representing the unique identifier of the session.
+      This parameter is optional in the function signature but required for successful retrieval.
+    Returns:
+    - A JSON object containing:
+      - "session_id": The provided session identifier string.
+      - "history": A list of past input-response pairs stored in the session.
+    Raises:
+    - HTTPException with status code 404 and a descriptive message if:
+      - The session_id is not provided (None or empty).
+      - The session_id does not exist in the session_store dictionary, indicating no active session.
+    """
+    # Check if session_id is missing or does not exist in the session_store dictionary
+    if not session_id or session_id not in session_store:
+        # Raise an HTTP 404 Not Found error with a clear message indicating the issue
+        raise HTTPException(status_code=404, detail="Session not found or session_id missing.")
+    # Retrieve the session data tuple from session_store using the session_id key
+    # The tuple contains two elements; the first is ignored here, the second is the session data dictionary
+    _, session_data = session_store[session_id]
+    # Return a dictionary with the session_id and the chat history extracted from the session data
+    # This dictionary will be automatically converted to JSON by FastAPI when sending the response
+    return {"session_id": session_id, "history": session_data["history"]}

src/routes/v1/models.py ADDED Viewed

	@@ -0,0 +1,39 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+# Import APIRouter class from fastapi module to create a router instance for grouping related API routes
+from fastapi import APIRouter
+# Import JSONResponse to send JSON formatted HTTP responses from endpoint functions
+from fastapi.responses import JSONResponse
+# Import MODEL constant from config module, which specifies the default model identifier
+from config import MODEL
+# Create an APIRouter instance to collect and organize routes related to model operations
+router = APIRouter()
+# Define an asynchronous GET endpoint at path "/models" on this router
+@router.get("/models")
+async def list_models():
+    """
+    OpenAI-compatible endpoint to list available models.
+    Returns a fixed list containing our default model.
+    This endpoint is required by many OpenAI-compatible clients to discover available models.
+    """
+    # Return a JSON response with a list object containing one model dictionary
+    # The model dictionary includes id from config, a static object type, a placeholder created timestamp
+    return JSONResponse({
+        "object": "list",
+        "data": [
+            {
+                "id": MODEL,
+                "object": "model",
+                "created": 0,  # Timestamp not available, so set to zero
+                "owned_by": "J.A.R.V.I.S."
+            }
+        ]
+    })

src/routes/v1/responses.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import time  # Import time module to handle timestamps and measure time intervals
+import uuid  # Import uuid module to generate unique identifiers for responses
+from fastapi import APIRouter, HTTPException  # Import FastAPI router and HTTP exception handling
+from fastapi.responses import JSONResponse, StreamingResponse  # Import response types for JSON and streaming data
+from src.models.requests import ResponseRequest  # Import the data model for incoming request validation
+from src.cores.sessions import get_or_create_session, session_store  # Import session management utilities
+from src.services.streaming import event_generator  # Import generator function for streaming AI responses
+from config import MODEL  # Import default AI model configuration
+# Create a new API router instance to handle endpoints related to AI responses
+router = APIRouter()
+@router.post("/responses")
+async def responses(req: ResponseRequest):
+    """
+    API endpoint to receive user input and return AI-generated responses.
+    Supports both streaming and non-streaming modes to accommodate different client needs.
+    Detailed Workflow:
+    1. Determine which AI model to use, either from request or default configuration.
+    2. Retrieve an existing session or create a new one based on session ID and model.
+    3. Extract the AI client from the session data, ensuring it is available.
+    4. If streaming is requested, return a streaming response that yields partial results as they arrive.
+    5. For non-streaming requests, submit the entire user input to the AI client and collect the full response.
+    6. Handle any errors during submission by returning appropriate HTTP error codes.
+    7. Store the user input and AI response in the session history for future reference.
+    8. Update the session's last access time to maintain session freshness.
+    9. Format the AI response in a JSON structure compatible with OpenAI's chat completion format.
+    10. Return the formatted JSON response along with the session ID for client reuse.
+    Parameters:
+    - req: ResponseRequest object containing user input, optional model, session ID, and streaming flag.
+    Returns:
+    - JSONResponse containing AI-generated text, metadata, and session information if non-streaming.
+    - StreamingResponse yielding incremental AI output if streaming is enabled.
+    Raises:
+    - HTTPException with status 503 if AI client is unavailable.
+    - HTTPException with status 500 if AI submission fails.
+    """
+    # Select the AI model specified in the request or fall back to the default model
+    model = req.model or MODEL
+    # Retrieve existing session or create a new one using the provided session ID and model
+    session_id = get_or_create_session(req.session_id, model)
+    # Extract the last update timestamp and session data dictionary from the session store
+    last_update, session_data = session_store[session_id]
+    # Extract the user's input text from the request object
+    user_input = req.input
+    # Retrieve the AI client instance from the session data; this client handles AI interactions
+    client = session_data["client"]
+    # If the AI client is not initialized or unavailable, respond with a 503 Service Unavailable error
+    if client is None:
+        raise HTTPException(status_code=503, detail="AI client not available")
+    # If the client requested streaming mode, return a streaming response that sends partial AI outputs as events
+    if req.stream:
+        # Use event_generator to produce server-sent events for real-time streaming of AI responses
+        return StreamingResponse(event_generator(user_input, model, session_id), media_type="text/event-stream")
+    # For non-streaming requests, submit the full user input to the AI client and collect the complete response
+    try:
+        jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
+    except Exception as e:
+        # If submission to the AI client fails, respond with a 500 Internal Server Error and error details
+        raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
+    # Initialize a buffer string to accumulate the full AI response text
+    buffer = ""
+    # Iterate over the streaming partial responses returned by the AI client
+    for partial in jarvis_response:
+        # Extract the text content from the nested partial response structure
+        text = partial[0][0][1]
+        # Update the buffer with the latest full response text received
+        buffer = text
+    # Append the user input and AI response pair to the session's chat history for context and record keeping
+    session_data["history"].append({"input": user_input, "response": buffer})
+    # Update the session store with the current timestamp and modified session data to keep session active
+    session_store[session_id] = (time.time(), session_data)
+    # Construct the JSON response following OpenAI's chat completion format for compatibility
+    response = {
+        "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",  # Unique identifier for this chat completion instance
+        "object": "chat.completion",  # Object type indicating a chat completion response
+        "created": int(time.time()),  # Timestamp of response creation in seconds since epoch
+        "model": model,  # The AI model used to generate this response
+        "choices": [
+            {
+                "index": 0,  # Index of this choice in the list of completions
+                "message": {
+                    "role": "assistant",  # Role indicating the source of the message is the AI assistant
+                    "content": buffer  # The AI-generated text content to be delivered to the user
+                },
+                "finish_reason": "stop"  # Reason for completion, indicating the response is complete
+            }
+        ],
+        "session_id": session_id  # Include session ID so the client can maintain or reuse the session
+    }
+    # Return the constructed JSON response to the client as an HTTP response
+    return JSONResponse(response)

src/services/__init__.py ADDED Viewed

File without changes

src/services/streaming.py ADDED Viewed

	@@ -0,0 +1,120 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import json  # Import the json module to handle JSON encoding and decoding of data
+import time  # Import the time module to work with timestamps
+import uuid  # Import the uuid module to generate unique identifiers
+from typing import AsyncGenerator  # Import AsyncGenerator for typing asynchronous generator functions
+from src.cores.sessions import session_store  # Import the session_store object to manage user sessions
+async def event_generator(user_input: str, model: str, session_id: str) -> AsyncGenerator[str, None]:
+    """
+    Asynchronous generator function that streams AI-generated responses incrementally as Server-Sent Events (SSE).
+    Parameters:
+    - user_input: The input text provided by the user to the AI model.
+    - model: The identifier of the AI model to be used for generating responses.
+    - session_id: A unique string representing the current user session.
+    Yields:
+    - JSON-formatted strings representing incremental chunks of the AI response,
+      formatted as Server-Sent Events for real-time streaming to the client.
+    """
+    # Retrieve the last update time and session data from the session store using the session ID
+    last_update, session_data = session_store.get(session_id, (0, None))
+    # If no session data is found for the given session ID, yield an error message and stop
+    if session_data is None:
+        yield f"data: {json.dumps({'error': 'Session not found'})}\n\n"
+        return
+    # Extract the AI client object from the session data
+    client = session_data["client"]
+    # If the client is missing in the session data, yield an error message and stop
+    if client is None:
+        yield f"data: {json.dumps({'error': 'AI client not available'})}\n\n"
+        return
+    try:
+        # Submit the user's input text to the AI model via the client's submit method
+        # The 'multi' parameter wraps the text, and 'api_name' specifies the API endpoint
+        jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
+    except Exception as e:
+        # If submission to the AI fails, yield an error message with the exception details and stop
+        yield f"data: {json.dumps({'error': f'Failed to submit to AI: {str(e)}'})}\n\n"
+        return
+    buffer = ""  # Initialize an empty string buffer to accumulate the full AI response progressively
+    try:
+        # Iterate over the partial responses received from the AI client submission
+        for partial in jarvis_response:
+            # Extract the current partial text chunk from the nested response structure
+            text = partial[0][0][1]
+            # Determine the new delta text by comparing with the buffer
+            # If the new text starts with the buffer, delta is the new appended text; otherwise, delta is the entire text
+            if text.startswith(buffer):
+                delta = text[len(buffer):]
+            else:
+                delta = text
+            buffer = text  # Update the buffer with the latest full text from the AI
+            # Skip yielding if the delta is empty (no new text)
+            if delta == "":
+                continue
+            # Construct a chunk dictionary following OpenAI's streaming response format
+            chunk = {
+                "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",  # Unique chunk ID with a UUID suffix
+                "object": "chat.completion.chunk",  # Object type indicating a chunk of chat completion
+                "created": int(time.time()),  # Timestamp of chunk creation in Unix time
+                "model": model,  # The AI model used for generating this chunk
+                "choices": [
+                    {
+                        "index": 0,  # Index of the choice in the response (usually 0 for single response)
+                        "delta": {"content": delta},  # The incremental new text content in this chunk
+                        "finish_reason": None  # No finish reason yet, stream is ongoing
+                    }
+                ]
+            }
+            # Yield the chunk as a Server-Sent Event formatted string with 'data:' prefix and double newline suffix
+            yield f"data: {json.dumps(chunk)}\n\n"
+        # After all chunks have been streamed, append the full input-response pair to the session history
+        session_data["history"].append({"input": user_input, "response": buffer})
+        # Update the session store with the new last access time and updated session data
+        session_store[session_id] = (time.time(), session_data)
+        # Prepare a final chunk indicating the end of the stream with finish_reason set to 'stop'
+        done_chunk = {
+            "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",  # Unique ID for the final chunk
+            "object": "chat.completion.chunk",  # Object type for consistency
+            "created": int(time.time()),  # Timestamp of completion
+            "model": model,  # Model identifier
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {},  # Empty delta indicating no new content
+                    "finish_reason": "stop"  # Signal that the stream has finished
+                }
+            ]
+        }
+        # Yield the final completion chunk to signal the client that streaming is done
+        yield f"data: {json.dumps(done_chunk)}\n\n"
+    except Exception as e:
+        # If any error occurs during streaming, yield an error chunk with the exception message
+        error_chunk = {
+            "error": {"message": f"Streaming error: {str(e)}"}
+        }
+        yield f"data: {json.dumps(error_chunk)}\n\n"