File size: 6,172 Bytes
e61d441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#

import time  # Import time module to handle timestamps and measure time intervals
import uuid  # Import uuid module to generate unique identifiers for responses

from fastapi import APIRouter, HTTPException  # Import FastAPI router and HTTP exception handling
from fastapi.responses import JSONResponse, StreamingResponse  # Import response types for JSON and streaming data
from src.models.requests import ResponseRequest  # Import the data model for incoming request validation
from src.cores.sessions import get_or_create_session, session_store  # Import session management utilities
from src.services.streaming import event_generator  # Import generator function for streaming AI responses
from config import MODEL  # Import default AI model configuration

# Create a new API router instance to handle endpoints related to AI responses
router = APIRouter()

@router.post("/responses")
async def responses(req: ResponseRequest):
    """
    API endpoint to receive user input and return AI-generated responses.
    Supports both streaming and non-streaming modes to accommodate different client needs.

    Detailed Workflow:
    1. Determine which AI model to use, either from request or default configuration.
    2. Retrieve an existing session or create a new one based on session ID and model.
    3. Extract the AI client from the session data, ensuring it is available.
    4. If streaming is requested, return a streaming response that yields partial results as they arrive.
    5. For non-streaming requests, submit the entire user input to the AI client and collect the full response.
    6. Handle any errors during submission by returning appropriate HTTP error codes.
    7. Store the user input and AI response in the session history for future reference.
    8. Update the session's last access time to maintain session freshness.
    9. Format the AI response in a JSON structure compatible with OpenAI's chat completion format.
    10. Return the formatted JSON response along with the session ID for client reuse.

    Parameters:
    - req: ResponseRequest object containing user input, optional model, session ID, and streaming flag.

    Returns:
    - JSONResponse containing AI-generated text, metadata, and session information if non-streaming.
    - StreamingResponse yielding incremental AI output if streaming is enabled.

    Raises:
    - HTTPException with status 503 if AI client is unavailable.
    - HTTPException with status 500 if AI submission fails.
    """
    # Select the AI model specified in the request or fall back to the default model
    model = req.model or MODEL

    # Retrieve existing session or create a new one using the provided session ID and model
    session_id = get_or_create_session(req.session_id, model)

    # Extract the last update timestamp and session data dictionary from the session store
    last_update, session_data = session_store[session_id]

    # Extract the user's input text from the request object
    user_input = req.input

    # Retrieve the AI client instance from the session data; this client handles AI interactions
    client = session_data["client"]

    # If the AI client is not initialized or unavailable, respond with a 503 Service Unavailable error
    if client is None:
        raise HTTPException(status_code=503, detail="AI client not available")

    # If the client requested streaming mode, return a streaming response that sends partial AI outputs as events
    if req.stream:
        # Use event_generator to produce server-sent events for real-time streaming of AI responses
        return StreamingResponse(event_generator(user_input, model, session_id), media_type="text/event-stream")

    # For non-streaming requests, submit the full user input to the AI client and collect the complete response
    try:
        jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
    except Exception as e:
        # If submission to the AI client fails, respond with a 500 Internal Server Error and error details
        raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")

    # Initialize a buffer string to accumulate the full AI response text
    buffer = ""
    # Iterate over the streaming partial responses returned by the AI client
    for partial in jarvis_response:
        # Extract the text content from the nested partial response structure
        text = partial[0][0][1]
        # Update the buffer with the latest full response text received
        buffer = text

    # Append the user input and AI response pair to the session's chat history for context and record keeping
    session_data["history"].append({"input": user_input, "response": buffer})

    # Update the session store with the current timestamp and modified session data to keep session active
    session_store[session_id] = (time.time(), session_data)

    # Construct the JSON response following OpenAI's chat completion format for compatibility
    response = {
        "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",  # Unique identifier for this chat completion instance
        "object": "chat.completion",  # Object type indicating a chat completion response
        "created": int(time.time()),  # Timestamp of response creation in seconds since epoch
        "model": model,  # The AI model used to generate this response
        "choices": [
            {
                "index": 0,  # Index of this choice in the list of completions
                "message": {
                    "role": "assistant",  # Role indicating the source of the message is the AI assistant
                    "content": buffer  # The AI-generated text content to be delivered to the user
                },
                "finish_reason": "stop"  # Reason for completion, indicating the response is complete
            }
        ],
        "session_id": session_id  # Include session ID so the client can maintain or reuse the session
    }

    # Return the constructed JSON response to the client as an HTTP response
    return JSONResponse(response)