File size: 6,296 Bytes
e61d441
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#

import time  # Import time module to handle timestamps and time-based operations
import uuid  # Import uuid module to generate unique identifiers for responses

from fastapi import APIRouter, HTTPException  # Import APIRouter to create route groups and HTTPException for error handling
from fastapi.responses import JSONResponse, StreamingResponse  # Import response classes for JSON and streaming responses
from src.models.requests import OpenAIChatRequest  # Import request model defining the expected structure of chat requests
from src.cores.sessions import get_or_create_session, session_store  # Import session management functions and storage
from src.services.streaming import event_generator  # Import generator function for streaming chat responses
from config import MODEL  # Import default model configuration

# Create an API router instance to group related endpoints for chat completions
router = APIRouter()

@router.post("/completions")
async def openai_chat_completions(req: OpenAIChatRequest):
    """
    Handle OpenAI-compatible chat completion requests.
    Supports streaming and non-streaming modes based on client request.

    Steps:
    - Validate the presence and structure of messages in the request
    - Extract conversation history and current user input from messages
    - Retrieve or create a session for managing conversation state
    - Update session history with prior conversation
    - If streaming is requested, return a streaming response
    - Otherwise, submit input to AI client and collect full response
    - Append new interaction to session history
    - Return response formatted according to OpenAI chat completion API

    Returns:
        JSONResponse or StreamingResponse with chat completion data and session ID
    """
    # Ensure messages list is not empty, else raise HTTP 400 error
    if not req.messages:
        raise HTTPException(status_code=400, detail="Messages cannot be empty")

    history = []  # Initialize conversation history list
    current_input = ""  # Initialize variable to hold current user input

    # Process messages to separate conversation history and current input
    try:
        # The last message must be from the user and represents current input
        if req.messages[-1]["role"] != "user":
            raise ValueError("Last message must be from user")

        current_input = req.messages[-1]["content"]  # Extract current input text

        # Iterate over message pairs (user followed by assistant) to build history
        messages = req.messages[:-1]  # Exclude last message as it is current input
        for i in range(0, len(messages), 2):
            if i + 1 < len(messages):
                user_msg = messages[i]
                assistant_msg = messages[i + 1]

                # Validate message roles; skip pairs that do not match expected pattern
                if user_msg["role"] != "user" or assistant_msg["role"] != "assistant":
                    continue

                # Append input-response pair to history
                history.append({
                    "input": user_msg["content"],
                    "response": assistant_msg["content"]
                })
    except (KeyError, ValueError) as e:
        # Raise HTTP 400 error if message format is invalid
        raise HTTPException(status_code=400, detail=f"Invalid message format: {str(e)}")

    # Determine model to use: requested model or default from config
    model = req.model or MODEL

    # Retrieve existing session or create a new one using session ID and model
    session_id = get_or_create_session(req.session_id, model)

    # Get last update time and session data from session store
    last_update, session_data = session_store[session_id]

    # Update session history with extracted conversation history
    session_data["history"] = history

    # Save updated session data with current timestamp
    session_store[session_id] = (time.time(), session_data)

    client = session_data["client"]  # Retrieve AI client instance from session

    # If AI client is not available, raise HTTP 503 error
    if client is None:
        raise HTTPException(status_code=503, detail="AI client not available")

    # If streaming is requested, return a streaming response using event generator
    if req.stream:
        return StreamingResponse(
            event_generator(current_input, model, session_id),
            media_type="text/event-stream"
        )

    # For non-streaming requests, submit input to AI client and collect response
    try:
        jarvis_response = client.submit(multi={"text": current_input}, api_name="/api")
    except Exception as e:
        # Raise HTTP 500 error if submission to AI client fails
        raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")

    buffer = ""  # Initialize buffer to accumulate AI response text

    # Iterate over partial responses from AI client to build full response text
    for partial in jarvis_response:
        text = partial[0][0][1]  # Extract text from nested response structure
        buffer = text  # Update buffer with latest text chunk

    # Append new input-response pair to session history
    session_data["history"].append({"input": current_input, "response": buffer})

    # Update session store with new history and timestamp
    session_store[session_id] = (time.time(), session_data)

    # Construct response in OpenAI chat completion format with session ID extension
    response = {
        "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",  # Unique response ID
        "object": "chat.completion",
        "created": int(time.time()),  # Timestamp of response creation
        "model": model,  # Model used for completion
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": "assistant",
                    "content": buffer  # AI-generated response content
                },
                "finish_reason": "stop"
            }
        ],
        "session_id": session_id  # Custom field for session management
    }

    # Return JSON response with formatted completion data
    return JSONResponse(response)