Spaces:

hadadrjt
/

api

Paused

App Files Files Community

api / src /routes /v1 /responses.py

hadadrjt

api: Restructured repo.

e61d441 12 days ago

raw

history blame contribute delete

6.17 kB

	#
	# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
	# SPDX-License-Identifier: Apache-2.0
	#

	import time # Import time module to handle timestamps and measure time intervals
	import uuid # Import uuid module to generate unique identifiers for responses

	from fastapi import APIRouter, HTTPException # Import FastAPI router and HTTP exception handling
	from fastapi.responses import JSONResponse, StreamingResponse # Import response types for JSON and streaming data
	from src.models.requests import ResponseRequest # Import the data model for incoming request validation
	from src.cores.sessions import get_or_create_session, session_store # Import session management utilities
	from src.services.streaming import event_generator # Import generator function for streaming AI responses
	from config import MODEL # Import default AI model configuration

	# Create a new API router instance to handle endpoints related to AI responses
	router = APIRouter()

	@router.post("/responses")
	async def responses(req: ResponseRequest):
	"""
	API endpoint to receive user input and return AI-generated responses.
	Supports both streaming and non-streaming modes to accommodate different client needs.

	Detailed Workflow:
	1. Determine which AI model to use, either from request or default configuration.
	2. Retrieve an existing session or create a new one based on session ID and model.
	3. Extract the AI client from the session data, ensuring it is available.
	4. If streaming is requested, return a streaming response that yields partial results as they arrive.
	5. For non-streaming requests, submit the entire user input to the AI client and collect the full response.
	6. Handle any errors during submission by returning appropriate HTTP error codes.
	7. Store the user input and AI response in the session history for future reference.
	8. Update the session's last access time to maintain session freshness.
	9. Format the AI response in a JSON structure compatible with OpenAI's chat completion format.
	10. Return the formatted JSON response along with the session ID for client reuse.

	Parameters:
	- req: ResponseRequest object containing user input, optional model, session ID, and streaming flag.

	Returns:
	- JSONResponse containing AI-generated text, metadata, and session information if non-streaming.
	- StreamingResponse yielding incremental AI output if streaming is enabled.

	Raises:
	- HTTPException with status 503 if AI client is unavailable.
	- HTTPException with status 500 if AI submission fails.
	"""
	# Select the AI model specified in the request or fall back to the default model
	model = req.model or MODEL

	# Retrieve existing session or create a new one using the provided session ID and model
	session_id = get_or_create_session(req.session_id, model)

	# Extract the last update timestamp and session data dictionary from the session store
	last_update, session_data = session_store[session_id]

	# Extract the user's input text from the request object
	user_input = req.input

	# Retrieve the AI client instance from the session data; this client handles AI interactions
	client = session_data["client"]

	# If the AI client is not initialized or unavailable, respond with a 503 Service Unavailable error
	if client is None:
	raise HTTPException(status_code=503, detail="AI client not available")

	# If the client requested streaming mode, return a streaming response that sends partial AI outputs as events
	if req.stream:
	# Use event_generator to produce server-sent events for real-time streaming of AI responses
	return StreamingResponse(event_generator(user_input, model, session_id), media_type="text/event-stream")

	# For non-streaming requests, submit the full user input to the AI client and collect the complete response
	try:
	jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
	except Exception as e:
	# If submission to the AI client fails, respond with a 500 Internal Server Error and error details
	raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")

	# Initialize a buffer string to accumulate the full AI response text
	buffer = ""
	# Iterate over the streaming partial responses returned by the AI client
	for partial in jarvis_response:
	# Extract the text content from the nested partial response structure
	text = partial[0][0][1]
	# Update the buffer with the latest full response text received
	buffer = text

	# Append the user input and AI response pair to the session's chat history for context and record keeping
	session_data["history"].append({"input": user_input, "response": buffer})

	# Update the session store with the current timestamp and modified session data to keep session active
	session_store[session_id] = (time.time(), session_data)

	# Construct the JSON response following OpenAI's chat completion format for compatibility
	response = {
	"id": f"chatcmpl-{uuid.uuid4().hex[:8]}", # Unique identifier for this chat completion instance
	"object": "chat.completion", # Object type indicating a chat completion response
	"created": int(time.time()), # Timestamp of response creation in seconds since epoch
	"model": model, # The AI model used to generate this response
	"choices": [
	{
	"index": 0, # Index of this choice in the list of completions
	"message": {
	"role": "assistant", # Role indicating the source of the message is the AI assistant
	"content": buffer # The AI-generated text content to be delivered to the user
	},
	"finish_reason": "stop" # Reason for completion, indicating the response is complete
	}
	],
	"session_id": session_id # Include session ID so the client can maintain or reuse the session
	}

	# Return the constructed JSON response to the client as an HTTP response
	return JSONResponse(response)