hadadrjt commited on
Commit
e61d441
Β·
1 Parent(s): c0d067d

api: Restructured repo.

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: J.A.R.V.I.S. API Endpoint
3
  license: apache-2.0
4
  license_link: https://huggingface.co/hadadrjt/JARVIS/blob/main/LICENSE
5
  emoji: πŸ‘€
@@ -8,7 +8,8 @@ colorTo: green
8
  sdk: gradio
9
  sdk_version: 5.34.0
10
  app_file: app.py
11
- pinned: false
 
12
  models:
13
  - hadadrjt/JARVIS
14
  ---
 
1
  ---
2
+ title: API
3
  license: apache-2.0
4
  license_link: https://huggingface.co/hadadrjt/JARVIS/blob/main/LICENSE
5
  emoji: πŸ‘€
 
8
  sdk: gradio
9
  sdk_version: 5.34.0
10
  app_file: app.py
11
+ pinned: true
12
+ short_description: J.A.R.V.I.S. API Endpoint!
13
  models:
14
  - hadadrjt/JARVIS
15
  ---
app.py CHANGED
@@ -3,465 +3,73 @@
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
- import json
7
- import time
8
- import uuid
9
- import asyncio
10
- import uvicorn
11
 
12
- from contextlib import asynccontextmanager
13
- from fastapi import FastAPI, HTTPException
14
- from fastapi.responses import JSONResponse, StreamingResponse
15
- from gradio_client import Client
16
- from pydantic import BaseModel
17
- from typing import AsyncGenerator, Optional, Dict, List, Tuple, Any
18
 
19
- # Default AI model name used when no model is specified by user
20
- MODEL = "JARVIS: 2.1.3"
21
-
22
- # Session store keeps track of active sessions.
23
- # Each session_id maps to a tuple:
24
- # (last_update_timestamp, session_data_dict)
25
- # session_data_dict contains:
26
- # - "model": the AI model name used in this session
27
- # - "history": list of past chat messages (input and response)
28
- # - "client": the Gradio Client instance specific to this session
29
- session_store: Dict[str, Tuple[float, Dict]] = {}
30
-
31
- # Duration (in seconds) after which inactive sessions are removed
32
- EXPIRE = 3600 # 1 hour
33
-
34
- # Create FastAPI app instance
35
  app = FastAPI()
36
 
37
- class ResponseRequest(BaseModel):
38
- """
39
- Defines the expected structure of the request body for /v1/responses endpoint.
40
-
41
- Attributes:
42
- - model: Optional; specifies which AI model to use. Defaults to MODEL if not provided.
43
- - input: The user's input text to send to the AI.
44
- - stream: Optional; if True, the response will be streamed incrementally.
45
- - session_id: Optional; unique identifier for the user's session. If missing, a new session will be created.
46
- """
47
- model: Optional[str] = None
48
- input: str
49
- stream: Optional[bool] = False
50
- session_id: Optional[str] = None
51
-
52
- class OpenAIChatRequest(BaseModel):
53
- """
54
- Defines the OpenAI-compatible request structure for /v1/chat/completions endpoint.
55
-
56
- Attributes:
57
- - model: Optional; specifies which AI model to use. Defaults to MODEL if not provided.
58
- - messages: List of message objects containing 'role' and 'content'
59
- - stream: Optional; if True, the response will be streamed incrementally.
60
- - session_id: Optional; unique session identifier for maintaining conversation history
61
- """
62
- model: Optional[str] = None
63
- messages: List[Dict[str, str]]
64
- stream: Optional[bool] = False
65
- session_id: Optional[str] = None
66
-
67
- def cleanup_expired_sessions():
68
- """
69
- Remove sessions that have been inactive for longer than EXPIRE.
70
- This helps free up memory by deleting old sessions and closing their clients.
71
- """
72
- now = time.time()
73
- expired_sessions = [
74
- sid for sid, (last_update, _) in session_store.items()
75
- if now - last_update > EXPIRE
76
- ]
77
- for sid in expired_sessions:
78
- # Attempt to close the Gradio client associated with the session
79
- _, data = session_store[sid]
80
- client = data.get("client")
81
- if client:
82
- try:
83
- client.close()
84
- except Exception:
85
- # Ignore errors during client close to avoid crashing cleanup
86
- pass
87
- # Remove the session from the store
88
- del session_store[sid]
89
-
90
- def create_client_for_model(model: str) -> Client:
91
- """
92
- Create a new Gradio Client instance and set it to use the specified AI model.
93
-
94
- Parameters:
95
- - model: The name of the AI model to initialize the client with.
96
-
97
- Returns:
98
- - A new Gradio Client instance configured with the given model.
99
- """
100
- client = Client("hadadrjt/ai")
101
- # Set the model on the Gradio client by calling the /change_model API
102
- client.predict(new=model, api_name="/change_model")
103
- return client
104
-
105
- def get_or_create_session(session_id: Optional[str], model: str) -> str:
106
- """
107
- Retrieve an existing session by session_id or create a new one if it doesn't exist.
108
- Also cleans up expired sessions before proceeding.
109
-
110
- Parameters:
111
- - session_id: The unique identifier of the session (optional).
112
- - model: The AI model to use for this session.
113
-
114
- Returns:
115
- - The session_id for the active or newly created session.
116
- """
117
- cleanup_expired_sessions()
118
-
119
- # If no session_id provided or session does not exist, create a new session
120
- if not session_id or session_id not in session_store:
121
- session_id = str(uuid.uuid4()) # Generate a new unique session ID
122
- client = create_client_for_model(model) # Create a new client for this session
123
- session_store[session_id] = (time.time(), {
124
- "model": model,
125
- "history": [],
126
- "client": client
127
- })
128
- else:
129
- # Session exists, update last access time and check if model changed
130
- last_update, data = session_store[session_id]
131
- if data["model"] != model:
132
- # If model changed, close old client and create a new one with the new model
133
- old_client = data.get("client")
134
- if old_client:
135
- try:
136
- old_client.close()
137
- except Exception:
138
- pass # Ignore errors on close
139
- new_client = create_client_for_model(model)
140
- data["model"] = model
141
- data["client"] = new_client
142
- session_store[session_id] = (time.time(), data)
143
- else:
144
- # Just update the last access time to keep session alive
145
- session_store[session_id] = (time.time(), data)
146
-
147
- return session_id
148
-
149
- async def event_generator(user_input: str, model: str, session_id: str) -> AsyncGenerator[str, None]:
150
- """
151
- Asynchronous generator that streams AI responses incrementally as Server-Sent Events (SSE).
152
-
153
- Parameters:
154
- - user_input: The input text from the user.
155
- - model: The AI model to use.
156
- - session_id: The unique session identifier.
157
-
158
- Yields:
159
- - JSON-formatted chunks representing incremental AI response deltas.
160
- """
161
- last_update, session_data = session_store.get(session_id, (0, None))
162
- if session_data is None:
163
- # Session not found; yield error and stop
164
- yield f"data: {json.dumps({'error': 'Session not found'})}\n\n"
165
- return
166
-
167
- client = session_data["client"]
168
- if client is None:
169
- # Client missing for session; yield error and stop
170
- yield f"data: {json.dumps({'error': 'AI client not available'})}\n\n"
171
- return
172
-
173
- try:
174
- # Submit the user input to the AI model via Gradio client
175
- jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
176
- except Exception as e:
177
- # If submission fails, yield error and stop
178
- yield f"data: {json.dumps({'error': f'Failed to submit to AI: {str(e)}'})}\n\n"
179
- return
180
-
181
- buffer = "" # Buffer to track full response text progressively
182
-
183
- try:
184
- for partial in jarvis_response:
185
- # Extract the current partial text from the response
186
- text = partial[0][0][1]
187
 
188
- # Calculate the delta (new text since last chunk)
189
- if text.startswith(buffer):
190
- delta = text[len(buffer):]
191
- else:
192
- delta = text
193
 
194
- buffer = text # Update buffer with latest full text
 
 
195
 
196
- if delta == "":
197
- # Skip empty delta chunks
198
- continue
199
-
200
- # Prepare chunk data in OpenAI streaming format
201
- chunk = {
202
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
203
- "object": "chat.completion.chunk",
204
- "created": int(time.time()),
205
- "model": model,
206
- "choices": [
207
- {
208
- "index": 0,
209
- "delta": {"content": delta},
210
- "finish_reason": None
211
- }
212
- ]
213
- }
214
-
215
- # Yield the chunk as a Server-Sent Event
216
- yield f"data: {json.dumps(chunk)}\n\n"
217
-
218
- # After streaming completes, save the full input-response pair to session history
219
- session_data["history"].append({"input": user_input, "response": buffer})
220
- session_store[session_id] = (time.time(), session_data) # Update last access time
221
-
222
- # Send a final chunk signaling completion of the stream
223
- done_chunk = {
224
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
225
- "object": "chat.completion.chunk",
226
- "created": int(time.time()),
227
- "model": model,
228
- "choices": [
229
- {
230
- "index": 0,
231
- "delta": {},
232
- "finish_reason": "stop"
233
- }
234
- ]
235
- }
236
- yield f"data: {json.dumps(done_chunk)}\n\n"
237
-
238
- except Exception as e:
239
- # If streaming fails at any point, yield an error chunk
240
- error_chunk = {
241
- "error": {"message": f"Streaming error: {str(e)}"}
242
- }
243
- yield f"data: {json.dumps(error_chunk)}\n\n"
244
-
245
- @app.post("/v1/responses")
246
- async def responses(req: ResponseRequest):
247
- """
248
- Original API endpoint to get AI responses.
249
- Supports both streaming and non-streaming modes.
250
-
251
- Workflow:
252
- - Validate or create session.
253
- - Ensure AI client is available.
254
- - Handle streaming or full response accordingly.
255
- - Save chat history per session.
256
-
257
- Returns:
258
- - JSON response with AI output and session ID.
259
- """
260
- model = req.model or MODEL # Use requested model or default
261
- session_id = get_or_create_session(req.session_id, model) # Get or create session
262
- last_update, session_data = session_store[session_id]
263
- user_input = req.input
264
-
265
- client = session_data["client"]
266
- if client is None:
267
- # If client is missing, return 503 error
268
- raise HTTPException(status_code=503, detail="AI client not available")
269
-
270
- if req.stream:
271
- # If streaming requested, return a streaming response using event_generator
272
- return StreamingResponse(event_generator(user_input, model, session_id), media_type="text/event-stream")
273
-
274
- # Non-streaming request: submit input and collect full response
275
- try:
276
- jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
277
- except Exception as e:
278
- # Return 500 error if submission fails
279
- raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
280
-
281
- buffer = ""
282
- for partial in jarvis_response:
283
- text = partial[0][0][1]
284
- buffer = text # Update buffer with latest full response
285
-
286
- # Save input and response to session history and update last access time
287
- session_data["history"].append({"input": user_input, "response": buffer})
288
- session_store[session_id] = (time.time(), session_data)
289
-
290
- # Prepare the JSON response in OpenAI style format
291
- response = {
292
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
293
- "object": "chat.completion",
294
- "created": int(time.time()),
295
- "model": model,
296
- "choices": [
297
- {
298
- "index": 0,
299
- "message": {
300
- "role": "assistant",
301
- "content": buffer
302
- },
303
- "finish_reason": "stop"
304
- }
305
- ],
306
- "session_id": session_id # Return session_id so client can reuse it
307
- }
308
-
309
- # Return the JSON response
310
- return JSONResponse(response)
311
-
312
- @app.post("/v1/chat/completions")
313
- async def openai_chat_completions(req: OpenAIChatRequest):
314
- """
315
- OpenAI-compatible endpoint for chat completions.
316
- Supports both streaming and non-streaming modes.
317
-
318
- Workflow:
319
- - Validate message structure and extract conversation history
320
- - Validate or create session
321
- - Update session history from messages
322
- - Handle streaming or full response
323
- - Save new interaction to session history
324
-
325
- Returns:
326
- - JSON response in OpenAI format with session ID extension
327
- """
328
- # Validate messages structure
329
- if not req.messages:
330
- raise HTTPException(status_code=400, detail="Messages cannot be empty")
331
-
332
- # Extract conversation history and current input
333
- history = []
334
- current_input = ""
335
-
336
- # Process messages to extract conversation history
337
- try:
338
- # Last message should be from user and used as current input
339
- if req.messages[-1]["role"] != "user":
340
- raise ValueError("Last message must be from user")
341
-
342
- current_input = req.messages[-1]["content"]
343
-
344
- # Process message pairs (user + assistant)
345
- messages = req.messages[:-1] # Exclude last message (current input)
346
- for i in range(0, len(messages), 2):
347
- if i+1 < len(messages):
348
- user_msg = messages[i]
349
- assistant_msg = messages[i+1]
350
-
351
- if user_msg["role"] != "user" or assistant_msg["role"] != "assistant":
352
- # Skip invalid pairs but continue processing
353
- continue
354
-
355
- history.append({
356
- "input": user_msg["content"],
357
- "response": assistant_msg["content"]
358
- })
359
- except (KeyError, ValueError) as e:
360
- raise HTTPException(status_code=400, detail=f"Invalid message format: {str(e)}")
361
-
362
- model = req.model or MODEL # Use requested model or default
363
- session_id = get_or_create_session(req.session_id, model) # Get or create session
364
- last_update, session_data = session_store[session_id]
365
-
366
- # Update session history from messages
367
- session_data["history"] = history
368
- session_store[session_id] = (time.time(), session_data)
369
-
370
- client = session_data["client"]
371
- if client is None:
372
- raise HTTPException(status_code=503, detail="AI client not available")
373
-
374
- if req.stream:
375
- # Streaming response
376
- return StreamingResponse(
377
- event_generator(current_input, model, session_id),
378
- media_type="text/event-stream"
379
- )
380
-
381
- # Non-streaming response
382
- try:
383
- jarvis_response = client.submit(multi={"text": current_input}, api_name="/api")
384
- except Exception as e:
385
- raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
386
-
387
- buffer = ""
388
- for partial in jarvis_response:
389
- text = partial[0][0][1]
390
- buffer = text
391
-
392
- # Update session history with new interaction
393
- session_data["history"].append({"input": current_input, "response": buffer})
394
- session_store[session_id] = (time.time(), session_data)
395
-
396
- # Format response in OpenAI style
397
- response = {
398
- "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
399
- "object": "chat.completion",
400
- "created": int(time.time()),
401
- "model": model,
402
- "choices": [
403
- {
404
- "index": 0,
405
- "message": {
406
- "role": "assistant",
407
- "content": buffer
408
- },
409
- "finish_reason": "stop"
410
- }
411
- ],
412
- "session_id": session_id # Custom extension for session management
413
- }
414
-
415
- return JSONResponse(response)
416
-
417
- @app.get("/v1/models")
418
- async def list_models():
419
- """
420
- OpenAI-compatible endpoint to list available models.
421
- Returns a fixed list containing our default model.
422
-
423
- This endpoint is required by many OpenAI-compatible clients.
424
- """
425
- return JSONResponse({
426
- "object": "list",
427
- "data": [
428
- {
429
- "id": MODEL,
430
- "object": "model",
431
- "created": 0, # Timestamp not available
432
- "owned_by": "J.A.R.V.I.S."
433
- }
434
- ]
435
- })
436
-
437
- @app.get("/v1/history")
438
- async def get_history(session_id: Optional[str] = None):
439
- """
440
- Endpoint to retrieve chat history for a given session.
441
-
442
- Parameters:
443
- - session_id: The unique session identifier.
444
-
445
- Returns:
446
- - JSON object containing session_id and list of past input-response pairs.
447
-
448
- Raises:
449
- - 404 error if session_id is missing or session does not exist.
450
- """
451
- if not session_id or session_id not in session_store:
452
- raise HTTPException(status_code=404, detail="Session not found or session_id missing.")
453
-
454
- _, session_data = session_store[session_id]
455
- return {"session_id": session_id, "history": session_data["history"]}
456
 
 
 
457
  @app.get("/")
458
  def root():
459
  """
460
- Simple health check endpoint.
461
- Returns basic status indicating if API is running.
462
- """
463
- return {"status": "API is running"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
 
465
- # Run the app with Uvicorn ASGI server when executed directly
466
  if __name__ == "__main__":
 
 
467
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
3
  # SPDX-License-Identifier: Apache-2.0
4
  #
5
 
6
+ import asyncio # Import asyncio for asynchronous programming support in Python
7
+ import json # Import json module to handle JSON encoding and decoding
8
+ import uvicorn # Import uvicorn, an ASGI server implementation for running FastAPI applications
 
 
9
 
10
+ from contextlib import asynccontextmanager # Import asynccontextmanager to create asynchronous context managers if needed
11
+ from fastapi import FastAPI # Import FastAPI class to create the main web application instance
12
+ from fastapi.responses import Response # Import Response class to send raw HTTP responses
13
+ from fastapi.responses import JSONResponse # Import JSONResponse class for sending JSON formatted HTTP responses
14
+ from src.routes.v1 import responses, chat_completions, models, history # Import router modules from the src.routes.v1 package to organize API endpoints
 
15
 
16
+ # Initialize a FastAPI application
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  app = FastAPI()
18
 
19
+ # Include the 'responses' router under the '/v1' prefix with the tag "Responses"
20
+ # This router handles endpoints related to general responses
21
+ app.include_router(responses.router, prefix="/v1", tags=["Responses"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Include the 'chat_completions' router under the '/v1/chat' prefix with the tag "Chat Completions"
24
+ # This router manages chat completion related API endpoints
25
+ app.include_router(chat_completions.router, prefix="/v1/chat", tags=["Chat Completions"])
 
 
26
 
27
+ # Include the 'models' router under the '/v1' prefix with the tag "Models"
28
+ # This router provides endpoints related to available models
29
+ app.include_router(models.router, prefix="/v1", tags=["Models"])
30
 
31
+ # Include the 'history' router under the '/v1' prefix with the tag "History"
32
+ # This router manages API endpoints related to user or session history
33
+ app.include_router(history.router, prefix="/v1", tags=["History"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Define a root path GET endpoint for the base URL '/'
36
+ # This endpoint acts as a health check to confirm the API is operational
37
  @app.get("/")
38
  def root():
39
  """
40
+ Health check endpoint that returns a JSON response with API status information.
41
+ It confirms the API is running and lists active routers with their URLs and statuses.
42
+ This is useful for monitoring and basic connectivity testing.
43
+ """
44
+ # Create a dictionary containing status information and URLs of active routers
45
+ data = {
46
+ "Status": "API is running!",
47
+ "Endpoint": "https://hadadrjt-api.hf.space/v1",
48
+ "Type": "OpenAI-style",
49
+ "Router 1": {
50
+ "URL": "https://hadadrjt-api.hf.space/v1/chat/completions",
51
+ "Status": "Active"
52
+ },
53
+ "Router 2": {
54
+ "URL": "https://hadadrjt-api.hf.space/v1/responses",
55
+ "Status": "Active"
56
+ },
57
+ "Router 3": {
58
+ "URL": "https://hadadrjt-api.hf.space/v1/models",
59
+ "Status": "Active"
60
+ },
61
+ "Router 4": {
62
+ "URL": "https://hadadrjt-api.hf.space/v1/history",
63
+ "Status": "Active"
64
+ }
65
+ }
66
+ # Convert the dictionary to a pretty-printed JSON string with indentation for readability
67
+ json_content = json.dumps(data, indent=4)
68
+ # Return the JSON string as an HTTP response with content type set to application/json
69
+ return Response(content=json_content, media_type="application/json")
70
 
71
+ # Check if this script is being run directly (not imported as a module)
72
  if __name__ == "__main__":
73
+ # Run the FastAPI app using the Uvicorn ASGI server
74
+ # Bind to all available network interfaces on port 7860
75
  uvicorn.run(app, host="0.0.0.0", port=7860)
config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ # Default AI model name used when no model is specified by user
7
+ MODEL = "JARVIS: 2.1.3"
8
+
9
+ # Duration (in seconds) after which inactive sessions are removed
10
+ EXPIRE = 3600 # 1 hour
src/__init__.py ADDED
File without changes
src/cores/__init__.py ADDED
File without changes
src/cores/sessions.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import time # Import the time module to work with timestamps for session expiration checks
7
+ import uuid # Import uuid module to generate unique session identifiers
8
+
9
+ from gradio_client import Client # Import Client from gradio_client to interact with the AI model API
10
+ from typing import Dict, Tuple, Optional, Any # Import type hints for better code clarity and validation
11
+ from config import EXPIRE # Import the EXPIRE constant which defines session timeout duration
12
+
13
+ # Dictionary to store active user sessions
14
+ # Key: session_id (string) uniquely identifying each session
15
+ # Value: Tuple containing:
16
+ # - last_update_timestamp (float): the last time this session was accessed or updated
17
+ # - session_data_dict (dict): holds session-specific data including:
18
+ # - "model": the AI model name currently used in this session
19
+ # - "history": a list that tracks the conversation history (inputs and responses)
20
+ # - "client": the Gradio Client instance associated with this session for API calls
21
+ session_store: Dict[str, Tuple[float, Dict[str, Any]]] = {}
22
+
23
+ def cleanup_expired_sessions():
24
+ """
25
+ Iterate through all stored sessions and remove those that have been inactive
26
+ for longer than the configured EXPIRE duration. This function helps prevent
27
+ memory leaks and resource wastage by closing Gradio clients and deleting
28
+ session data for sessions no longer in use.
29
+ """
30
+ now = time.time() # Get the current time in seconds since epoch
31
+ # Identify all sessions where the time since last update exceeds the expiration limit
32
+ expired_sessions = [
33
+ sid for sid, (last_update, _) in session_store.items()
34
+ if now - last_update > EXPIRE
35
+ ]
36
+ # For each expired session, safely close the associated Gradio client and remove session data
37
+ for sid in expired_sessions:
38
+ _, data = session_store[sid] # Extract session data dictionary
39
+ client = data.get("client") # Retrieve the Gradio client instance if it exists
40
+ if client:
41
+ try:
42
+ client.close() # Attempt to close the client connection to release resources
43
+ except Exception:
44
+ # Suppress any exceptions during client close to ensure cleanup continues smoothly
45
+ pass
46
+ del session_store[sid] # Remove the session entry from the session store dictionary
47
+
48
+ def create_client_for_model(model: str) -> Client:
49
+ """
50
+ Instantiate a new Gradio Client connected to the AI model API and configure it
51
+ to use the specified model. This client will be used to send requests and receive
52
+ responses for the given AI model in a session.
53
+
54
+ Parameters:
55
+ - model (str): The name of the AI model to initialize the client with.
56
+
57
+ Returns:
58
+ - Client: A configured Gradio Client instance ready to interact with the model.
59
+ """
60
+ client = Client("hadadrjt/ai") # Create a new Gradio Client pointing to the AI service endpoint
61
+ # Call the /change_model API on the client to switch to the requested AI model
62
+ client.predict(new=model, api_name="/change_model")
63
+ return client # Return the configured client instance
64
+
65
+ def get_or_create_session(session_id: Optional[str], model: str) -> str:
66
+ """
67
+ Retrieve an existing session by its session ID or create a new session if none exists.
68
+ This function also performs cleanup of expired sessions before proceeding to ensure
69
+ efficient resource management.
70
+
71
+ If the requested session exists but uses a different model than specified, the session's
72
+ client is replaced with a new one configured for the new model.
73
+
74
+ Parameters:
75
+ - session_id (Optional[str]): The unique identifier of the session to retrieve. If None or
76
+ invalid, a new session will be created.
77
+ - model (str): The AI model to be used for this session.
78
+
79
+ Returns:
80
+ - str: The session ID of the active or newly created session.
81
+ """
82
+ cleanup_expired_sessions() # Remove any sessions that have timed out before proceeding
83
+
84
+ # Check if the provided session_id is valid and exists in the session store
85
+ if not session_id or session_id not in session_store:
86
+ # Generate a new unique session ID using UUID4
87
+ session_id = str(uuid.uuid4())
88
+ # Create a new Gradio client configured for the requested model
89
+ client = create_client_for_model(model)
90
+ # Store the new session with current timestamp, model name, empty history, and client instance
91
+ session_store[session_id] = (time.time(), {
92
+ "model": model,
93
+ "history": [],
94
+ "client": client
95
+ })
96
+ else:
97
+ # Existing session found, retrieve its last update time and data dictionary
98
+ last_update, data = session_store[session_id]
99
+ # Check if the model requested differs from the one currently associated with the session
100
+ if data["model"] != model:
101
+ # Close the old client to release resources before switching models
102
+ old_client = data.get("client")
103
+ if old_client:
104
+ try:
105
+ old_client.close()
106
+ except Exception:
107
+ # Ignore any exceptions during client close to avoid interrupting flow
108
+ pass
109
+ # Create a new client configured for the new model
110
+ new_client = create_client_for_model(model)
111
+ # Update session data with the new model and client instance
112
+ data["model"] = model
113
+ data["client"] = new_client
114
+ # Update the session store with the new timestamp and updated data dictionary
115
+ session_store[session_id] = (time.time(), data)
116
+ else:
117
+ # Model has not changed, just update the last access time to keep session active
118
+ session_store[session_id] = (time.time(), data)
119
+
120
+ return session_id # Return the active or newly created session ID
src/models/__init__.py ADDED
File without changes
src/models/requests.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from pydantic import BaseModel # Import BaseModel from Pydantic to define data models with validation and serialization support
7
+ from typing import Optional, Dict, List # Import Optional for optional fields, Dict for dictionary types, and List for list types
8
+
9
+ class ResponseRequest(BaseModel):
10
+ """
11
+ Data model representing the request body structure for the /v1/responses API endpoint.
12
+
13
+ Attributes:
14
+ - model: Optional string specifying the AI model to use; defaults to a predefined MODEL if omitted.
15
+ - input: Required string containing the user's input text to send to the AI.
16
+ - stream: Optional boolean indicating if the response should be streamed incrementally; defaults to False.
17
+ - session_id: Optional string serving as a unique identifier for the user's session; if not provided, a new session is created.
18
+ """
19
+ model: Optional[str] = None # AI model identifier, optional with default None
20
+ input: str # User input text, required
21
+ stream: Optional[bool] = False # Stream response flag, optional with default False
22
+ session_id: Optional[str] = None # Session identifier, optional
23
+
24
+ class OpenAIChatRequest(BaseModel):
25
+ """
26
+ Data model defining the OpenAI-compatible request format for the /v1/chat/completions API endpoint.
27
+
28
+ Attributes:
29
+ - model: Optional string specifying the AI model to use; defaults to a predefined MODEL if omitted.
30
+ - messages: List of message dictionaries, each containing 'role' and 'content' keys, representing the conversation history.
31
+ - stream: Optional boolean indicating if the response should be streamed incrementally; defaults to False.
32
+ - session_id: Optional string serving as a unique session identifier to maintain conversation context.
33
+ """
34
+ model: Optional[str] = None # AI model identifier, optional with default None
35
+ messages: List[Dict[str, str]] # List of chat messages with roles and content, required
36
+ stream: Optional[bool] = False # Stream response flag, optional with default False
37
+ session_id: Optional[str] = None # Session identifier, optional
src/routes/__init__.py ADDED
File without changes
src/routes/v1/__init__.py ADDED
File without changes
src/routes/v1/chat_completions.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import time # Import time module to handle timestamps and time-based operations
7
+ import uuid # Import uuid module to generate unique identifiers for responses
8
+
9
+ from fastapi import APIRouter, HTTPException # Import APIRouter to create route groups and HTTPException for error handling
10
+ from fastapi.responses import JSONResponse, StreamingResponse # Import response classes for JSON and streaming responses
11
+ from src.models.requests import OpenAIChatRequest # Import request model defining the expected structure of chat requests
12
+ from src.cores.sessions import get_or_create_session, session_store # Import session management functions and storage
13
+ from src.services.streaming import event_generator # Import generator function for streaming chat responses
14
+ from config import MODEL # Import default model configuration
15
+
16
+ # Create an API router instance to group related endpoints for chat completions
17
+ router = APIRouter()
18
+
19
+ @router.post("/completions")
20
+ async def openai_chat_completions(req: OpenAIChatRequest):
21
+ """
22
+ Handle OpenAI-compatible chat completion requests.
23
+ Supports streaming and non-streaming modes based on client request.
24
+
25
+ Steps:
26
+ - Validate the presence and structure of messages in the request
27
+ - Extract conversation history and current user input from messages
28
+ - Retrieve or create a session for managing conversation state
29
+ - Update session history with prior conversation
30
+ - If streaming is requested, return a streaming response
31
+ - Otherwise, submit input to AI client and collect full response
32
+ - Append new interaction to session history
33
+ - Return response formatted according to OpenAI chat completion API
34
+
35
+ Returns:
36
+ JSONResponse or StreamingResponse with chat completion data and session ID
37
+ """
38
+ # Ensure messages list is not empty, else raise HTTP 400 error
39
+ if not req.messages:
40
+ raise HTTPException(status_code=400, detail="Messages cannot be empty")
41
+
42
+ history = [] # Initialize conversation history list
43
+ current_input = "" # Initialize variable to hold current user input
44
+
45
+ # Process messages to separate conversation history and current input
46
+ try:
47
+ # The last message must be from the user and represents current input
48
+ if req.messages[-1]["role"] != "user":
49
+ raise ValueError("Last message must be from user")
50
+
51
+ current_input = req.messages[-1]["content"] # Extract current input text
52
+
53
+ # Iterate over message pairs (user followed by assistant) to build history
54
+ messages = req.messages[:-1] # Exclude last message as it is current input
55
+ for i in range(0, len(messages), 2):
56
+ if i + 1 < len(messages):
57
+ user_msg = messages[i]
58
+ assistant_msg = messages[i + 1]
59
+
60
+ # Validate message roles; skip pairs that do not match expected pattern
61
+ if user_msg["role"] != "user" or assistant_msg["role"] != "assistant":
62
+ continue
63
+
64
+ # Append input-response pair to history
65
+ history.append({
66
+ "input": user_msg["content"],
67
+ "response": assistant_msg["content"]
68
+ })
69
+ except (KeyError, ValueError) as e:
70
+ # Raise HTTP 400 error if message format is invalid
71
+ raise HTTPException(status_code=400, detail=f"Invalid message format: {str(e)}")
72
+
73
+ # Determine model to use: requested model or default from config
74
+ model = req.model or MODEL
75
+
76
+ # Retrieve existing session or create a new one using session ID and model
77
+ session_id = get_or_create_session(req.session_id, model)
78
+
79
+ # Get last update time and session data from session store
80
+ last_update, session_data = session_store[session_id]
81
+
82
+ # Update session history with extracted conversation history
83
+ session_data["history"] = history
84
+
85
+ # Save updated session data with current timestamp
86
+ session_store[session_id] = (time.time(), session_data)
87
+
88
+ client = session_data["client"] # Retrieve AI client instance from session
89
+
90
+ # If AI client is not available, raise HTTP 503 error
91
+ if client is None:
92
+ raise HTTPException(status_code=503, detail="AI client not available")
93
+
94
+ # If streaming is requested, return a streaming response using event generator
95
+ if req.stream:
96
+ return StreamingResponse(
97
+ event_generator(current_input, model, session_id),
98
+ media_type="text/event-stream"
99
+ )
100
+
101
+ # For non-streaming requests, submit input to AI client and collect response
102
+ try:
103
+ jarvis_response = client.submit(multi={"text": current_input}, api_name="/api")
104
+ except Exception as e:
105
+ # Raise HTTP 500 error if submission to AI client fails
106
+ raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
107
+
108
+ buffer = "" # Initialize buffer to accumulate AI response text
109
+
110
+ # Iterate over partial responses from AI client to build full response text
111
+ for partial in jarvis_response:
112
+ text = partial[0][0][1] # Extract text from nested response structure
113
+ buffer = text # Update buffer with latest text chunk
114
+
115
+ # Append new input-response pair to session history
116
+ session_data["history"].append({"input": current_input, "response": buffer})
117
+
118
+ # Update session store with new history and timestamp
119
+ session_store[session_id] = (time.time(), session_data)
120
+
121
+ # Construct response in OpenAI chat completion format with session ID extension
122
+ response = {
123
+ "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", # Unique response ID
124
+ "object": "chat.completion",
125
+ "created": int(time.time()), # Timestamp of response creation
126
+ "model": model, # Model used for completion
127
+ "choices": [
128
+ {
129
+ "index": 0,
130
+ "message": {
131
+ "role": "assistant",
132
+ "content": buffer # AI-generated response content
133
+ },
134
+ "finish_reason": "stop"
135
+ }
136
+ ],
137
+ "session_id": session_id # Custom field for session management
138
+ }
139
+
140
+ # Return JSON response with formatted completion data
141
+ return JSONResponse(response)
src/routes/v1/history.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ # Import APIRouter and HTTPException classes from FastAPI framework
7
+ # APIRouter is used to create modular route handlers
8
+ # HTTPException is used to generate HTTP error responses with specific status codes and details
9
+ from fastapi import APIRouter, HTTPException
10
+
11
+ # Import Optional type hint from typing module
12
+ # Optional is used to indicate that a function parameter can be of a specified type or None
13
+ from typing import Optional
14
+
15
+ # Import session_store dictionary from the sessions module located in src.cores package
16
+ # session_store holds active session data keyed by session identifiers
17
+ from src.cores.sessions import session_store
18
+
19
+ # Create an instance of APIRouter to define routes related to session history
20
+ router = APIRouter()
21
+
22
+ # Define an asynchronous GET endpoint at path "/history" to retrieve chat history for a session
23
+ @router.get("/history")
24
+ async def get_history(session_id: Optional[str] = None):
25
+ """
26
+ This function handles GET requests to fetch the chat history for a specific session.
27
+
28
+ Parameters:
29
+ - session_id (Optional[str]): A string representing the unique identifier of the session.
30
+ This parameter is optional in the function signature but required for successful retrieval.
31
+
32
+ Returns:
33
+ - A JSON object containing:
34
+ - "session_id": The provided session identifier string.
35
+ - "history": A list of past input-response pairs stored in the session.
36
+
37
+ Raises:
38
+ - HTTPException with status code 404 and a descriptive message if:
39
+ - The session_id is not provided (None or empty).
40
+ - The session_id does not exist in the session_store dictionary, indicating no active session.
41
+ """
42
+
43
+ # Check if session_id is missing or does not exist in the session_store dictionary
44
+ if not session_id or session_id not in session_store:
45
+ # Raise an HTTP 404 Not Found error with a clear message indicating the issue
46
+ raise HTTPException(status_code=404, detail="Session not found or session_id missing.")
47
+
48
+ # Retrieve the session data tuple from session_store using the session_id key
49
+ # The tuple contains two elements; the first is ignored here, the second is the session data dictionary
50
+ _, session_data = session_store[session_id]
51
+
52
+ # Return a dictionary with the session_id and the chat history extracted from the session data
53
+ # This dictionary will be automatically converted to JSON by FastAPI when sending the response
54
+ return {"session_id": session_id, "history": session_data["history"]}
src/routes/v1/models.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ # Import APIRouter class from fastapi module to create a router instance for grouping related API routes
7
+ from fastapi import APIRouter
8
+
9
+ # Import JSONResponse to send JSON formatted HTTP responses from endpoint functions
10
+ from fastapi.responses import JSONResponse
11
+
12
+ # Import MODEL constant from config module, which specifies the default model identifier
13
+ from config import MODEL
14
+
15
+ # Create an APIRouter instance to collect and organize routes related to model operations
16
+ router = APIRouter()
17
+
18
+ # Define an asynchronous GET endpoint at path "/models" on this router
19
+ @router.get("/models")
20
+ async def list_models():
21
+ """
22
+ OpenAI-compatible endpoint to list available models.
23
+ Returns a fixed list containing our default model.
24
+
25
+ This endpoint is required by many OpenAI-compatible clients to discover available models.
26
+ """
27
+ # Return a JSON response with a list object containing one model dictionary
28
+ # The model dictionary includes id from config, a static object type, a placeholder created timestamp
29
+ return JSONResponse({
30
+ "object": "list",
31
+ "data": [
32
+ {
33
+ "id": MODEL,
34
+ "object": "model",
35
+ "created": 0, # Timestamp not available, so set to zero
36
+ "owned_by": "J.A.R.V.I.S."
37
+ }
38
+ ]
39
+ })
src/routes/v1/responses.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import time # Import time module to handle timestamps and measure time intervals
7
+ import uuid # Import uuid module to generate unique identifiers for responses
8
+
9
+ from fastapi import APIRouter, HTTPException # Import FastAPI router and HTTP exception handling
10
+ from fastapi.responses import JSONResponse, StreamingResponse # Import response types for JSON and streaming data
11
+ from src.models.requests import ResponseRequest # Import the data model for incoming request validation
12
+ from src.cores.sessions import get_or_create_session, session_store # Import session management utilities
13
+ from src.services.streaming import event_generator # Import generator function for streaming AI responses
14
+ from config import MODEL # Import default AI model configuration
15
+
16
+ # Create a new API router instance to handle endpoints related to AI responses
17
+ router = APIRouter()
18
+
19
+ @router.post("/responses")
20
+ async def responses(req: ResponseRequest):
21
+ """
22
+ API endpoint to receive user input and return AI-generated responses.
23
+ Supports both streaming and non-streaming modes to accommodate different client needs.
24
+
25
+ Detailed Workflow:
26
+ 1. Determine which AI model to use, either from request or default configuration.
27
+ 2. Retrieve an existing session or create a new one based on session ID and model.
28
+ 3. Extract the AI client from the session data, ensuring it is available.
29
+ 4. If streaming is requested, return a streaming response that yields partial results as they arrive.
30
+ 5. For non-streaming requests, submit the entire user input to the AI client and collect the full response.
31
+ 6. Handle any errors during submission by returning appropriate HTTP error codes.
32
+ 7. Store the user input and AI response in the session history for future reference.
33
+ 8. Update the session's last access time to maintain session freshness.
34
+ 9. Format the AI response in a JSON structure compatible with OpenAI's chat completion format.
35
+ 10. Return the formatted JSON response along with the session ID for client reuse.
36
+
37
+ Parameters:
38
+ - req: ResponseRequest object containing user input, optional model, session ID, and streaming flag.
39
+
40
+ Returns:
41
+ - JSONResponse containing AI-generated text, metadata, and session information if non-streaming.
42
+ - StreamingResponse yielding incremental AI output if streaming is enabled.
43
+
44
+ Raises:
45
+ - HTTPException with status 503 if AI client is unavailable.
46
+ - HTTPException with status 500 if AI submission fails.
47
+ """
48
+ # Select the AI model specified in the request or fall back to the default model
49
+ model = req.model or MODEL
50
+
51
+ # Retrieve existing session or create a new one using the provided session ID and model
52
+ session_id = get_or_create_session(req.session_id, model)
53
+
54
+ # Extract the last update timestamp and session data dictionary from the session store
55
+ last_update, session_data = session_store[session_id]
56
+
57
+ # Extract the user's input text from the request object
58
+ user_input = req.input
59
+
60
+ # Retrieve the AI client instance from the session data; this client handles AI interactions
61
+ client = session_data["client"]
62
+
63
+ # If the AI client is not initialized or unavailable, respond with a 503 Service Unavailable error
64
+ if client is None:
65
+ raise HTTPException(status_code=503, detail="AI client not available")
66
+
67
+ # If the client requested streaming mode, return a streaming response that sends partial AI outputs as events
68
+ if req.stream:
69
+ # Use event_generator to produce server-sent events for real-time streaming of AI responses
70
+ return StreamingResponse(event_generator(user_input, model, session_id), media_type="text/event-stream")
71
+
72
+ # For non-streaming requests, submit the full user input to the AI client and collect the complete response
73
+ try:
74
+ jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
75
+ except Exception as e:
76
+ # If submission to the AI client fails, respond with a 500 Internal Server Error and error details
77
+ raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
78
+
79
+ # Initialize a buffer string to accumulate the full AI response text
80
+ buffer = ""
81
+ # Iterate over the streaming partial responses returned by the AI client
82
+ for partial in jarvis_response:
83
+ # Extract the text content from the nested partial response structure
84
+ text = partial[0][0][1]
85
+ # Update the buffer with the latest full response text received
86
+ buffer = text
87
+
88
+ # Append the user input and AI response pair to the session's chat history for context and record keeping
89
+ session_data["history"].append({"input": user_input, "response": buffer})
90
+
91
+ # Update the session store with the current timestamp and modified session data to keep session active
92
+ session_store[session_id] = (time.time(), session_data)
93
+
94
+ # Construct the JSON response following OpenAI's chat completion format for compatibility
95
+ response = {
96
+ "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", # Unique identifier for this chat completion instance
97
+ "object": "chat.completion", # Object type indicating a chat completion response
98
+ "created": int(time.time()), # Timestamp of response creation in seconds since epoch
99
+ "model": model, # The AI model used to generate this response
100
+ "choices": [
101
+ {
102
+ "index": 0, # Index of this choice in the list of completions
103
+ "message": {
104
+ "role": "assistant", # Role indicating the source of the message is the AI assistant
105
+ "content": buffer # The AI-generated text content to be delivered to the user
106
+ },
107
+ "finish_reason": "stop" # Reason for completion, indicating the response is complete
108
+ }
109
+ ],
110
+ "session_id": session_id # Include session ID so the client can maintain or reuse the session
111
+ }
112
+
113
+ # Return the constructed JSON response to the client as an HTTP response
114
+ return JSONResponse(response)
src/services/__init__.py ADDED
File without changes
src/services/streaming.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import json # Import the json module to handle JSON encoding and decoding of data
7
+ import time # Import the time module to work with timestamps
8
+ import uuid # Import the uuid module to generate unique identifiers
9
+
10
+ from typing import AsyncGenerator # Import AsyncGenerator for typing asynchronous generator functions
11
+ from src.cores.sessions import session_store # Import the session_store object to manage user sessions
12
+
13
+ async def event_generator(user_input: str, model: str, session_id: str) -> AsyncGenerator[str, None]:
14
+ """
15
+ Asynchronous generator function that streams AI-generated responses incrementally as Server-Sent Events (SSE).
16
+
17
+ Parameters:
18
+ - user_input: The input text provided by the user to the AI model.
19
+ - model: The identifier of the AI model to be used for generating responses.
20
+ - session_id: A unique string representing the current user session.
21
+
22
+ Yields:
23
+ - JSON-formatted strings representing incremental chunks of the AI response,
24
+ formatted as Server-Sent Events for real-time streaming to the client.
25
+ """
26
+
27
+ # Retrieve the last update time and session data from the session store using the session ID
28
+ last_update, session_data = session_store.get(session_id, (0, None))
29
+
30
+ # If no session data is found for the given session ID, yield an error message and stop
31
+ if session_data is None:
32
+ yield f"data: {json.dumps({'error': 'Session not found'})}\n\n"
33
+ return
34
+
35
+ # Extract the AI client object from the session data
36
+ client = session_data["client"]
37
+
38
+ # If the client is missing in the session data, yield an error message and stop
39
+ if client is None:
40
+ yield f"data: {json.dumps({'error': 'AI client not available'})}\n\n"
41
+ return
42
+
43
+ try:
44
+ # Submit the user's input text to the AI model via the client's submit method
45
+ # The 'multi' parameter wraps the text, and 'api_name' specifies the API endpoint
46
+ jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
47
+ except Exception as e:
48
+ # If submission to the AI fails, yield an error message with the exception details and stop
49
+ yield f"data: {json.dumps({'error': f'Failed to submit to AI: {str(e)}'})}\n\n"
50
+ return
51
+
52
+ buffer = "" # Initialize an empty string buffer to accumulate the full AI response progressively
53
+
54
+ try:
55
+ # Iterate over the partial responses received from the AI client submission
56
+ for partial in jarvis_response:
57
+ # Extract the current partial text chunk from the nested response structure
58
+ text = partial[0][0][1]
59
+
60
+ # Determine the new delta text by comparing with the buffer
61
+ # If the new text starts with the buffer, delta is the new appended text; otherwise, delta is the entire text
62
+ if text.startswith(buffer):
63
+ delta = text[len(buffer):]
64
+ else:
65
+ delta = text
66
+
67
+ buffer = text # Update the buffer with the latest full text from the AI
68
+
69
+ # Skip yielding if the delta is empty (no new text)
70
+ if delta == "":
71
+ continue
72
+
73
+ # Construct a chunk dictionary following OpenAI's streaming response format
74
+ chunk = {
75
+ "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", # Unique chunk ID with a UUID suffix
76
+ "object": "chat.completion.chunk", # Object type indicating a chunk of chat completion
77
+ "created": int(time.time()), # Timestamp of chunk creation in Unix time
78
+ "model": model, # The AI model used for generating this chunk
79
+ "choices": [
80
+ {
81
+ "index": 0, # Index of the choice in the response (usually 0 for single response)
82
+ "delta": {"content": delta}, # The incremental new text content in this chunk
83
+ "finish_reason": None # No finish reason yet, stream is ongoing
84
+ }
85
+ ]
86
+ }
87
+
88
+ # Yield the chunk as a Server-Sent Event formatted string with 'data:' prefix and double newline suffix
89
+ yield f"data: {json.dumps(chunk)}\n\n"
90
+
91
+ # After all chunks have been streamed, append the full input-response pair to the session history
92
+ session_data["history"].append({"input": user_input, "response": buffer})
93
+
94
+ # Update the session store with the new last access time and updated session data
95
+ session_store[session_id] = (time.time(), session_data)
96
+
97
+ # Prepare a final chunk indicating the end of the stream with finish_reason set to 'stop'
98
+ done_chunk = {
99
+ "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", # Unique ID for the final chunk
100
+ "object": "chat.completion.chunk", # Object type for consistency
101
+ "created": int(time.time()), # Timestamp of completion
102
+ "model": model, # Model identifier
103
+ "choices": [
104
+ {
105
+ "index": 0,
106
+ "delta": {}, # Empty delta indicating no new content
107
+ "finish_reason": "stop" # Signal that the stream has finished
108
+ }
109
+ ]
110
+ }
111
+
112
+ # Yield the final completion chunk to signal the client that streaming is done
113
+ yield f"data: {json.dumps(done_chunk)}\n\n"
114
+
115
+ except Exception as e:
116
+ # If any error occurs during streaming, yield an error chunk with the exception message
117
+ error_chunk = {
118
+ "error": {"message": f"Streaming error: {str(e)}"}
119
+ }
120
+ yield f"data: {json.dumps(error_chunk)}\n\n"