api: Restructured repo.
Browse files- README.md +3 -2
- app.py +56 -448
- config.py +10 -0
- src/__init__.py +0 -0
- src/cores/__init__.py +0 -0
- src/cores/sessions.py +120 -0
- src/models/__init__.py +0 -0
- src/models/requests.py +37 -0
- src/routes/__init__.py +0 -0
- src/routes/v1/__init__.py +0 -0
- src/routes/v1/chat_completions.py +141 -0
- src/routes/v1/history.py +54 -0
- src/routes/v1/models.py +39 -0
- src/routes/v1/responses.py +114 -0
- src/services/__init__.py +0 -0
- src/services/streaming.py +120 -0
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
license: apache-2.0
|
4 |
license_link: https://huggingface.co/hadadrjt/JARVIS/blob/main/LICENSE
|
5 |
emoji: π
|
@@ -8,7 +8,8 @@ colorTo: green
|
|
8 |
sdk: gradio
|
9 |
sdk_version: 5.34.0
|
10 |
app_file: app.py
|
11 |
-
pinned:
|
|
|
12 |
models:
|
13 |
- hadadrjt/JARVIS
|
14 |
---
|
|
|
1 |
---
|
2 |
+
title: API
|
3 |
license: apache-2.0
|
4 |
license_link: https://huggingface.co/hadadrjt/JARVIS/blob/main/LICENSE
|
5 |
emoji: π
|
|
|
8 |
sdk: gradio
|
9 |
sdk_version: 5.34.0
|
10 |
app_file: app.py
|
11 |
+
pinned: true
|
12 |
+
short_description: J.A.R.V.I.S. API Endpoint!
|
13 |
models:
|
14 |
- hadadrjt/JARVIS
|
15 |
---
|
app.py
CHANGED
@@ -3,465 +3,73 @@
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
-
import
|
7 |
-
import
|
8 |
-
import
|
9 |
-
import asyncio
|
10 |
-
import uvicorn
|
11 |
|
12 |
-
from contextlib import asynccontextmanager
|
13 |
-
from fastapi import FastAPI
|
14 |
-
from fastapi.responses import
|
15 |
-
from
|
16 |
-
from
|
17 |
-
from typing import AsyncGenerator, Optional, Dict, List, Tuple, Any
|
18 |
|
19 |
-
#
|
20 |
-
MODEL = "JARVIS: 2.1.3"
|
21 |
-
|
22 |
-
# Session store keeps track of active sessions.
|
23 |
-
# Each session_id maps to a tuple:
|
24 |
-
# (last_update_timestamp, session_data_dict)
|
25 |
-
# session_data_dict contains:
|
26 |
-
# - "model": the AI model name used in this session
|
27 |
-
# - "history": list of past chat messages (input and response)
|
28 |
-
# - "client": the Gradio Client instance specific to this session
|
29 |
-
session_store: Dict[str, Tuple[float, Dict]] = {}
|
30 |
-
|
31 |
-
# Duration (in seconds) after which inactive sessions are removed
|
32 |
-
EXPIRE = 3600 # 1 hour
|
33 |
-
|
34 |
-
# Create FastAPI app instance
|
35 |
app = FastAPI()
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
Attributes:
|
42 |
-
- model: Optional; specifies which AI model to use. Defaults to MODEL if not provided.
|
43 |
-
- input: The user's input text to send to the AI.
|
44 |
-
- stream: Optional; if True, the response will be streamed incrementally.
|
45 |
-
- session_id: Optional; unique identifier for the user's session. If missing, a new session will be created.
|
46 |
-
"""
|
47 |
-
model: Optional[str] = None
|
48 |
-
input: str
|
49 |
-
stream: Optional[bool] = False
|
50 |
-
session_id: Optional[str] = None
|
51 |
-
|
52 |
-
class OpenAIChatRequest(BaseModel):
|
53 |
-
"""
|
54 |
-
Defines the OpenAI-compatible request structure for /v1/chat/completions endpoint.
|
55 |
-
|
56 |
-
Attributes:
|
57 |
-
- model: Optional; specifies which AI model to use. Defaults to MODEL if not provided.
|
58 |
-
- messages: List of message objects containing 'role' and 'content'
|
59 |
-
- stream: Optional; if True, the response will be streamed incrementally.
|
60 |
-
- session_id: Optional; unique session identifier for maintaining conversation history
|
61 |
-
"""
|
62 |
-
model: Optional[str] = None
|
63 |
-
messages: List[Dict[str, str]]
|
64 |
-
stream: Optional[bool] = False
|
65 |
-
session_id: Optional[str] = None
|
66 |
-
|
67 |
-
def cleanup_expired_sessions():
|
68 |
-
"""
|
69 |
-
Remove sessions that have been inactive for longer than EXPIRE.
|
70 |
-
This helps free up memory by deleting old sessions and closing their clients.
|
71 |
-
"""
|
72 |
-
now = time.time()
|
73 |
-
expired_sessions = [
|
74 |
-
sid for sid, (last_update, _) in session_store.items()
|
75 |
-
if now - last_update > EXPIRE
|
76 |
-
]
|
77 |
-
for sid in expired_sessions:
|
78 |
-
# Attempt to close the Gradio client associated with the session
|
79 |
-
_, data = session_store[sid]
|
80 |
-
client = data.get("client")
|
81 |
-
if client:
|
82 |
-
try:
|
83 |
-
client.close()
|
84 |
-
except Exception:
|
85 |
-
# Ignore errors during client close to avoid crashing cleanup
|
86 |
-
pass
|
87 |
-
# Remove the session from the store
|
88 |
-
del session_store[sid]
|
89 |
-
|
90 |
-
def create_client_for_model(model: str) -> Client:
|
91 |
-
"""
|
92 |
-
Create a new Gradio Client instance and set it to use the specified AI model.
|
93 |
-
|
94 |
-
Parameters:
|
95 |
-
- model: The name of the AI model to initialize the client with.
|
96 |
-
|
97 |
-
Returns:
|
98 |
-
- A new Gradio Client instance configured with the given model.
|
99 |
-
"""
|
100 |
-
client = Client("hadadrjt/ai")
|
101 |
-
# Set the model on the Gradio client by calling the /change_model API
|
102 |
-
client.predict(new=model, api_name="/change_model")
|
103 |
-
return client
|
104 |
-
|
105 |
-
def get_or_create_session(session_id: Optional[str], model: str) -> str:
|
106 |
-
"""
|
107 |
-
Retrieve an existing session by session_id or create a new one if it doesn't exist.
|
108 |
-
Also cleans up expired sessions before proceeding.
|
109 |
-
|
110 |
-
Parameters:
|
111 |
-
- session_id: The unique identifier of the session (optional).
|
112 |
-
- model: The AI model to use for this session.
|
113 |
-
|
114 |
-
Returns:
|
115 |
-
- The session_id for the active or newly created session.
|
116 |
-
"""
|
117 |
-
cleanup_expired_sessions()
|
118 |
-
|
119 |
-
# If no session_id provided or session does not exist, create a new session
|
120 |
-
if not session_id or session_id not in session_store:
|
121 |
-
session_id = str(uuid.uuid4()) # Generate a new unique session ID
|
122 |
-
client = create_client_for_model(model) # Create a new client for this session
|
123 |
-
session_store[session_id] = (time.time(), {
|
124 |
-
"model": model,
|
125 |
-
"history": [],
|
126 |
-
"client": client
|
127 |
-
})
|
128 |
-
else:
|
129 |
-
# Session exists, update last access time and check if model changed
|
130 |
-
last_update, data = session_store[session_id]
|
131 |
-
if data["model"] != model:
|
132 |
-
# If model changed, close old client and create a new one with the new model
|
133 |
-
old_client = data.get("client")
|
134 |
-
if old_client:
|
135 |
-
try:
|
136 |
-
old_client.close()
|
137 |
-
except Exception:
|
138 |
-
pass # Ignore errors on close
|
139 |
-
new_client = create_client_for_model(model)
|
140 |
-
data["model"] = model
|
141 |
-
data["client"] = new_client
|
142 |
-
session_store[session_id] = (time.time(), data)
|
143 |
-
else:
|
144 |
-
# Just update the last access time to keep session alive
|
145 |
-
session_store[session_id] = (time.time(), data)
|
146 |
-
|
147 |
-
return session_id
|
148 |
-
|
149 |
-
async def event_generator(user_input: str, model: str, session_id: str) -> AsyncGenerator[str, None]:
|
150 |
-
"""
|
151 |
-
Asynchronous generator that streams AI responses incrementally as Server-Sent Events (SSE).
|
152 |
-
|
153 |
-
Parameters:
|
154 |
-
- user_input: The input text from the user.
|
155 |
-
- model: The AI model to use.
|
156 |
-
- session_id: The unique session identifier.
|
157 |
-
|
158 |
-
Yields:
|
159 |
-
- JSON-formatted chunks representing incremental AI response deltas.
|
160 |
-
"""
|
161 |
-
last_update, session_data = session_store.get(session_id, (0, None))
|
162 |
-
if session_data is None:
|
163 |
-
# Session not found; yield error and stop
|
164 |
-
yield f"data: {json.dumps({'error': 'Session not found'})}\n\n"
|
165 |
-
return
|
166 |
-
|
167 |
-
client = session_data["client"]
|
168 |
-
if client is None:
|
169 |
-
# Client missing for session; yield error and stop
|
170 |
-
yield f"data: {json.dumps({'error': 'AI client not available'})}\n\n"
|
171 |
-
return
|
172 |
-
|
173 |
-
try:
|
174 |
-
# Submit the user input to the AI model via Gradio client
|
175 |
-
jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
|
176 |
-
except Exception as e:
|
177 |
-
# If submission fails, yield error and stop
|
178 |
-
yield f"data: {json.dumps({'error': f'Failed to submit to AI: {str(e)}'})}\n\n"
|
179 |
-
return
|
180 |
-
|
181 |
-
buffer = "" # Buffer to track full response text progressively
|
182 |
-
|
183 |
-
try:
|
184 |
-
for partial in jarvis_response:
|
185 |
-
# Extract the current partial text from the response
|
186 |
-
text = partial[0][0][1]
|
187 |
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
else:
|
192 |
-
delta = text
|
193 |
|
194 |
-
|
|
|
|
|
195 |
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
# Prepare chunk data in OpenAI streaming format
|
201 |
-
chunk = {
|
202 |
-
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
203 |
-
"object": "chat.completion.chunk",
|
204 |
-
"created": int(time.time()),
|
205 |
-
"model": model,
|
206 |
-
"choices": [
|
207 |
-
{
|
208 |
-
"index": 0,
|
209 |
-
"delta": {"content": delta},
|
210 |
-
"finish_reason": None
|
211 |
-
}
|
212 |
-
]
|
213 |
-
}
|
214 |
-
|
215 |
-
# Yield the chunk as a Server-Sent Event
|
216 |
-
yield f"data: {json.dumps(chunk)}\n\n"
|
217 |
-
|
218 |
-
# After streaming completes, save the full input-response pair to session history
|
219 |
-
session_data["history"].append({"input": user_input, "response": buffer})
|
220 |
-
session_store[session_id] = (time.time(), session_data) # Update last access time
|
221 |
-
|
222 |
-
# Send a final chunk signaling completion of the stream
|
223 |
-
done_chunk = {
|
224 |
-
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
225 |
-
"object": "chat.completion.chunk",
|
226 |
-
"created": int(time.time()),
|
227 |
-
"model": model,
|
228 |
-
"choices": [
|
229 |
-
{
|
230 |
-
"index": 0,
|
231 |
-
"delta": {},
|
232 |
-
"finish_reason": "stop"
|
233 |
-
}
|
234 |
-
]
|
235 |
-
}
|
236 |
-
yield f"data: {json.dumps(done_chunk)}\n\n"
|
237 |
-
|
238 |
-
except Exception as e:
|
239 |
-
# If streaming fails at any point, yield an error chunk
|
240 |
-
error_chunk = {
|
241 |
-
"error": {"message": f"Streaming error: {str(e)}"}
|
242 |
-
}
|
243 |
-
yield f"data: {json.dumps(error_chunk)}\n\n"
|
244 |
-
|
245 |
-
@app.post("/v1/responses")
|
246 |
-
async def responses(req: ResponseRequest):
|
247 |
-
"""
|
248 |
-
Original API endpoint to get AI responses.
|
249 |
-
Supports both streaming and non-streaming modes.
|
250 |
-
|
251 |
-
Workflow:
|
252 |
-
- Validate or create session.
|
253 |
-
- Ensure AI client is available.
|
254 |
-
- Handle streaming or full response accordingly.
|
255 |
-
- Save chat history per session.
|
256 |
-
|
257 |
-
Returns:
|
258 |
-
- JSON response with AI output and session ID.
|
259 |
-
"""
|
260 |
-
model = req.model or MODEL # Use requested model or default
|
261 |
-
session_id = get_or_create_session(req.session_id, model) # Get or create session
|
262 |
-
last_update, session_data = session_store[session_id]
|
263 |
-
user_input = req.input
|
264 |
-
|
265 |
-
client = session_data["client"]
|
266 |
-
if client is None:
|
267 |
-
# If client is missing, return 503 error
|
268 |
-
raise HTTPException(status_code=503, detail="AI client not available")
|
269 |
-
|
270 |
-
if req.stream:
|
271 |
-
# If streaming requested, return a streaming response using event_generator
|
272 |
-
return StreamingResponse(event_generator(user_input, model, session_id), media_type="text/event-stream")
|
273 |
-
|
274 |
-
# Non-streaming request: submit input and collect full response
|
275 |
-
try:
|
276 |
-
jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
|
277 |
-
except Exception as e:
|
278 |
-
# Return 500 error if submission fails
|
279 |
-
raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
|
280 |
-
|
281 |
-
buffer = ""
|
282 |
-
for partial in jarvis_response:
|
283 |
-
text = partial[0][0][1]
|
284 |
-
buffer = text # Update buffer with latest full response
|
285 |
-
|
286 |
-
# Save input and response to session history and update last access time
|
287 |
-
session_data["history"].append({"input": user_input, "response": buffer})
|
288 |
-
session_store[session_id] = (time.time(), session_data)
|
289 |
-
|
290 |
-
# Prepare the JSON response in OpenAI style format
|
291 |
-
response = {
|
292 |
-
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
293 |
-
"object": "chat.completion",
|
294 |
-
"created": int(time.time()),
|
295 |
-
"model": model,
|
296 |
-
"choices": [
|
297 |
-
{
|
298 |
-
"index": 0,
|
299 |
-
"message": {
|
300 |
-
"role": "assistant",
|
301 |
-
"content": buffer
|
302 |
-
},
|
303 |
-
"finish_reason": "stop"
|
304 |
-
}
|
305 |
-
],
|
306 |
-
"session_id": session_id # Return session_id so client can reuse it
|
307 |
-
}
|
308 |
-
|
309 |
-
# Return the JSON response
|
310 |
-
return JSONResponse(response)
|
311 |
-
|
312 |
-
@app.post("/v1/chat/completions")
|
313 |
-
async def openai_chat_completions(req: OpenAIChatRequest):
|
314 |
-
"""
|
315 |
-
OpenAI-compatible endpoint for chat completions.
|
316 |
-
Supports both streaming and non-streaming modes.
|
317 |
-
|
318 |
-
Workflow:
|
319 |
-
- Validate message structure and extract conversation history
|
320 |
-
- Validate or create session
|
321 |
-
- Update session history from messages
|
322 |
-
- Handle streaming or full response
|
323 |
-
- Save new interaction to session history
|
324 |
-
|
325 |
-
Returns:
|
326 |
-
- JSON response in OpenAI format with session ID extension
|
327 |
-
"""
|
328 |
-
# Validate messages structure
|
329 |
-
if not req.messages:
|
330 |
-
raise HTTPException(status_code=400, detail="Messages cannot be empty")
|
331 |
-
|
332 |
-
# Extract conversation history and current input
|
333 |
-
history = []
|
334 |
-
current_input = ""
|
335 |
-
|
336 |
-
# Process messages to extract conversation history
|
337 |
-
try:
|
338 |
-
# Last message should be from user and used as current input
|
339 |
-
if req.messages[-1]["role"] != "user":
|
340 |
-
raise ValueError("Last message must be from user")
|
341 |
-
|
342 |
-
current_input = req.messages[-1]["content"]
|
343 |
-
|
344 |
-
# Process message pairs (user + assistant)
|
345 |
-
messages = req.messages[:-1] # Exclude last message (current input)
|
346 |
-
for i in range(0, len(messages), 2):
|
347 |
-
if i+1 < len(messages):
|
348 |
-
user_msg = messages[i]
|
349 |
-
assistant_msg = messages[i+1]
|
350 |
-
|
351 |
-
if user_msg["role"] != "user" or assistant_msg["role"] != "assistant":
|
352 |
-
# Skip invalid pairs but continue processing
|
353 |
-
continue
|
354 |
-
|
355 |
-
history.append({
|
356 |
-
"input": user_msg["content"],
|
357 |
-
"response": assistant_msg["content"]
|
358 |
-
})
|
359 |
-
except (KeyError, ValueError) as e:
|
360 |
-
raise HTTPException(status_code=400, detail=f"Invalid message format: {str(e)}")
|
361 |
-
|
362 |
-
model = req.model or MODEL # Use requested model or default
|
363 |
-
session_id = get_or_create_session(req.session_id, model) # Get or create session
|
364 |
-
last_update, session_data = session_store[session_id]
|
365 |
-
|
366 |
-
# Update session history from messages
|
367 |
-
session_data["history"] = history
|
368 |
-
session_store[session_id] = (time.time(), session_data)
|
369 |
-
|
370 |
-
client = session_data["client"]
|
371 |
-
if client is None:
|
372 |
-
raise HTTPException(status_code=503, detail="AI client not available")
|
373 |
-
|
374 |
-
if req.stream:
|
375 |
-
# Streaming response
|
376 |
-
return StreamingResponse(
|
377 |
-
event_generator(current_input, model, session_id),
|
378 |
-
media_type="text/event-stream"
|
379 |
-
)
|
380 |
-
|
381 |
-
# Non-streaming response
|
382 |
-
try:
|
383 |
-
jarvis_response = client.submit(multi={"text": current_input}, api_name="/api")
|
384 |
-
except Exception as e:
|
385 |
-
raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
|
386 |
-
|
387 |
-
buffer = ""
|
388 |
-
for partial in jarvis_response:
|
389 |
-
text = partial[0][0][1]
|
390 |
-
buffer = text
|
391 |
-
|
392 |
-
# Update session history with new interaction
|
393 |
-
session_data["history"].append({"input": current_input, "response": buffer})
|
394 |
-
session_store[session_id] = (time.time(), session_data)
|
395 |
-
|
396 |
-
# Format response in OpenAI style
|
397 |
-
response = {
|
398 |
-
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
399 |
-
"object": "chat.completion",
|
400 |
-
"created": int(time.time()),
|
401 |
-
"model": model,
|
402 |
-
"choices": [
|
403 |
-
{
|
404 |
-
"index": 0,
|
405 |
-
"message": {
|
406 |
-
"role": "assistant",
|
407 |
-
"content": buffer
|
408 |
-
},
|
409 |
-
"finish_reason": "stop"
|
410 |
-
}
|
411 |
-
],
|
412 |
-
"session_id": session_id # Custom extension for session management
|
413 |
-
}
|
414 |
-
|
415 |
-
return JSONResponse(response)
|
416 |
-
|
417 |
-
@app.get("/v1/models")
|
418 |
-
async def list_models():
|
419 |
-
"""
|
420 |
-
OpenAI-compatible endpoint to list available models.
|
421 |
-
Returns a fixed list containing our default model.
|
422 |
-
|
423 |
-
This endpoint is required by many OpenAI-compatible clients.
|
424 |
-
"""
|
425 |
-
return JSONResponse({
|
426 |
-
"object": "list",
|
427 |
-
"data": [
|
428 |
-
{
|
429 |
-
"id": MODEL,
|
430 |
-
"object": "model",
|
431 |
-
"created": 0, # Timestamp not available
|
432 |
-
"owned_by": "J.A.R.V.I.S."
|
433 |
-
}
|
434 |
-
]
|
435 |
-
})
|
436 |
-
|
437 |
-
@app.get("/v1/history")
|
438 |
-
async def get_history(session_id: Optional[str] = None):
|
439 |
-
"""
|
440 |
-
Endpoint to retrieve chat history for a given session.
|
441 |
-
|
442 |
-
Parameters:
|
443 |
-
- session_id: The unique session identifier.
|
444 |
-
|
445 |
-
Returns:
|
446 |
-
- JSON object containing session_id and list of past input-response pairs.
|
447 |
-
|
448 |
-
Raises:
|
449 |
-
- 404 error if session_id is missing or session does not exist.
|
450 |
-
"""
|
451 |
-
if not session_id or session_id not in session_store:
|
452 |
-
raise HTTPException(status_code=404, detail="Session not found or session_id missing.")
|
453 |
-
|
454 |
-
_, session_data = session_store[session_id]
|
455 |
-
return {"session_id": session_id, "history": session_data["history"]}
|
456 |
|
|
|
|
|
457 |
@app.get("/")
|
458 |
def root():
|
459 |
"""
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
|
465 |
-
#
|
466 |
if __name__ == "__main__":
|
|
|
|
|
467 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
3 |
# SPDX-License-Identifier: Apache-2.0
|
4 |
#
|
5 |
|
6 |
+
import asyncio # Import asyncio for asynchronous programming support in Python
|
7 |
+
import json # Import json module to handle JSON encoding and decoding
|
8 |
+
import uvicorn # Import uvicorn, an ASGI server implementation for running FastAPI applications
|
|
|
|
|
9 |
|
10 |
+
from contextlib import asynccontextmanager # Import asynccontextmanager to create asynchronous context managers if needed
|
11 |
+
from fastapi import FastAPI # Import FastAPI class to create the main web application instance
|
12 |
+
from fastapi.responses import Response # Import Response class to send raw HTTP responses
|
13 |
+
from fastapi.responses import JSONResponse # Import JSONResponse class for sending JSON formatted HTTP responses
|
14 |
+
from src.routes.v1 import responses, chat_completions, models, history # Import router modules from the src.routes.v1 package to organize API endpoints
|
|
|
15 |
|
16 |
+
# Initialize a FastAPI application
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
app = FastAPI()
|
18 |
|
19 |
+
# Include the 'responses' router under the '/v1' prefix with the tag "Responses"
|
20 |
+
# This router handles endpoints related to general responses
|
21 |
+
app.include_router(responses.router, prefix="/v1", tags=["Responses"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
# Include the 'chat_completions' router under the '/v1/chat' prefix with the tag "Chat Completions"
|
24 |
+
# This router manages chat completion related API endpoints
|
25 |
+
app.include_router(chat_completions.router, prefix="/v1/chat", tags=["Chat Completions"])
|
|
|
|
|
26 |
|
27 |
+
# Include the 'models' router under the '/v1' prefix with the tag "Models"
|
28 |
+
# This router provides endpoints related to available models
|
29 |
+
app.include_router(models.router, prefix="/v1", tags=["Models"])
|
30 |
|
31 |
+
# Include the 'history' router under the '/v1' prefix with the tag "History"
|
32 |
+
# This router manages API endpoints related to user or session history
|
33 |
+
app.include_router(history.router, prefix="/v1", tags=["History"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
# Define a root path GET endpoint for the base URL '/'
|
36 |
+
# This endpoint acts as a health check to confirm the API is operational
|
37 |
@app.get("/")
|
38 |
def root():
|
39 |
"""
|
40 |
+
Health check endpoint that returns a JSON response with API status information.
|
41 |
+
It confirms the API is running and lists active routers with their URLs and statuses.
|
42 |
+
This is useful for monitoring and basic connectivity testing.
|
43 |
+
"""
|
44 |
+
# Create a dictionary containing status information and URLs of active routers
|
45 |
+
data = {
|
46 |
+
"Status": "API is running!",
|
47 |
+
"Endpoint": "https://hadadrjt-api.hf.space/v1",
|
48 |
+
"Type": "OpenAI-style",
|
49 |
+
"Router 1": {
|
50 |
+
"URL": "https://hadadrjt-api.hf.space/v1/chat/completions",
|
51 |
+
"Status": "Active"
|
52 |
+
},
|
53 |
+
"Router 2": {
|
54 |
+
"URL": "https://hadadrjt-api.hf.space/v1/responses",
|
55 |
+
"Status": "Active"
|
56 |
+
},
|
57 |
+
"Router 3": {
|
58 |
+
"URL": "https://hadadrjt-api.hf.space/v1/models",
|
59 |
+
"Status": "Active"
|
60 |
+
},
|
61 |
+
"Router 4": {
|
62 |
+
"URL": "https://hadadrjt-api.hf.space/v1/history",
|
63 |
+
"Status": "Active"
|
64 |
+
}
|
65 |
+
}
|
66 |
+
# Convert the dictionary to a pretty-printed JSON string with indentation for readability
|
67 |
+
json_content = json.dumps(data, indent=4)
|
68 |
+
# Return the JSON string as an HTTP response with content type set to application/json
|
69 |
+
return Response(content=json_content, media_type="application/json")
|
70 |
|
71 |
+
# Check if this script is being run directly (not imported as a module)
|
72 |
if __name__ == "__main__":
|
73 |
+
# Run the FastAPI app using the Uvicorn ASGI server
|
74 |
+
# Bind to all available network interfaces on port 7860
|
75 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
config.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
# Default AI model name used when no model is specified by user
|
7 |
+
MODEL = "JARVIS: 2.1.3"
|
8 |
+
|
9 |
+
# Duration (in seconds) after which inactive sessions are removed
|
10 |
+
EXPIRE = 3600 # 1 hour
|
src/__init__.py
ADDED
File without changes
|
src/cores/__init__.py
ADDED
File without changes
|
src/cores/sessions.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
import time # Import the time module to work with timestamps for session expiration checks
|
7 |
+
import uuid # Import uuid module to generate unique session identifiers
|
8 |
+
|
9 |
+
from gradio_client import Client # Import Client from gradio_client to interact with the AI model API
|
10 |
+
from typing import Dict, Tuple, Optional, Any # Import type hints for better code clarity and validation
|
11 |
+
from config import EXPIRE # Import the EXPIRE constant which defines session timeout duration
|
12 |
+
|
13 |
+
# Dictionary to store active user sessions
|
14 |
+
# Key: session_id (string) uniquely identifying each session
|
15 |
+
# Value: Tuple containing:
|
16 |
+
# - last_update_timestamp (float): the last time this session was accessed or updated
|
17 |
+
# - session_data_dict (dict): holds session-specific data including:
|
18 |
+
# - "model": the AI model name currently used in this session
|
19 |
+
# - "history": a list that tracks the conversation history (inputs and responses)
|
20 |
+
# - "client": the Gradio Client instance associated with this session for API calls
|
21 |
+
session_store: Dict[str, Tuple[float, Dict[str, Any]]] = {}
|
22 |
+
|
23 |
+
def cleanup_expired_sessions():
|
24 |
+
"""
|
25 |
+
Iterate through all stored sessions and remove those that have been inactive
|
26 |
+
for longer than the configured EXPIRE duration. This function helps prevent
|
27 |
+
memory leaks and resource wastage by closing Gradio clients and deleting
|
28 |
+
session data for sessions no longer in use.
|
29 |
+
"""
|
30 |
+
now = time.time() # Get the current time in seconds since epoch
|
31 |
+
# Identify all sessions where the time since last update exceeds the expiration limit
|
32 |
+
expired_sessions = [
|
33 |
+
sid for sid, (last_update, _) in session_store.items()
|
34 |
+
if now - last_update > EXPIRE
|
35 |
+
]
|
36 |
+
# For each expired session, safely close the associated Gradio client and remove session data
|
37 |
+
for sid in expired_sessions:
|
38 |
+
_, data = session_store[sid] # Extract session data dictionary
|
39 |
+
client = data.get("client") # Retrieve the Gradio client instance if it exists
|
40 |
+
if client:
|
41 |
+
try:
|
42 |
+
client.close() # Attempt to close the client connection to release resources
|
43 |
+
except Exception:
|
44 |
+
# Suppress any exceptions during client close to ensure cleanup continues smoothly
|
45 |
+
pass
|
46 |
+
del session_store[sid] # Remove the session entry from the session store dictionary
|
47 |
+
|
48 |
+
def create_client_for_model(model: str) -> Client:
|
49 |
+
"""
|
50 |
+
Instantiate a new Gradio Client connected to the AI model API and configure it
|
51 |
+
to use the specified model. This client will be used to send requests and receive
|
52 |
+
responses for the given AI model in a session.
|
53 |
+
|
54 |
+
Parameters:
|
55 |
+
- model (str): The name of the AI model to initialize the client with.
|
56 |
+
|
57 |
+
Returns:
|
58 |
+
- Client: A configured Gradio Client instance ready to interact with the model.
|
59 |
+
"""
|
60 |
+
client = Client("hadadrjt/ai") # Create a new Gradio Client pointing to the AI service endpoint
|
61 |
+
# Call the /change_model API on the client to switch to the requested AI model
|
62 |
+
client.predict(new=model, api_name="/change_model")
|
63 |
+
return client # Return the configured client instance
|
64 |
+
|
65 |
+
def get_or_create_session(session_id: Optional[str], model: str) -> str:
|
66 |
+
"""
|
67 |
+
Retrieve an existing session by its session ID or create a new session if none exists.
|
68 |
+
This function also performs cleanup of expired sessions before proceeding to ensure
|
69 |
+
efficient resource management.
|
70 |
+
|
71 |
+
If the requested session exists but uses a different model than specified, the session's
|
72 |
+
client is replaced with a new one configured for the new model.
|
73 |
+
|
74 |
+
Parameters:
|
75 |
+
- session_id (Optional[str]): The unique identifier of the session to retrieve. If None or
|
76 |
+
invalid, a new session will be created.
|
77 |
+
- model (str): The AI model to be used for this session.
|
78 |
+
|
79 |
+
Returns:
|
80 |
+
- str: The session ID of the active or newly created session.
|
81 |
+
"""
|
82 |
+
cleanup_expired_sessions() # Remove any sessions that have timed out before proceeding
|
83 |
+
|
84 |
+
# Check if the provided session_id is valid and exists in the session store
|
85 |
+
if not session_id or session_id not in session_store:
|
86 |
+
# Generate a new unique session ID using UUID4
|
87 |
+
session_id = str(uuid.uuid4())
|
88 |
+
# Create a new Gradio client configured for the requested model
|
89 |
+
client = create_client_for_model(model)
|
90 |
+
# Store the new session with current timestamp, model name, empty history, and client instance
|
91 |
+
session_store[session_id] = (time.time(), {
|
92 |
+
"model": model,
|
93 |
+
"history": [],
|
94 |
+
"client": client
|
95 |
+
})
|
96 |
+
else:
|
97 |
+
# Existing session found, retrieve its last update time and data dictionary
|
98 |
+
last_update, data = session_store[session_id]
|
99 |
+
# Check if the model requested differs from the one currently associated with the session
|
100 |
+
if data["model"] != model:
|
101 |
+
# Close the old client to release resources before switching models
|
102 |
+
old_client = data.get("client")
|
103 |
+
if old_client:
|
104 |
+
try:
|
105 |
+
old_client.close()
|
106 |
+
except Exception:
|
107 |
+
# Ignore any exceptions during client close to avoid interrupting flow
|
108 |
+
pass
|
109 |
+
# Create a new client configured for the new model
|
110 |
+
new_client = create_client_for_model(model)
|
111 |
+
# Update session data with the new model and client instance
|
112 |
+
data["model"] = model
|
113 |
+
data["client"] = new_client
|
114 |
+
# Update the session store with the new timestamp and updated data dictionary
|
115 |
+
session_store[session_id] = (time.time(), data)
|
116 |
+
else:
|
117 |
+
# Model has not changed, just update the last access time to keep session active
|
118 |
+
session_store[session_id] = (time.time(), data)
|
119 |
+
|
120 |
+
return session_id # Return the active or newly created session ID
|
src/models/__init__.py
ADDED
File without changes
|
src/models/requests.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
from pydantic import BaseModel # Import BaseModel from Pydantic to define data models with validation and serialization support
|
7 |
+
from typing import Optional, Dict, List # Import Optional for optional fields, Dict for dictionary types, and List for list types
|
8 |
+
|
9 |
+
class ResponseRequest(BaseModel):
|
10 |
+
"""
|
11 |
+
Data model representing the request body structure for the /v1/responses API endpoint.
|
12 |
+
|
13 |
+
Attributes:
|
14 |
+
- model: Optional string specifying the AI model to use; defaults to a predefined MODEL if omitted.
|
15 |
+
- input: Required string containing the user's input text to send to the AI.
|
16 |
+
- stream: Optional boolean indicating if the response should be streamed incrementally; defaults to False.
|
17 |
+
- session_id: Optional string serving as a unique identifier for the user's session; if not provided, a new session is created.
|
18 |
+
"""
|
19 |
+
model: Optional[str] = None # AI model identifier, optional with default None
|
20 |
+
input: str # User input text, required
|
21 |
+
stream: Optional[bool] = False # Stream response flag, optional with default False
|
22 |
+
session_id: Optional[str] = None # Session identifier, optional
|
23 |
+
|
24 |
+
class OpenAIChatRequest(BaseModel):
|
25 |
+
"""
|
26 |
+
Data model defining the OpenAI-compatible request format for the /v1/chat/completions API endpoint.
|
27 |
+
|
28 |
+
Attributes:
|
29 |
+
- model: Optional string specifying the AI model to use; defaults to a predefined MODEL if omitted.
|
30 |
+
- messages: List of message dictionaries, each containing 'role' and 'content' keys, representing the conversation history.
|
31 |
+
- stream: Optional boolean indicating if the response should be streamed incrementally; defaults to False.
|
32 |
+
- session_id: Optional string serving as a unique session identifier to maintain conversation context.
|
33 |
+
"""
|
34 |
+
model: Optional[str] = None # AI model identifier, optional with default None
|
35 |
+
messages: List[Dict[str, str]] # List of chat messages with roles and content, required
|
36 |
+
stream: Optional[bool] = False # Stream response flag, optional with default False
|
37 |
+
session_id: Optional[str] = None # Session identifier, optional
|
src/routes/__init__.py
ADDED
File without changes
|
src/routes/v1/__init__.py
ADDED
File without changes
|
src/routes/v1/chat_completions.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
import time # Import time module to handle timestamps and time-based operations
|
7 |
+
import uuid # Import uuid module to generate unique identifiers for responses
|
8 |
+
|
9 |
+
from fastapi import APIRouter, HTTPException # Import APIRouter to create route groups and HTTPException for error handling
|
10 |
+
from fastapi.responses import JSONResponse, StreamingResponse # Import response classes for JSON and streaming responses
|
11 |
+
from src.models.requests import OpenAIChatRequest # Import request model defining the expected structure of chat requests
|
12 |
+
from src.cores.sessions import get_or_create_session, session_store # Import session management functions and storage
|
13 |
+
from src.services.streaming import event_generator # Import generator function for streaming chat responses
|
14 |
+
from config import MODEL # Import default model configuration
|
15 |
+
|
16 |
+
# Create an API router instance to group related endpoints for chat completions
|
17 |
+
router = APIRouter()
|
18 |
+
|
19 |
+
@router.post("/completions")
|
20 |
+
async def openai_chat_completions(req: OpenAIChatRequest):
|
21 |
+
"""
|
22 |
+
Handle OpenAI-compatible chat completion requests.
|
23 |
+
Supports streaming and non-streaming modes based on client request.
|
24 |
+
|
25 |
+
Steps:
|
26 |
+
- Validate the presence and structure of messages in the request
|
27 |
+
- Extract conversation history and current user input from messages
|
28 |
+
- Retrieve or create a session for managing conversation state
|
29 |
+
- Update session history with prior conversation
|
30 |
+
- If streaming is requested, return a streaming response
|
31 |
+
- Otherwise, submit input to AI client and collect full response
|
32 |
+
- Append new interaction to session history
|
33 |
+
- Return response formatted according to OpenAI chat completion API
|
34 |
+
|
35 |
+
Returns:
|
36 |
+
JSONResponse or StreamingResponse with chat completion data and session ID
|
37 |
+
"""
|
38 |
+
# Ensure messages list is not empty, else raise HTTP 400 error
|
39 |
+
if not req.messages:
|
40 |
+
raise HTTPException(status_code=400, detail="Messages cannot be empty")
|
41 |
+
|
42 |
+
history = [] # Initialize conversation history list
|
43 |
+
current_input = "" # Initialize variable to hold current user input
|
44 |
+
|
45 |
+
# Process messages to separate conversation history and current input
|
46 |
+
try:
|
47 |
+
# The last message must be from the user and represents current input
|
48 |
+
if req.messages[-1]["role"] != "user":
|
49 |
+
raise ValueError("Last message must be from user")
|
50 |
+
|
51 |
+
current_input = req.messages[-1]["content"] # Extract current input text
|
52 |
+
|
53 |
+
# Iterate over message pairs (user followed by assistant) to build history
|
54 |
+
messages = req.messages[:-1] # Exclude last message as it is current input
|
55 |
+
for i in range(0, len(messages), 2):
|
56 |
+
if i + 1 < len(messages):
|
57 |
+
user_msg = messages[i]
|
58 |
+
assistant_msg = messages[i + 1]
|
59 |
+
|
60 |
+
# Validate message roles; skip pairs that do not match expected pattern
|
61 |
+
if user_msg["role"] != "user" or assistant_msg["role"] != "assistant":
|
62 |
+
continue
|
63 |
+
|
64 |
+
# Append input-response pair to history
|
65 |
+
history.append({
|
66 |
+
"input": user_msg["content"],
|
67 |
+
"response": assistant_msg["content"]
|
68 |
+
})
|
69 |
+
except (KeyError, ValueError) as e:
|
70 |
+
# Raise HTTP 400 error if message format is invalid
|
71 |
+
raise HTTPException(status_code=400, detail=f"Invalid message format: {str(e)}")
|
72 |
+
|
73 |
+
# Determine model to use: requested model or default from config
|
74 |
+
model = req.model or MODEL
|
75 |
+
|
76 |
+
# Retrieve existing session or create a new one using session ID and model
|
77 |
+
session_id = get_or_create_session(req.session_id, model)
|
78 |
+
|
79 |
+
# Get last update time and session data from session store
|
80 |
+
last_update, session_data = session_store[session_id]
|
81 |
+
|
82 |
+
# Update session history with extracted conversation history
|
83 |
+
session_data["history"] = history
|
84 |
+
|
85 |
+
# Save updated session data with current timestamp
|
86 |
+
session_store[session_id] = (time.time(), session_data)
|
87 |
+
|
88 |
+
client = session_data["client"] # Retrieve AI client instance from session
|
89 |
+
|
90 |
+
# If AI client is not available, raise HTTP 503 error
|
91 |
+
if client is None:
|
92 |
+
raise HTTPException(status_code=503, detail="AI client not available")
|
93 |
+
|
94 |
+
# If streaming is requested, return a streaming response using event generator
|
95 |
+
if req.stream:
|
96 |
+
return StreamingResponse(
|
97 |
+
event_generator(current_input, model, session_id),
|
98 |
+
media_type="text/event-stream"
|
99 |
+
)
|
100 |
+
|
101 |
+
# For non-streaming requests, submit input to AI client and collect response
|
102 |
+
try:
|
103 |
+
jarvis_response = client.submit(multi={"text": current_input}, api_name="/api")
|
104 |
+
except Exception as e:
|
105 |
+
# Raise HTTP 500 error if submission to AI client fails
|
106 |
+
raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
|
107 |
+
|
108 |
+
buffer = "" # Initialize buffer to accumulate AI response text
|
109 |
+
|
110 |
+
# Iterate over partial responses from AI client to build full response text
|
111 |
+
for partial in jarvis_response:
|
112 |
+
text = partial[0][0][1] # Extract text from nested response structure
|
113 |
+
buffer = text # Update buffer with latest text chunk
|
114 |
+
|
115 |
+
# Append new input-response pair to session history
|
116 |
+
session_data["history"].append({"input": current_input, "response": buffer})
|
117 |
+
|
118 |
+
# Update session store with new history and timestamp
|
119 |
+
session_store[session_id] = (time.time(), session_data)
|
120 |
+
|
121 |
+
# Construct response in OpenAI chat completion format with session ID extension
|
122 |
+
response = {
|
123 |
+
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}", # Unique response ID
|
124 |
+
"object": "chat.completion",
|
125 |
+
"created": int(time.time()), # Timestamp of response creation
|
126 |
+
"model": model, # Model used for completion
|
127 |
+
"choices": [
|
128 |
+
{
|
129 |
+
"index": 0,
|
130 |
+
"message": {
|
131 |
+
"role": "assistant",
|
132 |
+
"content": buffer # AI-generated response content
|
133 |
+
},
|
134 |
+
"finish_reason": "stop"
|
135 |
+
}
|
136 |
+
],
|
137 |
+
"session_id": session_id # Custom field for session management
|
138 |
+
}
|
139 |
+
|
140 |
+
# Return JSON response with formatted completion data
|
141 |
+
return JSONResponse(response)
|
src/routes/v1/history.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
# Import APIRouter and HTTPException classes from FastAPI framework
|
7 |
+
# APIRouter is used to create modular route handlers
|
8 |
+
# HTTPException is used to generate HTTP error responses with specific status codes and details
|
9 |
+
from fastapi import APIRouter, HTTPException
|
10 |
+
|
11 |
+
# Import Optional type hint from typing module
|
12 |
+
# Optional is used to indicate that a function parameter can be of a specified type or None
|
13 |
+
from typing import Optional
|
14 |
+
|
15 |
+
# Import session_store dictionary from the sessions module located in src.cores package
|
16 |
+
# session_store holds active session data keyed by session identifiers
|
17 |
+
from src.cores.sessions import session_store
|
18 |
+
|
19 |
+
# Create an instance of APIRouter to define routes related to session history
|
20 |
+
router = APIRouter()
|
21 |
+
|
22 |
+
# Define an asynchronous GET endpoint at path "/history" to retrieve chat history for a session
|
23 |
+
@router.get("/history")
|
24 |
+
async def get_history(session_id: Optional[str] = None):
|
25 |
+
"""
|
26 |
+
This function handles GET requests to fetch the chat history for a specific session.
|
27 |
+
|
28 |
+
Parameters:
|
29 |
+
- session_id (Optional[str]): A string representing the unique identifier of the session.
|
30 |
+
This parameter is optional in the function signature but required for successful retrieval.
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
- A JSON object containing:
|
34 |
+
- "session_id": The provided session identifier string.
|
35 |
+
- "history": A list of past input-response pairs stored in the session.
|
36 |
+
|
37 |
+
Raises:
|
38 |
+
- HTTPException with status code 404 and a descriptive message if:
|
39 |
+
- The session_id is not provided (None or empty).
|
40 |
+
- The session_id does not exist in the session_store dictionary, indicating no active session.
|
41 |
+
"""
|
42 |
+
|
43 |
+
# Check if session_id is missing or does not exist in the session_store dictionary
|
44 |
+
if not session_id or session_id not in session_store:
|
45 |
+
# Raise an HTTP 404 Not Found error with a clear message indicating the issue
|
46 |
+
raise HTTPException(status_code=404, detail="Session not found or session_id missing.")
|
47 |
+
|
48 |
+
# Retrieve the session data tuple from session_store using the session_id key
|
49 |
+
# The tuple contains two elements; the first is ignored here, the second is the session data dictionary
|
50 |
+
_, session_data = session_store[session_id]
|
51 |
+
|
52 |
+
# Return a dictionary with the session_id and the chat history extracted from the session data
|
53 |
+
# This dictionary will be automatically converted to JSON by FastAPI when sending the response
|
54 |
+
return {"session_id": session_id, "history": session_data["history"]}
|
src/routes/v1/models.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
# Import APIRouter class from fastapi module to create a router instance for grouping related API routes
|
7 |
+
from fastapi import APIRouter
|
8 |
+
|
9 |
+
# Import JSONResponse to send JSON formatted HTTP responses from endpoint functions
|
10 |
+
from fastapi.responses import JSONResponse
|
11 |
+
|
12 |
+
# Import MODEL constant from config module, which specifies the default model identifier
|
13 |
+
from config import MODEL
|
14 |
+
|
15 |
+
# Create an APIRouter instance to collect and organize routes related to model operations
|
16 |
+
router = APIRouter()
|
17 |
+
|
18 |
+
# Define an asynchronous GET endpoint at path "/models" on this router
|
19 |
+
@router.get("/models")
|
20 |
+
async def list_models():
|
21 |
+
"""
|
22 |
+
OpenAI-compatible endpoint to list available models.
|
23 |
+
Returns a fixed list containing our default model.
|
24 |
+
|
25 |
+
This endpoint is required by many OpenAI-compatible clients to discover available models.
|
26 |
+
"""
|
27 |
+
# Return a JSON response with a list object containing one model dictionary
|
28 |
+
# The model dictionary includes id from config, a static object type, a placeholder created timestamp
|
29 |
+
return JSONResponse({
|
30 |
+
"object": "list",
|
31 |
+
"data": [
|
32 |
+
{
|
33 |
+
"id": MODEL,
|
34 |
+
"object": "model",
|
35 |
+
"created": 0, # Timestamp not available, so set to zero
|
36 |
+
"owned_by": "J.A.R.V.I.S."
|
37 |
+
}
|
38 |
+
]
|
39 |
+
})
|
src/routes/v1/responses.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
import time # Import time module to handle timestamps and measure time intervals
|
7 |
+
import uuid # Import uuid module to generate unique identifiers for responses
|
8 |
+
|
9 |
+
from fastapi import APIRouter, HTTPException # Import FastAPI router and HTTP exception handling
|
10 |
+
from fastapi.responses import JSONResponse, StreamingResponse # Import response types for JSON and streaming data
|
11 |
+
from src.models.requests import ResponseRequest # Import the data model for incoming request validation
|
12 |
+
from src.cores.sessions import get_or_create_session, session_store # Import session management utilities
|
13 |
+
from src.services.streaming import event_generator # Import generator function for streaming AI responses
|
14 |
+
from config import MODEL # Import default AI model configuration
|
15 |
+
|
16 |
+
# Create a new API router instance to handle endpoints related to AI responses
|
17 |
+
router = APIRouter()
|
18 |
+
|
19 |
+
@router.post("/responses")
|
20 |
+
async def responses(req: ResponseRequest):
|
21 |
+
"""
|
22 |
+
API endpoint to receive user input and return AI-generated responses.
|
23 |
+
Supports both streaming and non-streaming modes to accommodate different client needs.
|
24 |
+
|
25 |
+
Detailed Workflow:
|
26 |
+
1. Determine which AI model to use, either from request or default configuration.
|
27 |
+
2. Retrieve an existing session or create a new one based on session ID and model.
|
28 |
+
3. Extract the AI client from the session data, ensuring it is available.
|
29 |
+
4. If streaming is requested, return a streaming response that yields partial results as they arrive.
|
30 |
+
5. For non-streaming requests, submit the entire user input to the AI client and collect the full response.
|
31 |
+
6. Handle any errors during submission by returning appropriate HTTP error codes.
|
32 |
+
7. Store the user input and AI response in the session history for future reference.
|
33 |
+
8. Update the session's last access time to maintain session freshness.
|
34 |
+
9. Format the AI response in a JSON structure compatible with OpenAI's chat completion format.
|
35 |
+
10. Return the formatted JSON response along with the session ID for client reuse.
|
36 |
+
|
37 |
+
Parameters:
|
38 |
+
- req: ResponseRequest object containing user input, optional model, session ID, and streaming flag.
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
- JSONResponse containing AI-generated text, metadata, and session information if non-streaming.
|
42 |
+
- StreamingResponse yielding incremental AI output if streaming is enabled.
|
43 |
+
|
44 |
+
Raises:
|
45 |
+
- HTTPException with status 503 if AI client is unavailable.
|
46 |
+
- HTTPException with status 500 if AI submission fails.
|
47 |
+
"""
|
48 |
+
# Select the AI model specified in the request or fall back to the default model
|
49 |
+
model = req.model or MODEL
|
50 |
+
|
51 |
+
# Retrieve existing session or create a new one using the provided session ID and model
|
52 |
+
session_id = get_or_create_session(req.session_id, model)
|
53 |
+
|
54 |
+
# Extract the last update timestamp and session data dictionary from the session store
|
55 |
+
last_update, session_data = session_store[session_id]
|
56 |
+
|
57 |
+
# Extract the user's input text from the request object
|
58 |
+
user_input = req.input
|
59 |
+
|
60 |
+
# Retrieve the AI client instance from the session data; this client handles AI interactions
|
61 |
+
client = session_data["client"]
|
62 |
+
|
63 |
+
# If the AI client is not initialized or unavailable, respond with a 503 Service Unavailable error
|
64 |
+
if client is None:
|
65 |
+
raise HTTPException(status_code=503, detail="AI client not available")
|
66 |
+
|
67 |
+
# If the client requested streaming mode, return a streaming response that sends partial AI outputs as events
|
68 |
+
if req.stream:
|
69 |
+
# Use event_generator to produce server-sent events for real-time streaming of AI responses
|
70 |
+
return StreamingResponse(event_generator(user_input, model, session_id), media_type="text/event-stream")
|
71 |
+
|
72 |
+
# For non-streaming requests, submit the full user input to the AI client and collect the complete response
|
73 |
+
try:
|
74 |
+
jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
|
75 |
+
except Exception as e:
|
76 |
+
# If submission to the AI client fails, respond with a 500 Internal Server Error and error details
|
77 |
+
raise HTTPException(status_code=500, detail=f"Failed to submit to AI: {str(e)}")
|
78 |
+
|
79 |
+
# Initialize a buffer string to accumulate the full AI response text
|
80 |
+
buffer = ""
|
81 |
+
# Iterate over the streaming partial responses returned by the AI client
|
82 |
+
for partial in jarvis_response:
|
83 |
+
# Extract the text content from the nested partial response structure
|
84 |
+
text = partial[0][0][1]
|
85 |
+
# Update the buffer with the latest full response text received
|
86 |
+
buffer = text
|
87 |
+
|
88 |
+
# Append the user input and AI response pair to the session's chat history for context and record keeping
|
89 |
+
session_data["history"].append({"input": user_input, "response": buffer})
|
90 |
+
|
91 |
+
# Update the session store with the current timestamp and modified session data to keep session active
|
92 |
+
session_store[session_id] = (time.time(), session_data)
|
93 |
+
|
94 |
+
# Construct the JSON response following OpenAI's chat completion format for compatibility
|
95 |
+
response = {
|
96 |
+
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}", # Unique identifier for this chat completion instance
|
97 |
+
"object": "chat.completion", # Object type indicating a chat completion response
|
98 |
+
"created": int(time.time()), # Timestamp of response creation in seconds since epoch
|
99 |
+
"model": model, # The AI model used to generate this response
|
100 |
+
"choices": [
|
101 |
+
{
|
102 |
+
"index": 0, # Index of this choice in the list of completions
|
103 |
+
"message": {
|
104 |
+
"role": "assistant", # Role indicating the source of the message is the AI assistant
|
105 |
+
"content": buffer # The AI-generated text content to be delivered to the user
|
106 |
+
},
|
107 |
+
"finish_reason": "stop" # Reason for completion, indicating the response is complete
|
108 |
+
}
|
109 |
+
],
|
110 |
+
"session_id": session_id # Include session ID so the client can maintain or reuse the session
|
111 |
+
}
|
112 |
+
|
113 |
+
# Return the constructed JSON response to the client as an HTTP response
|
114 |
+
return JSONResponse(response)
|
src/services/__init__.py
ADDED
File without changes
|
src/services/streaming.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
|
3 |
+
# SPDX-License-Identifier: Apache-2.0
|
4 |
+
#
|
5 |
+
|
6 |
+
import json # Import the json module to handle JSON encoding and decoding of data
|
7 |
+
import time # Import the time module to work with timestamps
|
8 |
+
import uuid # Import the uuid module to generate unique identifiers
|
9 |
+
|
10 |
+
from typing import AsyncGenerator # Import AsyncGenerator for typing asynchronous generator functions
|
11 |
+
from src.cores.sessions import session_store # Import the session_store object to manage user sessions
|
12 |
+
|
13 |
+
async def event_generator(user_input: str, model: str, session_id: str) -> AsyncGenerator[str, None]:
|
14 |
+
"""
|
15 |
+
Asynchronous generator function that streams AI-generated responses incrementally as Server-Sent Events (SSE).
|
16 |
+
|
17 |
+
Parameters:
|
18 |
+
- user_input: The input text provided by the user to the AI model.
|
19 |
+
- model: The identifier of the AI model to be used for generating responses.
|
20 |
+
- session_id: A unique string representing the current user session.
|
21 |
+
|
22 |
+
Yields:
|
23 |
+
- JSON-formatted strings representing incremental chunks of the AI response,
|
24 |
+
formatted as Server-Sent Events for real-time streaming to the client.
|
25 |
+
"""
|
26 |
+
|
27 |
+
# Retrieve the last update time and session data from the session store using the session ID
|
28 |
+
last_update, session_data = session_store.get(session_id, (0, None))
|
29 |
+
|
30 |
+
# If no session data is found for the given session ID, yield an error message and stop
|
31 |
+
if session_data is None:
|
32 |
+
yield f"data: {json.dumps({'error': 'Session not found'})}\n\n"
|
33 |
+
return
|
34 |
+
|
35 |
+
# Extract the AI client object from the session data
|
36 |
+
client = session_data["client"]
|
37 |
+
|
38 |
+
# If the client is missing in the session data, yield an error message and stop
|
39 |
+
if client is None:
|
40 |
+
yield f"data: {json.dumps({'error': 'AI client not available'})}\n\n"
|
41 |
+
return
|
42 |
+
|
43 |
+
try:
|
44 |
+
# Submit the user's input text to the AI model via the client's submit method
|
45 |
+
# The 'multi' parameter wraps the text, and 'api_name' specifies the API endpoint
|
46 |
+
jarvis_response = client.submit(multi={"text": user_input}, api_name="/api")
|
47 |
+
except Exception as e:
|
48 |
+
# If submission to the AI fails, yield an error message with the exception details and stop
|
49 |
+
yield f"data: {json.dumps({'error': f'Failed to submit to AI: {str(e)}'})}\n\n"
|
50 |
+
return
|
51 |
+
|
52 |
+
buffer = "" # Initialize an empty string buffer to accumulate the full AI response progressively
|
53 |
+
|
54 |
+
try:
|
55 |
+
# Iterate over the partial responses received from the AI client submission
|
56 |
+
for partial in jarvis_response:
|
57 |
+
# Extract the current partial text chunk from the nested response structure
|
58 |
+
text = partial[0][0][1]
|
59 |
+
|
60 |
+
# Determine the new delta text by comparing with the buffer
|
61 |
+
# If the new text starts with the buffer, delta is the new appended text; otherwise, delta is the entire text
|
62 |
+
if text.startswith(buffer):
|
63 |
+
delta = text[len(buffer):]
|
64 |
+
else:
|
65 |
+
delta = text
|
66 |
+
|
67 |
+
buffer = text # Update the buffer with the latest full text from the AI
|
68 |
+
|
69 |
+
# Skip yielding if the delta is empty (no new text)
|
70 |
+
if delta == "":
|
71 |
+
continue
|
72 |
+
|
73 |
+
# Construct a chunk dictionary following OpenAI's streaming response format
|
74 |
+
chunk = {
|
75 |
+
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}", # Unique chunk ID with a UUID suffix
|
76 |
+
"object": "chat.completion.chunk", # Object type indicating a chunk of chat completion
|
77 |
+
"created": int(time.time()), # Timestamp of chunk creation in Unix time
|
78 |
+
"model": model, # The AI model used for generating this chunk
|
79 |
+
"choices": [
|
80 |
+
{
|
81 |
+
"index": 0, # Index of the choice in the response (usually 0 for single response)
|
82 |
+
"delta": {"content": delta}, # The incremental new text content in this chunk
|
83 |
+
"finish_reason": None # No finish reason yet, stream is ongoing
|
84 |
+
}
|
85 |
+
]
|
86 |
+
}
|
87 |
+
|
88 |
+
# Yield the chunk as a Server-Sent Event formatted string with 'data:' prefix and double newline suffix
|
89 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
90 |
+
|
91 |
+
# After all chunks have been streamed, append the full input-response pair to the session history
|
92 |
+
session_data["history"].append({"input": user_input, "response": buffer})
|
93 |
+
|
94 |
+
# Update the session store with the new last access time and updated session data
|
95 |
+
session_store[session_id] = (time.time(), session_data)
|
96 |
+
|
97 |
+
# Prepare a final chunk indicating the end of the stream with finish_reason set to 'stop'
|
98 |
+
done_chunk = {
|
99 |
+
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}", # Unique ID for the final chunk
|
100 |
+
"object": "chat.completion.chunk", # Object type for consistency
|
101 |
+
"created": int(time.time()), # Timestamp of completion
|
102 |
+
"model": model, # Model identifier
|
103 |
+
"choices": [
|
104 |
+
{
|
105 |
+
"index": 0,
|
106 |
+
"delta": {}, # Empty delta indicating no new content
|
107 |
+
"finish_reason": "stop" # Signal that the stream has finished
|
108 |
+
}
|
109 |
+
]
|
110 |
+
}
|
111 |
+
|
112 |
+
# Yield the final completion chunk to signal the client that streaming is done
|
113 |
+
yield f"data: {json.dumps(done_chunk)}\n\n"
|
114 |
+
|
115 |
+
except Exception as e:
|
116 |
+
# If any error occurs during streaming, yield an error chunk with the exception message
|
117 |
+
error_chunk = {
|
118 |
+
"error": {"message": f"Streaming error: {str(e)}"}
|
119 |
+
}
|
120 |
+
yield f"data: {json.dumps(error_chunk)}\n\n"
|