Spaces:
Running
Running
import os | |
from openai import OpenAI | |
import modal | |
from dotenv import load_dotenv | |
load_dotenv() | |
class Colors: | |
"""ANSI color codes for terminal output formatting.""" | |
GREEN = "\033[0;32m" | |
RED = "\033[0;31m" | |
BLUE = "\033[0;34m" | |
GRAY = "\033[0;90m" | |
BOLD = "\033[1m" | |
END = "\033[0m" | |
def ask_ai( | |
prompt, | |
system_prompt, | |
temperature=0.7, | |
max_tokens=None, | |
stream=True, | |
verbose=False | |
): | |
""" | |
Send a prompt to the AI model and get a response. | |
Args: | |
prompt (str): The user prompt to send to the AI | |
system_prompt (str): The system instructions for the AI | |
model (str): The model name to use | |
temperature (float): Controls randomness (0.0-1.0) | |
max_tokens (int): Maximum tokens in the response | |
stream (bool): Whether to stream the response | |
verbose (bool): Whether to print status messages | |
Returns: | |
str: The AI's response text | |
""" | |
# Create OpenAI client and set up the connection to Modal | |
API_KEY = os.getenv("Modal_API_KEY") | |
client = OpenAI(api_key=API_KEY) | |
# Set base URL to point to our Modal-deployed endpoint | |
client.base_url = f"https://abhinav77642--llama-3-1-8b-instruct-serve.modal.run/v1" | |
# Set up the messages for the conversation | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": prompt} | |
] | |
# Set up the completion parameters | |
completion_args = { | |
"model": "neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16", | |
"messages": messages, | |
"temperature": temperature, | |
"max_tokens": max_tokens, | |
"stream": stream | |
} | |
# Remove None values | |
completion_args = {k: v for k, v in completion_args.items() if v is not None} | |
try: | |
response = client.chat.completions.create(**completion_args) | |
# Handle the response based on streaming or non-streaming mode | |
if stream: | |
result = "" | |
for chunk in response: | |
if chunk.choices and chunk.choices[0].delta.content: | |
content = chunk.choices[0].delta.content | |
result += content | |
return result | |
else: | |
result = response.choices[0].message.content | |
return result | |
except Exception as e: | |
error_msg = f"Error during API call: {e}" | |
return f"Error: {error_msg}" |