Spaces:
Running
Running
File size: 2,493 Bytes
484bee1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
from openai import OpenAI
import modal
from dotenv import load_dotenv
load_dotenv()
class Colors:
"""ANSI color codes for terminal output formatting."""
GREEN = "\033[0;32m"
RED = "\033[0;31m"
BLUE = "\033[0;34m"
GRAY = "\033[0;90m"
BOLD = "\033[1m"
END = "\033[0m"
def ask_ai(
prompt,
system_prompt,
temperature=0.7,
max_tokens=None,
stream=True,
verbose=False
):
"""
Send a prompt to the AI model and get a response.
Args:
prompt (str): The user prompt to send to the AI
system_prompt (str): The system instructions for the AI
model (str): The model name to use
temperature (float): Controls randomness (0.0-1.0)
max_tokens (int): Maximum tokens in the response
stream (bool): Whether to stream the response
verbose (bool): Whether to print status messages
Returns:
str: The AI's response text
"""
# Create OpenAI client and set up the connection to Modal
API_KEY = os.getenv("Modal_API_KEY")
client = OpenAI(api_key=API_KEY)
# Set base URL to point to our Modal-deployed endpoint
client.base_url = f"https://abhinav77642--llama-3-1-8b-instruct-serve.modal.run/v1"
# Set up the messages for the conversation
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
# Set up the completion parameters
completion_args = {
"model": "neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
"stream": stream
}
# Remove None values
completion_args = {k: v for k, v in completion_args.items() if v is not None}
try:
response = client.chat.completions.create(**completion_args)
# Handle the response based on streaming or non-streaming mode
if stream:
result = ""
for chunk in response:
if chunk.choices and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
result += content
return result
else:
result = response.choices[0].message.content
return result
except Exception as e:
error_msg = f"Error during API call: {e}"
return f"Error: {error_msg}" |