RAMYASRI-39's picture
Update app.py
bd218cf verified
import gradio as gr
from phi.agent import Agent
from phi.model.groq import Groq
import os
import logging
from sentence_transformers import CrossEncoder
from backend.semantic_search import table, retriever
import numpy as np
from time import perf_counter
import requests
from jinja2 import Environment, FileSystemLoader
from pathlib import Path
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# API Key setup
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
gr.Warning("GROQ_API_KEY not found. Set it in 'Repository secrets'.")
logger.error("GROQ_API_KEY not found.")
api_key = "" # Fallback to empty string, but this will fail without a key
else:
os.environ["GROQ_API_KEY"] = api_key
# Bhashini API setup
bhashini_api_key = os.getenv("API_KEY", "").strip()
bhashini_user_id = os.getenv("USER_ID", "").strip()
def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
"""Translates text from source language to target language using the Bhashini API."""
if not text.strip():
print('Input text is empty. Please provide valid text for translation.')
return {"status_code": 400, "message": "Input text is empty", "translated_content": None}
else:
print('Input text - ', text)
print(f'Starting translation process from {from_code} to {to_code}...')
gr.Warning(f'Translating to {to_code}...')
url = 'https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline'
headers = {
"Content-Type": "application/json",
"userID": bhashini_user_id,
"ulcaApiKey": bhashini_api_key
}
for key, value in headers.items():
if not isinstance(value, str) or '\n' in value or '\r' in value:
print(f"Invalid header value for {key}: {value}")
return {"status_code": 400, "message": f"Invalid header value for {key}", "translated_content": None}
payload = {
"pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
"pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
}
print('Sending initial request to get the pipeline...')
response = requests.post(url, json=payload, headers=headers)
if response.status_code != 200:
print(f'Error in initial request: {response.status_code}, Response: {response.text}')
return {"status_code": response.status_code, "message": "Error in translation request", "translated_content": None}
print('Initial request successful, processing response...')
response_data = response.json()
print('Full response data:', response_data)
if "pipelineInferenceAPIEndPoint" not in response_data or "callbackUrl" not in response_data["pipelineInferenceAPIEndPoint"]:
print('Unexpected response structure:', response_data)
return {"status_code": 400, "message": "Unexpected API response structure", "translated_content": None}
service_id = response_data["pipelineResponseConfig"][0]["config"][0]["serviceId"]
callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"]
print(f'Service ID: {service_id}, Callback URL: {callback_url}')
headers2 = {
"Content-Type": "application/json",
response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]: response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"]
}
compute_payload = {
"pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}, "serviceId": service_id}}],
"inputData": {"input": [{"source": text}], "audio": [{"audioContent": None}]}
}
print(f'Sending translation request with text: "{text}"')
compute_response = requests.post(callback_url, json=compute_payload, headers=headers2)
if compute_response.status_code != 200:
print(f'Error in translation request: {compute_response.status_code}, Response: {compute_response.text}')
return {"status_code": compute_response.status_code, "message": "Error in translation", "translated_content": None}
print('Translation request successful, processing translation...')
compute_response_data = compute_response.json()
translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"]
print(f'Translation successful. Translated content: "{translated_content}"')
return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
# Initialize PhiData Agent
agent = Agent(
name="Science Education Assistant",
role="You are a helpful science tutor for 10th-grade students",
instructions=[
"You are an expert science teacher specializing in 10th-grade curriculum.",
"Provide clear, accurate, and age-appropriate explanations.",
"Use simple language and examples that students can understand.",
"Focus on concepts from physics, chemistry, and biology.",
"Structure responses with headings and bullet points when helpful.",
"Encourage learning and curiosity."
],
model=Groq(id="llama3-70b-8192", api_key=api_key),
markdown=True
)
# Set up Jinja2 environment
proj_dir = Path(__file__).parent
env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
template = env.get_template('template.j2') # For document context
template_html = env.get_template('template_html.j2') # For HTML output
# Response Generation Function
def retrieve_and_generate_response(query, cross_encoder_choice, history=None):
"""Generate response using semantic search and LLM"""
top_rerank = 25
top_k_rank = 20
if not query.strip():
return "Please provide a valid question.", []
try:
start_time = perf_counter()
# Encode query and search documents
query_vec = retriever.encode(query)
documents = table.search(query_vec, vector_column_name="vector").limit(top_rerank).to_list()
documents = [doc["text"] for doc in documents]
# Re-rank documents using cross-encoder
cross_encoder_model = CrossEncoder('BAAI/bge-reranker-base') if cross_encoder_choice == '(ACCURATE) BGE reranker' else CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
query_doc_pair = [[query, doc] for doc in documents]
cross_scores = cross_encoder_model.predict(query_doc_pair)
sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
# Create context from top documents
context = "\n\n".join(documents[:10]) if documents else ""
context = f"Context information from educational materials:\n{context}\n\n"
# Add conversation history for context
history_context = ""
if history and len(history) > 0:
for user_msg, bot_msg in history[-2:]: # Last 2 exchanges
if user_msg and bot_msg:
history_context += f"Previous Q: {user_msg}\nPrevious A: {bot_msg}\n"
# Create full prompt
full_prompt = f"{history_context}{context}Question: {query}\n\nPlease answer the question using the context provided above. If the context doesn't contain relevant information, use your general knowledge about 10th-grade science topics."
# Generate response
response = agent.run(full_prompt)
response_text = response.content if hasattr(response, 'content') else str(response)
logger.info(f"Response generation took {perf_counter() - start_time:.2f} seconds")
return response_text, documents # Return documents for template
except Exception as e:
logger.error(f"Error in response generation: {e}")
return f"Error generating response: {str(e)}", []
def simple_chat_function(message, history, cross_encoder_choice):
"""Chat function with semantic search and retriever integration"""
if not message.strip():
return "", history, ""
# Generate response and get documents
response, documents = retrieve_and_generate_response(message, cross_encoder_choice, history)
# Add to history
history.append([message, response])
# Render template with documents and query
prompt_html = template_html.render(documents=documents, query=message)
return "", history, prompt_html
def translate_text(selected_language, history):
"""Translate the last response in history to the selected language."""
iso_language_codes = {
"Hindi": "hi", "Gom": "gom", "Kannada": "kn", "Dogri": "doi", "Bodo": "brx", "Urdu": "ur",
"Tamil": "ta", "Kashmiri": "ks", "Assamese": "as", "Bengali": "bn", "Marathi": "mr",
"Sindhi": "sd", "Maithili": "mai", "Punjabi": "pa", "Malayalam": "ml", "Manipuri": "mni",
"Telugu": "te", "Sanskrit": "sa", "Nepali": "ne", "Santali": "sat", "Gujarati": "gu", "Odia": "or"
}
to_code = iso_language_codes[selected_language]
response_text = history[-1][1] if history and history[-1][1] else ''
print('response_text for translation', response_text)
translation = bhashini_translate(response_text, to_code=to_code)
return translation.get('translated_content', 'Translation failed.')
# Gradio Interface with layout template
with gr.Blocks(title="Science Chatbot", theme='gradio/soft') as demo:
# Header section
with gr.Row():
with gr.Column(scale=10):
gr.HTML(value="""<div style="color: #FF4500;"><h1>Welcome! I am your friend!</h1>Ask me !I will help you<h1><span style="color: #008000">I AM A CHATBOT FOR 10TH SCIENCE WITH TRANSLATION IN 22 LANGUAGES</span></h1></div>""")
gr.HTML(value=f"""<p style="font-family: sans-serif; font-size: 16px;">A free chat bot developed by K.M.RAMYASRI,TGT,GHS.SUTHUKENY using Open source LLMs for 10 std students</p>""")
gr.HTML(value=f"""<p style="font-family: Arial, sans-serif; font-size: 14px;"> Suggestions may be sent to <a href="mailto:ramyasriraman2019@gmail.com" style="color: #00008B; font-style: italic;">ramyadevi1607@yahoo.com</a>.</p>""")
with gr.Column(scale=3):
try:
gr.Image(value='logo.png', height=200, width=200)
except:
gr.HTML("<div style='height: 200px; width: 200px; background-color: #f0f0f0; display: flex; align-items: center; justify-content: center;'>Logo</div>")
# Chat and input components
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
bubble_full_width=False,
show_copy_button=True,
show_share_button=True,
)
with gr.Row():
msg = gr.Textbox(
scale=3,
show_label=False,
placeholder="Enter text and press enter",
container=False,
)
submit_btn = gr.Button(value="Submit text", scale=1, variant="primary")
# Additional controls
cross_encoder = gr.Radio(
choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker'],
value='(ACCURATE) BGE reranker',
label="Embeddings Model",
info="Select the model for document ranking"
)
language_dropdown = gr.Dropdown(
choices=[
"Hindi", "Gom", "Kannada", "Dogri", "Bodo", "Urdu", "Tamil", "Kashmiri", "Assamese", "Bengali", "Marathi",
"Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
"Gujarati", "Odia"
],
value="Hindi",
label="Select Language for Translation"
)
translated_textbox = gr.Textbox(label="Translated Response")
prompt_html = gr.HTML() # Add HTML component for the template
# Event handlers
def update_chat_and_translate(message, history, cross_encoder_choice, selected_language):
if not message.strip():
return "", history, "", ""
# Generate response and get documents
response, documents = retrieve_and_generate_response(message, cross_encoder_choice, history)
history.append([message, response])
# Translate response
translated_text = translate_text(selected_language, history)
# Render template with documents and query
prompt_html_content = template_html.render(documents=documents, query=message)
return "", history, translated_text, prompt_html_content
msg.submit(update_chat_and_translate, [msg, chatbot, cross_encoder, language_dropdown], [msg, chatbot, translated_textbox, prompt_html])
submit_btn.click(update_chat_and_translate, [msg, chatbot, cross_encoder, language_dropdown], [msg, chatbot, translated_textbox, prompt_html])
clear = gr.Button("Clear Conversation")
clear.click(lambda: ([], "", "", ""), outputs=[chatbot, msg, translated_textbox, prompt_html])
# Example questions
gr.Examples(
examples=[
'What is the difference between metals and non-metals?',
'What is an ionic bond?',
'Explain asexual reproduction',
'What is photosynthesis?',
'Explain Newton\'s laws of motion'
],
inputs=msg,
label="Try these example questions:"
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)# import gradio as gr
# from phi.agent import Agent
# from phi.model.groq import Groq
# import os
# import logging
# from sentence_transformers import CrossEncoder
# from backend.semantic_search import table, retriever
# import numpy as np
# from time import perf_counter
# import requests
# from jinja2 import Environment, FileSystemLoader
# # Set up logging
# logging.basicConfig(level=logging.INFO)
# logger = logging.getLogger(__name__)
# # API Key setup
# api_key = os.getenv("GROQ_API_KEY")
# if not api_key:
# gr.Warning("GROQ_API_KEY not found. Set it in 'Repository secrets'.")
# logger.error("GROQ_API_KEY not found.")
# api_key = "" # Fallback to empty string, but this will fail without a key
# else:
# os.environ["GROQ_API_KEY"] = api_key
# # Bhashini API setup
# bhashini_api_key = os.getenv("API_KEY")
# bhashini_user_id = os.getenv("USER_ID")
# def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
# """Translates text from source language to target language using the Bhashini API."""
# if not text.strip():
# print('Input text is empty. Please provide valid text for translation.')
# return {"status_code": 400, "message": "Input text is empty", "translated_content": None}
# else:
# print('Input text - ', text)
# print(f'Starting translation process from {from_code} to {to_code}...')
# gr.Warning(f'Translating to {to_code}...')
# url = 'https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline'
# headers = {
# "Content-Type": "application/json",
# "userID": bhashini_user_id,
# "ulcaApiKey": bhashini_api_key
# }
# payload = {
# "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
# "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
# }
# print('Sending initial request to get the pipeline...')
# response = requests.post(url, json=payload, headers=headers)
# if response.status_code != 200:
# print(f'Error in initial request: {response.status_code}, Response: {response.text}')
# return {"status_code": response.status_code, "message": "Error in translation request", "translated_content": None}
# print('Initial request successful, processing response...')
# response_data = response.json()
# print('Full response data:', response_data) # Debug the full response
# if "pipelineInferenceAPIEndPoint" not in response_data or "callbackUrl" not in response_data["pipelineInferenceAPIEndPoint"]:
# print('Unexpected response structure:', response_data)
# return {"status_code": 400, "message": "Unexpected API response structure", "translated_content": None}
# service_id = response_data["pipelineResponseConfig"][0]["config"][0]["serviceId"]
# callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"]
# print(f'Service ID: {service_id}, Callback URL: {callback_url}')
# headers2 = {
# "Content-Type": "application/json",
# response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]: response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"]
# }
# compute_payload = {
# "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}, "serviceId": service_id}}],
# "inputData": {"input": [{"source": text}], "audio": [{"audioContent": None}]}
# }
# print(f'Sending translation request with text: "{text}"')
# compute_response = requests.post(callback_url, json=compute_payload, headers=headers2)
# if compute_response.status_code != 200:
# print(f'Error in translation request: {compute_response.status_code}, Response: {compute_response.text}')
# return {"status_code": compute_response.status_code, "message": "Error in translation", "translated_content": None}
# print('Translation request successful, processing translation...')
# compute_response_data = compute_response.json()
# translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"]
# print(f'Translation successful. Translated content: "{translated_content}"')
# return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
# # Initialize PhiData Agent
# agent = Agent(
# name="Science Education Assistant",
# role="You are a helpful science tutor for 10th-grade students",
# instructions=[
# "You are an expert science teacher specializing in 10th-grade curriculum.",
# "Provide clear, accurate, and age-appropriate explanations.",
# "Use simple language and examples that students can understand.",
# "Focus on concepts from physics, chemistry, and biology.",
# "Structure responses with headings and bullet points when helpful.",
# "Encourage learning and curiosity."
# ],
# model=Groq(id="llama3-70b-8192", api_key=api_key),
# markdown=True
# )
# # Set up Jinja2 environment
# proj_dir = Path(__file__).parent
# env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
# template_html = env.get_template('template_html.j2')
# # Response Generation Function
# def retrieve_and_generate_response(query, cross_encoder_choice, history=None):
# """Generate response using semantic search and LLM"""
# top_rerank = 25
# top_k_rank = 20
# if not query.strip():
# return "Please provide a valid question."
# try:
# start_time = perf_counter()
# # Encode query and search documents
# query_vec = retriever.encode(query)
# documents = table.search(query_vec, vector_column_name="vector").limit(top_rerank).to_list()
# documents = [doc["text"] for doc in documents]
# # Re-rank documents using cross-encoder
# cross_encoder_model = CrossEncoder('BAAI/bge-reranker-base') if cross_encoder_choice == '(ACCURATE) BGE reranker' else CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
# query_doc_pair = [[query, doc] for doc in documents]
# cross_scores = cross_encoder_model.predict(query_doc_pair)
# sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
# documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
# # Create context from top documents
# context = "\n\n".join(documents[:10]) if documents else ""
# context = f"Context information from educational materials:\n{context}\n\n"
# # Add conversation history for context
# history_context = ""
# if history and len(history) > 0:
# for user_msg, bot_msg in history[-2:]: # Last 2 exchanges
# if user_msg and bot_msg:
# history_context += f"Previous Q: {user_msg}\nPrevious A: {bot_msg}\n"
# # Create full prompt
# full_prompt = f"{history_context}{context}Question: {query}\n\nPlease answer the question using the context provided above. If the context doesn't contain relevant information, use your general knowledge about 10th-grade science topics."
# # Generate response
# response = agent.run(full_prompt)
# response_text = response.content if hasattr(response, 'content') else str(response)
# logger.info(f"Response generation took {perf_counter() - start_time:.2f} seconds")
# return response_text
# except Exception as e:
# logger.error(f"Error in response generation: {e}")
# return f"Error generating response: {str(e)}"
# def simple_chat_function(message, history, cross_encoder_choice):
# """Chat function with semantic search and retriever integration"""
# if not message.strip():
# return "", history
# # Generate response using the semantic search function
# response = retrieve_and_generate_response(message, cross_encoder_choice, history)
# # Add to history
# history.append([message, response])
# return "", history
# def translate_text(selected_language, history):
# """Translate the last response in history to the selected language."""
# iso_language_codes = {
# "Hindi": "hi", "Gom": "gom", "Kannada": "kn", "Dogri": "doi", "Bodo": "brx", "Urdu": "ur",
# "Tamil": "ta", "Kashmiri": "ks", "Assamese": "as", "Bengali": "bn", "Marathi": "mr",
# "Sindhi": "sd", "Maithili": "mai", "Punjabi": "pa", "Malayalam": "ml", "Manipuri": "mni",
# "Telugu": "te", "Sanskrit": "sa", "Nepali": "ne", "Santali": "sat", "Gujarati": "gu", "Odia": "or"
# }
# to_code = iso_language_codes[selected_language]
# response_text = history[-1][1] if history and history[-1][1] else ''
# print('response_text for translation', response_text)
# translation = bhashini_translate(response_text, to_code=to_code)
# return translation.get('translated_content', 'Translation failed.')
# # Gradio Interface with layout template
# with gr.Blocks(title="Science Chatbot", theme='gradio/soft') as demo:
# # Header section
# with gr.Row():
# with gr.Column(scale=10):
# gr.HTML(value="""<div style="color: #FF4500;"><h1>Welcome! I am your friend!</h1>Ask me !I will help you<h1><span style="color: #008000">I AM A CHATBOT FOR 10TH SCIENCE WITH TRANSLATION IN 22 LANGUAGES</span></h1></div>""")
# gr.HTML(value=f"""<p style="font-family: sans-serif; font-size: 16px;">A free chat bot developed by K.M.RAMYASRI,TGT,GHS.SUTHUKENY using Open source LLMs for 10 std students</p>""")
# gr.HTML(value=f"""<p style="font-family: Arial, sans-serif; font-size: 14px;"> Suggestions may be sent to <a href="mailto:ramyasriraman2019@gmail.com" style="color: #00008B; font-style: italic;">ramyadevi1607@yahoo.com</a>.</p>""")
# with gr.Column(scale=3):
# try:
# gr.Image(value='logo.png', height=200, width=200)
# except:
# gr.HTML("<div style='height: 200px; width: 200px; background-color: #f0f0f0; display: flex; align-items: center; justify-content: center;'>Logo</div>")
# # Chat and input components
# chatbot = gr.Chatbot(
# [],
# elem_id="chatbot",
# avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
# 'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
# bubble_full_width=False,
# show_copy_button=True,
# show_share_button=True,
# )
# with gr.Row():
# msg = gr.Textbox(
# scale=3,
# show_label=False,
# placeholder="Enter text and press enter",
# container=False,
# )
# submit_btn = gr.Button(value="Submit text", scale=1, variant="primary")
# # Additional controls
# cross_encoder = gr.Radio(
# choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker'],
# value='(ACCURATE) BGE reranker',
# label="Embeddings Model",
# info="Select the model for document ranking"
# )
# language_dropdown = gr.Dropdown(
# choices=[
# "Hindi", "Gom", "Kannada", "Dogri", "Bodo", "Urdu", "Tamil", "Kashmiri", "Assamese", "Bengali", "Marathi",
# "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
# "Gujarati", "Odia"
# ],
# value="Hindi",
# label="Select Language for Translation"
# )
# translated_textbox = gr.Textbox(label="Translated Response")
# # Event handlers
# def update_chat_and_translate(message, history, cross_encoder_choice, selected_language):
# if not message.strip():
# return "", history, ""
# # Generate response
# response = retrieve_and_generate_response(message, cross_encoder_choice, history)
# history.append([message, response])
# # Translate response
# translated_text = translate_text(selected_language, history)
# return "", history, translated_text
# msg.submit(update_chat_and_translate, [msg, chatbot, cross_encoder, language_dropdown], [msg, chatbot, translated_textbox])
# submit_btn.click(update_chat_and_translate, [msg, chatbot, cross_encoder, language_dropdown], [msg, chatbot, translated_textbox])
# clear = gr.Button("Clear Conversation")
# clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, translated_textbox])
# # Example questions
# gr.Examples(
# examples=[
# 'What is the difference between metals and non-metals?',
# 'What is an ionic bond?',
# 'Explain asexual reproduction',
# 'What is photosynthesis?',
# 'Explain Newton\'s laws of motion'
# ],
# inputs=msg,
# label="Try these example questions:"
# )
# if __name__ == "__main__":
# demo.launch(server_name="0.0.0.0", server_port=7860)# import gradio as gr