Spaces:

imenayadi
/

Smart-inbox

Runtime error

App Files Files Community

imenayadi commited on May 29, 2024

Commit

5207833

1 Parent(s): e265489

Gradio

Browse files

Files changed (5) hide show

app.py +232 -4
key_info.py +27 -0
llama2_response_mail_generator.py +58 -0
requirements.txt +9 -0
summarization_with_bart.py +24 -0

app.py CHANGED Viewed

@@ -1,7 +1,235 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+import pandas as pd
+from key_info import extract_entities
+from summarization_with_bart import summarize_email_conditional
+from llama2_response_mail_generator import generate_email_response
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import spacy
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+import subprocess
+subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
+"""**Original code**
+**CSS for Interface**
+"""
+custom_css = ''' @import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.min.css');
+body {
+    background-color: #eef1f5; /* Light grey-blue background for a neutral, clean look */
+}
+label {
+    color: #34495e; /* Dark blue-grey for a professional appearance */
+    font-weight: bold;
+}
+textarea, input, select, button {
+    background-color: #ffffff; /* Crisp white background for input fields and buttons */
+    border: 1px solid #bdc3c7; /* Soft grey border for a subtle, refined look */
+    color: #2c3e50; /* Darker shade of blue-grey for text, enhancing readability */
+}
+button {
+    background-color: #3498db; /* Bright blue for buttons to stand out */
+    color: black ; /* White text on buttons for clarity */
+    border-radius: 4px; /* Slightly rounded corners for a modern touch */
+    font-weight: bold; /* Bold text for emphasis */
+    font-size: 16px; /* Sizable text for easy interaction */
+}
+button[type="submit"], button[type="reset"], button[type="button"] {
+    font-weight: bold; /* Ensures all actionable buttons are prominent */
+    font-size: 18px; /* Larger text size for better visibility and impact */
+}
+.result-box {
+    background-color: #ecf0f1; /* Very light grey for result boxes, ensuring focus */
+    color: #2c3e50; /* Consistent dark blue-grey text for uniformity */
+    border: 1px solid #bdc3c7; /* Matching the input field borders for design coherence */
+}
+.gradio-toolbar {
+    background-color: #ffffff; /* Maintains a clean, unobtrusive toolbar appearance */
+    border-top: 2px solid #3498db; /* A pop of bright blue to delineate the toolbar */
+}
+'''
+"""**Seperate** **Interface**"""
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+nlp = spacy.load("en_core_web_sm")
+ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", tokenizer="dbmdz/bert-large-cased-finetuned-conll03-english")
+model_path = './fine_tuned_roberta_for_category_model_'
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForSequenceClassification.from_pretrained(model_path)
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# Load model and tokenizer from the drive
+model_sentiment_path = './fine_tuned_roberta_for_sentiment_analysis_2000_'
+tokenizer_sentiment = AutoTokenizer.from_pretrained(model_sentiment_path)
+model_sentiment = AutoModelForSequenceClassification.from_pretrained(model_sentiment_path)
+model_sentiment.eval()
+model_sentiment.to(device)
+model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
+model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin"  # The model is in bin format
+# Download the model file
+model_path_llama = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
+# Initialize the Llama model with appropriate settings for GPU
+lcpp_llm = Llama(
+    model_path=model_path_llama,
+    n_threads=2,  # CPU cores to use
+    n_batch=512,  # Batch size for processing; adjust as per your VRAM capacity
+    n_gpu_layers=32  # Number of layers to run on GPU, dependent on your GPU's VRAM
+)
+def generate_email_response(email_prompt):
+    # Check input received by the function
+    print("Received prompt:", email_prompt)
+    # Determine if the input is a shorthand command or an actual email
+    if 'email to' in email_prompt.lower():
+        # Assume it's a shorthand command, format appropriately
+        formatted_prompt = f'''
+        Email received: "{email_prompt}"
+        Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
+        Response:
+        '''
+    else:
+        # Assume it's direct email content
+        formatted_prompt = f'''
+        Email received: "{email_prompt}"
+        Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
+        Response:
+        '''
+    # Generate response using Llama-2 model
+    try:
+        response = lcpp_llm(
+            prompt=formatted_prompt,
+            max_tokens=256,
+            temperature=0.5,
+            top_p=0.95,
+            repeat_penalty=1.2,
+            top_k=150,
+            echo=True
+        )
+        generated_response = response["choices"][0]["text"]
+        # Remove the input part from the output if it is included
+        if formatted_prompt in generated_response:
+            generated_response = generated_response.replace(formatted_prompt, '').strip()
+        print("Generated response:", generated_response)
+        return generated_response
+    except Exception as e:
+        print("Error in response generation:", str(e))
+        return "Failed to generate response, please check the console for errors."
+def classify_sentiment(text):
+    # Encode the text using the tokenizer
+    inputs = tokenizer_sentiment(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
+    input_ids = inputs['input_ids'].to(device)
+    attention_mask = inputs['attention_mask'].to(device)
+    # Get model predictions
+    with torch.no_grad():
+        outputs = model_sentiment(input_ids=input_ids, attention_mask=attention_mask)
+        predictions = torch.nn.functional.softmax(outputs.logits, dim=1)
+    # Convert predictions to probabilities and sentiment category
+    probabilities = predictions.cpu().numpy()[0]
+    categories = ["Positive", "Neutral", "Negative"]
+    predicted_sentiment = categories[probabilities.argmax()]
+    # Return the predicted sentiment and the confidence
+    confidence = max(probabilities)
+    return f"Sentiment: {predicted_sentiment}, Confidence: {confidence:.2f}"
+def generate_summary(email_text):
+    return summarize_email_conditional(email_text, summarizer)
+def display_entities(email_text):
+    try:
+        results = extract_entities(email_text, nlp, ner_pipeline)
+        # Convert to DataFrames
+        data_spacy = pd.DataFrame(results['spaCy Entities'])
+        data_transformer = pd.DataFrame(results['Transformer Entities'])
+        return data_spacy, data_transformer, ", ".join(results['Dates'])
+    except Exception as e:
+        print(f"Error: {e}")
+        # Return empty outputs in case of error
+        return pd.DataFrame(), pd.DataFrame(), ""
+def classify_email(email):
+    # Encode the email text using the tokenizer
+    inputs = tokenizer(email, return_tensors="pt", truncation=True, max_length=512, padding=True)
+    input_ids = inputs['input_ids'].to(device)
+    attention_mask = inputs['attention_mask'].to(device)
+    # Get model predictions
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+        predictions = torch.nn.functional.softmax(outputs.logits, dim=1)
+    # Convert predictions to probabilities and category
+    probabilities = predictions.cpu().numpy()[0]
+    categories = ["Urgent Requests", "Project Updates", "Client Communications", "Meeting Coordination", "Internal Announcements"]
+    predicted_category = categories[probabilities.argmax()]
+    # Return the predicted category and the confidence
+    confidence = max(probabilities)
+    return f"Category: {predicted_category}, Confidence: {confidence:.2f}"
+iface_category = gr.Interface(
+    fn=classify_email,
+    inputs=gr.Textbox(lines=10, placeholder="Enter Email Content Here..."),
+    outputs="text",
+    title="Email Category Classifier",
+    description="This model classifies email text into one of five categories: Urgent Requests, Project Updates, Client Communications, Meeting Coordination, Internal Announcements."
+)
+iface_sentiment = gr.Interface(
+    fn=classify_sentiment,
+    inputs=gr.Textbox(lines=5, placeholder="Enter Email Text Here..."),
+    outputs=gr.Textbox(label="Sentiment Analysis"),
+    title="Sentiment Analysis"
+)
+iface_summary = gr.Interface(
+    fn=generate_summary,
+    inputs=[gr.Textbox(lines=5, placeholder="Enter Email Text Here...")],
+    outputs=gr.Textbox(label="Generated Summary"),
+    title="Summary Generation"
+)
+iface_ner = gr.Interface(
+    fn=display_entities,
+    inputs=gr.Textbox(lines=5, placeholder="Enter Email Text Here..."),
+    outputs=[
+        gr.Dataframe(label="spaCy Entity Recognition"),
+        gr.Dataframe(label="Transformer Entity Recognition"),
+        gr.Textbox(label="Extracted Dates")
+    ],
+    title="NER Analysis",
+    description="Performs Named Entity Recognition using spaCy and Transformer models."
+)
+iface_response = gr.Interface(
+    fn=generate_email_response,
+    inputs=gr.Textbox(lines=10, placeholder="Enter the email prompt..."),
+    outputs=gr.Textbox(label="Generated Email Response"),
+    title="Email Response Generator",
+    description="Generate email responses using Llama-2 model."
+)
+# Using tabs to organize the interfaces
+tabs = gr.TabbedInterface([iface_category, iface_sentiment,iface_summary,iface_ner,iface_response], ["Category", "Sentiment"," Summary","NER","Response Generator"], css=custom_css)
+tabs.launch(share=True)

key_info.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import spacy
+from transformers import pipeline
+import re
+from dateutil.parser import parse
+# Regex pattern for dates
+def extract_entities(email_text, nlp, ner_pipeline):
+    date_pattern = r'\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+\d{1,2}(?:th|st|nd|rd)?,\s+\d{4}\b'
+    # Use spaCy for initial extraction
+    doc = nlp(email_text)
+    spacy_entities = [{"Text": ent.text, "Type": ent.label_} for ent in doc.ents]
+    # Use transformer model for refined extraction
+    transformer_entities = ner_pipeline(email_text)
+    transformer_entities = [{"Text": ent['word'], "Type": ent['entity'], "Score": ent['score']} for ent in transformer_entities if ent['score'] > 0.75]
+    # Extract dates using regex
+    potential_dates = re.findall(date_pattern, email_text)
+    dates = [parse(date).strftime('%Y-%m-%d') for date in potential_dates]
+    return {
+        "spaCy Entities": spacy_entities,
+        "Transformer Entities": transformer_entities,
+        "Dates": dates
+    }

llama2_response_mail_generator.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
+model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin"  # The model is in bin format
+# Download the model file
+model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
+# Initialize the Llama model with appropriate settings for GPU
+lcpp_llm = Llama(
+    model_path=model_path,
+    n_threads=2,  # CPU cores to use
+    n_batch=512,  # Batch size for processing; adjust as per your VRAM capacity
+    n_gpu_layers=32  # Number of layers to run on GPU, dependent on your GPU's VRAM
+)
+def generate_email_response(email_prompt):
+    # Check input received by the function
+    print("Received prompt:", email_prompt)
+    # Determine if the input is a shorthand command or an actual email
+    if 'email to' in email_prompt.lower():
+        # Assume it's a shorthand command, format appropriately
+        formatted_prompt = f'''
+        Email received: "{email_prompt}"
+        Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
+        Response:
+        '''
+    else:
+        # Assume it's direct email content
+        formatted_prompt = f'''
+        Email received: "{email_prompt}"
+        Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
+        Response:
+        '''
+    # Generate response using Llama-2 model
+    try:
+        response = lcpp_llm(
+            prompt=formatted_prompt,
+            max_tokens=256,
+            temperature=0.5,
+            top_p=0.95,
+            repeat_penalty=1.2,
+            top_k=150,
+            echo=True
+        )
+        generated_response = response["choices"][0]["text"]
+        # Remove the input part from the output if it is included
+        if formatted_prompt in generated_response:
+            generated_response = generated_response.replace(formatted_prompt, '').strip()
+        print("Generated response:", generated_response)
+        return generated_response
+    except Exception as e:
+        print("Error in response generation:", str(e))
+        return "Failed to generate response, please check the console for errors."

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+transformers
+datasets
+torch
+gradio
+spacy
+llama-cpp-python
+numpy
+huggingface_hub
+##TODO python -m spacy download en_core_web_sm

summarization_with_bart.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from transformers import pipeline
+def summarize_email_conditional(email_text, summarizer, min_input_length=50):
+    """
+    Summarizes the email if it's longer than min_input_length.
+    Adjusts max_length parameter based on the length of the email.
+    Args:
+    - email_text (str): The text of the email to summarize.
+    - min_input_length (int): Minimum length of email to apply summarization.
+    Returns:
+    - str: The summary of the email or the original email if below the min_input_length.
+    """
+    # Only summarize if the email is longer than min_input_length
+    if len(email_text.split()) > min_input_length:
+        # Dynamically set max_length to be about 75% of the email length, or any ratio that suits your need
+        max_length = max(12, int(len(email_text.split()) * 0.75))
+        summary = summarizer(email_text, max_length=max_length, min_length=5, do_sample=False)
+        return summary[0]['summary_text']
+    else:
+        # Return the original email text if it's not long enough to require summarization
+        return email_text