File size: 1,473 Bytes
fd77815
528da04
03f4672
fd77815
 
 
03f4672
29406f8
fd77815
 
2db0396
fd77815
 
 
528da04
 
 
2db0396
528da04
 
 
 
fd77815
390d16b
 
fd77815
528da04
fd77815
 
 
 
 
 
528da04
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer
from transformers import (
    TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
)
from transformers import pipeline

st.title("Detecting Toxic Tweets")

demo = """Your words are like poison. They seep into my mind and make me feel worthless"""

text = st.text_area("Input text", demo, height=250)

# Add a drop-down menu for model selection
model_options = {
    "DistilBERT Base Uncased (SST-2)": "distilbert-base-uncased-finetuned-sst-2-english",
    "Fine-tuned Toxicity Model": "RobCaamano/toxicity_distilbert",
}
selected_model = st.selectbox("Select Model", options=list(model_options.keys()))

mod_name = model_options[selected_model]

tokenizer = AutoTokenizer.from_pretrained(mod_name)
model = AutoModelForSequenceClassification.from_pretrained(mod_name)
clf = pipeline(
    "text-classification", model=model, tokenizer=tokenizer, return_all_scores=True
)

input = tokenizer(text, return_tensors="tf")

if st.button("Submit", type="primary"):
    results = clf(text)[0]
    max_class = max(results, key=lambda x: x["score"])
    tweet_portion = text[:50] + "..." if len(text) > 50 else text
    
    # Create and display the table
    df = pd.DataFrame(
        {
            "Tweet (portion)": [tweet_portion],
            "Highest Toxicity Class": [max_class["label"]],
            "Probability": [max_class["score"]],
        }
    )
    st.table(df)