File size: 1,491 Bytes
fd77815
0dd6279
682174e
fd77815
 
 
682174e
29406f8
682174e
fd77815
cb4608c
fd77815
682174e
 
 
 
fd77815
682174e
 
 
 
 
08728c1
528da04
682174e
 
39e1615
 
 
4a0592e
682174e
 
 
 
fd77815
 
682174e
39e1615
682174e
 
 
0dd6279
682174e
0dd6279
 
 
 
 
2d942ee
0dd6279
cb4608c
682174e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer
from transformers import (
    TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
)
from transformers import pipeline

st.title("Toxic Tweet Classifier")

demo = """Your words are like poison. They seep into my mind and make me feel worthless."""

text = ""
submit = False
model_name = ""
col1, col2, col3 = st.columns([2,1,1])

with st.container():
    model_name = st.selectbox(
        "Select the model you want to use below.",
        ("RobCaamano/toxicity",),
    )
    submit = st.button("Submit", type="primary")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
clf = pipeline(
    "sentiment-analysis", model=model, tokenizer=tokenizer, return_all_scores=True
)

with col1:
    st.subheader("Tweet")
    text = st.text_area("Input text", demo, height=275)

input = tokenizer(text, return_tensors="tf")

if submit:
    results = dict(d.values() for d in clf(text)[0])
    classes = {k: results[k] for k in results.keys() if not k == "toxic"}

    max_class = max(classes, key=classes.get)
    probability = classes[max_class]

    result_df = pd.DataFrame({
        'Classification': [max_class],
        'Probability': [probability],
        'Toxic': ['Yes' if results['toxic'] >= 0.5 else 'No']
    })

    st.table(result_df)

    expander = st.expander("Raw output")
    expander.write(results)