Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +16 -12
src/streamlit_app.py
CHANGED
@@ -1,26 +1,28 @@
|
|
1 |
import streamlit as st
|
2 |
import tensorflow as tf
|
3 |
import numpy as np
|
4 |
-
import re
|
5 |
import nltk
|
6 |
import os
|
7 |
from nltk.tokenize import sent_tokenize
|
8 |
|
9 |
-
#
|
10 |
-
nltk_data_path =
|
11 |
nltk.download("punkt", download_dir=nltk_data_path)
|
12 |
nltk.data.path.append(nltk_data_path)
|
13 |
|
14 |
-
# Load model
|
15 |
-
model = tf.keras.models.load_model('src/my_distilbert_classifier.keras')
|
16 |
|
|
|
17 |
def predict_sentence_ai_probability(sentence):
|
|
|
18 |
preds = model.predict([sentence])
|
19 |
prob_ai = tf.sigmoid(preds[0][0]).numpy()
|
20 |
return prob_ai
|
21 |
|
|
|
22 |
def predict_ai_generated_percentage(text, threshold=0.75):
|
23 |
-
text
|
24 |
sentences = sent_tokenize(text)
|
25 |
ai_sentence_count = 0
|
26 |
results = []
|
@@ -36,20 +38,22 @@ def predict_ai_generated_percentage(text, threshold=0.75):
|
|
36 |
ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
|
37 |
return ai_percentage, results
|
38 |
|
39 |
-
# Streamlit UI
|
40 |
st.title("π§ AI Content Detector")
|
41 |
-
st.markdown("This tool detects the percentage of **AI-generated content**
|
42 |
|
43 |
-
user_input = st.text_area("Paste your text here:", height=300)
|
44 |
|
45 |
-
if st.button("Analyze"):
|
46 |
if user_input.strip() == "":
|
47 |
-
st.warning("Please enter some text to analyze.")
|
48 |
else:
|
49 |
ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
|
50 |
-
|
|
|
51 |
for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
|
52 |
label = "π’ Human" if not is_ai else "π΄ AI"
|
53 |
st.markdown(f"**{i}.** _{sentence}_\n\nβ **Probability AI:** `{prob:.2%}` β {label}")
|
|
|
54 |
st.subheader("π Final Result")
|
55 |
st.success(f"Estimated **AI-generated content**: **{ai_percentage:.2f}%**")
|
|
|
1 |
import streamlit as st
|
2 |
import tensorflow as tf
|
3 |
import numpy as np
|
|
|
4 |
import nltk
|
5 |
import os
|
6 |
from nltk.tokenize import sent_tokenize
|
7 |
|
8 |
+
# π§ Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
|
9 |
+
nltk_data_path = "/tmp/nltk_data"
|
10 |
nltk.download("punkt", download_dir=nltk_data_path)
|
11 |
nltk.data.path.append(nltk_data_path)
|
12 |
|
13 |
+
# π¦ Load model
|
14 |
+
model = tf.keras.models.load_model('src/my_distilbert_classifier.keras', compile=False)
|
15 |
|
16 |
+
# π§ Predict probability for one sentence
|
17 |
def predict_sentence_ai_probability(sentence):
|
18 |
+
# NOTE: You may need to tokenize properly if model needs token_ids/attention_mask
|
19 |
preds = model.predict([sentence])
|
20 |
prob_ai = tf.sigmoid(preds[0][0]).numpy()
|
21 |
return prob_ai
|
22 |
|
23 |
+
# π Analyze full text
|
24 |
def predict_ai_generated_percentage(text, threshold=0.75):
|
25 |
+
text = text.strip() + "."
|
26 |
sentences = sent_tokenize(text)
|
27 |
ai_sentence_count = 0
|
28 |
results = []
|
|
|
38 |
ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
|
39 |
return ai_percentage, results
|
40 |
|
41 |
+
# π Streamlit UI
|
42 |
st.title("π§ AI Content Detector")
|
43 |
+
st.markdown("This tool detects the percentage of **AI-generated content** based on sentence-level analysis.")
|
44 |
|
45 |
+
user_input = st.text_area("π Paste your text here:", height=300)
|
46 |
|
47 |
+
if st.button("π Analyze"):
|
48 |
if user_input.strip() == "":
|
49 |
+
st.warning("β οΈ Please enter some text to analyze.")
|
50 |
else:
|
51 |
ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
|
52 |
+
|
53 |
+
st.subheader("π Sentence-level Analysis")
|
54 |
for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
|
55 |
label = "π’ Human" if not is_ai else "π΄ AI"
|
56 |
st.markdown(f"**{i}.** _{sentence}_\n\nβ **Probability AI:** `{prob:.2%}` β {label}")
|
57 |
+
|
58 |
st.subheader("π Final Result")
|
59 |
st.success(f"Estimated **AI-generated content**: **{ai_percentage:.2f}%**")
|