Spaces:
Sleeping
Sleeping
New
Browse files- src/streamlit_app.py +52 -38
src/streamlit_app.py
CHANGED
@@ -1,40 +1,54 @@
|
|
1 |
-
|
2 |
-
import numpy as np
|
3 |
-
import pandas as pd
|
4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
""
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
""
|
15 |
-
|
16 |
-
|
17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
18 |
-
|
19 |
-
indices = np.linspace(0, 1, num_points)
|
20 |
-
theta = 2 * np.pi * num_turns * indices
|
21 |
-
radius = indices
|
22 |
-
|
23 |
-
x = radius * np.cos(theta)
|
24 |
-
y = radius * np.sin(theta)
|
25 |
-
|
26 |
-
df = pd.DataFrame({
|
27 |
-
"x": x,
|
28 |
-
"y": y,
|
29 |
-
"idx": indices,
|
30 |
-
"rand": np.random.randn(num_points),
|
31 |
-
})
|
32 |
-
|
33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
34 |
-
.mark_point(filled=True)
|
35 |
-
.encode(
|
36 |
-
x=alt.X("x", axis=None),
|
37 |
-
y=alt.Y("y", axis=None),
|
38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
40 |
-
))
|
|
|
1 |
+
|
|
|
|
|
2 |
import streamlit as st
|
3 |
+
import tensorflow as tf
|
4 |
+
import numpy as np
|
5 |
+
import re
|
6 |
+
import nltk
|
7 |
+
|
8 |
+
# Ensure NLTK sentence tokenizer is available
|
9 |
+
nltk.download('punkt')
|
10 |
+
from nltk.tokenize import sent_tokenize
|
11 |
+
|
12 |
+
model = tf.keras.models.load_model('my_distilbert_classifier.keras')
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
def predict_sentence_ai_probability(sentence):
|
18 |
+
preds = model.predict([sentence])
|
19 |
+
prob_ai = tf.sigmoid(preds[0][0]).numpy()
|
20 |
+
return prob_ai
|
21 |
+
|
22 |
+
def predict_ai_generated_percentage(text, threshold=0.75):
|
23 |
+
text=text+"."
|
24 |
+
sentences = sent_tokenize(text)
|
25 |
+
ai_sentence_count = 0
|
26 |
+
results = []
|
27 |
+
|
28 |
+
for sentence in sentences:
|
29 |
+
prob = predict_sentence_ai_probability(sentence)
|
30 |
+
is_ai = prob >= threshold
|
31 |
+
results.append((sentence, prob, is_ai))
|
32 |
+
if is_ai:
|
33 |
+
ai_sentence_count += 1
|
34 |
+
|
35 |
+
total_sentences = len(sentences)
|
36 |
+
ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
|
37 |
+
return ai_percentage, results
|
38 |
+
|
39 |
+
st.title("π§ AI Content Detector")
|
40 |
+
st.markdown("This tool detects the percentage of **AI-generated content** in your input text based on sentence-level analysis.")
|
41 |
+
|
42 |
+
user_input = st.text_area("Paste your text here:", height=300)
|
43 |
|
44 |
+
if st.button("Analyze"):
|
45 |
+
if user_input.strip() == "":
|
46 |
+
st.warning("Please enter some text to analyze.")
|
47 |
+
else:
|
48 |
+
ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
|
49 |
+
st.subheader("π Sentence-level Analysis")
|
50 |
+
for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
|
51 |
+
label = "π’ Human" if not is_ai else "π΄ AI"
|
52 |
+
st.markdown(f"**{i}.** _{sentence}_\n\nβ **Probability AI:** `{prob:.2%}` β {label}")
|
53 |
+
st.subheader("π Final Result")
|
54 |
+
st.success(f"Estimated **AI-generated content**: **{ai_percentage:.2f}%**")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|