File size: 2,193 Bytes
1c38e8c
1360051
 
 
4457e86
aab722c
1360051
bcd23af
 
4457e86
 
1360051
bcd23af
46f35a5
1360051
bcd23af
1360051
bcd23af
1360051
 
 
 
bcd23af
1360051
bcd23af
1360051
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcd23af
1360051
bcd23af
1360051
bcd23af
1c38e8c
bcd23af
1360051
bcd23af
1360051
 
bcd23af
 
1360051
 
 
bcd23af
1360051
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import streamlit as st
import tensorflow as tf
import numpy as np
import nltk
import os
from nltk.tokenize import sent_tokenize

# 🧠 Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
nltk_data_path = "/tmp/nltk_data"
nltk.download("punkt", download_dir=nltk_data_path)
nltk.data.path.append(nltk_data_path)

# πŸ“¦ Load model
# model = tf.keras.models.load_model('src/my_distilbert_classifier.keras', compile=False)

# 🧠 Predict probability for one sentence
def predict_sentence_ai_probability(sentence):
    # NOTE: You may need to tokenize properly if model needs token_ids/attention_mask
    preds = model.predict([sentence])
    prob_ai = tf.sigmoid(preds[0][0]).numpy()
    return prob_ai

# πŸ“Š Analyze full text
def predict_ai_generated_percentage(text, threshold=0.75):
    text = text.strip() + "."
    sentences = sent_tokenize(text)
    ai_sentence_count = 0
    results = []

    for sentence in sentences:
        prob = predict_sentence_ai_probability(sentence)
        is_ai = prob >= threshold
        results.append((sentence, prob, is_ai))
        if is_ai:
            ai_sentence_count += 1

    total_sentences = len(sentences)
    ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
    return ai_percentage, results

# πŸš€ Streamlit UI
st.title("🧠 AI Content Detector")
st.markdown("This tool detects the percentage of **AI-generated content** based on sentence-level analysis.")

user_input = st.text_area("πŸ“‹ Paste your text here:", height=300)

if st.button("πŸ” Analyze"):
    if user_input.strip() == "":
        st.warning("⚠️ Please enter some text to analyze.")
    else:
        ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
        
        st.subheader("πŸ”Ž Sentence-level Analysis")
        for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
            label = "🟒 Human" if not is_ai else "πŸ”΄ AI"
            st.markdown(f"**{i}.** _{sentence}_\n\n→ **Probability AI:** `{prob:.2%}` → {label}")

        st.subheader("πŸ“Š Final Result")
        st.success(f"Estimated **AI-generated content**: **{ai_percentage:.2f}%**")