Spaces:
Sleeping
Sleeping
File size: 2,453 Bytes
1c38e8c 1360051 4457e86 aab722c a9d8b80 1360051 bcd23af 4457e86 1360051 a9d8b80 1360051 bcd23af 1360051 a9d8b80 1360051 bcd23af 1360051 a9d8b80 1360051 bcd23af 1360051 bcd23af 1360051 bcd23af 1c38e8c bcd23af 1360051 bcd23af 1360051 bcd23af 1360051 bcd23af 1360051 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import streamlit as st
import tensorflow as tf
import numpy as np
import nltk
import os
from nltk.tokenize import sent_tokenize
from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
# π§ Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
nltk_data_path = "/tmp/nltk_data"
nltk.download("punkt", download_dir=nltk_data_path)
nltk.data.path.append(nltk_data_path)
# π¦ Load tokenizer and model
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
model = TFDistilBertForSequenceClassification.from_pretrained("sundaram07/distilbert-sentence-classifier")
# π§ Predict probability for one sentence
def predict_sentence_ai_probability(sentence):
inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
outputs = model(inputs)
logits = outputs.logits
prob_ai = tf.sigmoid(logits)[0][0].numpy() # Assuming binary classification (single neuron)
return prob_ai
# π Analyze full text
def predict_ai_generated_percentage(text, threshold=0.75):
text = text.strip()
sentences = sent_tokenize(text)
ai_sentence_count = 0
results = []
for sentence in sentences:
prob = predict_sentence_ai_probability(sentence)
is_ai = prob >= threshold
results.append((sentence, prob, is_ai))
if is_ai:
ai_sentence_count += 1
total_sentences = len(sentences)
ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
return ai_percentage, results
# π Streamlit UI
st.title("π§ AI Content Detector")
st.markdown("This tool detects the percentage of **AI-generated content** based on sentence-level analysis.")
user_input = st.text_area("π Paste your text here:", height=300)
if st.button("π Analyze"):
if user_input.strip() == "":
st.warning("β οΈ Please enter some text to analyze.")
else:
ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
st.subheader("π Sentence-level Analysis")
for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
label = "π’ Human" if not is_ai else "π΄ AI"
st.markdown(f"**{i}.** _{sentence}_\n\nβ **Probability AI:** `{prob:.2%}` β {label}")
st.subheader("π Final Result")
st.success(f"Estimated **AI-generated content**: **{ai_percentage:.2f}%**")
|