Spaces:

sundaram07
/

AI_Text_Detector

Running

App Files Files Community

sundaram07 commited on 4 days ago

Commit

bcd23af

verified ·

1 Parent(s): 4457e86

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +16 -12

src/streamlit_app.py CHANGED Viewed

@@ -1,26 +1,28 @@
 import streamlit as st
 import tensorflow as tf
 import numpy as np
-import re
 import nltk
 import os
 from nltk.tokenize import sent_tokenize
-# Use local nltk_data folder (safe for Hugging Face Spaces)
-nltk_data_path = os.path.join(os.path.dirname(__file__), "nltk_data")
 nltk.download("punkt", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
-# Load model
-model = tf.keras.models.load_model('src/my_distilbert_classifier.keras')
 def predict_sentence_ai_probability(sentence):
     preds = model.predict([sentence])
     prob_ai = tf.sigmoid(preds[0][0]).numpy()
     return prob_ai
 def predict_ai_generated_percentage(text, threshold=0.75):
-    text += "."
     sentences = sent_tokenize(text)
     ai_sentence_count = 0
     results = []
@@ -36,20 +38,22 @@ def predict_ai_generated_percentage(text, threshold=0.75):
     ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
     return ai_percentage, results
-# Streamlit UI
 st.title("🧠 AI Content Detector")
-st.markdown("This tool detects the percentage of **AI-generated content** in your input text based on sentence-level analysis.")
-user_input = st.text_area("Paste your text here:", height=300)
-if st.button("Analyze"):
     if user_input.strip() == "":
-        st.warning("Please enter some text to analyze.")
     else:
         ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
-        st.subheader("🔍 Sentence-level Analysis")
         for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
             label = "🟢 Human" if not is_ai else "🔴 AI"
             st.markdown(f"**{i}.** _{sentence}_\n\n→ **Probability AI:** `{prob:.2%}` → {label}")
         st.subheader("📊 Final Result")
         st.success(f"Estimated **AI-generated content**: **{ai_percentage:.2f}%**")

 import streamlit as st
 import tensorflow as tf
 import numpy as np
 import nltk
 import os
 from nltk.tokenize import sent_tokenize
+# 🧠 Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
+nltk_data_path = "/tmp/nltk_data"
 nltk.download("punkt", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
+# 📦 Load model
+model = tf.keras.models.load_model('src/my_distilbert_classifier.keras', compile=False)
+# 🧠 Predict probability for one sentence
 def predict_sentence_ai_probability(sentence):
+    # NOTE: You may need to tokenize properly if model needs token_ids/attention_mask
     preds = model.predict([sentence])
     prob_ai = tf.sigmoid(preds[0][0]).numpy()
     return prob_ai
+# 📊 Analyze full text
 def predict_ai_generated_percentage(text, threshold=0.75):
+    text = text.strip() + "."
     sentences = sent_tokenize(text)
     ai_sentence_count = 0
     results = []
     ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
     return ai_percentage, results
+# 🚀 Streamlit UI
 st.title("🧠 AI Content Detector")
+st.markdown("This tool detects the percentage of **AI-generated content** based on sentence-level analysis.")
+user_input = st.text_area("📋 Paste your text here:", height=300)
+if st.button("🔍 Analyze"):
     if user_input.strip() == "":
+        st.warning("⚠️ Please enter some text to analyze.")
     else:
         ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
+        st.subheader("🔎 Sentence-level Analysis")
         for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
             label = "🟢 Human" if not is_ai else "🔴 AI"
             st.markdown(f"**{i}.** _{sentence}_\n\n→ **Probability AI:** `{prob:.2%}` → {label}")
         st.subheader("📊 Final Result")
         st.success(f"Estimated **AI-generated content**: **{ai_percentage:.2f}%**")