Spaces:

sundaram07
/

AI_Text_Detector

Sleeping

App Files Files Community

sundaram07 commited on Jun 28

Commit

ed60e85

verified ·

1 Parent(s): f89536d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +28 -26

src/streamlit_app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import os
 from nltk.tokenize import sent_tokenize
 from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
-# 📁 Use safe cache directory inside Hugging Face or Docker
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
 # 📥 Download NLTK tokenizer
@@ -14,13 +14,18 @@ nltk_data_path = "/tmp/nltk_data"
 nltk.download("punkt_tab", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
-# 🔄 Load tokenizer and model from Hugging Face
-tokenizer = DistilBertTokenizerFast.from_pretrained(
-    "distilbert-base-uncased", cache_dir="/tmp/huggingface"
-)
-model = TFDistilBertForSequenceClassification.from_pretrained(
-    "sundaram07/distilbert-sentence-classifier", cache_dir="/tmp/huggingface"
-)
 # 🔮 Predict AI probability for a sentence
 def predict_sentence_ai_probability(sentence):
@@ -48,37 +53,34 @@ def predict_ai_generated_percentage(text, threshold=0.15):
     ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
     return ai_percentage, results
-# 🌐 Streamlit Web App
 st.set_page_config(page_title="AI Detector", layout="wide")
 st.title("🧠 AI Content Detector")
-st.markdown("This app detects the percentage of **AI-generated content** based on sentence-level analysis using DistilBERT.")
-# Initialize session state to avoid duplicates
-if "last_input" not in st.session_state:
-    st.session_state.last_input = ""
-    st.session_state.results = None
-    st.session_state.percentage = None
-# 📋 User Input Area
 user_input = st.text_area("📋 Paste your text below to check for AI-generated sentences:", height=300)
-# 🔘 Analyze Button
 if st.button("🔍 Analyze"):
     if not user_input.strip():
-        st.warning("⚠️ Please enter some text to analyze.")
     else:
-        # Store in session_state to avoid duplicates
-        st.session_state.last_input = user_input
         ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
-        st.session_state.results = analysis_results
-        st.session_state.percentage = ai_percentage
-# Display only if results are present
-if st.session_state.results is not None:
     st.subheader("🔍 Sentence-level Analysis")
-    for i, (sentence, prob, is_ai) in enumerate(st.session_state.results, start=1):
         label = "🟢 Human" if not is_ai else "🔴 AI"
         st.markdown(f"**{i}.** _{sentence}_\n\n → {label}")
     st.subheader("📊 Final Result")
-    st.success(f"Estimated **AI-generated content**: **{st.session_state.percentage:.2f}%**")

 from nltk.tokenize import sent_tokenize
 from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
+# 📁 Use safe cache directory
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
 # 📥 Download NLTK tokenizer
 nltk.download("punkt_tab", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
+# 🚀 Load model & tokenizer once using session state
+@st.cache_resource
+def load_model_and_tokenizer():
+    tokenizer = DistilBertTokenizerFast.from_pretrained(
+        "distilbert-base-uncased", cache_dir="/tmp/huggingface"
+    )
+    model = TFDistilBertForSequenceClassification.from_pretrained(
+        "sundaram07/distilbert-sentence-classifier", cache_dir="/tmp/huggingface"
+    )
+    return tokenizer, model
+tokenizer, model = load_model_and_tokenizer()
 # 🔮 Predict AI probability for a sentence
 def predict_sentence_ai_probability(sentence):
     ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
     return ai_percentage, results
+# 🖥️ Streamlit App UI
 st.set_page_config(page_title="AI Detector", layout="wide")
 st.title("🧠 AI Content Detector")
+st.markdown("This app detects the percentage of **AI-generated content** using DistilBERT.")
+# Session state to track if user clicked analyze
+if "analysis_done" not in st.session_state:
+    st.session_state.analysis_done = False
+# 📋 Input Area
 user_input = st.text_area("📋 Paste your text below to check for AI-generated sentences:", height=300)
+# 🔍 Analyze Button
 if st.button("🔍 Analyze"):
     if not user_input.strip():
+        st.warning("⚠️ Please enter some text.")
     else:
         ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
+        st.session_state.analysis_done = True
+        st.session_state.ai_percentage = ai_percentage
+        st.session_state.analysis_results = analysis_results
+# 📤 Show results after button press
+if st.session_state.get("analysis_done", False):
     st.subheader("🔍 Sentence-level Analysis")
+    for i, (sentence, prob, is_ai) in enumerate(st.session_state.analysis_results, start=1):
         label = "🟢 Human" if not is_ai else "🔴 AI"
         st.markdown(f"**{i}.** _{sentence}_\n\n → {label}")
     st.subheader("📊 Final Result")
+    st.success(f"Estimated **AI-generated content**: **{st.session_state.ai_percentage:.2f}%**")