Spaces:

sundaram07
/

AI_Text_Detector

Sleeping

App Files Files Community

sundaram07 commited on Jun 29

Commit

ce337fb

verified ·

1 Parent(s): 6adf923

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +21 -21

src/streamlit_app.py CHANGED Viewed

@@ -6,15 +6,15 @@ import os
 from nltk.tokenize import sent_tokenize
 from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
-# 📁 Set Hugging Face cache directory (safe for deployments)
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
-# 📥 Download NLTK tokenizer
 nltk_data_path = "/tmp/nltk_data"
-nltk.download("punkt_tab", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
-# 🚀 Load model & tokenizer once using cache
 @st.cache_resource
 def load_model_and_tokenizer():
     tokenizer = DistilBertTokenizerFast.from_pretrained(
@@ -27,7 +27,7 @@ def load_model_and_tokenizer():
 tokenizer, model = load_model_and_tokenizer()
-# 🔮 Predict AI probability for a sentence
 def predict_sentence_ai_probability(sentence):
     inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
     outputs = model(inputs)
@@ -39,6 +39,9 @@ def predict_sentence_ai_probability(sentence):
 def predict_ai_generated_percentage(text, threshold=0.15):
     text = text.strip()
     sentences = sent_tokenize(text)
     ai_sentence_count = 0
     results = []
@@ -49,11 +52,10 @@ def predict_ai_generated_percentage(text, threshold=0.15):
         if is_ai:
             ai_sentence_count += 1
-    total_sentences = len(sentences)
-    ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
     return ai_percentage, results
-# 🖥️ Streamlit UI setup
 st.set_page_config(page_title="AI Detector", layout="wide")
 st.title("🧠 AI Content Detector")
 st.markdown("This app detects the percentage of **AI-generated content** using sentence-level analysis with DistilBERT.")
@@ -61,15 +63,9 @@ st.markdown("This app detects the percentage of **AI-generated content** using s
 # 📋 Text input
 user_input = st.text_area("📋 Paste your text below to check for AI-generated sentences:", height=300)
-# ✅ Initialize session state
-if "analysis_done" not in st.session_state:
-    st.session_state.analysis_done = False
-    st.session_state.analysis_results = None
-    st.session_state.ai_percentage = None
 # 🔍 Analyze button logic
 if st.button("🔍 Analyze"):
-    # 🧹 Clear previous cache/state
     st.session_state.analysis_done = False
     st.session_state.analysis_results = None
     st.session_state.ai_percentage = None
@@ -77,14 +73,18 @@ if st.button("🔍 Analyze"):
     if not user_input.strip():
         st.warning("⚠️ Please enter some text.")
     else:
-        # Run fresh analysis
         ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
-        st.session_state.analysis_done = True
-        st.session_state.analysis_results = analysis_results
-        st.session_state.ai_percentage = ai_percentage
-# 📤 Show results if analysis was done
-if st.session_state.analysis_done:
     st.subheader("🔍 Sentence-level Analysis")
     for i, (sentence, prob, is_ai) in enumerate(st.session_state.analysis_results, start=1):
         label = "🟢 Human" if not is_ai else "🔴 AI"

 from nltk.tokenize import sent_tokenize
 from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
+# 📁 Hugging Face cache dir
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
+# 📥 Download NLTK punkt tokenizer
 nltk_data_path = "/tmp/nltk_data"
+nltk.download("punkt", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
+# ✅ Cache the model/tokenizer
 @st.cache_resource
 def load_model_and_tokenizer():
     tokenizer = DistilBertTokenizerFast.from_pretrained(
 tokenizer, model = load_model_and_tokenizer()
+# 🔮 Predict sentence AI probability
 def predict_sentence_ai_probability(sentence):
     inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
     outputs = model(inputs)
 def predict_ai_generated_percentage(text, threshold=0.15):
     text = text.strip()
     sentences = sent_tokenize(text)
+    if len(sentences) == 0:
+        return 0.0, []
     ai_sentence_count = 0
     results = []
         if is_ai:
             ai_sentence_count += 1
+    ai_percentage = (ai_sentence_count / len(sentences)) * 100
     return ai_percentage, results
+# 🖥️ Streamlit UI
 st.set_page_config(page_title="AI Detector", layout="wide")
 st.title("🧠 AI Content Detector")
 st.markdown("This app detects the percentage of **AI-generated content** using sentence-level analysis with DistilBERT.")
 # 📋 Text input
 user_input = st.text_area("📋 Paste your text below to check for AI-generated sentences:", height=300)
 # 🔍 Analyze button logic
 if st.button("🔍 Analyze"):
+    # Clear previous session results
     st.session_state.analysis_done = False
     st.session_state.analysis_results = None
     st.session_state.ai_percentage = None
     if not user_input.strip():
         st.warning("⚠️ Please enter some text.")
     else:
+        # Perform analysis
         ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
+        if len(analysis_results) == 0:
+            st.warning("⚠️ Not enough valid sentences to analyze.")
+        else:
+            st.session_state.analysis_done = True
+            st.session_state.analysis_results = analysis_results
+            st.session_state.ai_percentage = ai_percentage
+# 📤 Show results
+if st.session_state.get("analysis_done", False):
     st.subheader("🔍 Sentence-level Analysis")
     for i, (sentence, prob, is_ai) in enumerate(st.session_state.analysis_results, start=1):
         label = "🟢 Human" if not is_ai else "🔴 AI"