sundaram07 commited on
Commit
ed60e85
ยท
verified ยท
1 Parent(s): f89536d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +28 -26
src/streamlit_app.py CHANGED
@@ -6,7 +6,7 @@ import os
6
  from nltk.tokenize import sent_tokenize
7
  from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
8
 
9
- # ๐Ÿ“ Use safe cache directory inside Hugging Face or Docker
10
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
11
 
12
  # ๐Ÿ“ฅ Download NLTK tokenizer
@@ -14,13 +14,18 @@ nltk_data_path = "/tmp/nltk_data"
14
  nltk.download("punkt_tab", download_dir=nltk_data_path)
15
  nltk.data.path.append(nltk_data_path)
16
 
17
- # ๐Ÿ”„ Load tokenizer and model from Hugging Face
18
- tokenizer = DistilBertTokenizerFast.from_pretrained(
19
- "distilbert-base-uncased", cache_dir="/tmp/huggingface"
20
- )
21
- model = TFDistilBertForSequenceClassification.from_pretrained(
22
- "sundaram07/distilbert-sentence-classifier", cache_dir="/tmp/huggingface"
23
- )
 
 
 
 
 
24
 
25
  # ๐Ÿ”ฎ Predict AI probability for a sentence
26
  def predict_sentence_ai_probability(sentence):
@@ -48,37 +53,34 @@ def predict_ai_generated_percentage(text, threshold=0.15):
48
  ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
49
  return ai_percentage, results
50
 
51
- # ๐ŸŒ Streamlit Web App
52
  st.set_page_config(page_title="AI Detector", layout="wide")
53
  st.title("๐Ÿง  AI Content Detector")
54
- st.markdown("This app detects the percentage of **AI-generated content** based on sentence-level analysis using DistilBERT.")
55
 
56
- # Initialize session state to avoid duplicates
57
- if "last_input" not in st.session_state:
58
- st.session_state.last_input = ""
59
- st.session_state.results = None
60
- st.session_state.percentage = None
61
 
62
- # ๐Ÿ“‹ User Input Area
63
  user_input = st.text_area("๐Ÿ“‹ Paste your text below to check for AI-generated sentences:", height=300)
64
 
65
- # ๐Ÿ”˜ Analyze Button
66
  if st.button("๐Ÿ” Analyze"):
67
  if not user_input.strip():
68
- st.warning("โš ๏ธ Please enter some text to analyze.")
69
  else:
70
- # Store in session_state to avoid duplicates
71
- st.session_state.last_input = user_input
72
  ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
73
- st.session_state.results = analysis_results
74
- st.session_state.percentage = ai_percentage
 
75
 
76
- # Display only if results are present
77
- if st.session_state.results is not None:
78
  st.subheader("๐Ÿ” Sentence-level Analysis")
79
- for i, (sentence, prob, is_ai) in enumerate(st.session_state.results, start=1):
80
  label = "๐ŸŸข Human" if not is_ai else "๐Ÿ”ด AI"
81
  st.markdown(f"**{i}.** _{sentence}_\n\n โ†’ {label}")
82
 
83
  st.subheader("๐Ÿ“Š Final Result")
84
- st.success(f"Estimated **AI-generated content**: **{st.session_state.percentage:.2f}%**")
 
6
  from nltk.tokenize import sent_tokenize
7
  from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
8
 
9
+ # ๐Ÿ“ Use safe cache directory
10
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
11
 
12
  # ๐Ÿ“ฅ Download NLTK tokenizer
 
14
  nltk.download("punkt_tab", download_dir=nltk_data_path)
15
  nltk.data.path.append(nltk_data_path)
16
 
17
+ # ๐Ÿš€ Load model & tokenizer once using session state
18
+ @st.cache_resource
19
+ def load_model_and_tokenizer():
20
+ tokenizer = DistilBertTokenizerFast.from_pretrained(
21
+ "distilbert-base-uncased", cache_dir="/tmp/huggingface"
22
+ )
23
+ model = TFDistilBertForSequenceClassification.from_pretrained(
24
+ "sundaram07/distilbert-sentence-classifier", cache_dir="/tmp/huggingface"
25
+ )
26
+ return tokenizer, model
27
+
28
+ tokenizer, model = load_model_and_tokenizer()
29
 
30
  # ๐Ÿ”ฎ Predict AI probability for a sentence
31
  def predict_sentence_ai_probability(sentence):
 
53
  ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
54
  return ai_percentage, results
55
 
56
+ # ๐Ÿ–ฅ๏ธ Streamlit App UI
57
  st.set_page_config(page_title="AI Detector", layout="wide")
58
  st.title("๐Ÿง  AI Content Detector")
59
+ st.markdown("This app detects the percentage of **AI-generated content** using DistilBERT.")
60
 
61
+ # Session state to track if user clicked analyze
62
+ if "analysis_done" not in st.session_state:
63
+ st.session_state.analysis_done = False
 
 
64
 
65
+ # ๐Ÿ“‹ Input Area
66
  user_input = st.text_area("๐Ÿ“‹ Paste your text below to check for AI-generated sentences:", height=300)
67
 
68
+ # ๐Ÿ” Analyze Button
69
  if st.button("๐Ÿ” Analyze"):
70
  if not user_input.strip():
71
+ st.warning("โš ๏ธ Please enter some text.")
72
  else:
 
 
73
  ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
74
+ st.session_state.analysis_done = True
75
+ st.session_state.ai_percentage = ai_percentage
76
+ st.session_state.analysis_results = analysis_results
77
 
78
+ # ๐Ÿ“ค Show results after button press
79
+ if st.session_state.get("analysis_done", False):
80
  st.subheader("๐Ÿ” Sentence-level Analysis")
81
+ for i, (sentence, prob, is_ai) in enumerate(st.session_state.analysis_results, start=1):
82
  label = "๐ŸŸข Human" if not is_ai else "๐Ÿ”ด AI"
83
  st.markdown(f"**{i}.** _{sentence}_\n\n โ†’ {label}")
84
 
85
  st.subheader("๐Ÿ“Š Final Result")
86
+ st.success(f"Estimated **AI-generated content**: **{st.session_state.ai_percentage:.2f}%**")