sundaram07 commited on
Commit
ce337fb
ยท
verified ยท
1 Parent(s): 6adf923

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +21 -21
src/streamlit_app.py CHANGED
@@ -6,15 +6,15 @@ import os
6
  from nltk.tokenize import sent_tokenize
7
  from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
8
 
9
- # ๐Ÿ“ Set Hugging Face cache directory (safe for deployments)
10
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
11
 
12
- # ๐Ÿ“ฅ Download NLTK tokenizer
13
  nltk_data_path = "/tmp/nltk_data"
14
- nltk.download("punkt_tab", download_dir=nltk_data_path)
15
  nltk.data.path.append(nltk_data_path)
16
 
17
- # ๐Ÿš€ Load model & tokenizer once using cache
18
  @st.cache_resource
19
  def load_model_and_tokenizer():
20
  tokenizer = DistilBertTokenizerFast.from_pretrained(
@@ -27,7 +27,7 @@ def load_model_and_tokenizer():
27
 
28
  tokenizer, model = load_model_and_tokenizer()
29
 
30
- # ๐Ÿ”ฎ Predict AI probability for a sentence
31
  def predict_sentence_ai_probability(sentence):
32
  inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
33
  outputs = model(inputs)
@@ -39,6 +39,9 @@ def predict_sentence_ai_probability(sentence):
39
  def predict_ai_generated_percentage(text, threshold=0.15):
40
  text = text.strip()
41
  sentences = sent_tokenize(text)
 
 
 
42
  ai_sentence_count = 0
43
  results = []
44
 
@@ -49,11 +52,10 @@ def predict_ai_generated_percentage(text, threshold=0.15):
49
  if is_ai:
50
  ai_sentence_count += 1
51
 
52
- total_sentences = len(sentences)
53
- ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
54
  return ai_percentage, results
55
 
56
- # ๐Ÿ–ฅ๏ธ Streamlit UI setup
57
  st.set_page_config(page_title="AI Detector", layout="wide")
58
  st.title("๐Ÿง  AI Content Detector")
59
  st.markdown("This app detects the percentage of **AI-generated content** using sentence-level analysis with DistilBERT.")
@@ -61,15 +63,9 @@ st.markdown("This app detects the percentage of **AI-generated content** using s
61
  # ๐Ÿ“‹ Text input
62
  user_input = st.text_area("๐Ÿ“‹ Paste your text below to check for AI-generated sentences:", height=300)
63
 
64
- # โœ… Initialize session state
65
- if "analysis_done" not in st.session_state:
66
- st.session_state.analysis_done = False
67
- st.session_state.analysis_results = None
68
- st.session_state.ai_percentage = None
69
-
70
  # ๐Ÿ” Analyze button logic
71
  if st.button("๐Ÿ” Analyze"):
72
- # ๐Ÿงน Clear previous cache/state
73
  st.session_state.analysis_done = False
74
  st.session_state.analysis_results = None
75
  st.session_state.ai_percentage = None
@@ -77,14 +73,18 @@ if st.button("๐Ÿ” Analyze"):
77
  if not user_input.strip():
78
  st.warning("โš ๏ธ Please enter some text.")
79
  else:
80
- # Run fresh analysis
81
  ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
82
- st.session_state.analysis_done = True
83
- st.session_state.analysis_results = analysis_results
84
- st.session_state.ai_percentage = ai_percentage
85
 
86
- # ๐Ÿ“ค Show results if analysis was done
87
- if st.session_state.analysis_done:
 
 
 
 
 
 
 
88
  st.subheader("๐Ÿ” Sentence-level Analysis")
89
  for i, (sentence, prob, is_ai) in enumerate(st.session_state.analysis_results, start=1):
90
  label = "๐ŸŸข Human" if not is_ai else "๐Ÿ”ด AI"
 
6
  from nltk.tokenize import sent_tokenize
7
  from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
8
 
9
+ # ๐Ÿ“ Hugging Face cache dir
10
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
11
 
12
+ # ๐Ÿ“ฅ Download NLTK punkt tokenizer
13
  nltk_data_path = "/tmp/nltk_data"
14
+ nltk.download("punkt", download_dir=nltk_data_path)
15
  nltk.data.path.append(nltk_data_path)
16
 
17
+ # โœ… Cache the model/tokenizer
18
  @st.cache_resource
19
  def load_model_and_tokenizer():
20
  tokenizer = DistilBertTokenizerFast.from_pretrained(
 
27
 
28
  tokenizer, model = load_model_and_tokenizer()
29
 
30
+ # ๐Ÿ”ฎ Predict sentence AI probability
31
  def predict_sentence_ai_probability(sentence):
32
  inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
33
  outputs = model(inputs)
 
39
  def predict_ai_generated_percentage(text, threshold=0.15):
40
  text = text.strip()
41
  sentences = sent_tokenize(text)
42
+ if len(sentences) == 0:
43
+ return 0.0, []
44
+
45
  ai_sentence_count = 0
46
  results = []
47
 
 
52
  if is_ai:
53
  ai_sentence_count += 1
54
 
55
+ ai_percentage = (ai_sentence_count / len(sentences)) * 100
 
56
  return ai_percentage, results
57
 
58
+ # ๐Ÿ–ฅ๏ธ Streamlit UI
59
  st.set_page_config(page_title="AI Detector", layout="wide")
60
  st.title("๐Ÿง  AI Content Detector")
61
  st.markdown("This app detects the percentage of **AI-generated content** using sentence-level analysis with DistilBERT.")
 
63
  # ๐Ÿ“‹ Text input
64
  user_input = st.text_area("๐Ÿ“‹ Paste your text below to check for AI-generated sentences:", height=300)
65
 
 
 
 
 
 
 
66
  # ๐Ÿ” Analyze button logic
67
  if st.button("๐Ÿ” Analyze"):
68
+ # Clear previous session results
69
  st.session_state.analysis_done = False
70
  st.session_state.analysis_results = None
71
  st.session_state.ai_percentage = None
 
73
  if not user_input.strip():
74
  st.warning("โš ๏ธ Please enter some text.")
75
  else:
76
+ # Perform analysis
77
  ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
 
 
 
78
 
79
+ if len(analysis_results) == 0:
80
+ st.warning("โš ๏ธ Not enough valid sentences to analyze.")
81
+ else:
82
+ st.session_state.analysis_done = True
83
+ st.session_state.analysis_results = analysis_results
84
+ st.session_state.ai_percentage = ai_percentage
85
+
86
+ # ๐Ÿ“ค Show results
87
+ if st.session_state.get("analysis_done", False):
88
  st.subheader("๐Ÿ” Sentence-level Analysis")
89
  for i, (sentence, prob, is_ai) in enumerate(st.session_state.analysis_results, start=1):
90
  label = "๐ŸŸข Human" if not is_ai else "๐Ÿ”ด AI"