sundaram07 commited on
Commit
f973f9e
ยท
verified ยท
1 Parent(s): 283cc15

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +17 -13
src/streamlit_app.py CHANGED
@@ -6,24 +6,27 @@ import os
6
  from nltk.tokenize import sent_tokenize
7
  from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
8
 
9
- # ๐Ÿง  Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
 
 
 
10
  nltk_data_path = "/tmp/nltk_data"
11
  nltk.download("punkt", download_dir=nltk_data_path)
12
  nltk.data.path.append(nltk_data_path)
13
 
14
- # ๐Ÿ“ฆ Load tokenizer and model
15
- tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
16
- model = TFDistilBertForSequenceClassification.from_pretrained("sundaram07/distilbert-sentence-classifier")
17
 
18
- # ๐Ÿง  Predict probability for one sentence
19
  def predict_sentence_ai_probability(sentence):
20
  inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
21
  outputs = model(inputs)
22
  logits = outputs.logits
23
- prob_ai = tf.sigmoid(logits)[0][0].numpy() # Assuming binary classification (single neuron)
24
  return prob_ai
25
 
26
- # ๐Ÿ“Š Analyze full text
27
  def predict_ai_generated_percentage(text, threshold=0.75):
28
  text = text.strip()
29
  sentences = sent_tokenize(text)
@@ -41,19 +44,20 @@ def predict_ai_generated_percentage(text, threshold=0.75):
41
  ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
42
  return ai_percentage, results
43
 
44
- # ๐Ÿš€ Streamlit UI
 
45
  st.title("๐Ÿง  AI Content Detector")
46
- st.markdown("This tool detects the percentage of **AI-generated content** based on sentence-level analysis.")
47
 
48
- user_input = st.text_area("๐Ÿ“‹ Paste your text here:", height=300)
49
 
50
  if st.button("๐Ÿ” Analyze"):
51
- if user_input.strip() == "":
52
  st.warning("โš ๏ธ Please enter some text to analyze.")
53
  else:
54
  ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
55
-
56
- st.subheader("๐Ÿ”Ž Sentence-level Analysis")
57
  for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
58
  label = "๐ŸŸข Human" if not is_ai else "๐Ÿ”ด AI"
59
  st.markdown(f"**{i}.** _{sentence}_\n\nโ†’ **Probability AI:** `{prob:.2%}` โ†’ {label}")
 
6
  from nltk.tokenize import sent_tokenize
7
  from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
8
 
9
+ # ๐Ÿ“ Use safe cache directory inside Hugging Face or Docker
10
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
11
+
12
+ # ๐Ÿ“ฅ Download NLTK tokenizer
13
  nltk_data_path = "/tmp/nltk_data"
14
  nltk.download("punkt", download_dir=nltk_data_path)
15
  nltk.data.path.append(nltk_data_path)
16
 
17
+ # ๐Ÿ”„ Load tokenizer and model from Hugging Face
18
+ tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased", cache_dir="/tmp/huggingface")
19
+ model = TFDistilBertForSequenceClassification.from_pretrained("sundaram07/distilbert-sentence-classifier", cache_dir="/tmp/huggingface")
20
 
21
+ # ๐Ÿ”ฎ Predict AI probability for a sentence
22
  def predict_sentence_ai_probability(sentence):
23
  inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
24
  outputs = model(inputs)
25
  logits = outputs.logits
26
+ prob_ai = tf.sigmoid(logits)[0][0].numpy() # for binary classification
27
  return prob_ai
28
 
29
+ # ๐Ÿ“Š Analyze all sentences
30
  def predict_ai_generated_percentage(text, threshold=0.75):
31
  text = text.strip()
32
  sentences = sent_tokenize(text)
 
44
  ai_percentage = (ai_sentence_count / total_sentences) * 100 if total_sentences > 0 else 0.0
45
  return ai_percentage, results
46
 
47
+ # ๐ŸŒ Streamlit Web App
48
+ st.set_page_config(page_title="AI Detector", layout="wide")
49
  st.title("๐Ÿง  AI Content Detector")
50
+ st.markdown("This app detects the percentage of **AI-generated content** based on sentence-level analysis using DistilBERT.")
51
 
52
+ user_input = st.text_area("๐Ÿ“‹ Paste your text below to check for AI-generated sentences:", height=300)
53
 
54
  if st.button("๐Ÿ” Analyze"):
55
+ if not user_input.strip():
56
  st.warning("โš ๏ธ Please enter some text to analyze.")
57
  else:
58
  ai_percentage, analysis_results = predict_ai_generated_percentage(user_input)
59
+
60
+ st.subheader("๐Ÿ” Sentence-level Analysis")
61
  for i, (sentence, prob, is_ai) in enumerate(analysis_results, start=1):
62
  label = "๐ŸŸข Human" if not is_ai else "๐Ÿ”ด AI"
63
  st.markdown(f"**{i}.** _{sentence}_\n\nโ†’ **Probability AI:** `{prob:.2%}` โ†’ {label}")