sundaram07 commited on
Commit
a9d8b80
Β·
verified Β·
1 Parent(s): 46f35a5

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +9 -6
src/streamlit_app.py CHANGED
@@ -4,25 +4,28 @@ import numpy as np
4
  import nltk
5
  import os
6
  from nltk.tokenize import sent_tokenize
 
7
 
8
  # 🧠 Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
9
  nltk_data_path = "/tmp/nltk_data"
10
  nltk.download("punkt", download_dir=nltk_data_path)
11
  nltk.data.path.append(nltk_data_path)
12
 
13
- # πŸ“¦ Load model
14
- # model = tf.keras.models.load_model('src/my_distilbert_classifier.keras', compile=False)
 
15
 
16
  # 🧠 Predict probability for one sentence
17
  def predict_sentence_ai_probability(sentence):
18
- # NOTE: You may need to tokenize properly if model needs token_ids/attention_mask
19
- preds = model.predict([sentence])
20
- prob_ai = tf.sigmoid(preds[0][0]).numpy()
 
21
  return prob_ai
22
 
23
  # πŸ“Š Analyze full text
24
  def predict_ai_generated_percentage(text, threshold=0.75):
25
- text = text.strip() + "."
26
  sentences = sent_tokenize(text)
27
  ai_sentence_count = 0
28
  results = []
 
4
  import nltk
5
  import os
6
  from nltk.tokenize import sent_tokenize
7
+ from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
8
 
9
  # 🧠 Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
10
  nltk_data_path = "/tmp/nltk_data"
11
  nltk.download("punkt", download_dir=nltk_data_path)
12
  nltk.data.path.append(nltk_data_path)
13
 
14
+ # πŸ“¦ Load tokenizer and model
15
+ tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
16
+ model = TFDistilBertForSequenceClassification.from_pretrained("sundaram07/distilbert-sentence-classifier")
17
 
18
  # 🧠 Predict probability for one sentence
19
  def predict_sentence_ai_probability(sentence):
20
+ inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
21
+ outputs = model(inputs)
22
+ logits = outputs.logits
23
+ prob_ai = tf.sigmoid(logits)[0][0].numpy() # Assuming binary classification (single neuron)
24
  return prob_ai
25
 
26
  # πŸ“Š Analyze full text
27
  def predict_ai_generated_percentage(text, threshold=0.75):
28
+ text = text.strip()
29
  sentences = sent_tokenize(text)
30
  ai_sentence_count = 0
31
  results = []