Spaces:

sundaram07
/

AI_Text_Detector

Sleeping

sundaram07 commited on Jun 28

Commit

a9d8b80

verified ·

1 Parent(s): 46f35a5

Update src/streamlit_app.py

Files changed (1) hide show

src/streamlit_app.py CHANGED Viewed

@@ -4,25 +4,28 @@ import numpy as np
 import nltk
 import os
 from nltk.tokenize import sent_tokenize
 # 🧠 Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
 nltk_data_path = "/tmp/nltk_data"
 nltk.download("punkt", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
-# 📦 Load model
-# model = tf.keras.models.load_model('src/my_distilbert_classifier.keras', compile=False)
 # 🧠 Predict probability for one sentence
 def predict_sentence_ai_probability(sentence):
-    # NOTE: You may need to tokenize properly if model needs token_ids/attention_mask
-    preds = model.predict([sentence])
-    prob_ai = tf.sigmoid(preds[0][0]).numpy()
     return prob_ai
 # 📊 Analyze full text
 def predict_ai_generated_percentage(text, threshold=0.75):
-    text = text.strip() + "."
     sentences = sent_tokenize(text)
     ai_sentence_count = 0
     results = []

 import nltk
 import os
 from nltk.tokenize import sent_tokenize
+from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
 # 🧠 Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
 nltk_data_path = "/tmp/nltk_data"
 nltk.download("punkt", download_dir=nltk_data_path)
 nltk.data.path.append(nltk_data_path)
+# 📦 Load tokenizer and model
+tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
+model = TFDistilBertForSequenceClassification.from_pretrained("sundaram07/distilbert-sentence-classifier")
 # 🧠 Predict probability for one sentence
 def predict_sentence_ai_probability(sentence):
+    inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
+    outputs = model(inputs)
+    logits = outputs.logits
+    prob_ai = tf.sigmoid(logits)[0][0].numpy()  # Assuming binary classification (single neuron)
     return prob_ai
 # 📊 Analyze full text
 def predict_ai_generated_percentage(text, threshold=0.75):
+    text = text.strip()
     sentences = sent_tokenize(text)
     ai_sentence_count = 0
     results = []