Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +9 -6
src/streamlit_app.py
CHANGED
@@ -4,25 +4,28 @@ import numpy as np
|
|
4 |
import nltk
|
5 |
import os
|
6 |
from nltk.tokenize import sent_tokenize
|
|
|
7 |
|
8 |
# π§ Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
|
9 |
nltk_data_path = "/tmp/nltk_data"
|
10 |
nltk.download("punkt", download_dir=nltk_data_path)
|
11 |
nltk.data.path.append(nltk_data_path)
|
12 |
|
13 |
-
# π¦ Load model
|
14 |
-
|
|
|
15 |
|
16 |
# π§ Predict probability for one sentence
|
17 |
def predict_sentence_ai_probability(sentence):
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
21 |
return prob_ai
|
22 |
|
23 |
# π Analyze full text
|
24 |
def predict_ai_generated_percentage(text, threshold=0.75):
|
25 |
-
text = text.strip()
|
26 |
sentences = sent_tokenize(text)
|
27 |
ai_sentence_count = 0
|
28 |
results = []
|
|
|
4 |
import nltk
|
5 |
import os
|
6 |
from nltk.tokenize import sent_tokenize
|
7 |
+
from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
|
8 |
|
9 |
# π§ Ensure sentence tokenizer works inside Hugging Face (use /tmp/)
|
10 |
nltk_data_path = "/tmp/nltk_data"
|
11 |
nltk.download("punkt", download_dir=nltk_data_path)
|
12 |
nltk.data.path.append(nltk_data_path)
|
13 |
|
14 |
+
# π¦ Load tokenizer and model
|
15 |
+
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
|
16 |
+
model = TFDistilBertForSequenceClassification.from_pretrained("sundaram07/distilbert-sentence-classifier")
|
17 |
|
18 |
# π§ Predict probability for one sentence
|
19 |
def predict_sentence_ai_probability(sentence):
|
20 |
+
inputs = tokenizer(sentence, return_tensors="tf", truncation=True, padding=True)
|
21 |
+
outputs = model(inputs)
|
22 |
+
logits = outputs.logits
|
23 |
+
prob_ai = tf.sigmoid(logits)[0][0].numpy() # Assuming binary classification (single neuron)
|
24 |
return prob_ai
|
25 |
|
26 |
# π Analyze full text
|
27 |
def predict_ai_generated_percentage(text, threshold=0.75):
|
28 |
+
text = text.strip()
|
29 |
sentences = sent_tokenize(text)
|
30 |
ai_sentence_count = 0
|
31 |
results = []
|