Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,53 +1,37 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
4 |
from transformers.pipelines import pipeline
|
5 |
from sentence_transformers import SentenceTransformer, util
|
6 |
import numpy as np
|
7 |
import gradio.themes as grthemes
|
8 |
|
9 |
-
# Paraphrasing model:
|
10 |
-
PARAPHRASE_MODEL_NAME = "
|
11 |
paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
|
12 |
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
|
13 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
14 |
paraphrase_model = paraphrase_model.to(device)
|
15 |
|
16 |
-
# AI Detector:
|
17 |
-
|
|
|
18 |
|
19 |
# Semantic similarity model
|
20 |
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
# Paraphrasing function
|
30 |
-
def paraphrase(text, tone):
|
31 |
-
prompt = tone_templates[tone] + " " + text
|
32 |
-
input_ids = paraphrase_tokenizer(
|
33 |
-
f'paraphrase: {prompt}',
|
34 |
-
return_tensors="pt", padding="longest",
|
35 |
-
max_length=256, truncation=True
|
36 |
-
).input_ids.to(device)
|
37 |
-
outputs = paraphrase_model.generate(
|
38 |
-
input_ids,
|
39 |
-
temperature=0.7,
|
40 |
-
repetition_penalty=1.2,
|
41 |
-
num_return_sequences=1,
|
42 |
-
no_repeat_ngram_size=2,
|
43 |
-
max_length=256,
|
44 |
-
diversity_penalty=3.0,
|
45 |
num_beams=5,
|
46 |
-
|
47 |
-
|
48 |
)
|
49 |
-
|
50 |
-
return
|
51 |
|
52 |
def semantic_similarity(text1, text2):
|
53 |
emb1 = similarity_model.encode(text1, convert_to_tensor=True)
|
@@ -56,12 +40,12 @@ def semantic_similarity(text1, text2):
|
|
56 |
return sim
|
57 |
|
58 |
def ai_detect(text):
|
59 |
-
# Returns probability of being AI-generated (label '
|
60 |
result = ai_detector(text)
|
61 |
for r in result:
|
62 |
-
if r['label']
|
63 |
return r['score']
|
64 |
-
elif r['label']
|
65 |
return 1.0 - r['score']
|
66 |
return 0.5 # fallback
|
67 |
|
@@ -84,7 +68,10 @@ def process(text, tone):
|
|
84 |
# Pre-humanization AI detection
|
85 |
pre_ai_prob = ai_detect(text)
|
86 |
# Paraphrase
|
87 |
-
|
|
|
|
|
|
|
88 |
# Post-humanization AI detection
|
89 |
post_ai_prob = ai_detect(paraphrased)
|
90 |
# Semantic similarity
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
|
4 |
from transformers.pipelines import pipeline
|
5 |
from sentence_transformers import SentenceTransformer, util
|
6 |
import numpy as np
|
7 |
import gradio.themes as grthemes
|
8 |
|
9 |
+
# Paraphrasing model: tuner007/pegasus_paraphrase
|
10 |
+
PARAPHRASE_MODEL_NAME = "tuner007/pegasus_paraphrase"
|
11 |
paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
|
12 |
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
|
13 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
14 |
paraphrase_model = paraphrase_model.to(device)
|
15 |
|
16 |
+
# AI Detector: desklib/ai-text-detector-v1.01
|
17 |
+
AI_DETECTOR_MODEL = "desklib/ai-text-detector-v1.01"
|
18 |
+
ai_detector = pipeline("text-classification", model=AI_DETECTOR_MODEL, device=0 if torch.cuda.is_available() else -1)
|
19 |
|
20 |
# Semantic similarity model
|
21 |
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
|
22 |
|
23 |
+
def paraphrase(text):
|
24 |
+
prompt = text.strip()
|
25 |
+
batch = paraphrase_tokenizer([prompt], truncation=True, padding='longest', max_length=60, return_tensors="pt").to(device)
|
26 |
+
translated = paraphrase_model.generate(
|
27 |
+
**batch,
|
28 |
+
max_length=60,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
num_beams=5,
|
30 |
+
num_return_sequences=1,
|
31 |
+
temperature=1.0
|
32 |
)
|
33 |
+
tgt_text = paraphrase_tokenizer.batch_decode(translated, skip_special_tokens=True)
|
34 |
+
return tgt_text[0] if tgt_text else ""
|
35 |
|
36 |
def semantic_similarity(text1, text2):
|
37 |
emb1 = similarity_model.encode(text1, convert_to_tensor=True)
|
|
|
40 |
return sim
|
41 |
|
42 |
def ai_detect(text):
|
43 |
+
# Returns probability of being AI-generated (label 'LABEL_1' = AI, 'LABEL_0' = Human)
|
44 |
result = ai_detector(text)
|
45 |
for r in result:
|
46 |
+
if r['label'] in ['LABEL_1', 'Fake']:
|
47 |
return r['score']
|
48 |
+
elif r['label'] in ['LABEL_0', 'Real']:
|
49 |
return 1.0 - r['score']
|
50 |
return 0.5 # fallback
|
51 |
|
|
|
68 |
# Pre-humanization AI detection
|
69 |
pre_ai_prob = ai_detect(text)
|
70 |
# Paraphrase
|
71 |
+
try:
|
72 |
+
paraphrased = paraphrase(text)
|
73 |
+
except Exception as e:
|
74 |
+
return "[Error in paraphrasing: {}]".format(str(e)), "", 0.0, "", 0.0
|
75 |
# Post-humanization AI detection
|
76 |
post_ai_prob = ai_detect(paraphrased)
|
77 |
# Semantic similarity
|