taha092 commited on
Commit
05a40ee
·
verified ·
1 Parent(s): 23866f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -36
app.py CHANGED
@@ -1,53 +1,37 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  from transformers.pipelines import pipeline
5
  from sentence_transformers import SentenceTransformer, util
6
  import numpy as np
7
  import gradio.themes as grthemes
8
 
9
- # Paraphrasing model: humarin/chatgpt_paraphraser_on_T5_base
10
- PARAPHRASE_MODEL_NAME = "humarin/chatgpt_paraphraser_on_T5_base"
11
  paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
12
  paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  paraphrase_model = paraphrase_model.to(device)
15
 
16
- # AI Detector: roberta-base-openai-detector
17
- ai_detector = pipeline("text-classification", model="roberta-base-openai-detector", device=0 if torch.cuda.is_available() else -1)
 
18
 
19
  # Semantic similarity model
20
  similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
21
 
22
- tone_templates = {
23
- "Academic": "Paraphrase the following text in a formal, academic tone:",
24
- "Casual": "Paraphrase the following text in a casual, conversational tone:",
25
- "Friendly": "Paraphrase the following text in a friendly, approachable tone:",
26
- "Stealth": "Paraphrase the following text to bypass AI detectors and sound as human as possible:",
27
- }
28
-
29
- # Paraphrasing function
30
- def paraphrase(text, tone):
31
- prompt = tone_templates[tone] + " " + text
32
- input_ids = paraphrase_tokenizer(
33
- f'paraphrase: {prompt}',
34
- return_tensors="pt", padding="longest",
35
- max_length=256, truncation=True
36
- ).input_ids.to(device)
37
- outputs = paraphrase_model.generate(
38
- input_ids,
39
- temperature=0.7,
40
- repetition_penalty=1.2,
41
- num_return_sequences=1,
42
- no_repeat_ngram_size=2,
43
- max_length=256,
44
- diversity_penalty=3.0,
45
  num_beams=5,
46
- num_beam_groups=5,
47
- trust_remote_code=True
48
  )
49
- res = paraphrase_tokenizer.batch_decode(outputs, skip_special_tokens=True)
50
- return res[0] if res else ""
51
 
52
  def semantic_similarity(text1, text2):
53
  emb1 = similarity_model.encode(text1, convert_to_tensor=True)
@@ -56,12 +40,12 @@ def semantic_similarity(text1, text2):
56
  return sim
57
 
58
  def ai_detect(text):
59
- # Returns probability of being AI-generated (label 'Fake')
60
  result = ai_detector(text)
61
  for r in result:
62
- if r['label'] == 'Fake':
63
  return r['score']
64
- elif r['label'] == 'Real':
65
  return 1.0 - r['score']
66
  return 0.5 # fallback
67
 
@@ -84,7 +68,10 @@ def process(text, tone):
84
  # Pre-humanization AI detection
85
  pre_ai_prob = ai_detect(text)
86
  # Paraphrase
87
- paraphrased = paraphrase(text, tone)
 
 
 
88
  # Post-humanization AI detection
89
  post_ai_prob = ai_detect(paraphrased)
90
  # Semantic similarity
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
4
  from transformers.pipelines import pipeline
5
  from sentence_transformers import SentenceTransformer, util
6
  import numpy as np
7
  import gradio.themes as grthemes
8
 
9
+ # Paraphrasing model: tuner007/pegasus_paraphrase
10
+ PARAPHRASE_MODEL_NAME = "tuner007/pegasus_paraphrase"
11
  paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
12
  paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  paraphrase_model = paraphrase_model.to(device)
15
 
16
+ # AI Detector: desklib/ai-text-detector-v1.01
17
+ AI_DETECTOR_MODEL = "desklib/ai-text-detector-v1.01"
18
+ ai_detector = pipeline("text-classification", model=AI_DETECTOR_MODEL, device=0 if torch.cuda.is_available() else -1)
19
 
20
  # Semantic similarity model
21
  similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
22
 
23
+ def paraphrase(text):
24
+ prompt = text.strip()
25
+ batch = paraphrase_tokenizer([prompt], truncation=True, padding='longest', max_length=60, return_tensors="pt").to(device)
26
+ translated = paraphrase_model.generate(
27
+ **batch,
28
+ max_length=60,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  num_beams=5,
30
+ num_return_sequences=1,
31
+ temperature=1.0
32
  )
33
+ tgt_text = paraphrase_tokenizer.batch_decode(translated, skip_special_tokens=True)
34
+ return tgt_text[0] if tgt_text else ""
35
 
36
  def semantic_similarity(text1, text2):
37
  emb1 = similarity_model.encode(text1, convert_to_tensor=True)
 
40
  return sim
41
 
42
  def ai_detect(text):
43
+ # Returns probability of being AI-generated (label 'LABEL_1' = AI, 'LABEL_0' = Human)
44
  result = ai_detector(text)
45
  for r in result:
46
+ if r['label'] in ['LABEL_1', 'Fake']:
47
  return r['score']
48
+ elif r['label'] in ['LABEL_0', 'Real']:
49
  return 1.0 - r['score']
50
  return 0.5 # fallback
51
 
 
68
  # Pre-humanization AI detection
69
  pre_ai_prob = ai_detect(text)
70
  # Paraphrase
71
+ try:
72
+ paraphrased = paraphrase(text)
73
+ except Exception as e:
74
+ return "[Error in paraphrasing: {}]".format(str(e)), "", 0.0, "", 0.0
75
  # Post-humanization AI detection
76
  post_ai_prob = ai_detect(paraphrased)
77
  # Semantic similarity