Spaces:

taha092
/

HumanizerV2

Runtime error

App Files Files Community

taha092 commited on Jul 16

Commit

05a40ee

verified ·

1 Parent(s): 23866f0

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -36

app.py CHANGED Viewed

@@ -1,53 +1,37 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from transformers.pipelines import pipeline
 from sentence_transformers import SentenceTransformer, util
 import numpy as np
 import gradio.themes as grthemes
-# Paraphrasing model: humarin/chatgpt_paraphraser_on_T5_base
-PARAPHRASE_MODEL_NAME = "humarin/chatgpt_paraphraser_on_T5_base"
 paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
 paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 paraphrase_model = paraphrase_model.to(device)
-# AI Detector: roberta-base-openai-detector
-ai_detector = pipeline("text-classification", model="roberta-base-openai-detector", device=0 if torch.cuda.is_available() else -1)
 # Semantic similarity model
 similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
-tone_templates = {
-    "Academic": "Paraphrase the following text in a formal, academic tone:",
-    "Casual": "Paraphrase the following text in a casual, conversational tone:",
-    "Friendly": "Paraphrase the following text in a friendly, approachable tone:",
-    "Stealth": "Paraphrase the following text to bypass AI detectors and sound as human as possible:",
-}
-# Paraphrasing function
-def paraphrase(text, tone):
-    prompt = tone_templates[tone] + " " + text
-    input_ids = paraphrase_tokenizer(
-        f'paraphrase: {prompt}',
-        return_tensors="pt", padding="longest",
-        max_length=256, truncation=True
-    ).input_ids.to(device)
-    outputs = paraphrase_model.generate(
-        input_ids,
-        temperature=0.7,
-        repetition_penalty=1.2,
-        num_return_sequences=1,
-        no_repeat_ngram_size=2,
-        max_length=256,
-        diversity_penalty=3.0,
         num_beams=5,
-        num_beam_groups=5,
-        trust_remote_code=True
     )
-    res = paraphrase_tokenizer.batch_decode(outputs, skip_special_tokens=True)
-    return res[0] if res else ""
 def semantic_similarity(text1, text2):
     emb1 = similarity_model.encode(text1, convert_to_tensor=True)
@@ -56,12 +40,12 @@ def semantic_similarity(text1, text2):
     return sim
 def ai_detect(text):
-    # Returns probability of being AI-generated (label 'Fake')
     result = ai_detector(text)
     for r in result:
-        if r['label'] == 'Fake':
             return r['score']
-        elif r['label'] == 'Real':
             return 1.0 - r['score']
     return 0.5  # fallback
@@ -84,7 +68,10 @@ def process(text, tone):
     # Pre-humanization AI detection
     pre_ai_prob = ai_detect(text)
     # Paraphrase
-    paraphrased = paraphrase(text, tone)
     # Post-humanization AI detection
     post_ai_prob = ai_detect(paraphrased)
     # Semantic similarity

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
 from transformers.pipelines import pipeline
 from sentence_transformers import SentenceTransformer, util
 import numpy as np
 import gradio.themes as grthemes
+# Paraphrasing model: tuner007/pegasus_paraphrase
+PARAPHRASE_MODEL_NAME = "tuner007/pegasus_paraphrase"
 paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
 paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 paraphrase_model = paraphrase_model.to(device)
+# AI Detector: desklib/ai-text-detector-v1.01
+AI_DETECTOR_MODEL = "desklib/ai-text-detector-v1.01"
+ai_detector = pipeline("text-classification", model=AI_DETECTOR_MODEL, device=0 if torch.cuda.is_available() else -1)
 # Semantic similarity model
 similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
+def paraphrase(text):
+    prompt = text.strip()
+    batch = paraphrase_tokenizer([prompt], truncation=True, padding='longest', max_length=60, return_tensors="pt").to(device)
+    translated = paraphrase_model.generate(
+        **batch,
+        max_length=60,
         num_beams=5,
+        num_return_sequences=1,
+        temperature=1.0
     )
+    tgt_text = paraphrase_tokenizer.batch_decode(translated, skip_special_tokens=True)
+    return tgt_text[0] if tgt_text else ""
 def semantic_similarity(text1, text2):
     emb1 = similarity_model.encode(text1, convert_to_tensor=True)
     return sim
 def ai_detect(text):
+    # Returns probability of being AI-generated (label 'LABEL_1' = AI, 'LABEL_0' = Human)
     result = ai_detector(text)
     for r in result:
+        if r['label'] in ['LABEL_1', 'Fake']:
             return r['score']
+        elif r['label'] in ['LABEL_0', 'Real']:
             return 1.0 - r['score']
     return 0.5  # fallback
     # Pre-humanization AI detection
     pre_ai_prob = ai_detect(text)
     # Paraphrase
+    try:
+        paraphrased = paraphrase(text)
+    except Exception as e:
+        return "[Error in paraphrasing: {}]".format(str(e)), "", 0.0, "", 0.0
     # Post-humanization AI detection
     post_ai_prob = ai_detect(paraphrased)
     # Semantic similarity