File size: 7,674 Bytes
78bbe6d
 
 
208bdb2
78bbe6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208bdb2
fd8896a
208bdb2
fd8896a
208bdb2
 
78bbe6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208bdb2
78bbe6d
 
 
208bdb2
 
 
 
 
 
 
 
78bbe6d
208bdb2
78bbe6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da22f13
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers.pipelines import pipeline
from sentence_transformers import SentenceTransformer, util
import numpy as np
import gradio.themes as grthemes
import random
import re

# ----------------------
# Paraphrasing Model Setup (Pegasus)
# ----------------------
PARAPHRASE_MODEL_NAME = "tuner007/pegasus_paraphrase"
paraphrase_tokenizer = AutoTokenizer.from_pretrained(PARAPHRASE_MODEL_NAME)
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(PARAPHRASE_MODEL_NAME)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
paraphrase_model = paraphrase_model.to(device)

# ----------------------
# Semantic Similarity Model
# ----------------------
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

# ----------------------
# Local AI Detector (roberta-base-openai-detector)
# ----------------------
AI_DETECTOR_MODEL = "roberta-base-openai-detector"
ai_detector = pipeline("text-classification", model=AI_DETECTOR_MODEL, device=0 if torch.cuda.is_available() else -1)

# ----------------------
# Prompt Variations for Humanization
# ----------------------
PROMPT_VARIANTS = [
    "Paraphrase this naturally:",
    "Rewrite as if explaining to a friend:",
    "Make this sound like a real conversation:",
    "Express this in a casual, human way:",
    "Reword this with natural flow:",
    "Make this sound less robotic:",
    "Rewrite in a friendly, informal tone:",
    "Paraphrase in a way a student would say it:",
]

# ----------------------
# Sentence Splitter
# ----------------------
def split_sentences(text):
    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
    return [s for s in sentences if s]

# ----------------------
# Light Post-Processing
# ----------------------
def postprocess_text(text):
    contractions = {
        "do not": "don't", "cannot": "can't", "will not": "won't", "I am": "I'm",
        "is not": "isn't", "are not": "aren't", "did not": "didn't", "it is": "it's",
        "does not": "doesn't", "have not": "haven't", "has not": "hasn't"
    }
    for k, v in contractions.items():
        text = re.sub(rf'\b{k}\b', v, text, flags=re.IGNORECASE)
    idioms = [
        "at the end of the day", "to be honest", "as a matter of fact", "for what it's worth",
        "in a nutshell", "the bottom line is", "all things considered"
    ]
    if random.random() < 0.3:
        text += " " + random.choice(idioms) + "."
    return text

# ----------------------
# Sentence-level Paraphrasing with Prompt Variation
# ----------------------
def paraphrase_sentence(sentence, tone):
    prompt = random.choice(PROMPT_VARIANTS)
    if tone != "Stealth":
        prompt = f"{prompt} ({tone} tone):"
    full_prompt = f"{prompt} {sentence}"
    batch = paraphrase_tokenizer([full_prompt], truncation=True, padding='longest', max_length=60, return_tensors="pt").to(device)
    outputs = paraphrase_model.generate(
        **batch,
        max_length=60,
        num_beams=5,
        num_return_sequences=1,
        temperature=1.0
    )
    tgt_text = paraphrase_tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return tgt_text[0] if tgt_text else sentence

# ----------------------
# Main Paraphrasing Function
# ----------------------
def paraphrase(text, tone):
    sentences = split_sentences(text)
    paraphrased = []
    for sent in sentences:
        rewritten = paraphrase_sentence(sent, tone)
        paraphrased.append(rewritten)
    joined = ' '.join(paraphrased)
    return postprocess_text(joined)

# ----------------------
# Semantic Similarity Function
# ----------------------
def semantic_similarity(text1, text2):
    emb1 = similarity_model.encode(text1, convert_to_tensor=True)
    emb2 = similarity_model.encode(text2, convert_to_tensor=True)
    sim = util.pytorch_cos_sim(emb1, emb2).item()
    return sim

# ----------------------
# Local AI Detection Function
# ----------------------
def check_ai_score(text):
    try:
        result = ai_detector(text)
        for r in result:
            # LABEL_1 = AI, LABEL_0 = Human
            if r['label'] in ['LABEL_1', 'Fake']:
                return r['score'], None
            elif r['label'] in ['LABEL_0', 'Real']:
                return 1.0 - r['score'], None
        return 0.5, None  # fallback
    except Exception as e:
        return None, f"AI detection error: {str(e)}"

# ----------------------
# Humanization Score & Rating
# ----------------------
def humanization_score(sim, ai_prob):
    score = (1.0 - sim) * 0.5 + (1.0 - ai_prob) * 0.5
    return score

def humanization_rating(score):
    if score < 0.7:
        return f"⚠️ Still AI-like ({score:.2f})"
    elif score < 0.85:
        return f"👍 Acceptable ({score:.2f})"
    else:
        return f"✅ Highly Humanized ({score:.2f})"

# ----------------------
# Main Processing Function
# ----------------------
def process(text, tone):
    if not text.strip():
        return "", "", 0.0, "", 0.0, ""
    pre_ai_prob, pre_err = check_ai_score(text)
    if pre_ai_prob is None:
        return "", f"AI Detection Error: {pre_err}", 0.0, "", 0.0, ""
    try:
        paraphrased = paraphrase(text, tone)
    except Exception as e:
        return f"[Paraphrasing error: {str(e)}]", "", 0.0, "", 0.0, ""
    post_ai_prob, post_err = check_ai_score(paraphrased)
    if post_ai_prob is None:
        return paraphrased, f"AI Detection Error: {post_err}", 0.0, "", 0.0, ""
    sim = semantic_similarity(text, paraphrased)
    score = humanization_score(sim, post_ai_prob)
    rating = humanization_rating(score)
    ai_score_str = f"Pre: {100*(1-pre_ai_prob):.1f}% human | Post: {100*(1-post_ai_prob):.1f}% human"
    return (
        paraphrased,
        ai_score_str,
        sim,
        rating,
        score * 100,
        ""
    )

# ----------------------
# Gradio UI
# ----------------------
custom_theme = grthemes.Base(
    primary_hue="blue",
    secondary_hue="blue",
    neutral_hue="slate"
)

with gr.Blocks(theme=custom_theme, title="AI Humanizer - Made by Taha") as demo:
    gr.Markdown("""
    # 🧠 AI Humanizer
    <div style='display:flex;justify-content:space-between;align-items:center;'>
        <span style='font-size:1.2em;color:#7bb1ff;'>Rewrite AI text to sound 100% human</span>
        <span style='font-weight:bold;color:#7bb1ff;'>Made by Taha</span>
    </div>
    """, elem_id="header")
    with gr.Row():
        with gr.Column():
            text_in = gr.Textbox(label="Paste AI-generated text here", lines=8, placeholder="Paste your text...", elem_id="input-box")
            tone = gr.Dropdown(["Academic", "Casual", "Friendly", "Stealth"], value="Stealth", label="Tone Selector")
            btn = gr.Button("Humanize", elem_id="humanize-btn")
        with gr.Column():
            text_out = gr.Textbox(label="Humanized Output", lines=8, interactive=False, elem_id="output-box")
            ai_scores = gr.Markdown("", elem_id="ai-scores")
            sim_score = gr.Number(label="Similarity (0=very different, 1=very similar)", interactive=False)
            rating = gr.Markdown("", elem_id="rating")
            human_score = gr.Number(label="Humanization Score (%)", interactive=False)
    btn.click(
        process,
        inputs=[text_in, tone],
        outputs=[text_out, ai_scores, sim_score, rating, human_score, gr.Textbox(visible=False)],
        api_name="humanize"
    )
    gr.Markdown("""
    <div style='text-align:center;color:#7bb1ff;margin-top:2em;'>
        <b>Made by Taha</b> | Free for unlimited use | Optimized for students and creators
    </div>
    """, elem_id="footer")

demo.launch()