File size: 3,278 Bytes
161408c
864f922
bd740da
864f922
161408c
 
 
1c6417a
864f922
a63b1ed
bd740da
 
 
 
 
864f922
 
 
161408c
 
864f922
 
161408c
864f922
 
 
bd740da
161408c
864f922
 
 
161408c
 
 
 
864f922
bd740da
 
 
864f922
 
bd740da
864f922
bd740da
 
 
 
 
 
161408c
bd740da
864f922
 
 
 
 
 
 
161408c
864f922
 
 
bd740da
 
 
3808edd
 
bd740da
 
 
 
 
 
 
 
 
 
 
864f922
 
 
 
 
 
4a1bdef
864f922
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os, pandas as pd, gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama          # GGUF CPU backend

# ---------- model loading (one‑time) ----------
MODEL_REPO = "MaziyarPanahi/gemma-2b-it-GGUF"
MODEL_FILE = "gemma-2b-it.Q4_K_M.gguf"     # 1.6 GB 4‑bit
CTX_SIZE   = 4096

model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
llm = Llama(
    model_path=model_path,
    n_ctx=CTX_SIZE,
    n_threads=4                        # чуть быстрее на 2 vCPU
)

# ---------- analysis + generation ----------
def analyze_ads(file):
    print("DEBUG uploaded:", file.name, os.path.getsize(file.name), "bytes", flush=True)

    df = pd.read_csv(file.name)

    req = {"headline", "description", "impressions", "CTR", "form_opens", "spend"}
    if not req.issubset(df.columns):
        return f"Missing columns: {', '.join(req - set(df.columns))}"

    # convert numerics
    for col in ["impressions", "CTR", "form_opens", "spend"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")
    df = df.dropna()

    df["engagement_rate"]    = df["form_opens"] / df["impressions"]
    clicks                   = (df["CTR"] * df["impressions"]).replace(0, pd.NA)
    df["CPC"]                = df["spend"] / clicks
    df["cost_per_form_open"] = df["spend"] / df["form_opens"].replace(0, pd.NA)

    # 1 best + 1 worst → короче промпт
    top   = df.sort_values("CTR", ascending=False).head(1)
    worst = df.sort_values("CTR").head(1)

    def rows_to_text(sub):
        parts = []
        for _, r in sub.iterrows():
            parts.append(
                f"Headline: {r.headline}\n"
                f"Description: {r.description}\n"
                f"Impressions: {int(r.impressions)}, CTR: {r.CTR:.3f}, "
                f"Form Opens: {int(r.form_opens)}, ER: {r.engagement_rate:.3f}\n"
                f"Spend: ${r.spend:.2f}, CPC: ${r.CPC:.2f}, CPF: ${r.cost_per_form_open:.2f}\n"
            )
        return "\n".join(parts)

    prompt = (
        "You are a senior digital marketer.\n"
        "Analyse the high‑ and low‑performing ads below and deliver:\n"
        "1. Key patterns of winners.\n"
        "2. Weak points of losers.\n"
        "3. Three actionable creative improvements.\n\n"
        f"--- HIGH CTR ADS ---\n{rows_to_text(top)}\n"
        f"--- LOW CTR ADS ---\n{rows_to_text(worst)}"
    )

    # stream=True → токены сразу в логах, ответ ~25‑30 с
    stream = llm(
        prompt,
        max_tokens=1500,
        temperature=0.2,
        top_p=0.8,
        stream=True
    )

    out = []
    for chunk in stream:
        tok = chunk["choices"][0]["text"]
        print(tok, end="", flush=True)   # видно прогресс
        out.append(tok)

    return "".join(out).strip()

# ---------- Gradio UI ----------
demo = gr.Interface(
    fn=analyze_ads,
    inputs=gr.File(label="CSV with: headline, description, impressions, CTR, form_opens, spend"),
    outputs=gr.Textbox(label="AI‑generated analysis & recommendations"),
    title="Ad Performance Analyzer (Gemma‑2b 4‑bit, CPU‑only)",
    description="Upload your ad data and get actionable insights without paid APIs."
)

if __name__ == "__main__":
    demo.launch()