ad_analyzer / app.py
astonn's picture
Update app.py
3808edd verified
import os, pandas as pd, gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama # GGUF CPU backend
# ---------- model loading (one‑time) ----------
MODEL_REPO = "MaziyarPanahi/gemma-2b-it-GGUF"
MODEL_FILE = "gemma-2b-it.Q4_K_M.gguf" # 1.6 GB 4‑bit
CTX_SIZE = 4096
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
llm = Llama(
model_path=model_path,
n_ctx=CTX_SIZE,
n_threads=4 # чуть быстрее на 2 vCPU
)
# ---------- analysis + generation ----------
def analyze_ads(file):
print("DEBUG uploaded:", file.name, os.path.getsize(file.name), "bytes", flush=True)
df = pd.read_csv(file.name)
req = {"headline", "description", "impressions", "CTR", "form_opens", "spend"}
if not req.issubset(df.columns):
return f"Missing columns: {', '.join(req - set(df.columns))}"
# convert numerics
for col in ["impressions", "CTR", "form_opens", "spend"]:
df[col] = pd.to_numeric(df[col], errors="coerce")
df = df.dropna()
df["engagement_rate"] = df["form_opens"] / df["impressions"]
clicks = (df["CTR"] * df["impressions"]).replace(0, pd.NA)
df["CPC"] = df["spend"] / clicks
df["cost_per_form_open"] = df["spend"] / df["form_opens"].replace(0, pd.NA)
# 1 best + 1 worst → короче промпт
top = df.sort_values("CTR", ascending=False).head(1)
worst = df.sort_values("CTR").head(1)
def rows_to_text(sub):
parts = []
for _, r in sub.iterrows():
parts.append(
f"Headline: {r.headline}\n"
f"Description: {r.description}\n"
f"Impressions: {int(r.impressions)}, CTR: {r.CTR:.3f}, "
f"Form Opens: {int(r.form_opens)}, ER: {r.engagement_rate:.3f}\n"
f"Spend: ${r.spend:.2f}, CPC: ${r.CPC:.2f}, CPF: ${r.cost_per_form_open:.2f}\n"
)
return "\n".join(parts)
prompt = (
"You are a senior digital marketer.\n"
"Analyse the high‑ and low‑performing ads below and deliver:\n"
"1. Key patterns of winners.\n"
"2. Weak points of losers.\n"
"3. Three actionable creative improvements.\n\n"
f"--- HIGH CTR ADS ---\n{rows_to_text(top)}\n"
f"--- LOW CTR ADS ---\n{rows_to_text(worst)}"
)
# stream=True → токены сразу в логах, ответ ~25‑30 с
stream = llm(
prompt,
max_tokens=1500,
temperature=0.2,
top_p=0.8,
stream=True
)
out = []
for chunk in stream:
tok = chunk["choices"][0]["text"]
print(tok, end="", flush=True) # видно прогресс
out.append(tok)
return "".join(out).strip()
# ---------- Gradio UI ----------
demo = gr.Interface(
fn=analyze_ads,
inputs=gr.File(label="CSV with: headline, description, impressions, CTR, form_opens, spend"),
outputs=gr.Textbox(label="AI‑generated analysis & recommendations"),
title="Ad Performance Analyzer (Gemma‑2b 4‑bit, CPU‑only)",
description="Upload your ad data and get actionable insights without paid APIs."
)
if __name__ == "__main__":
demo.launch()