import os, pandas as pd, gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama          # GGUF CPU backend

# ---------- model loading (one‑time) ----------
MODEL_REPO = "MaziyarPanahi/gemma-2b-it-GGUF"
MODEL_FILE = "gemma-2b-it.Q4_K_M.gguf"     # 1.6 GB 4‑bit
CTX_SIZE   = 4096

model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
llm = Llama(
    model_path=model_path,
    n_ctx=CTX_SIZE,
    n_threads=4                        # чуть быстрее на 2 vCPU
)

# ---------- analysis + generation ----------
def analyze_ads(file):
    print("DEBUG uploaded:", file.name, os.path.getsize(file.name), "bytes", flush=True)

    df = pd.read_csv(file.name)

    req = {"headline", "description", "impressions", "CTR", "form_opens", "spend"}
    if not req.issubset(df.columns):
        return f"Missing columns: {', '.join(req - set(df.columns))}"

    # convert numerics
    for col in ["impressions", "CTR", "form_opens", "spend"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")
    df = df.dropna()

    df["engagement_rate"]    = df["form_opens"] / df["impressions"]
    clicks                   = (df["CTR"] * df["impressions"]).replace(0, pd.NA)
    df["CPC"]                = df["spend"] / clicks
    df["cost_per_form_open"] = df["spend"] / df["form_opens"].replace(0, pd.NA)

    # 1 best + 1 worst → короче промпт
    top   = df.sort_values("CTR", ascending=False).head(1)
    worst = df.sort_values("CTR").head(1)

    def rows_to_text(sub):
        parts = []
        for _, r in sub.iterrows():
            parts.append(
                f"Headline: {r.headline}\n"
                f"Description: {r.description}\n"
                f"Impressions: {int(r.impressions)}, CTR: {r.CTR:.3f}, "
                f"Form Opens: {int(r.form_opens)}, ER: {r.engagement_rate:.3f}\n"
                f"Spend: ${r.spend:.2f}, CPC: ${r.CPC:.2f}, CPF: ${r.cost_per_form_open:.2f}\n"
            )
        return "\n".join(parts)

    prompt = (
        "You are a senior digital marketer.\n"
        "Analyse the high‑ and low‑performing ads below and deliver:\n"
        "1. Key patterns of winners.\n"
        "2. Weak points of losers.\n"
        "3. Three actionable creative improvements.\n\n"
        f"--- HIGH CTR ADS ---\n{rows_to_text(top)}\n"
        f"--- LOW CTR ADS ---\n{rows_to_text(worst)}"
    )

    # stream=True → токены сразу в логах, ответ ~25‑30 с
    stream = llm(
        prompt,
        max_tokens=1500,
        temperature=0.2,
        top_p=0.8,
        stream=True
    )

    out = []
    for chunk in stream:
        tok = chunk["choices"][0]["text"]
        print(tok, end="", flush=True)   # видно прогресс
        out.append(tok)

    return "".join(out).strip()

# ---------- Gradio UI ----------
demo = gr.Interface(
    fn=analyze_ads,
    inputs=gr.File(label="CSV with: headline, description, impressions, CTR, form_opens, spend"),
    outputs=gr.Textbox(label="AI‑generated analysis & recommendations"),
    title="Ad Performance Analyzer (Gemma‑2b 4‑bit, CPU‑only)",
    description="Upload your ad data and get actionable insights without paid APIs."
)

if __name__ == "__main__":
    demo.launch()