Spaces:

omarcevi
/

gradio-app

Sleeping

File size: 12,763 Bytes

import gradio as gr
import joblib
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import random
import plotly.graph_objects as go
import plotly.express as px

model = joblib.load("churn_model.pkl")
model_features = joblib.load("model_features.pkl")

# Load the customer data
customer_df = pd.read_csv("Telco-Customer-Churn.csv")

customer_df['MonthlyCharges'] = pd.to_numeric(customer_df['MonthlyCharges'], errors='coerce').fillna(0)
customer_df['TotalCharges'] = pd.to_numeric(customer_df['TotalCharges'], errors='coerce').fillna(0)
customer_df['tenure'] = pd.to_numeric(customer_df['tenure'], errors='coerce').fillna(0)

value_map = {
    "Aydan aya": "Month-to-month",
    "1 yıllık": "One year",
    "2 yıllık": "Two year",
    "Elektronik çek": "Electronic check",
    "Posta çeki": "Mailed check",
    "Banka havalesi (otomatik)": "Bank transfer (automatic)",
    "Kredi kartı (otomatik)": "Credit card (automatic)",
    "Hayır": "No",
    "Evet": "Yes",
    "Yok": "No internet service",
    "Telefon hizmeti yok": "No phone service",
    "Fiber optik": "Fiber optic",
    "Erkek": "Male",
    "Kadın": "Female"
}

def customer_to_features(row):
    # Build a feature dict for a customer row, using the same logic as predict_churn
    input_dict = {}
    tenure = row['tenure']
    monthly = row['MonthlyCharges']
    total = row['TotalCharges']
    input_dict["tenure"] = tenure
    input_dict["PhoneService"] = row['PhoneService'] == "Yes"
    input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0
    input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1
    tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+"
    for bin_label in ["0-12", "12-24", "24+"]:
        input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label)
    contract_value = row['Contract']
    input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"]
    categorical_fields = [
        "gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling",
        "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup",
        "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies",
        "Contract", "PaymentMethod"
    ]
    for field in categorical_fields:
        raw_value = row[field]
        mapped_value = value_map.get(raw_value, raw_value)
        for col in model_features:
            if col.startswith(f"{field}_"):
                input_dict[col] = (col == f"{field}_{mapped_value}")
    for col in model_features:
        if col not in input_dict:
            input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False
    return [input_dict[col] for col in model_features]

# Precompute all customer feature vectors
customer_feature_matrix = np.vstack([customer_to_features(row) for _, row in customer_df.iterrows()])

def autofill_random_customer():
    row = customer_df.sample(1).iloc[0]
    # Map English values back to Turkish for dropdowns
    reverse_map = {v: k for k, v in value_map.items()}
    def rev(val):
        return reverse_map.get(val, val)
    # Ensure dropdown values are valid
    def safe(val, allowed):
        v = rev(val)
        return v if v in allowed else allowed[0]
    return [
        float(row['tenure']),
        float(row['MonthlyCharges']),
        float(row['TotalCharges']),
        safe(row['PhoneService'], phone_service_options),
        safe(row['gender'], gender_options),
        'Evet' if row['SeniorCitizen'] == 1 else 'Hayır',
        'Evet' if row['Partner'] == 'Yes' else 'Hayır',
        'Evet' if row['Dependents'] == 'Yes' else 'Hayır',
        'Evet' if row['PaperlessBilling'] == 'Yes' else 'Hayır',
        safe(row['MultipleLines'], multiple_lines_options),
        safe(row['InternetService'], internet_service_options),
        safe(row['OnlineSecurity'], online_security_options),
        safe(row['OnlineBackup'], online_backup_options),
        safe(row['DeviceProtection'], device_protection_options),
        safe(row['TechSupport'], tech_support_options),
        safe(row['StreamingTV'], streaming_tv_options),
        safe(row['StreamingMovies'], streaming_movies_options),
        safe(row['Contract'], contract_options),
        safe(row['PaymentMethod'], payment_method_options)
    ]

def find_similar_customers_vector(input_vector, n=5):
    dists = euclidean_distances(customer_feature_matrix, input_vector.reshape(1, -1)).flatten()
    top_idx = np.argsort(dists)[:n]
    print("Top distances:", dists[top_idx])
    print("Top indices:", top_idx)
    return customer_df.iloc[top_idx][['customerID','gender','SeniorCitizen','Partner','Dependents','tenure','Contract','PaymentMethod','MonthlyCharges','TotalCharges','Churn']]

def predict_churn(
    tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling,
    MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
    StreamingTV, StreamingMovies, Contract, PaymentMethod
):
    # Ensure numeric types
    tenure = float(tenure)
    monthly = float(monthly)
    total = float(total)
    input_dict = {}
    input_dict["tenure"] = tenure
    input_dict["PhoneService"] = PhoneService == "Evet"
    input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0
    input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1
    tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+"
    for bin_label in ["0-12", "12-24", "24+"]:
        input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label)
    contract_value = value_map.get(Contract, Contract)
    input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"]
    categorical_fields = [
        "gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling",
        "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup",
        "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies",
        "Contract", "PaymentMethod"
    ]
    form = {
        "gender": gender,
        "SeniorCitizen": SeniorCitizen,
        "Partner": Partner,
        "Dependents": Dependents,
        "PaperlessBilling": PaperlessBilling,
        "MultipleLines": MultipleLines,
        "InternetService": InternetService,
        "OnlineSecurity": OnlineSecurity,
        "OnlineBackup": OnlineBackup,
        "DeviceProtection": DeviceProtection,
        "TechSupport": TechSupport,
        "StreamingTV": StreamingTV,
        "StreamingMovies": StreamingMovies,
        "Contract": Contract,
        "PaymentMethod": PaymentMethod
    }
    for field in categorical_fields:
        raw_value = form[field]
        mapped_value = value_map.get(raw_value, raw_value)
        for col in model_features:
            if col.startswith(f"{field}_"):
                input_dict[col] = (col == f"{field}_{mapped_value}")
    for col in model_features:
        if col not in input_dict:
            input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False
    input_df = pd.DataFrame([[input_dict[col] for col in model_features]], columns=model_features)
    prediction = model.predict_proba(input_df)[0][1]
    score = round(prediction * 100, 2)
    
    # Create gauge chart for churn risk
    fig_gauge = go.Figure(go.Indicator(
        mode = "gauge+number",
        value = score,
        domain = {'x': [0, 1], 'y': [0, 1]},
        title = {'text': "Churn Riski"},
        gauge = {
            'axis': {'range': [0, 100]},
            'bar': {'color': "darkblue"},
            'steps': [
                {'range': [0, 30], 'color': "lightgreen"},
                {'range': [30, 70], 'color': "yellow"},
                {'range': [70, 100], 'color': "red"}
            ],
            'threshold': {
                'line': {'color': "red", 'width': 4},
                'thickness': 0.75,
                'value': 50
            }
        }
    ))
    
    # Create pie chart for probability distribution
    fig_pie = px.pie(
        values=[score, 100-score],
        names=['Churn Riski', 'Kalma Olasılığı'],
        title='Müşteri Durumu Dağılımı',
        color_discrete_sequence=['red', 'green']
    )
    
    if score >= 50:
        comment = "Müşteri Kaybedilebilir."
    else:
        comment = "Müşteri Kayıp Riski Taşımıyor."
    result = f"Churn Riski: %{score} — {comment}"
    
    # Vector similarity
    similar_customers = find_similar_customers_vector(input_df.values[0], n=5)
    return result, fig_gauge, fig_pie, similar_customers

# Define options for dropdowns (Turkish values)
phone_service_options = ["Evet", "Hayır"]
gender_options = ["Erkek", "Kadın"]
senior_citizen_options = ["Evet", "Hayır"]
partner_options = ["Evet", "Hayır"]
dependents_options = ["Evet", "Hayır"]
paperless_billing_options = ["Evet", "Hayır"]
multiple_lines_options = ["Hayır", "Evet", "Telefon hizmeti yok"]
internet_service_options = ["DSL", "Fiber optik", "Yok"]
online_security_options = ["Hayır", "Evet", "Yok"]
online_backup_options = ["Hayır", "Evet", "Yok"]
device_protection_options = ["Hayır", "Evet", "Yok"]
tech_support_options = ["Hayır", "Evet", "Yok"]
streaming_tv_options = ["Hayır", "Evet", "Yok"]
streaming_movies_options = ["Hayır", "Evet", "Yok"]
contract_options = ["Aydan aya", "1 yıllık", "2 yıllık"]
payment_method_options = [
    "Elektronik çek", "Posta çeki", "Banka havalesi (otomatik)", "Kredi kartı (otomatik)"
]

with gr.Blocks() as demo:
    gr.Markdown("# Müşteri Churn Tahmini")
    with gr.Row():
        tenure = gr.Number(label="Kullanım Süresi (tenure)", value=1)
        monthly = gr.Number(label="Aylık Ücret (MonthlyCharges)", value=1)
        total = gr.Number(label="Toplam Ücret (TotalCharges)", value=1)
    with gr.Row():
        PhoneService = gr.Dropdown(phone_service_options, label="Telefon Hizmeti (PhoneService)")
        gender = gr.Dropdown(gender_options, label="Cinsiyet (gender)")
        SeniorCitizen = gr.Dropdown(senior_citizen_options, label="Kıdemli Vatandaş (SeniorCitizen)")
        Partner = gr.Dropdown(partner_options, label="Partner")
        Dependents = gr.Dropdown(dependents_options, label="Bağımlılar (Dependents)")
        PaperlessBilling = gr.Dropdown(paperless_billing_options, label="Kağıtsız Fatura (PaperlessBilling)")
    with gr.Row():
        MultipleLines = gr.Dropdown(multiple_lines_options, label="Çoklu Hat (MultipleLines)")
        InternetService = gr.Dropdown(internet_service_options, label="İnternet Servisi (InternetService)")
        OnlineSecurity = gr.Dropdown(online_security_options, label="Online Güvenlik (OnlineSecurity)")
        OnlineBackup = gr.Dropdown(online_backup_options, label="Online Yedekleme (OnlineBackup)")
        DeviceProtection = gr.Dropdown(device_protection_options, label="Cihaz Koruma (DeviceProtection)")
        TechSupport = gr.Dropdown(tech_support_options, label="Teknik Destek (TechSupport)")
        StreamingTV = gr.Dropdown(streaming_tv_options, label="TV Yayını (StreamingTV)")
        StreamingMovies = gr.Dropdown(streaming_movies_options, label="Film Yayını (StreamingMovies)")
    with gr.Row():
        Contract = gr.Dropdown(contract_options, label="Sözleşme (Contract)")
        PaymentMethod = gr.Dropdown(payment_method_options, label="Ödeme Yöntemi (PaymentMethod)")
    
    autofill_btn = gr.Button("Rastgele Müşteri ile Doldur")
    submit_btn = gr.Button("Tahmin Et")
    
    with gr.Row():
        output = gr.Textbox(label="Sonuç")
    
    with gr.Row():
        gauge_plot = gr.Plot(label="Churn Risk Gauge")
        pie_plot = gr.Plot(label="Probability Distribution")
    
    similar_customers_table = gr.Dataframe(label="Benzer Müşteriler (İlk 5)")
    
    autofill_btn.click(
        autofill_random_customer,
        inputs=[],
        outputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling,
                MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
                StreamingTV, StreamingMovies, Contract, PaymentMethod]
    )
    submit_btn.click(
        predict_churn,
        inputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling,
                MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
                StreamingTV, StreamingMovies, Contract, PaymentMethod],
        outputs=[output, gauge_plot, pie_plot, similar_customers_table]
    )

if __name__ == "__main__":
    demo.launch()