Spaces:
Sleeping
Sleeping
import gradio as gr | |
import joblib | |
import pandas as pd | |
import numpy as np | |
from sklearn.metrics.pairwise import euclidean_distances | |
import random | |
import plotly.graph_objects as go | |
import plotly.express as px | |
model = joblib.load("churn_model.pkl") | |
model_features = joblib.load("model_features.pkl") | |
# Load the customer data | |
customer_df = pd.read_csv("Telco-Customer-Churn.csv") | |
customer_df['MonthlyCharges'] = pd.to_numeric(customer_df['MonthlyCharges'], errors='coerce').fillna(0) | |
customer_df['TotalCharges'] = pd.to_numeric(customer_df['TotalCharges'], errors='coerce').fillna(0) | |
customer_df['tenure'] = pd.to_numeric(customer_df['tenure'], errors='coerce').fillna(0) | |
value_map = { | |
"Aydan aya": "Month-to-month", | |
"1 yıllık": "One year", | |
"2 yıllık": "Two year", | |
"Elektronik çek": "Electronic check", | |
"Posta çeki": "Mailed check", | |
"Banka havalesi (otomatik)": "Bank transfer (automatic)", | |
"Kredi kartı (otomatik)": "Credit card (automatic)", | |
"Hayır": "No", | |
"Evet": "Yes", | |
"Yok": "No internet service", | |
"Telefon hizmeti yok": "No phone service", | |
"Fiber optik": "Fiber optic", | |
"Erkek": "Male", | |
"Kadın": "Female" | |
} | |
def customer_to_features(row): | |
# Build a feature dict for a customer row, using the same logic as predict_churn | |
input_dict = {} | |
tenure = row['tenure'] | |
monthly = row['MonthlyCharges'] | |
total = row['TotalCharges'] | |
input_dict["tenure"] = tenure | |
input_dict["PhoneService"] = row['PhoneService'] == "Yes" | |
input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0 | |
input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1 | |
tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+" | |
for bin_label in ["0-12", "12-24", "24+"]: | |
input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label) | |
contract_value = row['Contract'] | |
input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"] | |
categorical_fields = [ | |
"gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling", | |
"MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", | |
"DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", | |
"Contract", "PaymentMethod" | |
] | |
for field in categorical_fields: | |
raw_value = row[field] | |
mapped_value = value_map.get(raw_value, raw_value) | |
for col in model_features: | |
if col.startswith(f"{field}_"): | |
input_dict[col] = (col == f"{field}_{mapped_value}") | |
for col in model_features: | |
if col not in input_dict: | |
input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False | |
return [input_dict[col] for col in model_features] | |
# Precompute all customer feature vectors | |
customer_feature_matrix = np.vstack([customer_to_features(row) for _, row in customer_df.iterrows()]) | |
def autofill_random_customer(): | |
row = customer_df.sample(1).iloc[0] | |
# Map English values back to Turkish for dropdowns | |
reverse_map = {v: k for k, v in value_map.items()} | |
def rev(val): | |
return reverse_map.get(val, val) | |
# Ensure dropdown values are valid | |
def safe(val, allowed): | |
v = rev(val) | |
return v if v in allowed else allowed[0] | |
return [ | |
float(row['tenure']), | |
float(row['MonthlyCharges']), | |
float(row['TotalCharges']), | |
safe(row['PhoneService'], phone_service_options), | |
safe(row['gender'], gender_options), | |
'Evet' if row['SeniorCitizen'] == 1 else 'Hayır', | |
'Evet' if row['Partner'] == 'Yes' else 'Hayır', | |
'Evet' if row['Dependents'] == 'Yes' else 'Hayır', | |
'Evet' if row['PaperlessBilling'] == 'Yes' else 'Hayır', | |
safe(row['MultipleLines'], multiple_lines_options), | |
safe(row['InternetService'], internet_service_options), | |
safe(row['OnlineSecurity'], online_security_options), | |
safe(row['OnlineBackup'], online_backup_options), | |
safe(row['DeviceProtection'], device_protection_options), | |
safe(row['TechSupport'], tech_support_options), | |
safe(row['StreamingTV'], streaming_tv_options), | |
safe(row['StreamingMovies'], streaming_movies_options), | |
safe(row['Contract'], contract_options), | |
safe(row['PaymentMethod'], payment_method_options) | |
] | |
def find_similar_customers_vector(input_vector, n=5): | |
dists = euclidean_distances(customer_feature_matrix, input_vector.reshape(1, -1)).flatten() | |
top_idx = np.argsort(dists)[:n] | |
print("Top distances:", dists[top_idx]) | |
print("Top indices:", top_idx) | |
return customer_df.iloc[top_idx][['customerID','gender','SeniorCitizen','Partner','Dependents','tenure','Contract','PaymentMethod','MonthlyCharges','TotalCharges','Churn']] | |
def predict_churn( | |
tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling, | |
MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, | |
StreamingTV, StreamingMovies, Contract, PaymentMethod | |
): | |
# Ensure numeric types | |
tenure = float(tenure) | |
monthly = float(monthly) | |
total = float(total) | |
input_dict = {} | |
input_dict["tenure"] = tenure | |
input_dict["PhoneService"] = PhoneService == "Evet" | |
input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0 | |
input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1 | |
tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+" | |
for bin_label in ["0-12", "12-24", "24+"]: | |
input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label) | |
contract_value = value_map.get(Contract, Contract) | |
input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"] | |
categorical_fields = [ | |
"gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling", | |
"MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", | |
"DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", | |
"Contract", "PaymentMethod" | |
] | |
form = { | |
"gender": gender, | |
"SeniorCitizen": SeniorCitizen, | |
"Partner": Partner, | |
"Dependents": Dependents, | |
"PaperlessBilling": PaperlessBilling, | |
"MultipleLines": MultipleLines, | |
"InternetService": InternetService, | |
"OnlineSecurity": OnlineSecurity, | |
"OnlineBackup": OnlineBackup, | |
"DeviceProtection": DeviceProtection, | |
"TechSupport": TechSupport, | |
"StreamingTV": StreamingTV, | |
"StreamingMovies": StreamingMovies, | |
"Contract": Contract, | |
"PaymentMethod": PaymentMethod | |
} | |
for field in categorical_fields: | |
raw_value = form[field] | |
mapped_value = value_map.get(raw_value, raw_value) | |
for col in model_features: | |
if col.startswith(f"{field}_"): | |
input_dict[col] = (col == f"{field}_{mapped_value}") | |
for col in model_features: | |
if col not in input_dict: | |
input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False | |
input_df = pd.DataFrame([[input_dict[col] for col in model_features]], columns=model_features) | |
prediction = model.predict_proba(input_df)[0][1] | |
score = round(prediction * 100, 2) | |
# Create gauge chart for churn risk | |
fig_gauge = go.Figure(go.Indicator( | |
mode = "gauge+number", | |
value = score, | |
domain = {'x': [0, 1], 'y': [0, 1]}, | |
title = {'text': "Churn Riski"}, | |
gauge = { | |
'axis': {'range': [0, 100]}, | |
'bar': {'color': "darkblue"}, | |
'steps': [ | |
{'range': [0, 30], 'color': "lightgreen"}, | |
{'range': [30, 70], 'color': "yellow"}, | |
{'range': [70, 100], 'color': "red"} | |
], | |
'threshold': { | |
'line': {'color': "red", 'width': 4}, | |
'thickness': 0.75, | |
'value': 50 | |
} | |
} | |
)) | |
# Create pie chart for probability distribution | |
fig_pie = px.pie( | |
values=[score, 100-score], | |
names=['Churn Riski', 'Kalma Olasılığı'], | |
title='Müşteri Durumu Dağılımı', | |
color_discrete_sequence=['red', 'green'] | |
) | |
if score >= 50: | |
comment = "Müşteri Kaybedilebilir." | |
else: | |
comment = "Müşteri Kayıp Riski Taşımıyor." | |
result = f"Churn Riski: %{score} — {comment}" | |
# Vector similarity | |
similar_customers = find_similar_customers_vector(input_df.values[0], n=5) | |
return result, fig_gauge, fig_pie, similar_customers | |
# Define options for dropdowns (Turkish values) | |
phone_service_options = ["Evet", "Hayır"] | |
gender_options = ["Erkek", "Kadın"] | |
senior_citizen_options = ["Evet", "Hayır"] | |
partner_options = ["Evet", "Hayır"] | |
dependents_options = ["Evet", "Hayır"] | |
paperless_billing_options = ["Evet", "Hayır"] | |
multiple_lines_options = ["Hayır", "Evet", "Telefon hizmeti yok"] | |
internet_service_options = ["DSL", "Fiber optik", "Yok"] | |
online_security_options = ["Hayır", "Evet", "Yok"] | |
online_backup_options = ["Hayır", "Evet", "Yok"] | |
device_protection_options = ["Hayır", "Evet", "Yok"] | |
tech_support_options = ["Hayır", "Evet", "Yok"] | |
streaming_tv_options = ["Hayır", "Evet", "Yok"] | |
streaming_movies_options = ["Hayır", "Evet", "Yok"] | |
contract_options = ["Aydan aya", "1 yıllık", "2 yıllık"] | |
payment_method_options = [ | |
"Elektronik çek", "Posta çeki", "Banka havalesi (otomatik)", "Kredi kartı (otomatik)" | |
] | |
with gr.Blocks() as demo: | |
gr.Markdown("# Müşteri Churn Tahmini") | |
with gr.Row(): | |
tenure = gr.Number(label="Kullanım Süresi (tenure)", value=1) | |
monthly = gr.Number(label="Aylık Ücret (MonthlyCharges)", value=1) | |
total = gr.Number(label="Toplam Ücret (TotalCharges)", value=1) | |
with gr.Row(): | |
PhoneService = gr.Dropdown(phone_service_options, label="Telefon Hizmeti (PhoneService)") | |
gender = gr.Dropdown(gender_options, label="Cinsiyet (gender)") | |
SeniorCitizen = gr.Dropdown(senior_citizen_options, label="Kıdemli Vatandaş (SeniorCitizen)") | |
Partner = gr.Dropdown(partner_options, label="Partner") | |
Dependents = gr.Dropdown(dependents_options, label="Bağımlılar (Dependents)") | |
PaperlessBilling = gr.Dropdown(paperless_billing_options, label="Kağıtsız Fatura (PaperlessBilling)") | |
with gr.Row(): | |
MultipleLines = gr.Dropdown(multiple_lines_options, label="Çoklu Hat (MultipleLines)") | |
InternetService = gr.Dropdown(internet_service_options, label="İnternet Servisi (InternetService)") | |
OnlineSecurity = gr.Dropdown(online_security_options, label="Online Güvenlik (OnlineSecurity)") | |
OnlineBackup = gr.Dropdown(online_backup_options, label="Online Yedekleme (OnlineBackup)") | |
DeviceProtection = gr.Dropdown(device_protection_options, label="Cihaz Koruma (DeviceProtection)") | |
TechSupport = gr.Dropdown(tech_support_options, label="Teknik Destek (TechSupport)") | |
StreamingTV = gr.Dropdown(streaming_tv_options, label="TV Yayını (StreamingTV)") | |
StreamingMovies = gr.Dropdown(streaming_movies_options, label="Film Yayını (StreamingMovies)") | |
with gr.Row(): | |
Contract = gr.Dropdown(contract_options, label="Sözleşme (Contract)") | |
PaymentMethod = gr.Dropdown(payment_method_options, label="Ödeme Yöntemi (PaymentMethod)") | |
autofill_btn = gr.Button("Rastgele Müşteri ile Doldur") | |
submit_btn = gr.Button("Tahmin Et") | |
with gr.Row(): | |
output = gr.Textbox(label="Sonuç") | |
with gr.Row(): | |
gauge_plot = gr.Plot(label="Churn Risk Gauge") | |
pie_plot = gr.Plot(label="Probability Distribution") | |
similar_customers_table = gr.Dataframe(label="Benzer Müşteriler (İlk 5)") | |
autofill_btn.click( | |
autofill_random_customer, | |
inputs=[], | |
outputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling, | |
MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, | |
StreamingTV, StreamingMovies, Contract, PaymentMethod] | |
) | |
submit_btn.click( | |
predict_churn, | |
inputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling, | |
MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, | |
StreamingTV, StreamingMovies, Contract, PaymentMethod], | |
outputs=[output, gauge_plot, pie_plot, similar_customers_table] | |
) | |
if __name__ == "__main__": | |
demo.launch() |