gradio-app / gradio_app.py
omarcevi's picture
Upload folder using huggingface_hub
fc6cf5f verified
import gradio as gr
import joblib
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import random
import plotly.graph_objects as go
import plotly.express as px
model = joblib.load("churn_model.pkl")
model_features = joblib.load("model_features.pkl")
# Load the customer data
customer_df = pd.read_csv("Telco-Customer-Churn.csv")
customer_df['MonthlyCharges'] = pd.to_numeric(customer_df['MonthlyCharges'], errors='coerce').fillna(0)
customer_df['TotalCharges'] = pd.to_numeric(customer_df['TotalCharges'], errors='coerce').fillna(0)
customer_df['tenure'] = pd.to_numeric(customer_df['tenure'], errors='coerce').fillna(0)
value_map = {
"Aydan aya": "Month-to-month",
"1 yıllık": "One year",
"2 yıllık": "Two year",
"Elektronik çek": "Electronic check",
"Posta çeki": "Mailed check",
"Banka havalesi (otomatik)": "Bank transfer (automatic)",
"Kredi kartı (otomatik)": "Credit card (automatic)",
"Hayır": "No",
"Evet": "Yes",
"Yok": "No internet service",
"Telefon hizmeti yok": "No phone service",
"Fiber optik": "Fiber optic",
"Erkek": "Male",
"Kadın": "Female"
}
def customer_to_features(row):
# Build a feature dict for a customer row, using the same logic as predict_churn
input_dict = {}
tenure = row['tenure']
monthly = row['MonthlyCharges']
total = row['TotalCharges']
input_dict["tenure"] = tenure
input_dict["PhoneService"] = row['PhoneService'] == "Yes"
input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0
input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1
tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+"
for bin_label in ["0-12", "12-24", "24+"]:
input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label)
contract_value = row['Contract']
input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"]
categorical_fields = [
"gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling",
"MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup",
"DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies",
"Contract", "PaymentMethod"
]
for field in categorical_fields:
raw_value = row[field]
mapped_value = value_map.get(raw_value, raw_value)
for col in model_features:
if col.startswith(f"{field}_"):
input_dict[col] = (col == f"{field}_{mapped_value}")
for col in model_features:
if col not in input_dict:
input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False
return [input_dict[col] for col in model_features]
# Precompute all customer feature vectors
customer_feature_matrix = np.vstack([customer_to_features(row) for _, row in customer_df.iterrows()])
def autofill_random_customer():
row = customer_df.sample(1).iloc[0]
# Map English values back to Turkish for dropdowns
reverse_map = {v: k for k, v in value_map.items()}
def rev(val):
return reverse_map.get(val, val)
# Ensure dropdown values are valid
def safe(val, allowed):
v = rev(val)
return v if v in allowed else allowed[0]
return [
float(row['tenure']),
float(row['MonthlyCharges']),
float(row['TotalCharges']),
safe(row['PhoneService'], phone_service_options),
safe(row['gender'], gender_options),
'Evet' if row['SeniorCitizen'] == 1 else 'Hayır',
'Evet' if row['Partner'] == 'Yes' else 'Hayır',
'Evet' if row['Dependents'] == 'Yes' else 'Hayır',
'Evet' if row['PaperlessBilling'] == 'Yes' else 'Hayır',
safe(row['MultipleLines'], multiple_lines_options),
safe(row['InternetService'], internet_service_options),
safe(row['OnlineSecurity'], online_security_options),
safe(row['OnlineBackup'], online_backup_options),
safe(row['DeviceProtection'], device_protection_options),
safe(row['TechSupport'], tech_support_options),
safe(row['StreamingTV'], streaming_tv_options),
safe(row['StreamingMovies'], streaming_movies_options),
safe(row['Contract'], contract_options),
safe(row['PaymentMethod'], payment_method_options)
]
def find_similar_customers_vector(input_vector, n=5):
dists = euclidean_distances(customer_feature_matrix, input_vector.reshape(1, -1)).flatten()
top_idx = np.argsort(dists)[:n]
print("Top distances:", dists[top_idx])
print("Top indices:", top_idx)
return customer_df.iloc[top_idx][['customerID','gender','SeniorCitizen','Partner','Dependents','tenure','Contract','PaymentMethod','MonthlyCharges','TotalCharges','Churn']]
def predict_churn(
tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling,
MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
StreamingTV, StreamingMovies, Contract, PaymentMethod
):
# Ensure numeric types
tenure = float(tenure)
monthly = float(monthly)
total = float(total)
input_dict = {}
input_dict["tenure"] = tenure
input_dict["PhoneService"] = PhoneService == "Evet"
input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0
input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1
tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+"
for bin_label in ["0-12", "12-24", "24+"]:
input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label)
contract_value = value_map.get(Contract, Contract)
input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"]
categorical_fields = [
"gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling",
"MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup",
"DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies",
"Contract", "PaymentMethod"
]
form = {
"gender": gender,
"SeniorCitizen": SeniorCitizen,
"Partner": Partner,
"Dependents": Dependents,
"PaperlessBilling": PaperlessBilling,
"MultipleLines": MultipleLines,
"InternetService": InternetService,
"OnlineSecurity": OnlineSecurity,
"OnlineBackup": OnlineBackup,
"DeviceProtection": DeviceProtection,
"TechSupport": TechSupport,
"StreamingTV": StreamingTV,
"StreamingMovies": StreamingMovies,
"Contract": Contract,
"PaymentMethod": PaymentMethod
}
for field in categorical_fields:
raw_value = form[field]
mapped_value = value_map.get(raw_value, raw_value)
for col in model_features:
if col.startswith(f"{field}_"):
input_dict[col] = (col == f"{field}_{mapped_value}")
for col in model_features:
if col not in input_dict:
input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False
input_df = pd.DataFrame([[input_dict[col] for col in model_features]], columns=model_features)
prediction = model.predict_proba(input_df)[0][1]
score = round(prediction * 100, 2)
# Create gauge chart for churn risk
fig_gauge = go.Figure(go.Indicator(
mode = "gauge+number",
value = score,
domain = {'x': [0, 1], 'y': [0, 1]},
title = {'text': "Churn Riski"},
gauge = {
'axis': {'range': [0, 100]},
'bar': {'color': "darkblue"},
'steps': [
{'range': [0, 30], 'color': "lightgreen"},
{'range': [30, 70], 'color': "yellow"},
{'range': [70, 100], 'color': "red"}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 50
}
}
))
# Create pie chart for probability distribution
fig_pie = px.pie(
values=[score, 100-score],
names=['Churn Riski', 'Kalma Olasılığı'],
title='Müşteri Durumu Dağılımı',
color_discrete_sequence=['red', 'green']
)
if score >= 50:
comment = "Müşteri Kaybedilebilir."
else:
comment = "Müşteri Kayıp Riski Taşımıyor."
result = f"Churn Riski: %{score}{comment}"
# Vector similarity
similar_customers = find_similar_customers_vector(input_df.values[0], n=5)
return result, fig_gauge, fig_pie, similar_customers
# Define options for dropdowns (Turkish values)
phone_service_options = ["Evet", "Hayır"]
gender_options = ["Erkek", "Kadın"]
senior_citizen_options = ["Evet", "Hayır"]
partner_options = ["Evet", "Hayır"]
dependents_options = ["Evet", "Hayır"]
paperless_billing_options = ["Evet", "Hayır"]
multiple_lines_options = ["Hayır", "Evet", "Telefon hizmeti yok"]
internet_service_options = ["DSL", "Fiber optik", "Yok"]
online_security_options = ["Hayır", "Evet", "Yok"]
online_backup_options = ["Hayır", "Evet", "Yok"]
device_protection_options = ["Hayır", "Evet", "Yok"]
tech_support_options = ["Hayır", "Evet", "Yok"]
streaming_tv_options = ["Hayır", "Evet", "Yok"]
streaming_movies_options = ["Hayır", "Evet", "Yok"]
contract_options = ["Aydan aya", "1 yıllık", "2 yıllık"]
payment_method_options = [
"Elektronik çek", "Posta çeki", "Banka havalesi (otomatik)", "Kredi kartı (otomatik)"
]
with gr.Blocks() as demo:
gr.Markdown("# Müşteri Churn Tahmini")
with gr.Row():
tenure = gr.Number(label="Kullanım Süresi (tenure)", value=1)
monthly = gr.Number(label="Aylık Ücret (MonthlyCharges)", value=1)
total = gr.Number(label="Toplam Ücret (TotalCharges)", value=1)
with gr.Row():
PhoneService = gr.Dropdown(phone_service_options, label="Telefon Hizmeti (PhoneService)")
gender = gr.Dropdown(gender_options, label="Cinsiyet (gender)")
SeniorCitizen = gr.Dropdown(senior_citizen_options, label="Kıdemli Vatandaş (SeniorCitizen)")
Partner = gr.Dropdown(partner_options, label="Partner")
Dependents = gr.Dropdown(dependents_options, label="Bağımlılar (Dependents)")
PaperlessBilling = gr.Dropdown(paperless_billing_options, label="Kağıtsız Fatura (PaperlessBilling)")
with gr.Row():
MultipleLines = gr.Dropdown(multiple_lines_options, label="Çoklu Hat (MultipleLines)")
InternetService = gr.Dropdown(internet_service_options, label="İnternet Servisi (InternetService)")
OnlineSecurity = gr.Dropdown(online_security_options, label="Online Güvenlik (OnlineSecurity)")
OnlineBackup = gr.Dropdown(online_backup_options, label="Online Yedekleme (OnlineBackup)")
DeviceProtection = gr.Dropdown(device_protection_options, label="Cihaz Koruma (DeviceProtection)")
TechSupport = gr.Dropdown(tech_support_options, label="Teknik Destek (TechSupport)")
StreamingTV = gr.Dropdown(streaming_tv_options, label="TV Yayını (StreamingTV)")
StreamingMovies = gr.Dropdown(streaming_movies_options, label="Film Yayını (StreamingMovies)")
with gr.Row():
Contract = gr.Dropdown(contract_options, label="Sözleşme (Contract)")
PaymentMethod = gr.Dropdown(payment_method_options, label="Ödeme Yöntemi (PaymentMethod)")
autofill_btn = gr.Button("Rastgele Müşteri ile Doldur")
submit_btn = gr.Button("Tahmin Et")
with gr.Row():
output = gr.Textbox(label="Sonuç")
with gr.Row():
gauge_plot = gr.Plot(label="Churn Risk Gauge")
pie_plot = gr.Plot(label="Probability Distribution")
similar_customers_table = gr.Dataframe(label="Benzer Müşteriler (İlk 5)")
autofill_btn.click(
autofill_random_customer,
inputs=[],
outputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling,
MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
StreamingTV, StreamingMovies, Contract, PaymentMethod]
)
submit_btn.click(
predict_churn,
inputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling,
MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
StreamingTV, StreamingMovies, Contract, PaymentMethod],
outputs=[output, gauge_plot, pie_plot, similar_customers_table]
)
if __name__ == "__main__":
demo.launch()