import gradio as gr import joblib import pandas as pd import numpy as np from sklearn.metrics.pairwise import euclidean_distances import random import plotly.graph_objects as go import plotly.express as px model = joblib.load("churn_model.pkl") model_features = joblib.load("model_features.pkl") # Load the customer data customer_df = pd.read_csv("Telco-Customer-Churn.csv") customer_df['MonthlyCharges'] = pd.to_numeric(customer_df['MonthlyCharges'], errors='coerce').fillna(0) customer_df['TotalCharges'] = pd.to_numeric(customer_df['TotalCharges'], errors='coerce').fillna(0) customer_df['tenure'] = pd.to_numeric(customer_df['tenure'], errors='coerce').fillna(0) value_map = { "Aydan aya": "Month-to-month", "1 yıllık": "One year", "2 yıllık": "Two year", "Elektronik çek": "Electronic check", "Posta çeki": "Mailed check", "Banka havalesi (otomatik)": "Bank transfer (automatic)", "Kredi kartı (otomatik)": "Credit card (automatic)", "Hayır": "No", "Evet": "Yes", "Yok": "No internet service", "Telefon hizmeti yok": "No phone service", "Fiber optik": "Fiber optic", "Erkek": "Male", "Kadın": "Female" } def customer_to_features(row): # Build a feature dict for a customer row, using the same logic as predict_churn input_dict = {} tenure = row['tenure'] monthly = row['MonthlyCharges'] total = row['TotalCharges'] input_dict["tenure"] = tenure input_dict["PhoneService"] = row['PhoneService'] == "Yes" input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0 input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1 tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+" for bin_label in ["0-12", "12-24", "24+"]: input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label) contract_value = row['Contract'] input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"] categorical_fields = [ "gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaymentMethod" ] for field in categorical_fields: raw_value = row[field] mapped_value = value_map.get(raw_value, raw_value) for col in model_features: if col.startswith(f"{field}_"): input_dict[col] = (col == f"{field}_{mapped_value}") for col in model_features: if col not in input_dict: input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False return [input_dict[col] for col in model_features] # Precompute all customer feature vectors customer_feature_matrix = np.vstack([customer_to_features(row) for _, row in customer_df.iterrows()]) def autofill_random_customer(): row = customer_df.sample(1).iloc[0] # Map English values back to Turkish for dropdowns reverse_map = {v: k for k, v in value_map.items()} def rev(val): return reverse_map.get(val, val) # Ensure dropdown values are valid def safe(val, allowed): v = rev(val) return v if v in allowed else allowed[0] return [ float(row['tenure']), float(row['MonthlyCharges']), float(row['TotalCharges']), safe(row['PhoneService'], phone_service_options), safe(row['gender'], gender_options), 'Evet' if row['SeniorCitizen'] == 1 else 'Hayır', 'Evet' if row['Partner'] == 'Yes' else 'Hayır', 'Evet' if row['Dependents'] == 'Yes' else 'Hayır', 'Evet' if row['PaperlessBilling'] == 'Yes' else 'Hayır', safe(row['MultipleLines'], multiple_lines_options), safe(row['InternetService'], internet_service_options), safe(row['OnlineSecurity'], online_security_options), safe(row['OnlineBackup'], online_backup_options), safe(row['DeviceProtection'], device_protection_options), safe(row['TechSupport'], tech_support_options), safe(row['StreamingTV'], streaming_tv_options), safe(row['StreamingMovies'], streaming_movies_options), safe(row['Contract'], contract_options), safe(row['PaymentMethod'], payment_method_options) ] def find_similar_customers_vector(input_vector, n=5): dists = euclidean_distances(customer_feature_matrix, input_vector.reshape(1, -1)).flatten() top_idx = np.argsort(dists)[:n] print("Top distances:", dists[top_idx]) print("Top indices:", top_idx) return customer_df.iloc[top_idx][['customerID','gender','SeniorCitizen','Partner','Dependents','tenure','Contract','PaymentMethod','MonthlyCharges','TotalCharges','Churn']] def predict_churn( tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling, MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, StreamingTV, StreamingMovies, Contract, PaymentMethod ): # Ensure numeric types tenure = float(tenure) monthly = float(monthly) total = float(total) input_dict = {} input_dict["tenure"] = tenure input_dict["PhoneService"] = PhoneService == "Evet" input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0 input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1 tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+" for bin_label in ["0-12", "12-24", "24+"]: input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label) contract_value = value_map.get(Contract, Contract) input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"] categorical_fields = [ "gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaymentMethod" ] form = { "gender": gender, "SeniorCitizen": SeniorCitizen, "Partner": Partner, "Dependents": Dependents, "PaperlessBilling": PaperlessBilling, "MultipleLines": MultipleLines, "InternetService": InternetService, "OnlineSecurity": OnlineSecurity, "OnlineBackup": OnlineBackup, "DeviceProtection": DeviceProtection, "TechSupport": TechSupport, "StreamingTV": StreamingTV, "StreamingMovies": StreamingMovies, "Contract": Contract, "PaymentMethod": PaymentMethod } for field in categorical_fields: raw_value = form[field] mapped_value = value_map.get(raw_value, raw_value) for col in model_features: if col.startswith(f"{field}_"): input_dict[col] = (col == f"{field}_{mapped_value}") for col in model_features: if col not in input_dict: input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False input_df = pd.DataFrame([[input_dict[col] for col in model_features]], columns=model_features) prediction = model.predict_proba(input_df)[0][1] score = round(prediction * 100, 2) # Create gauge chart for churn risk fig_gauge = go.Figure(go.Indicator( mode = "gauge+number", value = score, domain = {'x': [0, 1], 'y': [0, 1]}, title = {'text': "Churn Riski"}, gauge = { 'axis': {'range': [0, 100]}, 'bar': {'color': "darkblue"}, 'steps': [ {'range': [0, 30], 'color': "lightgreen"}, {'range': [30, 70], 'color': "yellow"}, {'range': [70, 100], 'color': "red"} ], 'threshold': { 'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': 50 } } )) # Create pie chart for probability distribution fig_pie = px.pie( values=[score, 100-score], names=['Churn Riski', 'Kalma Olasılığı'], title='Müşteri Durumu Dağılımı', color_discrete_sequence=['red', 'green'] ) if score >= 50: comment = "Müşteri Kaybedilebilir." else: comment = "Müşteri Kayıp Riski Taşımıyor." result = f"Churn Riski: %{score} — {comment}" # Vector similarity similar_customers = find_similar_customers_vector(input_df.values[0], n=5) return result, fig_gauge, fig_pie, similar_customers # Define options for dropdowns (Turkish values) phone_service_options = ["Evet", "Hayır"] gender_options = ["Erkek", "Kadın"] senior_citizen_options = ["Evet", "Hayır"] partner_options = ["Evet", "Hayır"] dependents_options = ["Evet", "Hayır"] paperless_billing_options = ["Evet", "Hayır"] multiple_lines_options = ["Hayır", "Evet", "Telefon hizmeti yok"] internet_service_options = ["DSL", "Fiber optik", "Yok"] online_security_options = ["Hayır", "Evet", "Yok"] online_backup_options = ["Hayır", "Evet", "Yok"] device_protection_options = ["Hayır", "Evet", "Yok"] tech_support_options = ["Hayır", "Evet", "Yok"] streaming_tv_options = ["Hayır", "Evet", "Yok"] streaming_movies_options = ["Hayır", "Evet", "Yok"] contract_options = ["Aydan aya", "1 yıllık", "2 yıllık"] payment_method_options = [ "Elektronik çek", "Posta çeki", "Banka havalesi (otomatik)", "Kredi kartı (otomatik)" ] with gr.Blocks() as demo: gr.Markdown("# Müşteri Churn Tahmini") with gr.Row(): tenure = gr.Number(label="Kullanım Süresi (tenure)", value=1) monthly = gr.Number(label="Aylık Ücret (MonthlyCharges)", value=1) total = gr.Number(label="Toplam Ücret (TotalCharges)", value=1) with gr.Row(): PhoneService = gr.Dropdown(phone_service_options, label="Telefon Hizmeti (PhoneService)") gender = gr.Dropdown(gender_options, label="Cinsiyet (gender)") SeniorCitizen = gr.Dropdown(senior_citizen_options, label="Kıdemli Vatandaş (SeniorCitizen)") Partner = gr.Dropdown(partner_options, label="Partner") Dependents = gr.Dropdown(dependents_options, label="Bağımlılar (Dependents)") PaperlessBilling = gr.Dropdown(paperless_billing_options, label="Kağıtsız Fatura (PaperlessBilling)") with gr.Row(): MultipleLines = gr.Dropdown(multiple_lines_options, label="Çoklu Hat (MultipleLines)") InternetService = gr.Dropdown(internet_service_options, label="İnternet Servisi (InternetService)") OnlineSecurity = gr.Dropdown(online_security_options, label="Online Güvenlik (OnlineSecurity)") OnlineBackup = gr.Dropdown(online_backup_options, label="Online Yedekleme (OnlineBackup)") DeviceProtection = gr.Dropdown(device_protection_options, label="Cihaz Koruma (DeviceProtection)") TechSupport = gr.Dropdown(tech_support_options, label="Teknik Destek (TechSupport)") StreamingTV = gr.Dropdown(streaming_tv_options, label="TV Yayını (StreamingTV)") StreamingMovies = gr.Dropdown(streaming_movies_options, label="Film Yayını (StreamingMovies)") with gr.Row(): Contract = gr.Dropdown(contract_options, label="Sözleşme (Contract)") PaymentMethod = gr.Dropdown(payment_method_options, label="Ödeme Yöntemi (PaymentMethod)") autofill_btn = gr.Button("Rastgele Müşteri ile Doldur") submit_btn = gr.Button("Tahmin Et") with gr.Row(): output = gr.Textbox(label="Sonuç") with gr.Row(): gauge_plot = gr.Plot(label="Churn Risk Gauge") pie_plot = gr.Plot(label="Probability Distribution") similar_customers_table = gr.Dataframe(label="Benzer Müşteriler (İlk 5)") autofill_btn.click( autofill_random_customer, inputs=[], outputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling, MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, StreamingTV, StreamingMovies, Contract, PaymentMethod] ) submit_btn.click( predict_churn, inputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling, MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport, StreamingTV, StreamingMovies, Contract, PaymentMethod], outputs=[output, gauge_plot, pie_plot, similar_customers_table] ) if __name__ == "__main__": demo.launch()