Spaces:

omarcevi
/

gradio-app

Sleeping

App Files Files Community

gradio-app / gradio_app.py

omarcevi

Upload folder using huggingface_hub

fc6cf5f verified 28 days ago

raw

history blame contribute delete

12.8 kB

	import gradio as gr
	import joblib
	import pandas as pd
	import numpy as np
	from sklearn.metrics.pairwise import euclidean_distances
	import random
	import plotly.graph_objects as go
	import plotly.express as px

	model = joblib.load("churn_model.pkl")
	model_features = joblib.load("model_features.pkl")

	# Load the customer data
	customer_df = pd.read_csv("Telco-Customer-Churn.csv")

	customer_df['MonthlyCharges'] = pd.to_numeric(customer_df['MonthlyCharges'], errors='coerce').fillna(0)
	customer_df['TotalCharges'] = pd.to_numeric(customer_df['TotalCharges'], errors='coerce').fillna(0)
	customer_df['tenure'] = pd.to_numeric(customer_df['tenure'], errors='coerce').fillna(0)

	value_map = {
	"Aydan aya": "Month-to-month",
	"1 yıllık": "One year",
	"2 yıllık": "Two year",
	"Elektronik çek": "Electronic check",
	"Posta çeki": "Mailed check",
	"Banka havalesi (otomatik)": "Bank transfer (automatic)",
	"Kredi kartı (otomatik)": "Credit card (automatic)",
	"Hayır": "No",
	"Evet": "Yes",
	"Yok": "No internet service",
	"Telefon hizmeti yok": "No phone service",
	"Fiber optik": "Fiber optic",
	"Erkek": "Male",
	"Kadın": "Female"
	}

	def customer_to_features(row):
	# Build a feature dict for a customer row, using the same logic as predict_churn
	input_dict = {}
	tenure = row['tenure']
	monthly = row['MonthlyCharges']
	total = row['TotalCharges']
	input_dict["tenure"] = tenure
	input_dict["PhoneService"] = row['PhoneService'] == "Yes"
	input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0
	input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1
	tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+"
	for bin_label in ["0-12", "12-24", "24+"]:
	input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label)
	contract_value = row['Contract']
	input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"]
	categorical_fields = [
	"gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling",
	"MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup",
	"DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies",
	"Contract", "PaymentMethod"
	]
	for field in categorical_fields:
	raw_value = row[field]
	mapped_value = value_map.get(raw_value, raw_value)
	for col in model_features:
	if col.startswith(f"{field}_"):
	input_dict[col] = (col == f"{field}_{mapped_value}")
	for col in model_features:
	if col not in input_dict:
	input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False
	return [input_dict[col] for col in model_features]

	# Precompute all customer feature vectors
	customer_feature_matrix = np.vstack([customer_to_features(row) for _, row in customer_df.iterrows()])

	def autofill_random_customer():
	row = customer_df.sample(1).iloc[0]
	# Map English values back to Turkish for dropdowns
	reverse_map = {v: k for k, v in value_map.items()}
	def rev(val):
	return reverse_map.get(val, val)
	# Ensure dropdown values are valid
	def safe(val, allowed):
	v = rev(val)
	return v if v in allowed else allowed[0]
	return [
	float(row['tenure']),
	float(row['MonthlyCharges']),
	float(row['TotalCharges']),
	safe(row['PhoneService'], phone_service_options),
	safe(row['gender'], gender_options),
	'Evet' if row['SeniorCitizen'] == 1 else 'Hayır',
	'Evet' if row['Partner'] == 'Yes' else 'Hayır',
	'Evet' if row['Dependents'] == 'Yes' else 'Hayır',
	'Evet' if row['PaperlessBilling'] == 'Yes' else 'Hayır',
	safe(row['MultipleLines'], multiple_lines_options),
	safe(row['InternetService'], internet_service_options),
	safe(row['OnlineSecurity'], online_security_options),
	safe(row['OnlineBackup'], online_backup_options),
	safe(row['DeviceProtection'], device_protection_options),
	safe(row['TechSupport'], tech_support_options),
	safe(row['StreamingTV'], streaming_tv_options),
	safe(row['StreamingMovies'], streaming_movies_options),
	safe(row['Contract'], contract_options),
	safe(row['PaymentMethod'], payment_method_options)
	]

	def find_similar_customers_vector(input_vector, n=5):
	dists = euclidean_distances(customer_feature_matrix, input_vector.reshape(1, -1)).flatten()
	top_idx = np.argsort(dists)[:n]
	print("Top distances:", dists[top_idx])
	print("Top indices:", top_idx)
	return customer_df.iloc[top_idx][['customerID','gender','SeniorCitizen','Partner','Dependents','tenure','Contract','PaymentMethod','MonthlyCharges','TotalCharges','Churn']]

	def predict_churn(
	tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling,
	MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
	StreamingTV, StreamingMovies, Contract, PaymentMethod
	):
	# Ensure numeric types
	tenure = float(tenure)
	monthly = float(monthly)
	total = float(total)
	input_dict = {}
	input_dict["tenure"] = tenure
	input_dict["PhoneService"] = PhoneService == "Evet"
	input_dict["avg_charge_per_month"] = total / tenure if tenure > 0 else 0
	input_dict["charge_ratio"] = total / (monthly * tenure) if monthly > 0 and tenure > 0 else 1
	tenure_label = "0-12" if tenure <= 12 else "12-24" if tenure <= 24 else "24+"
	for bin_label in ["0-12", "12-24", "24+"]:
	input_dict[f"tenure_bin_{bin_label}"] = (tenure_label == bin_label)
	contract_value = value_map.get(Contract, Contract)
	input_dict["is_long_term_contract"] = contract_value in ["One year", "Two year"]
	categorical_fields = [
	"gender", "SeniorCitizen", "Partner", "Dependents", "PaperlessBilling",
	"MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup",
	"DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies",
	"Contract", "PaymentMethod"
	]
	form = {
	"gender": gender,
	"SeniorCitizen": SeniorCitizen,
	"Partner": Partner,
	"Dependents": Dependents,
	"PaperlessBilling": PaperlessBilling,
	"MultipleLines": MultipleLines,
	"InternetService": InternetService,
	"OnlineSecurity": OnlineSecurity,
	"OnlineBackup": OnlineBackup,
	"DeviceProtection": DeviceProtection,
	"TechSupport": TechSupport,
	"StreamingTV": StreamingTV,
	"StreamingMovies": StreamingMovies,
	"Contract": Contract,
	"PaymentMethod": PaymentMethod
	}
	for field in categorical_fields:
	raw_value = form[field]
	mapped_value = value_map.get(raw_value, raw_value)
	for col in model_features:
	if col.startswith(f"{field}_"):
	input_dict[col] = (col == f"{field}_{mapped_value}")
	for col in model_features:
	if col not in input_dict:
	input_dict[col] = 0 if col == "tenure" or "charge" in col or "avg" in col else False
	input_df = pd.DataFrame([[input_dict[col] for col in model_features]], columns=model_features)
	prediction = model.predict_proba(input_df)[0][1]
	score = round(prediction * 100, 2)

	# Create gauge chart for churn risk
	fig_gauge = go.Figure(go.Indicator(
	mode = "gauge+number",
	value = score,
	domain = {'x': [0, 1], 'y': [0, 1]},
	title = {'text': "Churn Riski"},
	gauge = {
	'axis': {'range': [0, 100]},
	'bar': {'color': "darkblue"},
	'steps': [
	{'range': [0, 30], 'color': "lightgreen"},
	{'range': [30, 70], 'color': "yellow"},
	{'range': [70, 100], 'color': "red"}
	],
	'threshold': {
	'line': {'color': "red", 'width': 4},
	'thickness': 0.75,
	'value': 50
	}
	}
	))

	# Create pie chart for probability distribution
	fig_pie = px.pie(
	values=[score, 100-score],
	names=['Churn Riski', 'Kalma Olasılığı'],
	title='Müşteri Durumu Dağılımı',
	color_discrete_sequence=['red', 'green']
	)

	if score >= 50:
	comment = "Müşteri Kaybedilebilir."
	else:
	comment = "Müşteri Kayıp Riski Taşımıyor."
	result = f"Churn Riski: %{score} — {comment}"

	# Vector similarity
	similar_customers = find_similar_customers_vector(input_df.values[0], n=5)
	return result, fig_gauge, fig_pie, similar_customers

	# Define options for dropdowns (Turkish values)
	phone_service_options = ["Evet", "Hayır"]
	gender_options = ["Erkek", "Kadın"]
	senior_citizen_options = ["Evet", "Hayır"]
	partner_options = ["Evet", "Hayır"]
	dependents_options = ["Evet", "Hayır"]
	paperless_billing_options = ["Evet", "Hayır"]
	multiple_lines_options = ["Hayır", "Evet", "Telefon hizmeti yok"]
	internet_service_options = ["DSL", "Fiber optik", "Yok"]
	online_security_options = ["Hayır", "Evet", "Yok"]
	online_backup_options = ["Hayır", "Evet", "Yok"]
	device_protection_options = ["Hayır", "Evet", "Yok"]
	tech_support_options = ["Hayır", "Evet", "Yok"]
	streaming_tv_options = ["Hayır", "Evet", "Yok"]
	streaming_movies_options = ["Hayır", "Evet", "Yok"]
	contract_options = ["Aydan aya", "1 yıllık", "2 yıllık"]
	payment_method_options = [
	"Elektronik çek", "Posta çeki", "Banka havalesi (otomatik)", "Kredi kartı (otomatik)"
	]

	with gr.Blocks() as demo:
	gr.Markdown("# Müşteri Churn Tahmini")
	with gr.Row():
	tenure = gr.Number(label="Kullanım Süresi (tenure)", value=1)
	monthly = gr.Number(label="Aylık Ücret (MonthlyCharges)", value=1)
	total = gr.Number(label="Toplam Ücret (TotalCharges)", value=1)
	with gr.Row():
	PhoneService = gr.Dropdown(phone_service_options, label="Telefon Hizmeti (PhoneService)")
	gender = gr.Dropdown(gender_options, label="Cinsiyet (gender)")
	SeniorCitizen = gr.Dropdown(senior_citizen_options, label="Kıdemli Vatandaş (SeniorCitizen)")
	Partner = gr.Dropdown(partner_options, label="Partner")
	Dependents = gr.Dropdown(dependents_options, label="Bağımlılar (Dependents)")
	PaperlessBilling = gr.Dropdown(paperless_billing_options, label="Kağıtsız Fatura (PaperlessBilling)")
	with gr.Row():
	MultipleLines = gr.Dropdown(multiple_lines_options, label="Çoklu Hat (MultipleLines)")
	InternetService = gr.Dropdown(internet_service_options, label="İnternet Servisi (InternetService)")
	OnlineSecurity = gr.Dropdown(online_security_options, label="Online Güvenlik (OnlineSecurity)")
	OnlineBackup = gr.Dropdown(online_backup_options, label="Online Yedekleme (OnlineBackup)")
	DeviceProtection = gr.Dropdown(device_protection_options, label="Cihaz Koruma (DeviceProtection)")
	TechSupport = gr.Dropdown(tech_support_options, label="Teknik Destek (TechSupport)")
	StreamingTV = gr.Dropdown(streaming_tv_options, label="TV Yayını (StreamingTV)")
	StreamingMovies = gr.Dropdown(streaming_movies_options, label="Film Yayını (StreamingMovies)")
	with gr.Row():
	Contract = gr.Dropdown(contract_options, label="Sözleşme (Contract)")
	PaymentMethod = gr.Dropdown(payment_method_options, label="Ödeme Yöntemi (PaymentMethod)")

	autofill_btn = gr.Button("Rastgele Müşteri ile Doldur")
	submit_btn = gr.Button("Tahmin Et")

	with gr.Row():
	output = gr.Textbox(label="Sonuç")

	with gr.Row():
	gauge_plot = gr.Plot(label="Churn Risk Gauge")
	pie_plot = gr.Plot(label="Probability Distribution")

	similar_customers_table = gr.Dataframe(label="Benzer Müşteriler (İlk 5)")

	autofill_btn.click(
	autofill_random_customer,
	inputs=[],
	outputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling,
	MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
	StreamingTV, StreamingMovies, Contract, PaymentMethod]
	)
	submit_btn.click(
	predict_churn,
	inputs=[tenure, monthly, total, PhoneService, gender, SeniorCitizen, Partner, Dependents, PaperlessBilling,
	MultipleLines, InternetService, OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
	StreamingTV, StreamingMovies, Contract, PaymentMethod],
	outputs=[output, gauge_plot, pie_plot, similar_customers_table]
	)

	if __name__ == "__main__":
	demo.launch()