import gradio as gr import pandas as pd from datasets import load_dataset from io import StringIO # دیتافریم جهانی global_df = pd.DataFrame() # بارگذاری دیتاست از Hugging Face def load_hf_dataset(hf_path): global global_df try: dataset = load_dataset(hf_path) split = list(dataset.keys())[0] data = dataset[split] df = pd.DataFrame(data) global_df = df.copy() return df, f"✅ Dataset loaded from '{hf_path}' with {len(df)} rows." except Exception as e: return None, f"❌ Error loading dataset:\n{str(e)}" # اضافه کردن ردیف def add_row(): global global_df empty_row = {col: "" for col in global_df.columns} global_df = pd.concat([global_df, pd.DataFrame([empty_row])], ignore_index=True) return global_df # حذف ردیف بر اساس شماره ردیف def delete_row(index): global global_df if 0 <= index < len(global_df): global_df = global_df.drop(index).reset_index(drop=True) return global_df # اضافه کردن ستون def add_column(col_name): global global_df if col_name and col_name not in global_df.columns: global_df[col_name] = "" return global_df # حذف ستون def delete_column(col_name): global global_df if col_name in global_df.columns: global_df = global_df.drop(columns=[col_name]) return global_df # تغییر نام ستون def rename_column(old_name, new_name): global global_df if old_name in global_df.columns and new_name: global_df = global_df.rename(columns={old_name: new_name}) return global_df # گرفتن خروجی CSV def download_csv(): global global_df csv_str = global_df.to_csv(index=False) return csv_str # رابط کاربری Gradio with gr.Blocks(title="HuggingFace Dataset Editor") as demo: gr.Markdown("## 🧬 Hugging Face Dataset Editor (Gradio Web App)") with gr.Row(): dataset_input = gr.Textbox(label="Hugging Face Dataset Path (e.g. `codersan/Persian-Wikipedia-Corpus`)", value="") load_btn = gr.Button("🔄 Load Dataset") status = gr.Textbox(label="Status", interactive=False) data_table = gr.Dataframe(label="Dataset", wrap=True, interactive=True) load_btn.click(fn=load_hf_dataset, inputs=dataset_input, outputs=[data_table, status]) with gr.Row(): add_row_btn = gr.Button("➕ Add Row") del_row_idx = gr.Number(label="Row Index to Delete", value=0) delete_row_btn = gr.Button("🗑 Delete Row") add_row_btn.click(fn=add_row, outputs=data_table) delete_row_btn.click(fn=delete_row, inputs=del_row_idx, outputs=data_table) with gr.Row(): col_name_add = gr.Textbox(label="New Column Name") add_col_btn = gr.Button("➕ Add Column") col_name_del = gr.Textbox(label="Column Name to Delete") del_col_btn = gr.Button("🗑 Delete Column") add_col_btn.click(fn=add_column, inputs=col_name_add, outputs=data_table) del_col_btn.click(fn=delete_column, inputs=col_name_del, outputs=data_table) with gr.Row(): old_col = gr.Textbox(label="Old Column Name") new_col = gr.Textbox(label="New Column Name") rename_btn = gr.Button("✏️ Rename Column") rename_btn.click(fn=rename_column, inputs=[old_col, new_col], outputs=data_table) gr.Markdown("### 📤 Export Dataset as CSV") csv_btn = gr.Button("📁 Generate CSV") csv_output = gr.File(label="Download CSV") def csv_download_link(): csv_str = download_csv() return gr.File.update(value=StringIO(csv_str), filename="dataset.csv") csv_btn.click(fn=csv_download_link, outputs=csv_output) demo.launch()