Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from datasets import load_dataset | |
from io import StringIO | |
# دیتافریم جهانی | |
global_df = pd.DataFrame() | |
# بارگذاری دیتاست از Hugging Face | |
def load_hf_dataset(hf_path): | |
global global_df | |
try: | |
dataset = load_dataset(hf_path) | |
split = list(dataset.keys())[0] | |
data = dataset[split] | |
df = pd.DataFrame(data) | |
global_df = df.copy() | |
return df, f"✅ Dataset loaded from '{hf_path}' with {len(df)} rows." | |
except Exception as e: | |
return None, f"❌ Error loading dataset:\n{str(e)}" | |
# اضافه کردن ردیف | |
def add_row(): | |
global global_df | |
empty_row = {col: "" for col in global_df.columns} | |
global_df = pd.concat([global_df, pd.DataFrame([empty_row])], ignore_index=True) | |
return global_df | |
# حذف ردیف بر اساس شماره ردیف | |
def delete_row(index): | |
global global_df | |
if 0 <= index < len(global_df): | |
global_df = global_df.drop(index).reset_index(drop=True) | |
return global_df | |
# اضافه کردن ستون | |
def add_column(col_name): | |
global global_df | |
if col_name and col_name not in global_df.columns: | |
global_df[col_name] = "" | |
return global_df | |
# حذف ستون | |
def delete_column(col_name): | |
global global_df | |
if col_name in global_df.columns: | |
global_df = global_df.drop(columns=[col_name]) | |
return global_df | |
# تغییر نام ستون | |
def rename_column(old_name, new_name): | |
global global_df | |
if old_name in global_df.columns and new_name: | |
global_df = global_df.rename(columns={old_name: new_name}) | |
return global_df | |
# گرفتن خروجی CSV | |
def download_csv(): | |
global global_df | |
csv_str = global_df.to_csv(index=False) | |
return csv_str | |
# رابط کاربری Gradio | |
with gr.Blocks(title="HuggingFace Dataset Editor") as demo: | |
gr.Markdown("## 🧬 Hugging Face Dataset Editor (Gradio Web App)") | |
with gr.Row(): | |
dataset_input = gr.Textbox(label="Hugging Face Dataset Path (e.g. `codersan/Persian-Wikipedia-Corpus`)", value="") | |
load_btn = gr.Button("🔄 Load Dataset") | |
status = gr.Textbox(label="Status", interactive=False) | |
data_table = gr.Dataframe(label="Dataset", wrap=True, interactive=True) | |
load_btn.click(fn=load_hf_dataset, inputs=dataset_input, outputs=[data_table, status]) | |
with gr.Row(): | |
add_row_btn = gr.Button("➕ Add Row") | |
del_row_idx = gr.Number(label="Row Index to Delete", value=0) | |
delete_row_btn = gr.Button("🗑 Delete Row") | |
add_row_btn.click(fn=add_row, outputs=data_table) | |
delete_row_btn.click(fn=delete_row, inputs=del_row_idx, outputs=data_table) | |
with gr.Row(): | |
col_name_add = gr.Textbox(label="New Column Name") | |
add_col_btn = gr.Button("➕ Add Column") | |
col_name_del = gr.Textbox(label="Column Name to Delete") | |
del_col_btn = gr.Button("🗑 Delete Column") | |
add_col_btn.click(fn=add_column, inputs=col_name_add, outputs=data_table) | |
del_col_btn.click(fn=delete_column, inputs=col_name_del, outputs=data_table) | |
with gr.Row(): | |
old_col = gr.Textbox(label="Old Column Name") | |
new_col = gr.Textbox(label="New Column Name") | |
rename_btn = gr.Button("✏️ Rename Column") | |
rename_btn.click(fn=rename_column, inputs=[old_col, new_col], outputs=data_table) | |
gr.Markdown("### 📤 Export Dataset as CSV") | |
csv_btn = gr.Button("📁 Generate CSV") | |
csv_output = gr.File(label="Download CSV") | |
def csv_download_link(): | |
csv_str = download_csv() | |
return gr.File.update(value=StringIO(csv_str), filename="dataset.csv") | |
csv_btn.click(fn=csv_download_link, outputs=csv_output) | |
demo.launch() | |