Spaces:
Sleeping
Sleeping
File size: 3,738 Bytes
142a9cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import gradio as gr
import pandas as pd
from datasets import load_dataset
from io import StringIO
# دیتافریم جهانی
global_df = pd.DataFrame()
# بارگذاری دیتاست از Hugging Face
def load_hf_dataset(hf_path):
global global_df
try:
dataset = load_dataset(hf_path)
split = list(dataset.keys())[0]
data = dataset[split]
df = pd.DataFrame(data)
global_df = df.copy()
return df, f"✅ Dataset loaded from '{hf_path}' with {len(df)} rows."
except Exception as e:
return None, f"❌ Error loading dataset:\n{str(e)}"
# اضافه کردن ردیف
def add_row():
global global_df
empty_row = {col: "" for col in global_df.columns}
global_df = pd.concat([global_df, pd.DataFrame([empty_row])], ignore_index=True)
return global_df
# حذف ردیف بر اساس شماره ردیف
def delete_row(index):
global global_df
if 0 <= index < len(global_df):
global_df = global_df.drop(index).reset_index(drop=True)
return global_df
# اضافه کردن ستون
def add_column(col_name):
global global_df
if col_name and col_name not in global_df.columns:
global_df[col_name] = ""
return global_df
# حذف ستون
def delete_column(col_name):
global global_df
if col_name in global_df.columns:
global_df = global_df.drop(columns=[col_name])
return global_df
# تغییر نام ستون
def rename_column(old_name, new_name):
global global_df
if old_name in global_df.columns and new_name:
global_df = global_df.rename(columns={old_name: new_name})
return global_df
# گرفتن خروجی CSV
def download_csv():
global global_df
csv_str = global_df.to_csv(index=False)
return csv_str
# رابط کاربری Gradio
with gr.Blocks(title="HuggingFace Dataset Editor") as demo:
gr.Markdown("## 🧬 Hugging Face Dataset Editor (Gradio Web App)")
with gr.Row():
dataset_input = gr.Textbox(label="Hugging Face Dataset Path (e.g. `codersan/Persian-Wikipedia-Corpus`)", value="")
load_btn = gr.Button("🔄 Load Dataset")
status = gr.Textbox(label="Status", interactive=False)
data_table = gr.Dataframe(label="Dataset", wrap=True, interactive=True)
load_btn.click(fn=load_hf_dataset, inputs=dataset_input, outputs=[data_table, status])
with gr.Row():
add_row_btn = gr.Button("➕ Add Row")
del_row_idx = gr.Number(label="Row Index to Delete", value=0)
delete_row_btn = gr.Button("🗑 Delete Row")
add_row_btn.click(fn=add_row, outputs=data_table)
delete_row_btn.click(fn=delete_row, inputs=del_row_idx, outputs=data_table)
with gr.Row():
col_name_add = gr.Textbox(label="New Column Name")
add_col_btn = gr.Button("➕ Add Column")
col_name_del = gr.Textbox(label="Column Name to Delete")
del_col_btn = gr.Button("🗑 Delete Column")
add_col_btn.click(fn=add_column, inputs=col_name_add, outputs=data_table)
del_col_btn.click(fn=delete_column, inputs=col_name_del, outputs=data_table)
with gr.Row():
old_col = gr.Textbox(label="Old Column Name")
new_col = gr.Textbox(label="New Column Name")
rename_btn = gr.Button("✏️ Rename Column")
rename_btn.click(fn=rename_column, inputs=[old_col, new_col], outputs=data_table)
gr.Markdown("### 📤 Export Dataset as CSV")
csv_btn = gr.Button("📁 Generate CSV")
csv_output = gr.File(label="Download CSV")
def csv_download_link():
csv_str = download_csv()
return gr.File.update(value=StringIO(csv_str), filename="dataset.csv")
csv_btn.click(fn=csv_download_link, outputs=csv_output)
demo.launch()
|