File size: 3,738 Bytes
142a9cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import pandas as pd
from datasets import load_dataset
from io import StringIO

# دیتافریم جهانی
global_df = pd.DataFrame()

# بارگذاری دیتاست از Hugging Face
def load_hf_dataset(hf_path):
    global global_df
    try:
        dataset = load_dataset(hf_path)
        split = list(dataset.keys())[0]
        data = dataset[split]
        df = pd.DataFrame(data)
        global_df = df.copy()
        return df, f"✅ Dataset loaded from '{hf_path}' with {len(df)} rows."
    except Exception as e:
        return None, f"❌ Error loading dataset:\n{str(e)}"

# اضافه کردن ردیف
def add_row():
    global global_df
    empty_row = {col: "" for col in global_df.columns}
    global_df = pd.concat([global_df, pd.DataFrame([empty_row])], ignore_index=True)
    return global_df

# حذف ردیف بر اساس شماره ردیف
def delete_row(index):
    global global_df
    if 0 <= index < len(global_df):
        global_df = global_df.drop(index).reset_index(drop=True)
    return global_df

# اضافه کردن ستون
def add_column(col_name):
    global global_df
    if col_name and col_name not in global_df.columns:
        global_df[col_name] = ""
    return global_df

# حذف ستون
def delete_column(col_name):
    global global_df
    if col_name in global_df.columns:
        global_df = global_df.drop(columns=[col_name])
    return global_df

# تغییر نام ستون
def rename_column(old_name, new_name):
    global global_df
    if old_name in global_df.columns and new_name:
        global_df = global_df.rename(columns={old_name: new_name})
    return global_df

# گرفتن خروجی CSV
def download_csv():
    global global_df
    csv_str = global_df.to_csv(index=False)
    return csv_str

# رابط کاربری Gradio
with gr.Blocks(title="HuggingFace Dataset Editor") as demo:
    gr.Markdown("## 🧬 Hugging Face Dataset Editor (Gradio Web App)")

    with gr.Row():
        dataset_input = gr.Textbox(label="Hugging Face Dataset Path (e.g. `codersan/Persian-Wikipedia-Corpus`)", value="")
        load_btn = gr.Button("🔄 Load Dataset")

    status = gr.Textbox(label="Status", interactive=False)
    data_table = gr.Dataframe(label="Dataset", wrap=True, interactive=True)

    load_btn.click(fn=load_hf_dataset, inputs=dataset_input, outputs=[data_table, status])

    with gr.Row():
        add_row_btn = gr.Button("➕ Add Row")
        del_row_idx = gr.Number(label="Row Index to Delete", value=0)
        delete_row_btn = gr.Button("🗑 Delete Row")

    add_row_btn.click(fn=add_row, outputs=data_table)
    delete_row_btn.click(fn=delete_row, inputs=del_row_idx, outputs=data_table)

    with gr.Row():
        col_name_add = gr.Textbox(label="New Column Name")
        add_col_btn = gr.Button("➕ Add Column")
        col_name_del = gr.Textbox(label="Column Name to Delete")
        del_col_btn = gr.Button("🗑 Delete Column")

    add_col_btn.click(fn=add_column, inputs=col_name_add, outputs=data_table)
    del_col_btn.click(fn=delete_column, inputs=col_name_del, outputs=data_table)

    with gr.Row():
        old_col = gr.Textbox(label="Old Column Name")
        new_col = gr.Textbox(label="New Column Name")
        rename_btn = gr.Button("✏️ Rename Column")

    rename_btn.click(fn=rename_column, inputs=[old_col, new_col], outputs=data_table)

    gr.Markdown("### 📤 Export Dataset as CSV")

    csv_btn = gr.Button("📁 Generate CSV")
    csv_output = gr.File(label="Download CSV")

    def csv_download_link():
        csv_str = download_csv()
        return gr.File.update(value=StringIO(csv_str), filename="dataset.csv")

    csv_btn.click(fn=csv_download_link, outputs=csv_output)

demo.launch()