geekgirl3 commited on
Commit
94571e6
Β·
verified Β·
1 Parent(s): e49d959

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -108
app.py CHANGED
@@ -1,4 +1,12 @@
1
- #25-08-05
 
 
 
 
 
 
 
 
2
 
3
  import gradio as gr
4
  import pandas as pd
@@ -17,64 +25,72 @@ class CSVBinaryTransformer:
17
  self.df = None
18
  self.original_filename = None
19
  self.processed_df = None
20
-
21
- def load_csv(self, file_path: str) -> Tuple[pd.DataFrame, List[str]]:
22
- """Load CSV file and return dataframe and column list"""
23
  try:
24
  self.df = pd.read_csv(file_path)
25
  self.original_filename = os.path.splitext(os.path.basename(file_path))[0]
26
-
27
  # Create checkbox options for columns
28
  column_choices = [(col, col) for col in self.df.columns.tolist()]
29
-
30
- return self.df.head(10), column_choices
 
 
 
 
 
 
 
 
31
  except Exception as e:
32
  raise gr.Error(f"Error loading CSV: {str(e)}")
33
-
34
  def select_all_columns(self) -> List[str]:
35
  """Return all column names for select all functionality"""
36
  if self.df is not None:
37
  return self.df.columns.tolist()
38
  return []
39
-
40
  def deselect_all_columns(self) -> List[str]:
41
  """Return empty list for deselect all functionality"""
42
  return []
43
-
44
  def process_absa_columns(self, df: pd.DataFrame) -> pd.DataFrame:
45
  """Process ABSA prefixed columns to create sentiment and topic columns"""
46
  absa_columns = [col for col in df.columns if col.startswith('ABSA')]
47
-
48
  if not absa_columns:
49
  return df
50
-
51
  # Create verbatim sentiment columns
52
  df['Verbatim_Positive'] = 0
53
  df['Verbatim_Neutral'] = 0
54
  df['Verbatim_Negative'] = 0
55
-
56
  # Dictionary to store topic-sentiment combinations
57
  topic_sentiment_combinations = set()
58
-
59
  for col in absa_columns:
60
  for idx, value in df[col].items():
61
  if pd.isna(value):
62
  continue
63
-
64
  value_str = str(value)
65
-
66
  # Split by pipe delimiter to get individual topic::sentiment pairs
67
  pairs = [pair.strip() for pair in value_str.split('|') if pair.strip()]
68
-
69
  for pair in pairs:
70
  if '::' in pair:
71
  topic_part, sentiment_part = pair.split('::', 1)
72
  topic = topic_part.strip()
73
  sentiment = sentiment_part.strip()
74
-
75
  if topic and sentiment:
76
  topic_sentiment_combinations.add((topic, sentiment))
77
-
78
  # Update verbatim sentiment columns based on sentiment
79
  sentiment_lower = sentiment.lower()
80
  if 'positive' in sentiment_lower:
@@ -83,7 +99,7 @@ class CSVBinaryTransformer:
83
  df.at[idx, 'Verbatim_Negative'] = 1
84
  elif 'neutral' in sentiment_lower:
85
  df.at[idx, 'Verbatim_Neutral'] = 1
86
-
87
  # Create columns for topic-sentiment combinations
88
  for topic, sentiment in topic_sentiment_combinations:
89
  # Clean topic name for column naming
@@ -91,42 +107,42 @@ class CSVBinaryTransformer:
91
  col_name = f"S_{safe_topic}"
92
  if col_name not in df.columns:
93
  df[col_name] = ""
94
-
95
  # Fill the S_ columns with sentiment values
96
  for col in absa_columns:
97
  for idx, value in df[col].items():
98
  if pd.isna(value):
99
  continue
100
-
101
  value_str = str(value)
102
  # Split by pipe delimiter to get individual topic::sentiment pairs
103
  pairs = [pair.strip() for pair in value_str.split('|') if pair.strip()]
104
-
105
  for pair in pairs:
106
  if '::' in pair:
107
  topic_part, sentiment_part = pair.split('::', 1)
108
  topic = topic_part.strip()
109
  sentiment = sentiment_part.strip()
110
-
111
  if topic and sentiment:
112
  # Clean topic name for column naming
113
  safe_topic = re.sub(r'[^\w]', '_', topic).strip('_')
114
  col_name = f"S_{safe_topic}"
115
  if col_name in df.columns:
116
  df.at[idx, col_name] = sentiment
117
-
118
  return df
119
-
120
  def process_categories_columns(self, df: pd.DataFrame) -> pd.DataFrame:
121
  """Process Categories prefixed columns to create binary category columns"""
122
  category_columns = [col for col in df.columns if col.startswith('Categories')]
123
-
124
  if not category_columns:
125
  return df
126
-
127
  # Collect all unique categories
128
  all_categories = set()
129
-
130
  for col in category_columns:
131
  for value in df[col].dropna():
132
  if isinstance(value, str):
@@ -136,53 +152,73 @@ class CSVBinaryTransformer:
136
  cat = cat.strip()
137
  if cat:
138
  all_categories.add(cat)
139
-
140
  # Create binary columns for each category
 
141
  for category in all_categories:
142
  col_name = f"C_{category}"
143
  df[col_name] = 0
144
-
 
145
  for col in category_columns:
146
  for idx, value in df[col].items():
147
  if pd.isna(value):
148
  continue
149
  if isinstance(value, str) and category.lower() in value.lower():
150
  df.at[idx, col_name] = 1
151
-
 
 
 
 
 
 
 
152
  return df
153
-
154
  def process_topics_column(self, df: pd.DataFrame, topics_column: str) -> pd.DataFrame:
155
  """Process specified topics column to create binary topic columns"""
156
  if not topics_column or topics_column not in df.columns:
157
  return df
158
-
159
  # Collect all unique topics
160
  all_topics = set()
161
-
162
  for value in df[topics_column].dropna():
163
  if isinstance(value, str):
164
- # Split by common delimiters
165
- topics = re.split(r'[,;|]', value)
166
  for topic in topics:
167
- topic = topic.strip()
168
  if topic:
169
  all_topics.add(topic)
170
-
171
  # Create binary columns for each topic
 
172
  for topic in all_topics:
173
  col_name = f"T_{topic}"
174
  df[col_name] = 0
175
-
 
176
  for idx, value in df[topics_column].items():
177
  if pd.isna(value):
178
  continue
179
- if isinstance(value, str) and topic.lower() in value.lower():
180
- df.at[idx, col_name] = 1
181
-
 
 
 
 
 
 
 
 
 
 
182
  return df
183
-
184
  def save_transformed_data(self, output_format='xlsx'):
185
- """Save the transformed data and return the file path - using exact same method as working version"""
186
  if self.processed_df is None:
187
  raise ValueError("No transformed data to save")
188
 
@@ -190,11 +226,14 @@ class CSVBinaryTransformer:
190
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
191
  prefix = self.original_filename if self.original_filename else 'transformed_data'
192
 
193
- if output_format == 'xlsx':
194
  filename = f"{prefix}_BinaryTransformation_{timestamp}.xlsx"
195
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
196
- self.processed_df.to_excel(temp_file.name, index=False)
197
- temp_file.close()
 
 
 
198
 
199
  final_path = os.path.join(tempfile.gettempdir(), filename)
200
  if os.path.exists(final_path):
@@ -204,8 +243,9 @@ class CSVBinaryTransformer:
204
  else: # csv
205
  filename = f"{prefix}_BinaryTransformation_{timestamp}.csv"
206
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv')
 
 
207
  self.processed_df.to_csv(temp_file.name, index=False)
208
- temp_file.close()
209
 
210
  final_path = os.path.join(tempfile.gettempdir(), filename)
211
  if os.path.exists(final_path):
@@ -221,24 +261,25 @@ class CSVBinaryTransformer:
221
  transformer = CSVBinaryTransformer()
222
 
223
  def handle_file_upload(file):
224
- """Handle CSV file upload"""
225
  if file is None:
226
- return None, gr.update(choices=[], value=[]), "Please upload a CSV file"
227
-
228
  try:
229
- preview_df, column_choices = transformer.load_csv(file.name)
230
  preview_html = preview_df.to_html(classes="table table-striped", table_id="upload-preview")
231
-
232
- # Update the checkbox group with new choices and make all columns visible
233
- column_names = [col for col, _ in column_choices]
234
-
235
  return (
236
- preview_html,
237
- gr.update(choices=column_choices, value=[], visible=True),
 
238
  f"βœ… Successfully loaded CSV with {len(transformer.df)} rows and {len(transformer.df.columns)} columns"
239
  )
240
  except Exception as e:
241
- return None, gr.update(choices=[], value=[], visible=False), f"❌ Error: {str(e)}"
242
 
243
  def select_all():
244
  """Select all columns"""
@@ -251,61 +292,69 @@ def deselect_all():
251
  return gr.update(value=transformer.deselect_all_columns())
252
 
253
  def process_transformation(selected_columns, topics_column, export_format):
254
- """Process the transformation - using exact same pattern as working version"""
255
  try:
256
  if transformer.df is None:
257
  return None, None, "❌ Error: No CSV file loaded"
258
-
259
  if not selected_columns:
260
  return None, None, "❌ Error: Please select at least one column"
261
-
 
 
 
 
 
 
 
262
  # Create a copy of the dataframe with selected columns
263
  processed_df = transformer.df[selected_columns].copy()
264
-
265
  # Process ABSA columns
266
  processed_df = transformer.process_absa_columns(processed_df)
267
-
268
  # Process Categories columns
269
  processed_df = transformer.process_categories_columns(processed_df)
270
-
271
  # Process Topics column
272
  processed_df = transformer.process_topics_column(processed_df, topics_column)
273
-
274
  # Store processed data
275
  transformer.processed_df = processed_df
276
-
277
  # Generate preview
278
  preview_html = processed_df.head(20).to_html(classes="table table-striped", table_id="preview-table")
279
-
280
- # Save file using the exact same method as working version
281
- output_file = transformer.save_transformed_data(export_format.lower().replace(' (.', '').replace(')', ''))
282
-
 
283
  success_msg = f"βœ… Transformation completed! Generated file: {os.path.basename(output_file)}"
284
  success_msg += f"\nπŸ“Š Processed {len(transformer.processed_df)} rows with {len(transformer.processed_df.columns)} columns"
285
- success_msg += f"\nπŸ’Ύ File saved successfully"
286
  success_msg += f"\nπŸ“₯ File download should start automatically"
287
-
288
  return preview_html, output_file, success_msg
289
-
290
  except Exception as e:
291
  import traceback
292
  error_msg = f"❌ Error during transformation: {str(e)}\n\n{traceback.format_exc()}"
293
  return None, None, error_msg
294
 
295
- # Create Gradio interface - using similar structure to working version
296
  with gr.Blocks(title="CSV Binary Transformation Tool", theme=gr.themes.Soft()) as app:
297
  gr.Markdown("""
298
  # πŸ“Š CSV Binary Transformation Tool
299
-
300
  This tool transforms CSV files by creating binary columns for sentiment analysis, categories, and topics.
301
-
302
  ## Features:
303
  - **ABSA Processing**: Creates sentiment columns and topic-sentiment combinations
304
  - **Category Processing**: Creates binary columns for each category
305
- - **Topic Processing**: Creates binary columns for each topic
306
  - **Flexible Export**: Support for CSV and Excel formats
307
  """)
308
-
309
  with gr.Row():
310
  with gr.Column(scale=1):
311
  # File upload section
@@ -320,15 +369,16 @@ with gr.Blocks(title="CSV Binary Transformation Tool", theme=gr.themes.Soft()) a
320
  interactive=False,
321
  lines=2
322
  )
323
-
324
  # Column selection section
325
  gr.Markdown("### 2. Select Columns")
326
  gr.Markdown("*Choose which columns from your CSV to include in the output file*")
327
-
328
  with gr.Row():
329
- select_all_btn = gr.Button("βœ“ Select All", size="sm", variant="secondary")
330
- deselect_all_btn = gr.Button("βœ— Deselect All", size="sm", variant="secondary")
331
-
 
332
  column_selector = gr.CheckboxGroup(
333
  label="Choose columns to include in output",
334
  choices=[],
@@ -337,15 +387,18 @@ with gr.Blocks(title="CSV Binary Transformation Tool", theme=gr.themes.Soft()) a
337
  visible=False,
338
  info="Select the columns you want to include in the transformed output file"
339
  )
340
-
341
- # Topics column input
342
- gr.Markdown("### 3. Specify Topics Column")
 
 
343
  topics_column_input = gr.Textbox(
344
- label="Topics Column Name",
345
- placeholder="Enter the name of the column containing topics",
346
- info="This column will be used to create T_<topic> binary columns"
 
347
  )
348
-
349
  # Export options
350
  gr.Markdown("### 4. Export Settings")
351
  export_format = gr.Radio(
@@ -353,39 +406,39 @@ with gr.Blocks(title="CSV Binary Transformation Tool", theme=gr.themes.Soft()) a
353
  choices=["CSV (.csv)", "Excel (.xlsx)"],
354
  value="Excel (.xlsx)"
355
  )
356
-
357
  # Process button
358
  process_btn = gr.Button(
359
  "πŸš€ Transform Data",
360
  variant="primary",
361
  size="lg"
362
  )
363
-
364
  with gr.Column(scale=2):
365
  # Preview sections
366
  gr.Markdown("### File Preview")
367
-
368
  with gr.Tabs():
369
  with gr.Tab("Original Data"):
370
  original_preview = gr.HTML(
371
  label="Original Data Preview (First 10 rows)",
372
  value="<p>No file uploaded yet</p>"
373
  )
374
-
375
  with gr.Tab("Transformed Data"):
376
  transformed_preview = gr.HTML(
377
  label="Transformed Data Preview (First 20 rows)",
378
  value="<p>No transformation performed yet</p>"
379
  )
380
-
381
  # Status and download
382
  process_status = gr.Textbox(
383
  label="Processing Status",
384
  interactive=False,
385
  lines=6
386
  )
387
-
388
- # Download section - using exact same setup as working version
389
  gr.Markdown("### πŸ“₯ Download Status")
390
  gr.Markdown("Please click on the link inside the output file size value to download the transformed file (the number value on the right hand side below). You may need to right click and select Save Link As (or something similar)")
391
  output_file = gr.File(
@@ -393,31 +446,31 @@ with gr.Blocks(title="CSV Binary Transformation Tool", theme=gr.themes.Soft()) a
393
  interactive=False,
394
  visible=True
395
  )
396
-
397
- # Event handlers - same pattern as working version
398
  file_input.change(
399
  fn=handle_file_upload,
400
  inputs=[file_input],
401
- outputs=[original_preview, column_selector, upload_status]
402
  )
403
-
404
  select_all_btn.click(
405
  fn=select_all,
406
  outputs=[column_selector]
407
  )
408
-
409
  deselect_all_btn.click(
410
  fn=deselect_all,
411
  outputs=[column_selector]
412
  )
413
-
414
  process_btn.click(
415
  fn=process_transformation,
416
  inputs=[column_selector, topics_column_input, export_format],
417
  outputs=[transformed_preview, output_file, process_status]
418
  )
419
-
420
- # Add custom CSS for better table styling
421
  app.load(js="""
422
  function() {
423
  const style = document.createElement('style');
@@ -425,6 +478,29 @@ with gr.Blocks(title="CSV Binary Transformation Tool", theme=gr.themes.Soft()) a
425
  .table { font-size: 12px; }
426
  .table th, .table td { padding: 4px 8px; }
427
  #upload-preview, #preview-table { max-height: 400px; overflow-y: auto; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  `;
429
  document.head.appendChild(style);
430
  }
@@ -432,5 +508,4 @@ with gr.Blocks(title="CSV Binary Transformation Tool", theme=gr.themes.Soft()) a
432
 
433
  if __name__ == "__main__":
434
  app.launch(
435
-
436
  )
 
1
+ #18-08-05
2
+ # -*- coding: utf-8 -*-
3
+ """BinaryMatrix_Production.ipynb
4
+
5
+ Automatically generated by Colab.
6
+
7
+ Original file is located at
8
+ https://colab.research.google.com/drive/14s_B8s3ANig3-51wDw2PpBXnCCmBnbRo
9
+ """
10
 
11
  import gradio as gr
12
  import pandas as pd
 
25
  self.df = None
26
  self.original_filename = None
27
  self.processed_df = None
28
+
29
+ def load_csv(self, file_path: str) -> Tuple[pd.DataFrame, List[str], str]:
30
+ """Load CSV file and return dataframe, column list, and default topics column"""
31
  try:
32
  self.df = pd.read_csv(file_path)
33
  self.original_filename = os.path.splitext(os.path.basename(file_path))[0]
34
+
35
  # Create checkbox options for columns
36
  column_choices = [(col, col) for col in self.df.columns.tolist()]
37
+
38
+ # Find default topics column (column before the one starting with "Categories")
39
+ default_topics_column = ""
40
+ columns = self.df.columns.tolist()
41
+ for i, col in enumerate(columns):
42
+ if col.startswith('Categories') and i > 0:
43
+ default_topics_column = columns[i-1]
44
+ break
45
+
46
+ return self.df.head(10), column_choices, default_topics_column
47
  except Exception as e:
48
  raise gr.Error(f"Error loading CSV: {str(e)}")
49
+
50
  def select_all_columns(self) -> List[str]:
51
  """Return all column names for select all functionality"""
52
  if self.df is not None:
53
  return self.df.columns.tolist()
54
  return []
55
+
56
  def deselect_all_columns(self) -> List[str]:
57
  """Return empty list for deselect all functionality"""
58
  return []
59
+
60
  def process_absa_columns(self, df: pd.DataFrame) -> pd.DataFrame:
61
  """Process ABSA prefixed columns to create sentiment and topic columns"""
62
  absa_columns = [col for col in df.columns if col.startswith('ABSA')]
63
+
64
  if not absa_columns:
65
  return df
66
+
67
  # Create verbatim sentiment columns
68
  df['Verbatim_Positive'] = 0
69
  df['Verbatim_Neutral'] = 0
70
  df['Verbatim_Negative'] = 0
71
+
72
  # Dictionary to store topic-sentiment combinations
73
  topic_sentiment_combinations = set()
74
+
75
  for col in absa_columns:
76
  for idx, value in df[col].items():
77
  if pd.isna(value):
78
  continue
79
+
80
  value_str = str(value)
81
+
82
  # Split by pipe delimiter to get individual topic::sentiment pairs
83
  pairs = [pair.strip() for pair in value_str.split('|') if pair.strip()]
84
+
85
  for pair in pairs:
86
  if '::' in pair:
87
  topic_part, sentiment_part = pair.split('::', 1)
88
  topic = topic_part.strip()
89
  sentiment = sentiment_part.strip()
90
+
91
  if topic and sentiment:
92
  topic_sentiment_combinations.add((topic, sentiment))
93
+
94
  # Update verbatim sentiment columns based on sentiment
95
  sentiment_lower = sentiment.lower()
96
  if 'positive' in sentiment_lower:
 
99
  df.at[idx, 'Verbatim_Negative'] = 1
100
  elif 'neutral' in sentiment_lower:
101
  df.at[idx, 'Verbatim_Neutral'] = 1
102
+
103
  # Create columns for topic-sentiment combinations
104
  for topic, sentiment in topic_sentiment_combinations:
105
  # Clean topic name for column naming
 
107
  col_name = f"S_{safe_topic}"
108
  if col_name not in df.columns:
109
  df[col_name] = ""
110
+
111
  # Fill the S_ columns with sentiment values
112
  for col in absa_columns:
113
  for idx, value in df[col].items():
114
  if pd.isna(value):
115
  continue
116
+
117
  value_str = str(value)
118
  # Split by pipe delimiter to get individual topic::sentiment pairs
119
  pairs = [pair.strip() for pair in value_str.split('|') if pair.strip()]
120
+
121
  for pair in pairs:
122
  if '::' in pair:
123
  topic_part, sentiment_part = pair.split('::', 1)
124
  topic = topic_part.strip()
125
  sentiment = sentiment_part.strip()
126
+
127
  if topic and sentiment:
128
  # Clean topic name for column naming
129
  safe_topic = re.sub(r'[^\w]', '_', topic).strip('_')
130
  col_name = f"S_{safe_topic}"
131
  if col_name in df.columns:
132
  df.at[idx, col_name] = sentiment
133
+
134
  return df
135
+
136
  def process_categories_columns(self, df: pd.DataFrame) -> pd.DataFrame:
137
  """Process Categories prefixed columns to create binary category columns"""
138
  category_columns = [col for col in df.columns if col.startswith('Categories')]
139
+
140
  if not category_columns:
141
  return df
142
+
143
  # Collect all unique categories
144
  all_categories = set()
145
+
146
  for col in category_columns:
147
  for value in df[col].dropna():
148
  if isinstance(value, str):
 
152
  cat = cat.strip()
153
  if cat:
154
  all_categories.add(cat)
155
+
156
  # Create binary columns for each category
157
+ category_columns_created = []
158
  for category in all_categories:
159
  col_name = f"C_{category}"
160
  df[col_name] = 0
161
+ category_columns_created.append(col_name)
162
+
163
  for col in category_columns:
164
  for idx, value in df[col].items():
165
  if pd.isna(value):
166
  continue
167
  if isinstance(value, str) and category.lower() in value.lower():
168
  df.at[idx, col_name] = 1
169
+
170
+ # Sort C_ columns alphabetically by the text after C_
171
+ if category_columns_created:
172
+ category_columns_created.sort(key=lambda x: x[2:]) # Sort by text after "C_"
173
+ # Reorder the columns in the dataframe
174
+ other_cols = [col for col in df.columns if not col.startswith('C_') or col not in category_columns_created]
175
+ df = df[other_cols + category_columns_created]
176
+
177
  return df
178
+
179
  def process_topics_column(self, df: pd.DataFrame, topics_column: str) -> pd.DataFrame:
180
  """Process specified topics column to create binary topic columns"""
181
  if not topics_column or topics_column not in df.columns:
182
  return df
183
+
184
  # Collect all unique topics
185
  all_topics = set()
186
+
187
  for value in df[topics_column].dropna():
188
  if isinstance(value, str):
189
+ # Split only by pipe delimiter (|) to preserve commas within topics
190
+ topics = [topic.strip() for topic in value.split('|') if topic.strip()]
191
  for topic in topics:
 
192
  if topic:
193
  all_topics.add(topic)
194
+
195
  # Create binary columns for each topic
196
+ topic_columns_created = []
197
  for topic in all_topics:
198
  col_name = f"T_{topic}"
199
  df[col_name] = 0
200
+ topic_columns_created.append(col_name)
201
+
202
  for idx, value in df[topics_column].items():
203
  if pd.isna(value):
204
  continue
205
+ if isinstance(value, str):
206
+ # Split by pipe delimiter and check for exact match
207
+ topics_in_row = [t.strip() for t in value.split('|')]
208
+ if topic in topics_in_row:
209
+ df.at[idx, col_name] = 1
210
+
211
+ # Sort T_ columns alphabetically by the text after T_
212
+ if topic_columns_created:
213
+ topic_columns_created.sort(key=lambda x: x[2:]) # Sort by text after "T_"
214
+ # Reorder the columns in the dataframe
215
+ other_cols = [col for col in df.columns if not col.startswith('T_') or col not in topic_columns_created]
216
+ df = df[other_cols + topic_columns_created]
217
+
218
  return df
219
+
220
  def save_transformed_data(self, output_format='xlsx'):
221
+ """Save the transformed data and return the file path - FIXED Excel export"""
222
  if self.processed_df is None:
223
  raise ValueError("No transformed data to save")
224
 
 
226
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
227
  prefix = self.original_filename if self.original_filename else 'transformed_data'
228
 
229
+ if output_format.lower() in ['xlsx', 'excel', 'excel (.xlsx)']:
230
  filename = f"{prefix}_BinaryTransformation_{timestamp}.xlsx"
231
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
232
+ temp_file.close() # Close the file handle before writing
233
+
234
+ # Use pandas to_excel with proper engine
235
+ with pd.ExcelWriter(temp_file.name, engine='openpyxl') as writer:
236
+ self.processed_df.to_excel(writer, index=False)
237
 
238
  final_path = os.path.join(tempfile.gettempdir(), filename)
239
  if os.path.exists(final_path):
 
243
  else: # csv
244
  filename = f"{prefix}_BinaryTransformation_{timestamp}.csv"
245
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv')
246
+ temp_file.close() # Close the file handle before writing
247
+
248
  self.processed_df.to_csv(temp_file.name, index=False)
 
249
 
250
  final_path = os.path.join(tempfile.gettempdir(), filename)
251
  if os.path.exists(final_path):
 
261
  transformer = CSVBinaryTransformer()
262
 
263
  def handle_file_upload(file):
264
+ """Handle CSV file upload - UPDATED to return default topics column and preselect all columns"""
265
  if file is None:
266
+ return None, gr.update(choices=[], value=[]), "", "Please upload a CSV file"
267
+
268
  try:
269
+ preview_df, column_choices, default_topics_column = transformer.load_csv(file.name)
270
  preview_html = preview_df.to_html(classes="table table-striped", table_id="upload-preview")
271
+
272
+ # Preselect all columns
273
+ all_columns = [col for col, _ in column_choices]
274
+
275
  return (
276
+ preview_html,
277
+ gr.update(choices=column_choices, value=all_columns, visible=True), # Preselect all
278
+ default_topics_column, # Set default topics column
279
  f"βœ… Successfully loaded CSV with {len(transformer.df)} rows and {len(transformer.df.columns)} columns"
280
  )
281
  except Exception as e:
282
+ return None, gr.update(choices=[], value=[], visible=False), "", f"❌ Error: {str(e)}"
283
 
284
  def select_all():
285
  """Select all columns"""
 
292
  return gr.update(value=transformer.deselect_all_columns())
293
 
294
  def process_transformation(selected_columns, topics_column, export_format):
295
+ """Process the transformation - UPDATED to validate topics column"""
296
  try:
297
  if transformer.df is None:
298
  return None, None, "❌ Error: No CSV file loaded"
299
+
300
  if not selected_columns:
301
  return None, None, "❌ Error: Please select at least one column"
302
+
303
+ # Validate topics column is provided and exists
304
+ if not topics_column or topics_column.strip() == "":
305
+ return None, None, "❌ Error: Topics Column Name is mandatory. Please specify a topics column."
306
+
307
+ if topics_column not in transformer.df.columns:
308
+ return None, None, f"❌ Error: Topics column '{topics_column}' not found in the data. Available columns: {', '.join(transformer.df.columns.tolist())}"
309
+
310
  # Create a copy of the dataframe with selected columns
311
  processed_df = transformer.df[selected_columns].copy()
312
+
313
  # Process ABSA columns
314
  processed_df = transformer.process_absa_columns(processed_df)
315
+
316
  # Process Categories columns
317
  processed_df = transformer.process_categories_columns(processed_df)
318
+
319
  # Process Topics column
320
  processed_df = transformer.process_topics_column(processed_df, topics_column)
321
+
322
  # Store processed data
323
  transformer.processed_df = processed_df
324
+
325
  # Generate preview
326
  preview_html = processed_df.head(20).to_html(classes="table table-striped", table_id="preview-table")
327
+
328
+ # Save file with proper format detection
329
+ format_clean = export_format.lower().replace(' (', '').replace(')', '').replace('.', '')
330
+ output_file = transformer.save_transformed_data(format_clean)
331
+
332
  success_msg = f"βœ… Transformation completed! Generated file: {os.path.basename(output_file)}"
333
  success_msg += f"\nπŸ“Š Processed {len(transformer.processed_df)} rows with {len(transformer.processed_df.columns)} columns"
334
+ success_msg += f"\nπŸ’Ύ File saved successfully as {format_clean.upper()}"
335
  success_msg += f"\nπŸ“₯ File download should start automatically"
336
+
337
  return preview_html, output_file, success_msg
338
+
339
  except Exception as e:
340
  import traceback
341
  error_msg = f"❌ Error during transformation: {str(e)}\n\n{traceback.format_exc()}"
342
  return None, None, error_msg
343
 
344
+ # Create Gradio interface - UPDATED with colored buttons and mandatory field
345
  with gr.Blocks(title="CSV Binary Transformation Tool", theme=gr.themes.Soft()) as app:
346
  gr.Markdown("""
347
  # πŸ“Š CSV Binary Transformation Tool
348
+
349
  This tool transforms CSV files by creating binary columns for sentiment analysis, categories, and topics.
350
+
351
  ## Features:
352
  - **ABSA Processing**: Creates sentiment columns and topic-sentiment combinations
353
  - **Category Processing**: Creates binary columns for each category
354
+ - **Topic Processing**: Creates binary columns for each topic (using "|" delimiter)
355
  - **Flexible Export**: Support for CSV and Excel formats
356
  """)
357
+
358
  with gr.Row():
359
  with gr.Column(scale=1):
360
  # File upload section
 
369
  interactive=False,
370
  lines=2
371
  )
372
+
373
  # Column selection section
374
  gr.Markdown("### 2. Select Columns")
375
  gr.Markdown("*Choose which columns from your CSV to include in the output file*")
376
+
377
  with gr.Row():
378
+ # Colored buttons as requested
379
+ select_all_btn = gr.Button("βœ“ Select All", size="sm", variant="primary", elem_classes="green-button")
380
+ deselect_all_btn = gr.Button("βœ— Deselect All", size="sm", variant="secondary", elem_classes="red-button")
381
+
382
  column_selector = gr.CheckboxGroup(
383
  label="Choose columns to include in output",
384
  choices=[],
 
387
  visible=False,
388
  info="Select the columns you want to include in the transformed output file"
389
  )
390
+
391
+ # Topics column input - MADE MANDATORY
392
+ gr.Markdown("### 3. Specify Topics Column **(MANDATORY)**")
393
+ gr.Markdown("⚠️ **This field is required** - Enter the name of the column containing topics")
394
+ gr.Markdown("**Note**: Topics should be separated by '|' delimiter in your data")
395
  topics_column_input = gr.Textbox(
396
+ label="Topics Column Name (Required)",
397
+ placeholder="This field will be auto-populated when you upload a CSV",
398
+ info="This column will be used to create T_<topic> binary columns (using '|' as delimiter)",
399
+ interactive=True
400
  )
401
+
402
  # Export options
403
  gr.Markdown("### 4. Export Settings")
404
  export_format = gr.Radio(
 
406
  choices=["CSV (.csv)", "Excel (.xlsx)"],
407
  value="Excel (.xlsx)"
408
  )
409
+
410
  # Process button
411
  process_btn = gr.Button(
412
  "πŸš€ Transform Data",
413
  variant="primary",
414
  size="lg"
415
  )
416
+
417
  with gr.Column(scale=2):
418
  # Preview sections
419
  gr.Markdown("### File Preview")
420
+
421
  with gr.Tabs():
422
  with gr.Tab("Original Data"):
423
  original_preview = gr.HTML(
424
  label="Original Data Preview (First 10 rows)",
425
  value="<p>No file uploaded yet</p>"
426
  )
427
+
428
  with gr.Tab("Transformed Data"):
429
  transformed_preview = gr.HTML(
430
  label="Transformed Data Preview (First 20 rows)",
431
  value="<p>No transformation performed yet</p>"
432
  )
433
+
434
  # Status and download
435
  process_status = gr.Textbox(
436
  label="Processing Status",
437
  interactive=False,
438
  lines=6
439
  )
440
+
441
+ # Download section
442
  gr.Markdown("### πŸ“₯ Download Status")
443
  gr.Markdown("Please click on the link inside the output file size value to download the transformed file (the number value on the right hand side below). You may need to right click and select Save Link As (or something similar)")
444
  output_file = gr.File(
 
446
  interactive=False,
447
  visible=True
448
  )
449
+
450
+ # Event handlers - UPDATED to include topics column default
451
  file_input.change(
452
  fn=handle_file_upload,
453
  inputs=[file_input],
454
+ outputs=[original_preview, column_selector, topics_column_input, upload_status]
455
  )
456
+
457
  select_all_btn.click(
458
  fn=select_all,
459
  outputs=[column_selector]
460
  )
461
+
462
  deselect_all_btn.click(
463
  fn=deselect_all,
464
  outputs=[column_selector]
465
  )
466
+
467
  process_btn.click(
468
  fn=process_transformation,
469
  inputs=[column_selector, topics_column_input, export_format],
470
  outputs=[transformed_preview, output_file, process_status]
471
  )
472
+
473
+ # Add custom CSS for better styling and colored buttons
474
  app.load(js="""
475
  function() {
476
  const style = document.createElement('style');
 
478
  .table { font-size: 12px; }
479
  .table th, .table td { padding: 4px 8px; }
480
  #upload-preview, #preview-table { max-height: 400px; overflow-y: auto; }
481
+
482
+ /* Green button for Select All */
483
+ .green-button button {
484
+ background-color: #22c55e !important;
485
+ border-color: #16a34a !important;
486
+ color: white !important;
487
+ }
488
+ .green-button button:hover {
489
+ background-color: #16a34a !important;
490
+ border-color: #15803d !important;
491
+ }
492
+
493
+ /* Pale red button for Deselect All */
494
+ .red-button button {
495
+ background-color: #fca5a5 !important;
496
+ border-color: #f87171 !important;
497
+ color: #7f1d1d !important;
498
+ }
499
+ .red-button button:hover {
500
+ background-color: #f87171 !important;
501
+ border-color: #ef4444 !important;
502
+ color: #7f1d1d !important;
503
+ }
504
  `;
505
  document.head.appendChild(style);
506
  }
 
508
 
509
  if __name__ == "__main__":
510
  app.launch(
 
511
  )