fedec65 commited on
Commit
338117f
Β·
verified Β·
1 Parent(s): 1688162

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +596 -0
app.py CHANGED
@@ -0,0 +1,596 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import os
5
+ import traceback
6
+ from typing import Tuple, Dict, Any, Optional
7
+ import tempfile
8
+ import io
9
+ import datetime
10
+
11
+ class FeedbackTransformer:
12
+ """
13
+ A class to transform feedback data with topic and sentiment columns
14
+ into a binary format where each topic is a separate column.
15
+ """
16
+
17
+ def __init__(self,
18
+ topic_prefix="TOPIC_",
19
+ sentiment_prefix="SENTIMENT_",
20
+ category_prefix="Categories:",
21
+ text_column="TEXT",
22
+ recommendation_column="Q4_Weiterempfehlung"):
23
+ """
24
+ Initialize the FeedbackTransformer with column specifications.
25
+ """
26
+ self.topic_prefix = topic_prefix
27
+ self.sentiment_prefix = sentiment_prefix
28
+ self.category_prefix = category_prefix
29
+ self.text_column = text_column
30
+ self.recommendation_column = recommendation_column
31
+ self.data = None
32
+ self.transformed_data = None
33
+ self.topic_cols = []
34
+ self.sentiment_cols = []
35
+ self.category_cols = []
36
+ self.unique_topics = set()
37
+ self.file_name = None
38
+ self.original_filename = None
39
+ self.selected_columns = [] # Store columns selected for inclusion
40
+
41
+ def load_data(self, file_obj):
42
+ """
43
+ Load data from the uploaded file object.
44
+ """
45
+ if file_obj is None:
46
+ raise ValueError("No file uploaded")
47
+
48
+ # Get file extension and store original filename
49
+ file_name = file_obj if isinstance(file_obj, str) else (file_obj.name if hasattr(file_obj, 'name') else 'unknown')
50
+ self.original_filename = os.path.splitext(os.path.basename(file_name))[0]
51
+ _, file_ext = os.path.splitext(file_name)
52
+
53
+ # Read the data based on file type
54
+ try:
55
+ if file_ext.lower() in ['.xlsx', '.xls']:
56
+ self.data = pd.read_excel(file_obj)
57
+ elif file_ext.lower() == '.csv':
58
+ # Try comma delimiter first
59
+ try:
60
+ self.data = pd.read_csv(file_obj, encoding='utf-8')
61
+ except:
62
+ # If comma fails, try tab delimiter
63
+ self.data = pd.read_csv(file_obj, sep='\t', encoding='utf-8')
64
+ else:
65
+ # Default to tab-delimited
66
+ self.data = pd.read_csv(file_obj, sep='\t', encoding='utf-8')
67
+ except Exception as e:
68
+ raise ValueError(f"Error reading file: {str(e)}")
69
+
70
+ return len(self.data), len(self.data.columns)
71
+
72
+ def identify_columns(self):
73
+ """
74
+ Identify topic, category, and sentiment columns in the data.
75
+ """
76
+ if self.data is None:
77
+ raise ValueError("Data not loaded")
78
+
79
+ # Extract columns based on prefixes
80
+ self.topic_cols = [col for col in self.data.columns if self.topic_prefix in col]
81
+ self.sentiment_cols = [col for col in self.data.columns if self.sentiment_prefix in col]
82
+ self.category_cols = [col for col in self.data.columns if col.startswith(self.category_prefix)]
83
+
84
+ # If no columns found with specified prefixes, return all columns for manual selection
85
+ all_cols = list(self.data.columns)
86
+
87
+ return {
88
+ 'topic_cols': self.topic_cols,
89
+ 'sentiment_cols': self.sentiment_cols,
90
+ 'category_cols': self.category_cols,
91
+ 'all_columns': all_cols
92
+ }
93
+
94
+ def extract_unique_topics(self):
95
+ """
96
+ Extract all unique topics from the topic columns.
97
+ """
98
+ self.unique_topics = set()
99
+
100
+ # Extract from topic columns
101
+ for col in self.topic_cols:
102
+ self.unique_topics.update(self.data[col].dropna().unique())
103
+
104
+ # Also extract from category columns if they exist
105
+ for col in self.category_cols:
106
+ self.unique_topics.update(self.data[col].dropna().unique())
107
+
108
+ # Remove empty topics
109
+ self.unique_topics = {t for t in self.unique_topics if isinstance(t, str) and t.strip()}
110
+
111
+ return len(self.unique_topics)
112
+
113
+ @staticmethod
114
+ def create_column_name(topic):
115
+ """
116
+ Create a standardized column name from a topic string.
117
+ """
118
+ # Remove special characters and standardize
119
+ topic_clean = str(topic).strip()
120
+ # Remove brackets and special characters
121
+ topic_clean = topic_clean.replace('[', '').replace(']', '').replace('(', '').replace(')', '')
122
+ topic_clean = topic_clean.replace('**', '').replace('*', '')
123
+ topic_clean = topic_clean.replace('.', '_').replace(' ', '_').replace('&', 'and')
124
+ topic_clean = topic_clean.replace(':', '_').replace('-', '_').replace('/', '_')
125
+ # Remove multiple underscores
126
+ while '__' in topic_clean:
127
+ topic_clean = topic_clean.replace('__', '_')
128
+ return topic_clean.lower().strip('_')
129
+
130
+ def set_selected_columns(self, selected_columns):
131
+ """
132
+ Set which original columns should be included in the output.
133
+ """
134
+ self.selected_columns = selected_columns if selected_columns else []
135
+
136
+ def transform_data(self):
137
+ """
138
+ Transform the data into binary topic columns with sentiment values.
139
+ """
140
+ if not self.unique_topics:
141
+ self.extract_unique_topics()
142
+
143
+ # Create output dataframe starting with feedback_id
144
+ self.transformed_data = pd.DataFrame({'feedback_id': range(1, len(self.data) + 1)})
145
+
146
+ # Add selected original columns first (right after feedback_id)
147
+ for col in self.selected_columns:
148
+ if col in self.data.columns:
149
+ self.transformed_data[col] = self.data[col]
150
+
151
+ # Initialize all topic columns to 0
152
+ for topic in sorted(self.unique_topics):
153
+ topic_col = self.create_column_name(topic)
154
+ self.transformed_data[topic_col] = 0
155
+ self.transformed_data[f'{topic_col}_sentiment'] = None
156
+
157
+ # Fill in the data from topic columns
158
+ for idx, row in self.data.iterrows():
159
+ # Process topic columns with sentiments
160
+ for i, t_col in enumerate(self.topic_cols):
161
+ topic = row.get(t_col)
162
+
163
+ # Find corresponding sentiment column
164
+ if i < len(self.sentiment_cols):
165
+ sentiment = row.get(self.sentiment_cols[i])
166
+ else:
167
+ sentiment = None
168
+
169
+ if pd.notna(topic) and isinstance(topic, str) and topic.strip():
170
+ topic_col = self.create_column_name(topic)
171
+ if topic_col in self.transformed_data.columns:
172
+ self.transformed_data.loc[idx, topic_col] = 1
173
+
174
+ # Convert sentiment to numeric value
175
+ if pd.notna(sentiment) and isinstance(sentiment, str):
176
+ sentiment_lower = sentiment.lower()
177
+ if 'positive' in sentiment_lower:
178
+ self.transformed_data.loc[idx, f'{topic_col}_sentiment'] = 1
179
+ elif 'negative' in sentiment_lower:
180
+ self.transformed_data.loc[idx, f'{topic_col}_sentiment'] = 0
181
+ elif 'neutral' in sentiment_lower:
182
+ self.transformed_data.loc[idx, f'{topic_col}_sentiment'] = 0.5
183
+
184
+ # Process category columns (these typically don't have sentiments)
185
+ for c_col in self.category_cols:
186
+ category = row.get(c_col)
187
+ if pd.notna(category) and isinstance(category, str) and category.strip():
188
+ category_col = self.create_column_name(category)
189
+ if category_col in self.transformed_data.columns:
190
+ self.transformed_data.loc[idx, category_col] = 1
191
+
192
+ return self.transformed_data.shape
193
+
194
+ def analyze_data(self):
195
+ """
196
+ Analyze the transformed data to provide insights.
197
+ """
198
+ if self.transformed_data is None:
199
+ raise ValueError("No transformed data to analyze")
200
+
201
+ # Identify topic columns (exclude feedback_id, selected original columns, and sentiment columns)
202
+ excluded_cols = ['feedback_id'] + self.selected_columns
203
+ topic_cols = [col for col in self.transformed_data.columns
204
+ if col not in excluded_cols and not col.endswith('_sentiment')]
205
+
206
+ # Count occurrences of each topic
207
+ topic_counts = {}
208
+ for topic in topic_cols:
209
+ topic_counts[topic] = self.transformed_data[topic].sum()
210
+
211
+ # Sort topics by frequency
212
+ sorted_topics = sorted(topic_counts.items(), key=lambda x: x[1], reverse=True)
213
+
214
+ # Prepare analysis summary
215
+ analysis_text = f"**Analysis Results**\n\n"
216
+ analysis_text += f"Total feedbacks: {len(self.transformed_data)}\n"
217
+ analysis_text += f"Selected original columns: {len(self.selected_columns)}\n"
218
+ analysis_text += f"Unique topics: {len(topic_cols)}\n\n"
219
+
220
+ if self.selected_columns:
221
+ analysis_text += f"**Included Original Columns:** {', '.join(self.selected_columns)}\n\n"
222
+
223
+ analysis_text += "**Top 10 Most Frequent Topics:**\n"
224
+ for topic, count in sorted_topics[:10]:
225
+ analysis_text += f"- {topic}: {count} occurrences\n"
226
+
227
+ # Calculate sentiment distributions for top topics
228
+ analysis_text += "\n**Sentiment Distributions for Top 5 Topics:**\n"
229
+ for topic, _ in sorted_topics[:5]:
230
+ sentiment_col = f"{topic}_sentiment"
231
+ if sentiment_col in self.transformed_data.columns:
232
+ # Filter rows where the topic is present
233
+ topic_rows = self.transformed_data[self.transformed_data[topic] == 1]
234
+
235
+ positive = (topic_rows[sentiment_col] == 1.0).sum()
236
+ negative = (topic_rows[sentiment_col] == 0.0).sum()
237
+ neutral = (topic_rows[sentiment_col] == 0.5).sum()
238
+
239
+ total = positive + negative + neutral
240
+
241
+ if total > 0:
242
+ analysis_text += f"\n{topic} ({total} occurrences):\n"
243
+ analysis_text += f" - Positive: {positive} ({positive/total*100:.1f}%)\n"
244
+ analysis_text += f" - Negative: {negative} ({negative/total*100:.1f}%)\n"
245
+ analysis_text += f" - Neutral: {neutral} ({neutral/total*100:.1f}%)\n"
246
+
247
+ # Calculate number of topics per feedback
248
+ self.transformed_data['topic_count'] = self.transformed_data[topic_cols].sum(axis=1)
249
+ avg_topics = self.transformed_data['topic_count'].mean()
250
+ max_topics = self.transformed_data['topic_count'].max()
251
+
252
+ analysis_text += f"\n**Topics per Feedback:**\n"
253
+ analysis_text += f"- Average: {avg_topics:.2f}\n"
254
+ analysis_text += f"- Maximum: {max_topics}\n"
255
+
256
+ # Remove the temporary topic_count column
257
+ self.transformed_data.drop('topic_count', axis=1, inplace=True)
258
+
259
+ return analysis_text
260
+
261
+ def save_transformed_data(self, output_format='xlsx'):
262
+ """
263
+ Save the transformed data and return the file path.
264
+ Modified to work properly with Hugging Face Spaces downloads.
265
+ """
266
+ if self.transformed_data is None:
267
+ raise ValueError("No transformed data to save")
268
+
269
+ # Create filename with original filename prefix and timestamp
270
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
271
+
272
+ # Use original filename as prefix, or fallback to 'transformed_feedback' if not available
273
+ prefix = self.original_filename if self.original_filename else 'transformed_feedback'
274
+
275
+ if output_format == 'xlsx':
276
+ filename = f"{prefix}_transformed_{timestamp}.xlsx"
277
+ # Create temporary file that Gradio can handle
278
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx')
279
+ self.transformed_data.to_excel(temp_file.name, index=False)
280
+ temp_file.close()
281
+
282
+ # Rename the temporary file to have a meaningful name
283
+ final_path = os.path.join(tempfile.gettempdir(), filename)
284
+ if os.path.exists(final_path):
285
+ os.remove(final_path)
286
+ os.rename(temp_file.name, final_path)
287
+
288
+ else: # csv
289
+ filename = f"{prefix}_transformed_{timestamp}.csv"
290
+ # Create temporary file that Gradio can handle
291
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv')
292
+ self.transformed_data.to_csv(temp_file.name, index=False)
293
+ temp_file.close()
294
+
295
+ # Rename the temporary file to have a meaningful name
296
+ final_path = os.path.join(tempfile.gettempdir(), filename)
297
+ if os.path.exists(final_path):
298
+ os.remove(final_path)
299
+ os.rename(temp_file.name, final_path)
300
+
301
+ # Verify file was created and is readable
302
+ if not os.path.exists(final_path):
303
+ raise ValueError(f"Failed to create output file: {final_path}")
304
+
305
+ return final_path
306
+
307
+
308
+ # Gradio interface functions
309
+ def get_column_selector(file_obj):
310
+ """
311
+ Get a combined column preview and selector interface.
312
+ """
313
+ try:
314
+ if file_obj is None:
315
+ return gr.CheckboxGroup(
316
+ choices=[],
317
+ value=[],
318
+ label="πŸ“‹ Select Columns to Include",
319
+ info="Upload a file first to see available columns"
320
+ )
321
+
322
+ # Read first few rows to get column names
323
+ file_name = file_obj if isinstance(file_obj, str) else (file_obj.name if hasattr(file_obj, 'name') else 'unknown')
324
+ _, file_ext = os.path.splitext(file_name)
325
+
326
+ if file_ext.lower() in ['.xlsx', '.xls']:
327
+ df = pd.read_excel(file_obj, nrows=5)
328
+ elif file_ext.lower() == '.csv':
329
+ try:
330
+ df = pd.read_csv(file_obj, nrows=5)
331
+ except:
332
+ df = pd.read_csv(file_obj, sep='\t', nrows=5)
333
+ else:
334
+ df = pd.read_csv(file_obj, sep='\t', nrows=5)
335
+
336
+ columns = list(df.columns)
337
+
338
+ # Create column display with indices for easier reference
339
+ column_choices = [f"{i+1:2d}. {col}" for i, col in enumerate(columns)]
340
+
341
+ # Return updated CheckboxGroup with numbered columns and individual rows
342
+ return gr.CheckboxGroup(
343
+ choices=column_choices,
344
+ value=[], # No columns selected by default
345
+ label=f"πŸ“‹ Select Columns to Include ({len(columns)} available)",
346
+ info="Choose which original columns to include in the transformed file (in addition to feedback_id). Columns are numbered for easy reference.",
347
+ elem_classes=["column-selector"] # Add CSS class for styling
348
+ )
349
+
350
+ except Exception as e:
351
+ return gr.CheckboxGroup(
352
+ choices=[],
353
+ value=[],
354
+ label="πŸ“‹ Select Columns to Include",
355
+ info=f"Error reading file: {str(e)}"
356
+ )
357
+
358
+
359
+ def extract_column_names(selected_display_names):
360
+ """
361
+ Extract actual column names from the numbered display format.
362
+ """
363
+ if not selected_display_names:
364
+ return []
365
+
366
+ actual_names = []
367
+ for display_name in selected_display_names:
368
+ # Remove the number prefix (e.g., "1. Column Name" -> "Column Name")
369
+ if '. ' in display_name:
370
+ actual_name = display_name.split('. ', 1)[1]
371
+ actual_names.append(actual_name)
372
+ else:
373
+ actual_names.append(display_name)
374
+
375
+ return actual_names
376
+
377
+
378
+ def process_file(file_obj, topic_prefix, sentiment_prefix, category_prefix,
379
+ text_column, recommendation_column, output_format, analyze_data, selected_columns):
380
+ """
381
+ Main processing function for Gradio interface.
382
+ """
383
+ try:
384
+ # Extract actual column names from display format
385
+ actual_column_names = extract_column_names(selected_columns)
386
+
387
+ # Initialize transformer
388
+ transformer = FeedbackTransformer(
389
+ topic_prefix=topic_prefix,
390
+ sentiment_prefix=sentiment_prefix,
391
+ category_prefix=category_prefix,
392
+ text_column=text_column,
393
+ recommendation_column=recommendation_column
394
+ )
395
+
396
+ # Load data
397
+ rows, cols = transformer.load_data(file_obj)
398
+ status_msg = f"βœ… Loaded {rows} rows and {cols} columns\n"
399
+
400
+ # Set selected columns for inclusion
401
+ transformer.set_selected_columns(actual_column_names)
402
+ status_msg += f"πŸ“‹ Selected {len(actual_column_names)} original columns for inclusion\n"
403
+ if actual_column_names:
404
+ status_msg += f" Selected columns: {', '.join(actual_column_names)}\n"
405
+
406
+ # Identify columns
407
+ col_info = transformer.identify_columns()
408
+ status_msg += f"\nπŸ“Š Found columns:\n"
409
+ status_msg += f"- Topic columns: {len(col_info['topic_cols'])}\n"
410
+ status_msg += f"- Sentiment columns: {len(col_info['sentiment_cols'])}\n"
411
+ status_msg += f"- Category columns: {len(col_info['category_cols'])}\n"
412
+
413
+ # Extract unique topics
414
+ num_topics = transformer.extract_unique_topics()
415
+ status_msg += f"\n🎯 Found {num_topics} unique topics\n"
416
+
417
+ # Transform data
418
+ shape = transformer.transform_data()
419
+ status_msg += f"\n✨ Transformed data shape: {shape[0]} rows Γ— {shape[1]} columns\n"
420
+
421
+ # Analyze if requested
422
+ analysis_result = ""
423
+ if analyze_data:
424
+ analysis_result = transformer.analyze_data()
425
+
426
+ # Save transformed data
427
+ output_file = transformer.save_transformed_data(output_format)
428
+ status_msg += f"\nπŸ’Ύ File saved successfully: {os.path.basename(output_file)}\n"
429
+
430
+ return status_msg, analysis_result, output_file
431
+
432
+ except Exception as e:
433
+ error_msg = f"❌ Error: {str(e)}\n\n{traceback.format_exc()}"
434
+ return error_msg, "", None
435
+
436
+
437
+ # Create Gradio interface
438
+ with gr.Blocks(title="Feedback Topic & Sentiment Transformer", css="""
439
+ .column-selector .form-check {
440
+ display: block !important;
441
+ margin-bottom: 8px !important;
442
+ }
443
+ .column-selector .form-check-input {
444
+ margin-right: 8px !important;
445
+ }
446
+ """) as demo:
447
+ gr.Markdown("""
448
+ # πŸ“Š Feedback Topic & Sentiment Transformer
449
+ Transform feedback data with topic and sentiment columns into a binary matrix format.
450
+ Each unique topic becomes a separate column with 0/1 values and associated sentiment scores.
451
+ ### πŸ“‹ Instructions:
452
+ 1. Upload your Excel, CSV, or tab-delimited text file
453
+ 2. Select which original columns to include in the output
454
+ 3. Configure column prefixes (or use defaults)
455
+ 4. Click "Transform Data" to process
456
+ 5. Download the transformed file
457
+ """)
458
+
459
+ with gr.Row():
460
+ with gr.Column(scale=1):
461
+ # File upload
462
+ input_file = gr.File(
463
+ label="Upload Input File",
464
+ file_types=[".xlsx", ".xls", ".csv", ".txt"],
465
+ type="filepath"
466
+ )
467
+
468
+ # Combined column selector (replaces both preview and checkboxes)
469
+ gr.Markdown("### πŸ“‹ Column Selection")
470
+ column_selector = gr.CheckboxGroup(
471
+ choices=[],
472
+ value=[],
473
+ label="Select Columns to Include",
474
+ info="Upload a file first to see available columns"
475
+ )
476
+
477
+ with gr.Column(scale=1):
478
+ # Configuration parameters
479
+ gr.Markdown("### βš™οΈ Configuration")
480
+
481
+ topic_prefix = gr.Textbox(
482
+ label="Topic Column Prefix",
483
+ value="[**WORKSHOP] SwissLife Taxonomy",
484
+ info="Prefix to identify topic columns"
485
+ )
486
+
487
+ sentiment_prefix = gr.Textbox(
488
+ label="Sentiment Column Prefix",
489
+ value="ABSA:",
490
+ info="Prefix to identify sentiment columns"
491
+ )
492
+
493
+ category_prefix = gr.Textbox(
494
+ label="Category Column Prefix",
495
+ value="Categories:",
496
+ info="Prefix to identify category columns"
497
+ )
498
+
499
+ text_column = gr.Textbox(
500
+ label="Text Column Name",
501
+ value="TEXT",
502
+ info="Column containing original feedback text (for reference only)"
503
+ )
504
+
505
+ recommendation_column = gr.Textbox(
506
+ label="Recommendation Column Name",
507
+ value="Q4_Weiterempfehlung",
508
+ info="Column containing recommendation scores (for reference only)"
509
+ )
510
+
511
+ output_format = gr.Radio(
512
+ label="Output Format",
513
+ choices=["xlsx", "csv"],
514
+ value="xlsx"
515
+ )
516
+
517
+ analyze_checkbox = gr.Checkbox(
518
+ label="Analyze transformed data",
519
+ value=True
520
+ )
521
+
522
+ # Transform button
523
+ transform_btn = gr.Button("πŸ”„ Transform Data", variant="primary", size="lg")
524
+
525
+ # Output sections
526
+ with gr.Row():
527
+ with gr.Column():
528
+ status_output = gr.Textbox(
529
+ label="Processing Status",
530
+ lines=10,
531
+ interactive=False
532
+ )
533
+
534
+ with gr.Column():
535
+ analysis_output = gr.Markdown(
536
+ label="Data Analysis"
537
+ )
538
+
539
+ # Download section - Modified for better download functionality
540
+ with gr.Row():
541
+ with gr.Column():
542
+ gr.Markdown("### πŸ“₯ Download Transformed File")
543
+ output_file = gr.File(
544
+ label="Transformed File",
545
+ interactive=False,
546
+ visible=True
547
+ )
548
+
549
+ # Event handlers
550
+ input_file.change(
551
+ fn=get_column_selector,
552
+ inputs=[input_file],
553
+ outputs=[column_selector]
554
+ )
555
+
556
+ transform_btn.click(
557
+ fn=process_file,
558
+ inputs=[
559
+ input_file,
560
+ topic_prefix,
561
+ sentiment_prefix,
562
+ category_prefix,
563
+ text_column,
564
+ recommendation_column,
565
+ output_format,
566
+ analyze_checkbox,
567
+ column_selector
568
+ ],
569
+ outputs=[status_output, analysis_output, output_file]
570
+ )
571
+
572
+ # Examples section
573
+ gr.Markdown("""
574
+ ### πŸ“ Example Column Formats:
575
+ - **Topic columns**: `[**WORKSHOP] SwissLife Taxonomy(Kommentar) 1`, `[**WORKSHOP] SwissLife Taxonomy(Kommentar) 2`
576
+ - **Category columns**: `Categories:Topic1`, `Categories:Topic2`
577
+ - **Sentiment columns**: `ABSA:Sentiment1`, `ABSA:Sentiment2`
578
+ ### 🎯 Output Format:
579
+ - **feedback_id**: Unique identifier for each row
580
+ - **Selected original columns**: Any columns you selected from the original file
581
+ - **Topic columns**: Each unique topic becomes a column with values 0 (absent) or 1 (present)
582
+ - **Sentiment columns**: Each topic has an associated `_sentiment` column with values:
583
+ - 1.0 = Positive
584
+ - 0.5 = Neutral
585
+ - 0.0 = Negative
586
+ - **Output filename**: `[original_filename]_transformed_[timestamp].[format]`
587
+ ### πŸ’‘ Tips:
588
+ - Use the numbered column list to easily identify and select columns
589
+ - The text and recommendation column names in configuration are now for reference only
590
+ - To include them in output, select them using the column checkboxes
591
+ - Click on the download button that appears after processing to download the file
592
+ """)
593
+
594
+ # Launch the app
595
+ if __name__ == "__main__":
596
+ demo.launch()