geekgirl3 commited on
Commit
19651ed
Β·
verified Β·
1 Parent(s): 42fdecf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -85
app.py CHANGED
@@ -42,7 +42,6 @@ class FeedbackTransformer:
42
  self.original_filename = None
43
  self.selected_columns = []
44
  self.verbatim_column = None # Store the verbatim/text column
45
- self.dynamic_topic_prefix = None # Store dynamically extracted topic prefix
46
 
47
  def load_data(self, file_obj):
48
  """
@@ -75,37 +74,6 @@ class FeedbackTransformer:
75
 
76
  return len(self.data), len(self.data.columns)
77
 
78
- def extract_topic_prefix_from_category(self):
79
- """
80
- Extract the topic prefix from a column containing "Category:"
81
- by finding text between "Category:" and "("
82
- """
83
- # Look for columns containing "Category:"
84
- category_pattern_cols = [col for col in self.data.columns if "Category:" in col]
85
-
86
- if category_pattern_cols:
87
- # Use the first matching column
88
- category_col = category_pattern_cols[0]
89
-
90
- # Try to extract from column name first
91
- match = re.search(r'Category:\s*([^(]+)\s*\(', category_col)
92
- if match:
93
- extracted_prefix = match.group(1).strip() + ":"
94
- self.dynamic_topic_prefix = extracted_prefix
95
- return extracted_prefix
96
-
97
- # If not found in column name, try to extract from column values
98
- for value in self.data[category_col].dropna():
99
- if isinstance(value, str):
100
- match = re.search(r'Category:\s*([^(]+)\s*\(', value)
101
- if match:
102
- extracted_prefix = match.group(1).strip() + ":"
103
- self.dynamic_topic_prefix = extracted_prefix
104
- return extracted_prefix
105
-
106
- # If no match found, return None
107
- return None
108
-
109
  def identify_columns(self):
110
  """
111
  Identify topic, category, and sentiment columns in the data.
@@ -113,22 +81,8 @@ class FeedbackTransformer:
113
  if self.data is None:
114
  raise ValueError("Data not loaded")
115
 
116
- # First try to extract topic prefix dynamically
117
- extracted_prefix = self.extract_topic_prefix_from_category()
118
-
119
- # Use dynamic prefix if found, otherwise use the provided topic_prefix
120
- topic_identifier = extracted_prefix if extracted_prefix else self.topic_prefix
121
-
122
- # Log the prefix being used
123
- print(f"Using topic prefix: '{topic_identifier}'")
124
-
125
  # Extract columns based on prefixes
126
- # For topic columns, use the dynamic or provided prefix
127
- if topic_identifier:
128
- self.topic_cols = [col for col in self.data.columns if topic_identifier in col]
129
- else:
130
- self.topic_cols = [col for col in self.data.columns if "Topic:" in col]
131
-
132
  self.sentiment_cols = [col for col in self.data.columns if self.sentiment_prefix in col]
133
  self.category_cols = [col for col in self.data.columns if col.startswith(self.category_prefix)]
134
 
@@ -147,8 +101,7 @@ class FeedbackTransformer:
147
  'sentiment_cols': self.sentiment_cols,
148
  'category_cols': self.category_cols,
149
  'all_columns': all_cols,
150
- 'verbatim_column': self.verbatim_column,
151
- 'dynamic_topic_prefix': self.dynamic_topic_prefix
152
  }
153
 
154
  def extract_unique_topics_and_categories(self):
@@ -359,13 +312,7 @@ class FeedbackTransformer:
359
  analysis_text += f"Topic columns (T_): {len(topic_cols)}\n"
360
  analysis_text += f"Sentiment columns (S_): {len(sentiment_cols)}\n"
361
  analysis_text += f"Category columns (C_): {len(category_cols)}\n"
362
- analysis_text += f"Verbatim column used: {self.verbatim_column}\n"
363
-
364
- # Add dynamic topic prefix info
365
- if self.dynamic_topic_prefix:
366
- analysis_text += f"Dynamic topic prefix extracted: '{self.dynamic_topic_prefix}'\n\n"
367
- else:
368
- analysis_text += f"Topic prefix used: '{self.topic_prefix}'\n\n"
369
 
370
  if self.selected_columns:
371
  analysis_text += f"**Included Original Columns:** {', '.join(self.selected_columns)}\n\n"
@@ -538,10 +485,6 @@ def process_file(file_obj, topic_prefix, sentiment_prefix, category_prefix,
538
  status_msg += f"- Category columns: {len(col_info['category_cols'])}\n"
539
  status_msg += f"- Verbatim column: {col_info['verbatim_column']}\n"
540
 
541
- # Add dynamic topic prefix info
542
- if col_info.get('dynamic_topic_prefix'):
543
- status_msg += f"- Dynamic topic prefix extracted: '{col_info['dynamic_topic_prefix']}'\n"
544
-
545
  # Extract unique topics, categories, and sentiments
546
  num_topics, num_categories, num_sentiments = transformer.extract_unique_topics_and_categories()
547
  status_msg += f"\n🎯 Found {num_topics} unique topics\n"
@@ -589,22 +532,21 @@ with gr.Blocks(title="Binary Matrix Feedback Transformer", css="""
589
  Transform feedback data with delimited topic and sentiment columns into binary matrix format.
590
 
591
  ### πŸ”§ Processing Logic:
592
- - **Automatic Topic Prefix Detection**: Extracts topic prefix from columns containing "Category:" by finding text between "Category:" and "("
593
  - **Verbatim_Positive/Neutral/Negative**: Set to 1 if respective sentiment is found in ABSA column, 0 otherwise
594
  - **T_ Columns**: Set to 1 if topic is present in ABSA column, 0 otherwise
595
  - **S_ Columns**: One column per topic (e.g., S_Allgemeine_Zufriedenheit) containing actual sentiment values
596
  - **C_ Columns**: Set to 1 if category is assigned, 0 otherwise
597
 
598
  ### πŸ“‹ Data Format Requirements:
599
- - **Topics**: Delimited by `|` (pipe) in columns identified by dynamic or manual prefix
600
  - **Sentiments**: Format `Topic::Sentiment|Topic2::Sentiment2` in ABSA columns
601
  - **Categories**: Delimited by `|` (pipe) in "Categories:" columns
602
 
603
- ### πŸ†• Key Features:
604
- - **Dynamic Topic Prefix Extraction**: Automatically extracts topic prefix from "Category:" columns
605
  - **Verbatim_** columns detect overall sentiment presence regardless of topic
606
  - **T_** columns based on ABSA column presence (topics that have sentiment data)
607
  - **S_** columns contain actual sentiment values (not binary 1/0)
 
608
  """)
609
 
610
  with gr.Row():
@@ -631,9 +573,9 @@ with gr.Blocks(title="Binary Matrix Feedback Transformer", css="""
631
  gr.Markdown("### βš™οΈ 3. Configuration")
632
 
633
  topic_prefix = gr.Textbox(
634
- label="Topic Column Identifier (Fallback)",
635
  value="Topic:",
636
- info="Fallback identifier if dynamic extraction from Category: column fails"
637
  )
638
 
639
  sentiment_prefix = gr.Textbox(
@@ -726,36 +668,37 @@ with gr.Blocks(title="Binary Matrix Feedback Transformer", css="""
726
  gr.Markdown("""
727
  ### πŸ“ Example Transformations:
728
 
729
- **Input Data with Dynamic Topic Extraction:**
730
  ```
731
- | Column: "Category: Service (ABC)" | ABSA: Sentiments | Categories: Issues |
732
  | 1 | Service::Negative|Quality::Positive | Issues|Support |
733
  ```
734
 
735
- **System will:**
736
- 1. Extract "Service:" from "Category: Service (ABC)" column
737
- 2. Use "Service:" to identify topic columns instead of "Topic:"
738
-
739
  **Output Binary Matrix:**
740
  ```
741
  | feedback_id | Verbatim_Positive | Verbatim_Neutral | Verbatim_Negative | T_Service | T_Quality | S_Service | S_Quality | C_Issues | C_Support |
742
  | 1 | 1 | 0 | 1 | 1 | 1 | Negative | Positive | 1 | 1 |
743
  ```
744
 
745
- ### πŸ’‘ Dynamic Topic Prefix Logic:
746
- - Searches for columns containing "Category:"
747
- - Extracts text between "Category:" and "(" (e.g., "Service" from "Category: Service (ABC)")
748
- - Adds ":" to create the topic prefix (e.g., "Service:")
749
- - Uses this prefix to identify topic columns
750
- - Falls back to manual "Topic Column Identifier" if extraction fails
751
-
752
- ### πŸ” Key Changes in This Version:
753
- - **NEW**: Automatic extraction of topic prefix from Category columns
754
- - Dynamically identifies topic columns based on extracted prefix
755
- - Maintains all other functionality (Verbatim columns, T_, S_, C_ logic)
756
- - Provides fallback to manual topic prefix if extraction fails
 
 
 
 
757
  """)
758
 
759
  # Launch the app
760
  if __name__ == "__main__":
761
- demo.launch(share=True)
 
 
42
  self.original_filename = None
43
  self.selected_columns = []
44
  self.verbatim_column = None # Store the verbatim/text column
 
45
 
46
  def load_data(self, file_obj):
47
  """
 
74
 
75
  return len(self.data), len(self.data.columns)
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  def identify_columns(self):
78
  """
79
  Identify topic, category, and sentiment columns in the data.
 
81
  if self.data is None:
82
  raise ValueError("Data not loaded")
83
 
 
 
 
 
 
 
 
 
 
84
  # Extract columns based on prefixes
85
+ self.topic_cols = [col for col in self.data.columns if "Topic:" in col]
 
 
 
 
 
86
  self.sentiment_cols = [col for col in self.data.columns if self.sentiment_prefix in col]
87
  self.category_cols = [col for col in self.data.columns if col.startswith(self.category_prefix)]
88
 
 
101
  'sentiment_cols': self.sentiment_cols,
102
  'category_cols': self.category_cols,
103
  'all_columns': all_cols,
104
+ 'verbatim_column': self.verbatim_column
 
105
  }
106
 
107
  def extract_unique_topics_and_categories(self):
 
312
  analysis_text += f"Topic columns (T_): {len(topic_cols)}\n"
313
  analysis_text += f"Sentiment columns (S_): {len(sentiment_cols)}\n"
314
  analysis_text += f"Category columns (C_): {len(category_cols)}\n"
315
+ analysis_text += f"Verbatim column used: {self.verbatim_column}\n\n"
 
 
 
 
 
 
316
 
317
  if self.selected_columns:
318
  analysis_text += f"**Included Original Columns:** {', '.join(self.selected_columns)}\n\n"
 
485
  status_msg += f"- Category columns: {len(col_info['category_cols'])}\n"
486
  status_msg += f"- Verbatim column: {col_info['verbatim_column']}\n"
487
 
 
 
 
 
488
  # Extract unique topics, categories, and sentiments
489
  num_topics, num_categories, num_sentiments = transformer.extract_unique_topics_and_categories()
490
  status_msg += f"\n🎯 Found {num_topics} unique topics\n"
 
532
  Transform feedback data with delimited topic and sentiment columns into binary matrix format.
533
 
534
  ### πŸ”§ Processing Logic:
 
535
  - **Verbatim_Positive/Neutral/Negative**: Set to 1 if respective sentiment is found in ABSA column, 0 otherwise
536
  - **T_ Columns**: Set to 1 if topic is present in ABSA column, 0 otherwise
537
  - **S_ Columns**: One column per topic (e.g., S_Allgemeine_Zufriedenheit) containing actual sentiment values
538
  - **C_ Columns**: Set to 1 if category is assigned, 0 otherwise
539
 
540
  ### πŸ“‹ Data Format Requirements:
541
+ - **Topics**: Delimited by `|` (pipe) in "Topics:" columns (optional)
542
  - **Sentiments**: Format `Topic::Sentiment|Topic2::Sentiment2` in ABSA columns
543
  - **Categories**: Delimited by `|` (pipe) in "Categories:" columns
544
 
545
+ ### πŸ†• Key Logic:
 
546
  - **Verbatim_** columns detect overall sentiment presence regardless of topic
547
  - **T_** columns based on ABSA column presence (topics that have sentiment data)
548
  - **S_** columns contain actual sentiment values (not binary 1/0)
549
+ - No automatic column renaming for "Topic:" prefix
550
  """)
551
 
552
  with gr.Row():
 
573
  gr.Markdown("### βš™οΈ 3. Configuration")
574
 
575
  topic_prefix = gr.Textbox(
576
+ label="Topic Column Identifier",
577
  value="Topic:",
578
+ info="Text to identify topic columns (for reference only)"
579
  )
580
 
581
  sentiment_prefix = gr.Textbox(
 
668
  gr.Markdown("""
669
  ### πŸ“ Example Transformations:
670
 
671
+ **Input Data:**
672
  ```
673
+ | feedback_id | ABSA: Sentiments | Categories: Issues |
674
  | 1 | Service::Negative|Quality::Positive | Issues|Support |
675
  ```
676
 
 
 
 
 
677
  **Output Binary Matrix:**
678
  ```
679
  | feedback_id | Verbatim_Positive | Verbatim_Neutral | Verbatim_Negative | T_Service | T_Quality | S_Service | S_Quality | C_Issues | C_Support |
680
  | 1 | 1 | 0 | 1 | 1 | 1 | Negative | Positive | 1 | 1 |
681
  ```
682
 
683
+ ### πŸ’‘ Column Logic:
684
+ - **Verbatim_Positive**: 1 if any "Positive"/"Positiv" sentiment found in ABSA
685
+ - **Verbatim_Neutral**: 1 if any "Neutral" sentiment found in ABSA
686
+ - **Verbatim_Negative**: 1 if any "Negative"/"Negativ" sentiment found in ABSA
687
+ - **T_[topic_name]**: 1 if topic exists in ABSA column, 0 otherwise
688
+ - **S_[topic_name]**: Actual sentiment value for that topic (e.g., "Positive", "Negative")
689
+ - **C_[category_name]**: 1 if category is assigned, 0 otherwise
690
+ - Safe column names (special characters replaced with underscores)
691
+
692
+ ### πŸ” Key Changes Made:
693
+ - **NEW**: Added Verbatim_Positive, Verbatim_Neutral, Verbatim_Negative columns
694
+ - These columns are set to 1 if the respective sentiment is found anywhere in the ABSA column
695
+ - Supports both English (Positive/Negative/Neutral) and German (Positiv/Negativ) sentiment detection
696
+ - Removed automatic "Topic:" column renaming logic
697
+ - T_ columns are now binary (1/0) based on topic existence in ABSA column
698
+ - Topics are extracted from ABSA sentiment data for T_ column creation
699
  """)
700
 
701
  # Launch the app
702
  if __name__ == "__main__":
703
+ demo.launch(share=True)
704
+