Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -42,7 +42,6 @@ class FeedbackTransformer:
|
|
42 |
self.original_filename = None
|
43 |
self.selected_columns = []
|
44 |
self.verbatim_column = None # Store the verbatim/text column
|
45 |
-
self.dynamic_topic_prefix = None # Store dynamically extracted topic prefix
|
46 |
|
47 |
def load_data(self, file_obj):
|
48 |
"""
|
@@ -75,37 +74,6 @@ class FeedbackTransformer:
|
|
75 |
|
76 |
return len(self.data), len(self.data.columns)
|
77 |
|
78 |
-
def extract_topic_prefix_from_category(self):
|
79 |
-
"""
|
80 |
-
Extract the topic prefix from a column containing "Category:"
|
81 |
-
by finding text between "Category:" and "("
|
82 |
-
"""
|
83 |
-
# Look for columns containing "Category:"
|
84 |
-
category_pattern_cols = [col for col in self.data.columns if "Category:" in col]
|
85 |
-
|
86 |
-
if category_pattern_cols:
|
87 |
-
# Use the first matching column
|
88 |
-
category_col = category_pattern_cols[0]
|
89 |
-
|
90 |
-
# Try to extract from column name first
|
91 |
-
match = re.search(r'Category:\s*([^(]+)\s*\(', category_col)
|
92 |
-
if match:
|
93 |
-
extracted_prefix = match.group(1).strip() + ":"
|
94 |
-
self.dynamic_topic_prefix = extracted_prefix
|
95 |
-
return extracted_prefix
|
96 |
-
|
97 |
-
# If not found in column name, try to extract from column values
|
98 |
-
for value in self.data[category_col].dropna():
|
99 |
-
if isinstance(value, str):
|
100 |
-
match = re.search(r'Category:\s*([^(]+)\s*\(', value)
|
101 |
-
if match:
|
102 |
-
extracted_prefix = match.group(1).strip() + ":"
|
103 |
-
self.dynamic_topic_prefix = extracted_prefix
|
104 |
-
return extracted_prefix
|
105 |
-
|
106 |
-
# If no match found, return None
|
107 |
-
return None
|
108 |
-
|
109 |
def identify_columns(self):
|
110 |
"""
|
111 |
Identify topic, category, and sentiment columns in the data.
|
@@ -113,22 +81,8 @@ class FeedbackTransformer:
|
|
113 |
if self.data is None:
|
114 |
raise ValueError("Data not loaded")
|
115 |
|
116 |
-
# First try to extract topic prefix dynamically
|
117 |
-
extracted_prefix = self.extract_topic_prefix_from_category()
|
118 |
-
|
119 |
-
# Use dynamic prefix if found, otherwise use the provided topic_prefix
|
120 |
-
topic_identifier = extracted_prefix if extracted_prefix else self.topic_prefix
|
121 |
-
|
122 |
-
# Log the prefix being used
|
123 |
-
print(f"Using topic prefix: '{topic_identifier}'")
|
124 |
-
|
125 |
# Extract columns based on prefixes
|
126 |
-
|
127 |
-
if topic_identifier:
|
128 |
-
self.topic_cols = [col for col in self.data.columns if topic_identifier in col]
|
129 |
-
else:
|
130 |
-
self.topic_cols = [col for col in self.data.columns if "Topic:" in col]
|
131 |
-
|
132 |
self.sentiment_cols = [col for col in self.data.columns if self.sentiment_prefix in col]
|
133 |
self.category_cols = [col for col in self.data.columns if col.startswith(self.category_prefix)]
|
134 |
|
@@ -147,8 +101,7 @@ class FeedbackTransformer:
|
|
147 |
'sentiment_cols': self.sentiment_cols,
|
148 |
'category_cols': self.category_cols,
|
149 |
'all_columns': all_cols,
|
150 |
-
'verbatim_column': self.verbatim_column
|
151 |
-
'dynamic_topic_prefix': self.dynamic_topic_prefix
|
152 |
}
|
153 |
|
154 |
def extract_unique_topics_and_categories(self):
|
@@ -359,13 +312,7 @@ class FeedbackTransformer:
|
|
359 |
analysis_text += f"Topic columns (T_): {len(topic_cols)}\n"
|
360 |
analysis_text += f"Sentiment columns (S_): {len(sentiment_cols)}\n"
|
361 |
analysis_text += f"Category columns (C_): {len(category_cols)}\n"
|
362 |
-
analysis_text += f"Verbatim column used: {self.verbatim_column}\n"
|
363 |
-
|
364 |
-
# Add dynamic topic prefix info
|
365 |
-
if self.dynamic_topic_prefix:
|
366 |
-
analysis_text += f"Dynamic topic prefix extracted: '{self.dynamic_topic_prefix}'\n\n"
|
367 |
-
else:
|
368 |
-
analysis_text += f"Topic prefix used: '{self.topic_prefix}'\n\n"
|
369 |
|
370 |
if self.selected_columns:
|
371 |
analysis_text += f"**Included Original Columns:** {', '.join(self.selected_columns)}\n\n"
|
@@ -538,10 +485,6 @@ def process_file(file_obj, topic_prefix, sentiment_prefix, category_prefix,
|
|
538 |
status_msg += f"- Category columns: {len(col_info['category_cols'])}\n"
|
539 |
status_msg += f"- Verbatim column: {col_info['verbatim_column']}\n"
|
540 |
|
541 |
-
# Add dynamic topic prefix info
|
542 |
-
if col_info.get('dynamic_topic_prefix'):
|
543 |
-
status_msg += f"- Dynamic topic prefix extracted: '{col_info['dynamic_topic_prefix']}'\n"
|
544 |
-
|
545 |
# Extract unique topics, categories, and sentiments
|
546 |
num_topics, num_categories, num_sentiments = transformer.extract_unique_topics_and_categories()
|
547 |
status_msg += f"\nπ― Found {num_topics} unique topics\n"
|
@@ -589,22 +532,21 @@ with gr.Blocks(title="Binary Matrix Feedback Transformer", css="""
|
|
589 |
Transform feedback data with delimited topic and sentiment columns into binary matrix format.
|
590 |
|
591 |
### π§ Processing Logic:
|
592 |
-
- **Automatic Topic Prefix Detection**: Extracts topic prefix from columns containing "Category:" by finding text between "Category:" and "("
|
593 |
- **Verbatim_Positive/Neutral/Negative**: Set to 1 if respective sentiment is found in ABSA column, 0 otherwise
|
594 |
- **T_ Columns**: Set to 1 if topic is present in ABSA column, 0 otherwise
|
595 |
- **S_ Columns**: One column per topic (e.g., S_Allgemeine_Zufriedenheit) containing actual sentiment values
|
596 |
- **C_ Columns**: Set to 1 if category is assigned, 0 otherwise
|
597 |
|
598 |
### π Data Format Requirements:
|
599 |
-
- **Topics**: Delimited by `|` (pipe) in columns
|
600 |
- **Sentiments**: Format `Topic::Sentiment|Topic2::Sentiment2` in ABSA columns
|
601 |
- **Categories**: Delimited by `|` (pipe) in "Categories:" columns
|
602 |
|
603 |
-
### π Key
|
604 |
-
- **Dynamic Topic Prefix Extraction**: Automatically extracts topic prefix from "Category:" columns
|
605 |
- **Verbatim_** columns detect overall sentiment presence regardless of topic
|
606 |
- **T_** columns based on ABSA column presence (topics that have sentiment data)
|
607 |
- **S_** columns contain actual sentiment values (not binary 1/0)
|
|
|
608 |
""")
|
609 |
|
610 |
with gr.Row():
|
@@ -631,9 +573,9 @@ with gr.Blocks(title="Binary Matrix Feedback Transformer", css="""
|
|
631 |
gr.Markdown("### βοΈ 3. Configuration")
|
632 |
|
633 |
topic_prefix = gr.Textbox(
|
634 |
-
label="Topic Column Identifier
|
635 |
value="Topic:",
|
636 |
-
info="
|
637 |
)
|
638 |
|
639 |
sentiment_prefix = gr.Textbox(
|
@@ -726,36 +668,37 @@ with gr.Blocks(title="Binary Matrix Feedback Transformer", css="""
|
|
726 |
gr.Markdown("""
|
727 |
### π Example Transformations:
|
728 |
|
729 |
-
**Input Data
|
730 |
```
|
731 |
-
|
|
732 |
| 1 | Service::Negative|Quality::Positive | Issues|Support |
|
733 |
```
|
734 |
|
735 |
-
**System will:**
|
736 |
-
1. Extract "Service:" from "Category: Service (ABC)" column
|
737 |
-
2. Use "Service:" to identify topic columns instead of "Topic:"
|
738 |
-
|
739 |
**Output Binary Matrix:**
|
740 |
```
|
741 |
| feedback_id | Verbatim_Positive | Verbatim_Neutral | Verbatim_Negative | T_Service | T_Quality | S_Service | S_Quality | C_Issues | C_Support |
|
742 |
| 1 | 1 | 0 | 1 | 1 | 1 | Negative | Positive | 1 | 1 |
|
743 |
```
|
744 |
|
745 |
-
### π‘
|
746 |
-
-
|
747 |
-
-
|
748 |
-
-
|
749 |
-
-
|
750 |
-
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
-
|
756 |
-
-
|
|
|
|
|
|
|
|
|
757 |
""")
|
758 |
|
759 |
# Launch the app
|
760 |
if __name__ == "__main__":
|
761 |
-
demo.launch(share=True)
|
|
|
|
42 |
self.original_filename = None
|
43 |
self.selected_columns = []
|
44 |
self.verbatim_column = None # Store the verbatim/text column
|
|
|
45 |
|
46 |
def load_data(self, file_obj):
|
47 |
"""
|
|
|
74 |
|
75 |
return len(self.data), len(self.data.columns)
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
def identify_columns(self):
|
78 |
"""
|
79 |
Identify topic, category, and sentiment columns in the data.
|
|
|
81 |
if self.data is None:
|
82 |
raise ValueError("Data not loaded")
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
# Extract columns based on prefixes
|
85 |
+
self.topic_cols = [col for col in self.data.columns if "Topic:" in col]
|
|
|
|
|
|
|
|
|
|
|
86 |
self.sentiment_cols = [col for col in self.data.columns if self.sentiment_prefix in col]
|
87 |
self.category_cols = [col for col in self.data.columns if col.startswith(self.category_prefix)]
|
88 |
|
|
|
101 |
'sentiment_cols': self.sentiment_cols,
|
102 |
'category_cols': self.category_cols,
|
103 |
'all_columns': all_cols,
|
104 |
+
'verbatim_column': self.verbatim_column
|
|
|
105 |
}
|
106 |
|
107 |
def extract_unique_topics_and_categories(self):
|
|
|
312 |
analysis_text += f"Topic columns (T_): {len(topic_cols)}\n"
|
313 |
analysis_text += f"Sentiment columns (S_): {len(sentiment_cols)}\n"
|
314 |
analysis_text += f"Category columns (C_): {len(category_cols)}\n"
|
315 |
+
analysis_text += f"Verbatim column used: {self.verbatim_column}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
|
317 |
if self.selected_columns:
|
318 |
analysis_text += f"**Included Original Columns:** {', '.join(self.selected_columns)}\n\n"
|
|
|
485 |
status_msg += f"- Category columns: {len(col_info['category_cols'])}\n"
|
486 |
status_msg += f"- Verbatim column: {col_info['verbatim_column']}\n"
|
487 |
|
|
|
|
|
|
|
|
|
488 |
# Extract unique topics, categories, and sentiments
|
489 |
num_topics, num_categories, num_sentiments = transformer.extract_unique_topics_and_categories()
|
490 |
status_msg += f"\nπ― Found {num_topics} unique topics\n"
|
|
|
532 |
Transform feedback data with delimited topic and sentiment columns into binary matrix format.
|
533 |
|
534 |
### π§ Processing Logic:
|
|
|
535 |
- **Verbatim_Positive/Neutral/Negative**: Set to 1 if respective sentiment is found in ABSA column, 0 otherwise
|
536 |
- **T_ Columns**: Set to 1 if topic is present in ABSA column, 0 otherwise
|
537 |
- **S_ Columns**: One column per topic (e.g., S_Allgemeine_Zufriedenheit) containing actual sentiment values
|
538 |
- **C_ Columns**: Set to 1 if category is assigned, 0 otherwise
|
539 |
|
540 |
### π Data Format Requirements:
|
541 |
+
- **Topics**: Delimited by `|` (pipe) in "Topics:" columns (optional)
|
542 |
- **Sentiments**: Format `Topic::Sentiment|Topic2::Sentiment2` in ABSA columns
|
543 |
- **Categories**: Delimited by `|` (pipe) in "Categories:" columns
|
544 |
|
545 |
+
### π Key Logic:
|
|
|
546 |
- **Verbatim_** columns detect overall sentiment presence regardless of topic
|
547 |
- **T_** columns based on ABSA column presence (topics that have sentiment data)
|
548 |
- **S_** columns contain actual sentiment values (not binary 1/0)
|
549 |
+
- No automatic column renaming for "Topic:" prefix
|
550 |
""")
|
551 |
|
552 |
with gr.Row():
|
|
|
573 |
gr.Markdown("### βοΈ 3. Configuration")
|
574 |
|
575 |
topic_prefix = gr.Textbox(
|
576 |
+
label="Topic Column Identifier",
|
577 |
value="Topic:",
|
578 |
+
info="Text to identify topic columns (for reference only)"
|
579 |
)
|
580 |
|
581 |
sentiment_prefix = gr.Textbox(
|
|
|
668 |
gr.Markdown("""
|
669 |
### π Example Transformations:
|
670 |
|
671 |
+
**Input Data:**
|
672 |
```
|
673 |
+
| feedback_id | ABSA: Sentiments | Categories: Issues |
|
674 |
| 1 | Service::Negative|Quality::Positive | Issues|Support |
|
675 |
```
|
676 |
|
|
|
|
|
|
|
|
|
677 |
**Output Binary Matrix:**
|
678 |
```
|
679 |
| feedback_id | Verbatim_Positive | Verbatim_Neutral | Verbatim_Negative | T_Service | T_Quality | S_Service | S_Quality | C_Issues | C_Support |
|
680 |
| 1 | 1 | 0 | 1 | 1 | 1 | Negative | Positive | 1 | 1 |
|
681 |
```
|
682 |
|
683 |
+
### π‘ Column Logic:
|
684 |
+
- **Verbatim_Positive**: 1 if any "Positive"/"Positiv" sentiment found in ABSA
|
685 |
+
- **Verbatim_Neutral**: 1 if any "Neutral" sentiment found in ABSA
|
686 |
+
- **Verbatim_Negative**: 1 if any "Negative"/"Negativ" sentiment found in ABSA
|
687 |
+
- **T_[topic_name]**: 1 if topic exists in ABSA column, 0 otherwise
|
688 |
+
- **S_[topic_name]**: Actual sentiment value for that topic (e.g., "Positive", "Negative")
|
689 |
+
- **C_[category_name]**: 1 if category is assigned, 0 otherwise
|
690 |
+
- Safe column names (special characters replaced with underscores)
|
691 |
+
|
692 |
+
### π Key Changes Made:
|
693 |
+
- **NEW**: Added Verbatim_Positive, Verbatim_Neutral, Verbatim_Negative columns
|
694 |
+
- These columns are set to 1 if the respective sentiment is found anywhere in the ABSA column
|
695 |
+
- Supports both English (Positive/Negative/Neutral) and German (Positiv/Negativ) sentiment detection
|
696 |
+
- Removed automatic "Topic:" column renaming logic
|
697 |
+
- T_ columns are now binary (1/0) based on topic existence in ABSA column
|
698 |
+
- Topics are extracted from ABSA sentiment data for T_ column creation
|
699 |
""")
|
700 |
|
701 |
# Launch the app
|
702 |
if __name__ == "__main__":
|
703 |
+
demo.launch(share=True)
|
704 |
+
|