Spaces:
Sleeping
Sleeping
ChatExplorer
/
dataset_adapters
/7a329ccea52693be98470e74ada5768849ba2523454c19d1f2d84b60221c156e.py
| def transform_data(data): | |
| # Define roles and map them to the 'from' fields | |
| role_mapping = { | |
| 'role_1': 'human', | |
| 'message_1': 'human', | |
| 'message_2': 'gpt', | |
| } | |
| # Use heuristics to properly order the messages | |
| conv_order = ['role_1', 'message_1', 'message_2'] | |
| # Add 'instruction' if available, ensuring it comes before 'output' | |
| if 'instruction' in data: | |
| conv_order.insert(conv_order.index('message_1'), 'instruction') | |
| # Iterate over the data in the specified order and construct the conversation list | |
| conversation = [] | |
| for key in conv_order: | |
| if key in data and data[key]: | |
| from_role = 'system' if 'system' in key else role_mapping.get(key, 'human') | |
| msg_value = data[key] if 'message' in key else data[key].split('.')[-1].replace('_', ' ').capitalize() | |
| # If there is 'instruction' and 'input', concat 'input' at the end of the message | |
| if key == 'instruction' and 'input' in data and data['input']: | |
| msg_value += ' ' + data['input'] | |
| conv_item = { | |
| 'from': from_role, | |
| 'value': msg_value.strip() | |
| } | |
| conversation.append(conv_item) | |
| return {'conversations': conversation} | |