Spaces:

thomasgauthier
/

ChatExplorer

Sleeping

ChatExplorer / dataset_adapters /7a329ccea52693be98470e74ada5768849ba2523454c19d1f2d84b60221c156e.py

will this work?

10c1f9c almost 2 years ago

1.28 kB

	def transform_data(data):
	# Define roles and map them to the 'from' fields
	role_mapping = {
	'role_1': 'human',
	'message_1': 'human',
	'message_2': 'gpt',
	}

	# Use heuristics to properly order the messages
	conv_order = ['role_1', 'message_1', 'message_2']
	# Add 'instruction' if available, ensuring it comes before 'output'
	if 'instruction' in data:
	conv_order.insert(conv_order.index('message_1'), 'instruction')

	# Iterate over the data in the specified order and construct the conversation list
	conversation = []
	for key in conv_order:
	if key in data and data[key]:
	from_role = 'system' if 'system' in key else role_mapping.get(key, 'human')
	msg_value = data[key] if 'message' in key else data[key].split('.')[-1].replace('_', ' ').capitalize()

	# If there is 'instruction' and 'input', concat 'input' at the end of the message
	if key == 'instruction' and 'input' in data and data['input']:
	msg_value += ' ' + data['input']

	conv_item = {
	'from': from_role,
	'value': msg_value.strip()
	}
	conversation.append(conv_item)

	return {'conversations': conversation}