Orensomekh commited on
Commit
5881f5e
·
verified ·
1 Parent(s): c4cae1c

Upload create-submission-result.ipynb

Browse files
Operational_Instructions/create-submission-result.ipynb ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "a5e875f5",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "\n",
12
+ "# Use this schema: \n",
13
+ "\"\"\"\n",
14
+ "{ \n",
15
+ "\n",
16
+ "\"$schema\": \"http://json-schema.org/draft-07/schema#\", \n",
17
+ "\n",
18
+ " \"title\": \"Answer file schema\", \n",
19
+ " \"type\": \"object\", \n",
20
+ " \"properties\": { \n",
21
+ " \"id\": { \n",
22
+ " \"type\": \"integer\", \n",
23
+ " \"description\": \"Question ID\" \n",
24
+ " }, \n",
25
+ " \"question\": { \n",
26
+ " \"type\": \"string\", \n",
27
+ " \"description\": \"The question\" \n",
28
+ " }, \n",
29
+ " \"passages\": { \n",
30
+ " \"type\": \"array\", \n",
31
+ " \"description\": \"Passages used and related FineWeb doc IDs, ordered by decreasing importance\", \n",
32
+ " \"items\": { \n",
33
+ " \"type\": \"object\", \n",
34
+ " \"properties\": {\n",
35
+ " \"passage\": { \n",
36
+ " \"type\": \"string\", \n",
37
+ " \"description\": \"Passage text\" \n",
38
+ " }, \n",
39
+ " \"doc_IDs\": {\n",
40
+ " \"type\": \"array\", \n",
41
+ " \"description\": \"Passage related FineWeb doc IDs, ordered by decreasing importance\", \n",
42
+ " \"items\": { \n",
43
+ " \"type\": \"string\", \n",
44
+ " \"description\": \"FineWeb doc ID, e.g., <urn:uuid:d69cbebc-133a-4ebe-9378-68235ec9f091>\"\n",
45
+ " } \n",
46
+ " } \n",
47
+ " },\n",
48
+ " \"required\": [\"passage\", \"doc_IDs\"]\n",
49
+ " }\n",
50
+ " }, \n",
51
+ " \"final_prompt\": {\n",
52
+ " \"type\": \"string\",\n",
53
+ " \"description\": \"Final prompt, as submitted to Falcon LLM\"\n",
54
+ " },\n",
55
+ " \"answer\": {\n",
56
+ " \"type\": \"string\",\n",
57
+ " \"description\": \"Your answer\"\n",
58
+ " }\n",
59
+ " },\n",
60
+ " \"required\": [\"id\", \"question\", \"passages\", \"final_prompt\", \"answer\"]\n",
61
+ "} \n",
62
+ "\"\"\"\n",
63
+ "\n",
64
+ "# Example output:\n",
65
+ "\"\"\"\n",
66
+ "{\n",
67
+ " \"id\": 17,\n",
68
+ " \"question\": \"What is the capital of France?\",\n",
69
+ " \"passages\": [\n",
70
+ " {\n",
71
+ " \"passage\": \"Paris is the capital and most populous city of France.\",\n",
72
+ " \"doc_IDs\": [\"<urn:uuid:1234abcd-5678-efgh-9101-ijklmnopqrst>\", \"<urn:uuid:1234abcd-5678-efgh-9202-ijklmnopqrst>\"]\n",
73
+ " },\n",
74
+ " {\n",
75
+ " \"passage\": \"France is located in Western Europe.\",\n",
76
+ " \"doc_IDs\": [\"<urn:uuid:1234abcd-5678-efgh-9101-ijklmnopqrst>\"]\n",
77
+ " }\n",
78
+ " ],\n",
79
+ " \"final_prompt\": \"Using the following - Paris is the capital and most populous city of France - and - France is located in Western Europe - answer the question: What is the capital of France?\",\n",
80
+ " \"answer\": \"Paris\" \n",
81
+ "} \n",
82
+ "\"\"\"\n",
83
+ "\n",
84
+ "# Code that generates the output\n",
85
+ "answers = pd.DataFrame({\n",
86
+ " \"id\": [1, 2],\n",
87
+ " \"question\": [\"What is the capital of France?\", \"What is the capital of Germany?\"],\n",
88
+ " \"passages\": [\n",
89
+ " [\n",
90
+ " {\"passage\": \"Paris is the capital and most populous city of France.\", \n",
91
+ " \"doc_IDs\": [\"<urn:uuid:1234abcd-5678-efgh-9101-ijklmnopqrst>\", \"<urn:uuid:1234abcd-5678-efgh-9202-ijklmnopqrst>\"]},\n",
92
+ " {\"passage\": \"France is located in Western Europe.\", \n",
93
+ " \"doc_IDs\": [\"<urn:uuid:1234abcd-5678-efgh-9101-ijklmnopqrst>\"]}\n",
94
+ " ],\n",
95
+ " [\n",
96
+ " {\"passage\": \"Berlin is the capital of Germany.\", \n",
97
+ " \"doc_IDs\": [\"<urn:uuid:1234abcd-5678-efgh-9101-ijklmnopqrst>\"]}\n",
98
+ " ]\n",
99
+ " ],\n",
100
+ " \"final_prompt\": [\n",
101
+ " \"Using the following - Paris is the capital and most populous city of France - and - France is located in Western Europe - answer the question: What is the capital of France?\",\n",
102
+ " \"Using the following - Berlin is the capital of Germany - answer the question: What is the capital of Germany?\"\n",
103
+ " ],\n",
104
+ " \"answer\": [\"Paris\", \"Berlin\"]\n",
105
+ "})\n",
106
+ "# Convert to JSON format\n",
107
+ "answers_json = answers.to_json(orient='records', lines=True, force_ascii=False)\n",
108
+ "\n",
109
+ "# Or just save to a file\n",
110
+ "answers.to_json(\"answers.jsonl\", orient='records', lines=True, force_ascii=False)\n",
111
+ "\n",
112
+ "# Load the file to make sure it is correct\n",
113
+ "loaded_answers = pd.read_json(\"answers.jsonl\", lines=True)\n",
114
+ "# Print the loaded answers\n",
115
+ "print(loaded_answers)\n"
116
+ ]
117
+ }
118
+ ],
119
+ "metadata": {
120
+ "kernelspec": {
121
+ "display_name": ".venv",
122
+ "language": "python",
123
+ "name": "python3"
124
+ },
125
+ "language_info": {
126
+ "codemirror_mode": {
127
+ "name": "ipython",
128
+ "version": 3
129
+ },
130
+ "file_extension": ".py",
131
+ "mimetype": "text/x-python",
132
+ "name": "python",
133
+ "nbconvert_exporter": "python",
134
+ "pygments_lexer": "ipython3",
135
+ "version": "3.12.8"
136
+ }
137
+ },
138
+ "nbformat": 4,
139
+ "nbformat_minor": 5
140
+ }