zkava01 commited on
Commit
49d4292
·
verified ·
1 Parent(s): 94feb59

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ library_name: transformers
4
+ tags:
5
+ - autotrain
6
+ - text-classification
7
+ base_model: cardiffnlp/twitter-roberta-base
8
+ widget:
9
+ - text: "I love AutoTrain"
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Classification
15
+
16
+ ## Validation Metrics
17
+ loss: 0.007734560873359442
18
+
19
+ f1_macro: 0.9991256596070146
20
+
21
+ f1_micro: 0.9989545216936748
22
+
23
+ f1_weighted: 0.9989547969603347
24
+
25
+ precision_macro: 0.9989517819706499
26
+
27
+ precision_micro: 0.9989545216936748
28
+
29
+ precision_weighted: 0.9989578093613047
30
+
31
+ recall_macro: 0.9993019197207679
32
+
33
+ recall_micro: 0.9989545216936748
34
+
35
+ recall_weighted: 0.9989545216936748
36
+
37
+ accuracy: 0.9989545216936748
checkpoint-2871/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cardiffnlp/twitter-roberta-base",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "RobertaForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": null,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "0",
17
+ "1": "1",
18
+ "2": "2"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "0": 0,
24
+ "1": 1,
25
+ "2": 2
26
+ },
27
+ "layer_norm_eps": 1e-05,
28
+ "max_position_embeddings": 514,
29
+ "model_type": "roberta",
30
+ "num_attention_heads": 12,
31
+ "num_hidden_layers": 12,
32
+ "pad_token_id": 1,
33
+ "position_embedding_type": "absolute",
34
+ "problem_type": "single_label_classification",
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.48.0",
37
+ "type_vocab_size": 1,
38
+ "use_cache": true,
39
+ "vocab_size": 50265
40
+ }
checkpoint-2871/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77fa83f78f8efced14d895c74a0a60d8ecad30afe7fd6883bb7021fbc7b2a264
3
+ size 498615900
checkpoint-2871/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27c865c6a1a3f3e8d2bff18171784335968cac1981439b08e9bec482de42d332
3
+ size 997346042
checkpoint-2871/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00436bb592d234f4404aef3208939a5b2c931d620283a1d576a4fa20302ad73b
3
+ size 13990
checkpoint-2871/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01983363ef66428eb10f82a416330e9bd118392e8a184a1ab2a4ec7174e89cec
3
+ size 1064
checkpoint-2871/trainer_state.json ADDED
@@ -0,0 +1,894 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.007734560873359442,
3
+ "best_model_checkpoint": "autotrain-5zkp2-pa5ot/checkpoint-2871",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2871,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02612330198537095,
13
+ "grad_norm": 3.338355302810669,
14
+ "learning_rate": 4.340277777777778e-06,
15
+ "loss": 1.1122,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.0522466039707419,
20
+ "grad_norm": 1.7776278257369995,
21
+ "learning_rate": 8.680555555555556e-06,
22
+ "loss": 1.0796,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.07836990595611286,
27
+ "grad_norm": 3.9048593044281006,
28
+ "learning_rate": 1.3020833333333334e-05,
29
+ "loss": 1.0438,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.1044932079414838,
34
+ "grad_norm": 4.985559940338135,
35
+ "learning_rate": 1.736111111111111e-05,
36
+ "loss": 0.944,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.13061650992685475,
41
+ "grad_norm": 6.691493988037109,
42
+ "learning_rate": 2.170138888888889e-05,
43
+ "loss": 0.8765,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.15673981191222572,
48
+ "grad_norm": 15.063920974731445,
49
+ "learning_rate": 2.604166666666667e-05,
50
+ "loss": 0.5913,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.18286311389759666,
55
+ "grad_norm": 55.3206901550293,
56
+ "learning_rate": 3.0381944444444444e-05,
57
+ "loss": 0.3799,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.2089864158829676,
62
+ "grad_norm": 8.026313781738281,
63
+ "learning_rate": 3.472222222222222e-05,
64
+ "loss": 0.3776,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.23510971786833856,
69
+ "grad_norm": 3.5273993015289307,
70
+ "learning_rate": 3.90625e-05,
71
+ "loss": 0.2117,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.2612330198537095,
76
+ "grad_norm": 0.12634535133838654,
77
+ "learning_rate": 4.340277777777778e-05,
78
+ "loss": 0.2089,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.28735632183908044,
83
+ "grad_norm": 0.2070770412683487,
84
+ "learning_rate": 4.774305555555556e-05,
85
+ "loss": 0.1633,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.31347962382445144,
90
+ "grad_norm": 62.55227279663086,
91
+ "learning_rate": 4.9767711962833916e-05,
92
+ "loss": 0.1345,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.3396029258098224,
97
+ "grad_norm": 0.017617134377360344,
98
+ "learning_rate": 4.928377855207124e-05,
99
+ "loss": 0.0022,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.3657262277951933,
104
+ "grad_norm": 0.02495918981730938,
105
+ "learning_rate": 4.879984514130856e-05,
106
+ "loss": 0.0762,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.39184952978056425,
111
+ "grad_norm": 23.060834884643555,
112
+ "learning_rate": 4.831591173054588e-05,
113
+ "loss": 0.0773,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.4179728317659352,
118
+ "grad_norm": 0.018387485295534134,
119
+ "learning_rate": 4.78319783197832e-05,
120
+ "loss": 0.0877,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.4440961337513062,
125
+ "grad_norm": 0.08322528004646301,
126
+ "learning_rate": 4.734804490902052e-05,
127
+ "loss": 0.1489,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.4702194357366771,
132
+ "grad_norm": 0.018301822245121002,
133
+ "learning_rate": 4.686411149825784e-05,
134
+ "loss": 0.1955,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.49634273772204807,
139
+ "grad_norm": 159.80633544921875,
140
+ "learning_rate": 4.638017808749516e-05,
141
+ "loss": 0.1085,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.522466039707419,
146
+ "grad_norm": 0.010405668057501316,
147
+ "learning_rate": 4.5896244676732484e-05,
148
+ "loss": 0.0424,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.54858934169279,
153
+ "grad_norm": 0.016250956803560257,
154
+ "learning_rate": 4.5412311265969805e-05,
155
+ "loss": 0.0995,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.5747126436781609,
160
+ "grad_norm": 6.8752760887146,
161
+ "learning_rate": 4.4928377855207126e-05,
162
+ "loss": 0.1536,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.6008359456635318,
167
+ "grad_norm": 0.012155416421592236,
168
+ "learning_rate": 4.4444444444444447e-05,
169
+ "loss": 0.0141,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.6269592476489029,
174
+ "grad_norm": 0.008480357006192207,
175
+ "learning_rate": 4.396051103368177e-05,
176
+ "loss": 0.002,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.6530825496342738,
181
+ "grad_norm": 0.010472940281033516,
182
+ "learning_rate": 4.347657762291909e-05,
183
+ "loss": 0.0437,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.6792058516196448,
188
+ "grad_norm": 0.021664993837475777,
189
+ "learning_rate": 4.299264421215641e-05,
190
+ "loss": 0.0272,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.7053291536050157,
195
+ "grad_norm": 0.006474316120147705,
196
+ "learning_rate": 4.250871080139373e-05,
197
+ "loss": 0.0324,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.7314524555903866,
202
+ "grad_norm": 0.0071400972083210945,
203
+ "learning_rate": 4.202477739063105e-05,
204
+ "loss": 0.0674,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.7575757575757576,
209
+ "grad_norm": 0.05537139251828194,
210
+ "learning_rate": 4.154084397986837e-05,
211
+ "loss": 0.1479,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.7836990595611285,
216
+ "grad_norm": 0.03653930872678757,
217
+ "learning_rate": 4.105691056910569e-05,
218
+ "loss": 0.0746,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.8098223615464994,
223
+ "grad_norm": 0.030840527266263962,
224
+ "learning_rate": 4.0572977158343014e-05,
225
+ "loss": 0.0381,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.8359456635318704,
230
+ "grad_norm": 0.02931591309607029,
231
+ "learning_rate": 4.0089043747580335e-05,
232
+ "loss": 0.0491,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.8620689655172413,
237
+ "grad_norm": 0.01176014170050621,
238
+ "learning_rate": 3.9605110336817656e-05,
239
+ "loss": 0.0348,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.8881922675026124,
244
+ "grad_norm": 0.03229213505983353,
245
+ "learning_rate": 3.912117692605498e-05,
246
+ "loss": 0.0834,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.9143155694879833,
251
+ "grad_norm": 0.015275160782039165,
252
+ "learning_rate": 3.86372435152923e-05,
253
+ "loss": 0.05,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.9404388714733543,
258
+ "grad_norm": 0.013074109330773354,
259
+ "learning_rate": 3.815331010452962e-05,
260
+ "loss": 0.0012,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.9665621734587252,
265
+ "grad_norm": 0.007205578964203596,
266
+ "learning_rate": 3.766937669376694e-05,
267
+ "loss": 0.0006,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.9926854754440961,
272
+ "grad_norm": 0.023728761821985245,
273
+ "learning_rate": 3.718544328300426e-05,
274
+ "loss": 0.0409,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 1.0,
279
+ "eval_accuracy": 0.9973863042341872,
280
+ "eval_f1_macro": 0.9973126862743199,
281
+ "eval_f1_micro": 0.9973863042341872,
282
+ "eval_f1_weighted": 0.9973863333834622,
283
+ "eval_loss": 0.020341886207461357,
284
+ "eval_precision_macro": 0.9963851515122331,
285
+ "eval_precision_micro": 0.9973863042341872,
286
+ "eval_precision_weighted": 0.9974001038510532,
287
+ "eval_recall_macro": 0.9982547993019196,
288
+ "eval_recall_micro": 0.9973863042341872,
289
+ "eval_recall_weighted": 0.9973863042341872,
290
+ "eval_runtime": 209.7986,
291
+ "eval_samples_per_second": 9.118,
292
+ "eval_steps_per_second": 0.572,
293
+ "step": 957
294
+ },
295
+ {
296
+ "epoch": 1.0188087774294672,
297
+ "grad_norm": 0.01665070652961731,
298
+ "learning_rate": 3.670150987224158e-05,
299
+ "loss": 0.0121,
300
+ "step": 975
301
+ },
302
+ {
303
+ "epoch": 1.044932079414838,
304
+ "grad_norm": 0.00787193700671196,
305
+ "learning_rate": 3.62175764614789e-05,
306
+ "loss": 0.0374,
307
+ "step": 1000
308
+ },
309
+ {
310
+ "epoch": 1.071055381400209,
311
+ "grad_norm": 0.004602901637554169,
312
+ "learning_rate": 3.5733643050716224e-05,
313
+ "loss": 0.0029,
314
+ "step": 1025
315
+ },
316
+ {
317
+ "epoch": 1.09717868338558,
318
+ "grad_norm": 0.0053739468567073345,
319
+ "learning_rate": 3.5249709639953545e-05,
320
+ "loss": 0.0004,
321
+ "step": 1050
322
+ },
323
+ {
324
+ "epoch": 1.123301985370951,
325
+ "grad_norm": 0.0077377124689519405,
326
+ "learning_rate": 3.4765776229190865e-05,
327
+ "loss": 0.0003,
328
+ "step": 1075
329
+ },
330
+ {
331
+ "epoch": 1.1494252873563218,
332
+ "grad_norm": 0.005279259290546179,
333
+ "learning_rate": 3.4281842818428186e-05,
334
+ "loss": 0.0798,
335
+ "step": 1100
336
+ },
337
+ {
338
+ "epoch": 1.1755485893416928,
339
+ "grad_norm": 0.005335587542504072,
340
+ "learning_rate": 3.379790940766551e-05,
341
+ "loss": 0.0008,
342
+ "step": 1125
343
+ },
344
+ {
345
+ "epoch": 1.2016718913270636,
346
+ "grad_norm": 0.013302076607942581,
347
+ "learning_rate": 3.331397599690283e-05,
348
+ "loss": 0.0007,
349
+ "step": 1150
350
+ },
351
+ {
352
+ "epoch": 1.2277951933124347,
353
+ "grad_norm": 0.060083452612161636,
354
+ "learning_rate": 3.283004258614015e-05,
355
+ "loss": 0.1174,
356
+ "step": 1175
357
+ },
358
+ {
359
+ "epoch": 1.2539184952978055,
360
+ "grad_norm": 0.005654782988131046,
361
+ "learning_rate": 3.234610917537747e-05,
362
+ "loss": 0.0008,
363
+ "step": 1200
364
+ },
365
+ {
366
+ "epoch": 1.2800417972831766,
367
+ "grad_norm": 0.004458857234567404,
368
+ "learning_rate": 3.186217576461479e-05,
369
+ "loss": 0.0004,
370
+ "step": 1225
371
+ },
372
+ {
373
+ "epoch": 1.3061650992685476,
374
+ "grad_norm": 0.005284965503960848,
375
+ "learning_rate": 3.137824235385211e-05,
376
+ "loss": 0.0396,
377
+ "step": 1250
378
+ },
379
+ {
380
+ "epoch": 1.3322884012539185,
381
+ "grad_norm": 0.008645354770123959,
382
+ "learning_rate": 3.089430894308943e-05,
383
+ "loss": 0.0007,
384
+ "step": 1275
385
+ },
386
+ {
387
+ "epoch": 1.3584117032392895,
388
+ "grad_norm": 0.0050517115741968155,
389
+ "learning_rate": 3.041037553232675e-05,
390
+ "loss": 0.0126,
391
+ "step": 1300
392
+ },
393
+ {
394
+ "epoch": 1.3845350052246603,
395
+ "grad_norm": 0.013066472485661507,
396
+ "learning_rate": 2.9926442121564075e-05,
397
+ "loss": 0.0831,
398
+ "step": 1325
399
+ },
400
+ {
401
+ "epoch": 1.4106583072100314,
402
+ "grad_norm": 0.017604809254407883,
403
+ "learning_rate": 2.9442508710801396e-05,
404
+ "loss": 0.0012,
405
+ "step": 1350
406
+ },
407
+ {
408
+ "epoch": 1.4367816091954024,
409
+ "grad_norm": 0.006513866595923901,
410
+ "learning_rate": 2.8958575300038713e-05,
411
+ "loss": 0.0416,
412
+ "step": 1375
413
+ },
414
+ {
415
+ "epoch": 1.4629049111807733,
416
+ "grad_norm": 0.012942259199917316,
417
+ "learning_rate": 2.8474641889276038e-05,
418
+ "loss": 0.0007,
419
+ "step": 1400
420
+ },
421
+ {
422
+ "epoch": 1.489028213166144,
423
+ "grad_norm": 344.988037109375,
424
+ "learning_rate": 2.7990708478513355e-05,
425
+ "loss": 0.0425,
426
+ "step": 1425
427
+ },
428
+ {
429
+ "epoch": 1.5151515151515151,
430
+ "grad_norm": 0.03896530717611313,
431
+ "learning_rate": 2.750677506775068e-05,
432
+ "loss": 0.064,
433
+ "step": 1450
434
+ },
435
+ {
436
+ "epoch": 1.5412748171368862,
437
+ "grad_norm": 0.009162936359643936,
438
+ "learning_rate": 2.7022841656988e-05,
439
+ "loss": 0.001,
440
+ "step": 1475
441
+ },
442
+ {
443
+ "epoch": 1.567398119122257,
444
+ "grad_norm": 0.010370401665568352,
445
+ "learning_rate": 2.6538908246225318e-05,
446
+ "loss": 0.0365,
447
+ "step": 1500
448
+ },
449
+ {
450
+ "epoch": 1.5935214211076278,
451
+ "grad_norm": 0.009396117180585861,
452
+ "learning_rate": 2.6054974835462642e-05,
453
+ "loss": 0.0466,
454
+ "step": 1525
455
+ },
456
+ {
457
+ "epoch": 1.619644723092999,
458
+ "grad_norm": 0.016944007948040962,
459
+ "learning_rate": 2.5571041424699967e-05,
460
+ "loss": 0.0008,
461
+ "step": 1550
462
+ },
463
+ {
464
+ "epoch": 1.64576802507837,
465
+ "grad_norm": 0.008113248273730278,
466
+ "learning_rate": 2.5087108013937284e-05,
467
+ "loss": 0.034,
468
+ "step": 1575
469
+ },
470
+ {
471
+ "epoch": 1.671891327063741,
472
+ "grad_norm": 0.008707555942237377,
473
+ "learning_rate": 2.4603174603174602e-05,
474
+ "loss": 0.0007,
475
+ "step": 1600
476
+ },
477
+ {
478
+ "epoch": 1.6980146290491118,
479
+ "grad_norm": 0.06956545263528824,
480
+ "learning_rate": 2.4119241192411926e-05,
481
+ "loss": 0.0005,
482
+ "step": 1625
483
+ },
484
+ {
485
+ "epoch": 1.7241379310344827,
486
+ "grad_norm": 0.010159909725189209,
487
+ "learning_rate": 2.3635307781649247e-05,
488
+ "loss": 0.0822,
489
+ "step": 1650
490
+ },
491
+ {
492
+ "epoch": 1.7502612330198537,
493
+ "grad_norm": 0.007952134124934673,
494
+ "learning_rate": 2.3151374370886568e-05,
495
+ "loss": 0.0768,
496
+ "step": 1675
497
+ },
498
+ {
499
+ "epoch": 1.7763845350052248,
500
+ "grad_norm": 0.022700520232319832,
501
+ "learning_rate": 2.2667440960123886e-05,
502
+ "loss": 0.0325,
503
+ "step": 1700
504
+ },
505
+ {
506
+ "epoch": 1.8025078369905956,
507
+ "grad_norm": 0.04925369843840599,
508
+ "learning_rate": 2.218350754936121e-05,
509
+ "loss": 0.0832,
510
+ "step": 1725
511
+ },
512
+ {
513
+ "epoch": 1.8286311389759664,
514
+ "grad_norm": 0.010277110151946545,
515
+ "learning_rate": 2.169957413859853e-05,
516
+ "loss": 0.0007,
517
+ "step": 1750
518
+ },
519
+ {
520
+ "epoch": 1.8547544409613375,
521
+ "grad_norm": 0.009189656004309654,
522
+ "learning_rate": 2.1215640727835852e-05,
523
+ "loss": 0.0004,
524
+ "step": 1775
525
+ },
526
+ {
527
+ "epoch": 1.8808777429467085,
528
+ "grad_norm": 0.00968814454972744,
529
+ "learning_rate": 2.073170731707317e-05,
530
+ "loss": 0.0429,
531
+ "step": 1800
532
+ },
533
+ {
534
+ "epoch": 1.9070010449320796,
535
+ "grad_norm": 0.009455765597522259,
536
+ "learning_rate": 2.0247773906310494e-05,
537
+ "loss": 0.0874,
538
+ "step": 1825
539
+ },
540
+ {
541
+ "epoch": 1.9331243469174504,
542
+ "grad_norm": 0.00541004678234458,
543
+ "learning_rate": 1.9763840495547815e-05,
544
+ "loss": 0.0019,
545
+ "step": 1850
546
+ },
547
+ {
548
+ "epoch": 1.9592476489028212,
549
+ "grad_norm": 0.027987977489829063,
550
+ "learning_rate": 1.9279907084785136e-05,
551
+ "loss": 0.0375,
552
+ "step": 1875
553
+ },
554
+ {
555
+ "epoch": 1.9853709508881923,
556
+ "grad_norm": 0.012382814660668373,
557
+ "learning_rate": 1.8795973674022453e-05,
558
+ "loss": 0.0006,
559
+ "step": 1900
560
+ },
561
+ {
562
+ "epoch": 2.0,
563
+ "eval_accuracy": 0.9952953476215368,
564
+ "eval_f1_macro": 0.9958184083774263,
565
+ "eval_f1_micro": 0.9952953476215368,
566
+ "eval_f1_weighted": 0.995297723057706,
567
+ "eval_loss": 0.03401019424200058,
568
+ "eval_precision_macro": 0.9948206725776819,
569
+ "eval_precision_micro": 0.9952953476215368,
570
+ "eval_precision_weighted": 0.9953490671179285,
571
+ "eval_recall_macro": 0.9968586387434555,
572
+ "eval_recall_micro": 0.9952953476215368,
573
+ "eval_recall_weighted": 0.9952953476215368,
574
+ "eval_runtime": 204.9069,
575
+ "eval_samples_per_second": 9.336,
576
+ "eval_steps_per_second": 0.586,
577
+ "step": 1914
578
+ },
579
+ {
580
+ "epoch": 2.0114942528735633,
581
+ "grad_norm": 0.01057450845837593,
582
+ "learning_rate": 1.8312040263259778e-05,
583
+ "loss": 0.0421,
584
+ "step": 1925
585
+ },
586
+ {
587
+ "epoch": 2.0376175548589344,
588
+ "grad_norm": 0.005483025684952736,
589
+ "learning_rate": 1.78281068524971e-05,
590
+ "loss": 0.0004,
591
+ "step": 1950
592
+ },
593
+ {
594
+ "epoch": 2.063740856844305,
595
+ "grad_norm": 0.0031655074562877417,
596
+ "learning_rate": 1.734417344173442e-05,
597
+ "loss": 0.0065,
598
+ "step": 1975
599
+ },
600
+ {
601
+ "epoch": 2.089864158829676,
602
+ "grad_norm": 0.0039079682901501656,
603
+ "learning_rate": 1.6860240030971737e-05,
604
+ "loss": 0.0003,
605
+ "step": 2000
606
+ },
607
+ {
608
+ "epoch": 2.115987460815047,
609
+ "grad_norm": 6.877926826477051,
610
+ "learning_rate": 1.6376306620209058e-05,
611
+ "loss": 0.0447,
612
+ "step": 2025
613
+ },
614
+ {
615
+ "epoch": 2.142110762800418,
616
+ "grad_norm": 0.006742693949490786,
617
+ "learning_rate": 1.5892373209446382e-05,
618
+ "loss": 0.0387,
619
+ "step": 2050
620
+ },
621
+ {
622
+ "epoch": 2.1682340647857887,
623
+ "grad_norm": 0.005568367429077625,
624
+ "learning_rate": 1.5408439798683703e-05,
625
+ "loss": 0.0004,
626
+ "step": 2075
627
+ },
628
+ {
629
+ "epoch": 2.19435736677116,
630
+ "grad_norm": 0.010062599554657936,
631
+ "learning_rate": 1.4924506387921023e-05,
632
+ "loss": 0.0003,
633
+ "step": 2100
634
+ },
635
+ {
636
+ "epoch": 2.220480668756531,
637
+ "grad_norm": 0.0033714643213897943,
638
+ "learning_rate": 1.4440572977158342e-05,
639
+ "loss": 0.0002,
640
+ "step": 2125
641
+ },
642
+ {
643
+ "epoch": 2.246603970741902,
644
+ "grad_norm": 0.0053630974143743515,
645
+ "learning_rate": 1.3956639566395666e-05,
646
+ "loss": 0.0002,
647
+ "step": 2150
648
+ },
649
+ {
650
+ "epoch": 2.2727272727272725,
651
+ "grad_norm": 0.012313129380345345,
652
+ "learning_rate": 1.3472706155632985e-05,
653
+ "loss": 0.0803,
654
+ "step": 2175
655
+ },
656
+ {
657
+ "epoch": 2.2988505747126435,
658
+ "grad_norm": 0.010815066285431385,
659
+ "learning_rate": 1.2988772744870306e-05,
660
+ "loss": 0.0014,
661
+ "step": 2200
662
+ },
663
+ {
664
+ "epoch": 2.3249738766980146,
665
+ "grad_norm": 0.002910745795816183,
666
+ "learning_rate": 1.2504839334107627e-05,
667
+ "loss": 0.0014,
668
+ "step": 2225
669
+ },
670
+ {
671
+ "epoch": 2.3510971786833856,
672
+ "grad_norm": 0.003235210431739688,
673
+ "learning_rate": 1.2020905923344948e-05,
674
+ "loss": 0.0004,
675
+ "step": 2250
676
+ },
677
+ {
678
+ "epoch": 2.3772204806687567,
679
+ "grad_norm": 0.002846105257049203,
680
+ "learning_rate": 1.1536972512582269e-05,
681
+ "loss": 0.0002,
682
+ "step": 2275
683
+ },
684
+ {
685
+ "epoch": 2.4033437826541273,
686
+ "grad_norm": 0.0029369164258241653,
687
+ "learning_rate": 1.105303910181959e-05,
688
+ "loss": 0.0326,
689
+ "step": 2300
690
+ },
691
+ {
692
+ "epoch": 2.4294670846394983,
693
+ "grad_norm": 0.002867381554096937,
694
+ "learning_rate": 1.0569105691056911e-05,
695
+ "loss": 0.0547,
696
+ "step": 2325
697
+ },
698
+ {
699
+ "epoch": 2.4555903866248694,
700
+ "grad_norm": 0.0030547629576176405,
701
+ "learning_rate": 1.0085172280294232e-05,
702
+ "loss": 0.0002,
703
+ "step": 2350
704
+ },
705
+ {
706
+ "epoch": 2.4817136886102404,
707
+ "grad_norm": 0.003066838486120105,
708
+ "learning_rate": 9.601238869531553e-06,
709
+ "loss": 0.0002,
710
+ "step": 2375
711
+ },
712
+ {
713
+ "epoch": 2.507836990595611,
714
+ "grad_norm": 0.003202056046575308,
715
+ "learning_rate": 9.117305458768874e-06,
716
+ "loss": 0.0002,
717
+ "step": 2400
718
+ },
719
+ {
720
+ "epoch": 2.533960292580982,
721
+ "grad_norm": 0.002831035992130637,
722
+ "learning_rate": 8.633372048006195e-06,
723
+ "loss": 0.0002,
724
+ "step": 2425
725
+ },
726
+ {
727
+ "epoch": 2.560083594566353,
728
+ "grad_norm": 0.0025005133356899023,
729
+ "learning_rate": 8.149438637243516e-06,
730
+ "loss": 0.0002,
731
+ "step": 2450
732
+ },
733
+ {
734
+ "epoch": 2.586206896551724,
735
+ "grad_norm": 0.0023419370409101248,
736
+ "learning_rate": 7.665505226480837e-06,
737
+ "loss": 0.0002,
738
+ "step": 2475
739
+ },
740
+ {
741
+ "epoch": 2.6123301985370952,
742
+ "grad_norm": 0.004286649636924267,
743
+ "learning_rate": 7.181571815718158e-06,
744
+ "loss": 0.022,
745
+ "step": 2500
746
+ },
747
+ {
748
+ "epoch": 2.6384535005224663,
749
+ "grad_norm": 0.0027705898974090815,
750
+ "learning_rate": 6.697638404955478e-06,
751
+ "loss": 0.0002,
752
+ "step": 2525
753
+ },
754
+ {
755
+ "epoch": 2.664576802507837,
756
+ "grad_norm": 0.002411492168903351,
757
+ "learning_rate": 6.2137049941927995e-06,
758
+ "loss": 0.0301,
759
+ "step": 2550
760
+ },
761
+ {
762
+ "epoch": 2.690700104493208,
763
+ "grad_norm": 0.003571214620023966,
764
+ "learning_rate": 5.7297715834301205e-06,
765
+ "loss": 0.0002,
766
+ "step": 2575
767
+ },
768
+ {
769
+ "epoch": 2.716823406478579,
770
+ "grad_norm": 0.002616529120132327,
771
+ "learning_rate": 5.245838172667441e-06,
772
+ "loss": 0.0002,
773
+ "step": 2600
774
+ },
775
+ {
776
+ "epoch": 2.7429467084639496,
777
+ "grad_norm": 0.002348940121009946,
778
+ "learning_rate": 4.7619047619047615e-06,
779
+ "loss": 0.0454,
780
+ "step": 2625
781
+ },
782
+ {
783
+ "epoch": 2.7690700104493207,
784
+ "grad_norm": 0.0026359122712165117,
785
+ "learning_rate": 4.2779713511420825e-06,
786
+ "loss": 0.0195,
787
+ "step": 2650
788
+ },
789
+ {
790
+ "epoch": 2.7951933124346917,
791
+ "grad_norm": 0.002935645403340459,
792
+ "learning_rate": 3.794037940379404e-06,
793
+ "loss": 0.0508,
794
+ "step": 2675
795
+ },
796
+ {
797
+ "epoch": 2.8213166144200628,
798
+ "grad_norm": 0.009833462536334991,
799
+ "learning_rate": 3.3101045296167248e-06,
800
+ "loss": 0.0002,
801
+ "step": 2700
802
+ },
803
+ {
804
+ "epoch": 2.847439916405434,
805
+ "grad_norm": 6.756618976593018,
806
+ "learning_rate": 2.8261711188540457e-06,
807
+ "loss": 0.1122,
808
+ "step": 2725
809
+ },
810
+ {
811
+ "epoch": 2.873563218390805,
812
+ "grad_norm": 0.005804854445159435,
813
+ "learning_rate": 2.3422377080913667e-06,
814
+ "loss": 0.0002,
815
+ "step": 2750
816
+ },
817
+ {
818
+ "epoch": 2.8996865203761755,
819
+ "grad_norm": 0.005187211558222771,
820
+ "learning_rate": 1.8583042973286876e-06,
821
+ "loss": 0.0003,
822
+ "step": 2775
823
+ },
824
+ {
825
+ "epoch": 2.9258098223615465,
826
+ "grad_norm": 0.0601598359644413,
827
+ "learning_rate": 1.3743708865660086e-06,
828
+ "loss": 0.0002,
829
+ "step": 2800
830
+ },
831
+ {
832
+ "epoch": 2.9519331243469176,
833
+ "grad_norm": 0.0032336723525077105,
834
+ "learning_rate": 8.904374758033296e-07,
835
+ "loss": 0.0318,
836
+ "step": 2825
837
+ },
838
+ {
839
+ "epoch": 2.978056426332288,
840
+ "grad_norm": 0.003166941227391362,
841
+ "learning_rate": 4.0650406504065046e-07,
842
+ "loss": 0.0004,
843
+ "step": 2850
844
+ },
845
+ {
846
+ "epoch": 3.0,
847
+ "eval_accuracy": 0.9989545216936748,
848
+ "eval_f1_macro": 0.9991256596070146,
849
+ "eval_f1_micro": 0.9989545216936748,
850
+ "eval_f1_weighted": 0.9989547969603347,
851
+ "eval_loss": 0.007734560873359442,
852
+ "eval_precision_macro": 0.9989517819706499,
853
+ "eval_precision_micro": 0.9989545216936748,
854
+ "eval_precision_weighted": 0.9989578093613047,
855
+ "eval_recall_macro": 0.9993019197207679,
856
+ "eval_recall_micro": 0.9989545216936748,
857
+ "eval_recall_weighted": 0.9989545216936748,
858
+ "eval_runtime": 176.5223,
859
+ "eval_samples_per_second": 10.837,
860
+ "eval_steps_per_second": 0.68,
861
+ "step": 2871
862
+ }
863
+ ],
864
+ "logging_steps": 25,
865
+ "max_steps": 2871,
866
+ "num_input_tokens_seen": 0,
867
+ "num_train_epochs": 3,
868
+ "save_steps": 500,
869
+ "stateful_callbacks": {
870
+ "EarlyStoppingCallback": {
871
+ "args": {
872
+ "early_stopping_patience": 5,
873
+ "early_stopping_threshold": 0.01
874
+ },
875
+ "attributes": {
876
+ "early_stopping_patience_counter": 0
877
+ }
878
+ },
879
+ "TrainerControl": {
880
+ "args": {
881
+ "should_epoch_stop": false,
882
+ "should_evaluate": false,
883
+ "should_log": false,
884
+ "should_save": true,
885
+ "should_training_stop": true
886
+ },
887
+ "attributes": {}
888
+ }
889
+ },
890
+ "total_flos": 1509810569277696.0,
891
+ "train_batch_size": 8,
892
+ "trial_name": null,
893
+ "trial_params": null
894
+ }
checkpoint-2871/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b77a17d847c78ac8f470104c0b00fb117b1100fd9badf4a978157ffb4d832be3
3
+ size 5368
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cardiffnlp/twitter-roberta-base",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "RobertaForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 0,
9
+ "classifier_dropout": null,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "0",
17
+ "1": "1",
18
+ "2": "2"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "0": 0,
24
+ "1": 1,
25
+ "2": 2
26
+ },
27
+ "layer_norm_eps": 1e-05,
28
+ "max_position_embeddings": 514,
29
+ "model_type": "roberta",
30
+ "num_attention_heads": 12,
31
+ "num_hidden_layers": 12,
32
+ "pad_token_id": 1,
33
+ "position_embedding_type": "absolute",
34
+ "problem_type": "single_label_classification",
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.48.0",
37
+ "type_vocab_size": 1,
38
+ "use_cache": true,
39
+ "vocab_size": 50265
40
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77fa83f78f8efced14d895c74a0a60d8ecad30afe7fd6883bb7021fbc7b2a264
3
+ size 498615900
runs/Oct29_16-34-51_r-zkava01-pj-ee7luc7v-2b90d-t5zcp/events.out.tfevents.1761755692.r-zkava01-pj-ee7luc7v-2b90d-t5zcp.65.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e78e803c8a6563a2f15acbae40d0239612336b465911106c1267c40d5087db5e
3
- size 30755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d9b89b35a19dd8e614f706634d1f7291da3a6b89d57c42f5826930277999e82
3
+ size 32153
runs/Oct29_16-34-51_r-zkava01-pj-ee7luc7v-2b90d-t5zcp/events.out.tfevents.1761766876.r-zkava01-pj-ee7luc7v-2b90d-t5zcp.65.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:928d93d2d13da65ca5aafd2efd1f4c70509bd5bd99e6c2e0d38bb11246d640c1
3
+ size 921
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 1000000000000000019884624838656,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b77a17d847c78ac8f470104c0b00fb117b1100fd9badf4a978157ffb4d832be3
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-5zkp2-pa5ot/autotrain-data",
3
+ "model": "cardiffnlp/twitter-roberta-base",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 128,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "autotrain-5zkp2-pa5ot",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "zkava01",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff