Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

config.json +173 -0
generation_config.json +9 -0
merges.txt +0 -0
model.safetensors +3 -0
optimizer.pt +3 -0
preprocessor_config.json +23 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
trainer_state.json +663 -0
training_args.bin +3 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,173 @@

+{
+  "_name_or_path": "microsoft/trocr-base-handwritten",
+  "architectures": [
+    "VisionEncoderDecoderModel"
+  ],
+  "decoder": {
+    "_attn_implementation_autoset": false,
+    "_name_or_path": "",
+    "activation_dropout": 0.0,
+    "activation_function": "gelu",
+    "add_cross_attention": true,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": 768,
+    "d_model": 1024,
+    "decoder_attention_heads": 16,
+    "decoder_ffn_dim": 4096,
+    "decoder_layerdrop": 0.0,
+    "decoder_layers": 12,
+    "decoder_start_token_id": 2,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "init_std": 0.02,
+    "is_decoder": true,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layernorm_embedding": true,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 512,
+    "min_length": 0,
+    "model_type": "trocr",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": false,
+    "use_learned_position_embeddings": true,
+    "vocab_size": 50265
+  },
+  "decoder_start_token_id": 0,
+  "encoder": {
+    "_attn_implementation_autoset": false,
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_probs_dropout_prob": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "encoder_stride": 16,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 384,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-12,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "vit",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 16,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "qkv_bias": false,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "is_encoder_decoder": true,
+  "model_type": "vision-encoder-decoder",
+  "pad_token_id": 1,
+  "processor_class": "TrOCRProcessor",
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.3"
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "pad_token_id": 1,
+  "transformers_version": "4.46.3",
+  "use_cache": false
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d0864b5cf3951805142afc7e4bd12aa596343c1c0f2e0ba9bab7f4488024797
+size 1335747032

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6f539a989eff32b0042a7125b35451e93cefdcff01b5b4b5b6d9fc0e8f849d1
+size 2666915017

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "ViTImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "TrOCRProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 384,
+    "width": 384
+  }
+}

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0de23271066ca64748bcdffdff0925f8c4f7312284e7a166681ab5394cd92665
+size 13547

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d31953a7317079e12954d04c4974cd6b9586e0ed412382f3df798c0960b96d72
+size 623

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "processor_class": "TrOCRProcessor",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,663 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.272984441301273,
+  "eval_steps": 500,
+  "global_step": 900,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.014144271570014143,
+      "grad_norm": 85.7519302368164,
+      "learning_rate": 4.976426214049977e-05,
+      "loss": 9.3898,
+      "step": 10
+    },
+    {
+      "epoch": 0.028288543140028287,
+      "grad_norm": 33.235904693603516,
+      "learning_rate": 4.952852428099953e-05,
+      "loss": 5.3413,
+      "step": 20
+    },
+    {
+      "epoch": 0.042432814710042434,
+      "grad_norm": 5.113602161407471,
+      "learning_rate": 4.9292786421499294e-05,
+      "loss": 4.854,
+      "step": 30
+    },
+    {
+      "epoch": 0.056577086280056574,
+      "grad_norm": 36.069705963134766,
+      "learning_rate": 4.9057048561999055e-05,
+      "loss": 5.0425,
+      "step": 40
+    },
+    {
+      "epoch": 0.07072135785007072,
+      "grad_norm": 33.71372604370117,
+      "learning_rate": 4.882131070249882e-05,
+      "loss": 5.04,
+      "step": 50
+    },
+    {
+      "epoch": 0.08486562942008487,
+      "grad_norm": 10.95967960357666,
+      "learning_rate": 4.858557284299859e-05,
+      "loss": 4.5402,
+      "step": 60
+    },
+    {
+      "epoch": 0.09900990099009901,
+      "grad_norm": 57.9084358215332,
+      "learning_rate": 4.834983498349835e-05,
+      "loss": 4.8505,
+      "step": 70
+    },
+    {
+      "epoch": 0.11315417256011315,
+      "grad_norm": 34.007869720458984,
+      "learning_rate": 4.8114097123998114e-05,
+      "loss": 4.3305,
+      "step": 80
+    },
+    {
+      "epoch": 0.1272984441301273,
+      "grad_norm": 15.245634078979492,
+      "learning_rate": 4.787835926449788e-05,
+      "loss": 4.4707,
+      "step": 90
+    },
+    {
+      "epoch": 0.14144271570014144,
+      "grad_norm": 24.47016143798828,
+      "learning_rate": 4.7642621404997644e-05,
+      "loss": 4.5379,
+      "step": 100
+    },
+    {
+      "epoch": 0.15558698727015557,
+      "grad_norm": 14.591358184814453,
+      "learning_rate": 4.740688354549741e-05,
+      "loss": 4.5205,
+      "step": 110
+    },
+    {
+      "epoch": 0.16973125884016974,
+      "grad_norm": 18.776493072509766,
+      "learning_rate": 4.7171145685997174e-05,
+      "loss": 4.6578,
+      "step": 120
+    },
+    {
+      "epoch": 0.18387553041018387,
+      "grad_norm": 4.018069267272949,
+      "learning_rate": 4.6935407826496935e-05,
+      "loss": 4.1864,
+      "step": 130
+    },
+    {
+      "epoch": 0.19801980198019803,
+      "grad_norm": 8.038480758666992,
+      "learning_rate": 4.6699669966996704e-05,
+      "loss": 4.5046,
+      "step": 140
+    },
+    {
+      "epoch": 0.21216407355021216,
+      "grad_norm": 2.9573426246643066,
+      "learning_rate": 4.6463932107496465e-05,
+      "loss": 4.2317,
+      "step": 150
+    },
+    {
+      "epoch": 0.2263083451202263,
+      "grad_norm": 3.0037927627563477,
+      "learning_rate": 4.622819424799623e-05,
+      "loss": 4.0951,
+      "step": 160
+    },
+    {
+      "epoch": 0.24045261669024046,
+      "grad_norm": 9.886232376098633,
+      "learning_rate": 4.5992456388495995e-05,
+      "loss": 4.0168,
+      "step": 170
+    },
+    {
+      "epoch": 0.2545968882602546,
+      "grad_norm": 2.463179588317871,
+      "learning_rate": 4.5756718528995756e-05,
+      "loss": 4.0372,
+      "step": 180
+    },
+    {
+      "epoch": 0.26874115983026875,
+      "grad_norm": 13.546555519104004,
+      "learning_rate": 4.5520980669495525e-05,
+      "loss": 4.1929,
+      "step": 190
+    },
+    {
+      "epoch": 0.2828854314002829,
+      "grad_norm": 2.4467103481292725,
+      "learning_rate": 4.5285242809995286e-05,
+      "loss": 4.2146,
+      "step": 200
+    },
+    {
+      "epoch": 0.297029702970297,
+      "grad_norm": 5.936313152313232,
+      "learning_rate": 4.5049504950495054e-05,
+      "loss": 4.6069,
+      "step": 210
+    },
+    {
+      "epoch": 0.31117397454031115,
+      "grad_norm": 6.5272536277771,
+      "learning_rate": 4.4813767090994816e-05,
+      "loss": 4.3263,
+      "step": 220
+    },
+    {
+      "epoch": 0.32531824611032534,
+      "grad_norm": 4.2881598472595215,
+      "learning_rate": 4.457802923149458e-05,
+      "loss": 3.8218,
+      "step": 230
+    },
+    {
+      "epoch": 0.33946251768033947,
+      "grad_norm": 3.9945058822631836,
+      "learning_rate": 4.4342291371994345e-05,
+      "loss": 4.1532,
+      "step": 240
+    },
+    {
+      "epoch": 0.3536067892503536,
+      "grad_norm": 4.577730655670166,
+      "learning_rate": 4.410655351249411e-05,
+      "loss": 4.0727,
+      "step": 250
+    },
+    {
+      "epoch": 0.36775106082036774,
+      "grad_norm": 2.6052353382110596,
+      "learning_rate": 4.3870815652993875e-05,
+      "loss": 3.9074,
+      "step": 260
+    },
+    {
+      "epoch": 0.38189533239038187,
+      "grad_norm": 15.787618637084961,
+      "learning_rate": 4.363507779349364e-05,
+      "loss": 4.2402,
+      "step": 270
+    },
+    {
+      "epoch": 0.39603960396039606,
+      "grad_norm": 23.970670700073242,
+      "learning_rate": 4.33993399339934e-05,
+      "loss": 4.3765,
+      "step": 280
+    },
+    {
+      "epoch": 0.4101838755304102,
+      "grad_norm": 2.5313973426818848,
+      "learning_rate": 4.3163602074493166e-05,
+      "loss": 3.9863,
+      "step": 290
+    },
+    {
+      "epoch": 0.4243281471004243,
+      "grad_norm": 21.020267486572266,
+      "learning_rate": 4.292786421499293e-05,
+      "loss": 4.0341,
+      "step": 300
+    },
+    {
+      "epoch": 0.43847241867043846,
+      "grad_norm": 9.731268882751465,
+      "learning_rate": 4.2692126355492696e-05,
+      "loss": 4.1089,
+      "step": 310
+    },
+    {
+      "epoch": 0.4526166902404526,
+      "grad_norm": 4.240326881408691,
+      "learning_rate": 4.245638849599246e-05,
+      "loss": 3.9737,
+      "step": 320
+    },
+    {
+      "epoch": 0.4667609618104668,
+      "grad_norm": 15.72867202758789,
+      "learning_rate": 4.222065063649222e-05,
+      "loss": 4.454,
+      "step": 330
+    },
+    {
+      "epoch": 0.4809052333804809,
+      "grad_norm": 10.669405937194824,
+      "learning_rate": 4.198491277699199e-05,
+      "loss": 4.1596,
+      "step": 340
+    },
+    {
+      "epoch": 0.49504950495049505,
+      "grad_norm": 11.927492141723633,
+      "learning_rate": 4.174917491749175e-05,
+      "loss": 4.0485,
+      "step": 350
+    },
+    {
+      "epoch": 0.5091937765205092,
+      "grad_norm": 7.629958629608154,
+      "learning_rate": 4.151343705799152e-05,
+      "loss": 4.1567,
+      "step": 360
+    },
+    {
+      "epoch": 0.5233380480905233,
+      "grad_norm": 32.22209930419922,
+      "learning_rate": 4.1277699198491285e-05,
+      "loss": 4.9187,
+      "step": 370
+    },
+    {
+      "epoch": 0.5374823196605375,
+      "grad_norm": 7.841526985168457,
+      "learning_rate": 4.104196133899104e-05,
+      "loss": 4.0989,
+      "step": 380
+    },
+    {
+      "epoch": 0.5516265912305516,
+      "grad_norm": 3.8099868297576904,
+      "learning_rate": 4.080622347949081e-05,
+      "loss": 4.4586,
+      "step": 390
+    },
+    {
+      "epoch": 0.5657708628005658,
+      "grad_norm": 11.720135688781738,
+      "learning_rate": 4.057048561999057e-05,
+      "loss": 4.1915,
+      "step": 400
+    },
+    {
+      "epoch": 0.57991513437058,
+      "grad_norm": 5.8960280418396,
+      "learning_rate": 4.033474776049034e-05,
+      "loss": 4.3458,
+      "step": 410
+    },
+    {
+      "epoch": 0.594059405940594,
+      "grad_norm": 3.532780885696411,
+      "learning_rate": 4.0099009900990106e-05,
+      "loss": 4.2094,
+      "step": 420
+    },
+    {
+      "epoch": 0.6082036775106082,
+      "grad_norm": 6.77498722076416,
+      "learning_rate": 3.986327204148986e-05,
+      "loss": 4.3139,
+      "step": 430
+    },
+    {
+      "epoch": 0.6223479490806223,
+      "grad_norm": 23.035005569458008,
+      "learning_rate": 3.962753418198963e-05,
+      "loss": 4.0213,
+      "step": 440
+    },
+    {
+      "epoch": 0.6364922206506365,
+      "grad_norm": 3.033621311187744,
+      "learning_rate": 3.939179632248939e-05,
+      "loss": 4.0612,
+      "step": 450
+    },
+    {
+      "epoch": 0.6506364922206507,
+      "grad_norm": 32.6967887878418,
+      "learning_rate": 3.915605846298916e-05,
+      "loss": 4.1847,
+      "step": 460
+    },
+    {
+      "epoch": 0.6647807637906648,
+      "grad_norm": 9.779464721679688,
+      "learning_rate": 3.892032060348893e-05,
+      "loss": 4.1741,
+      "step": 470
+    },
+    {
+      "epoch": 0.6789250353606789,
+      "grad_norm": 14.904414176940918,
+      "learning_rate": 3.868458274398868e-05,
+      "loss": 4.1909,
+      "step": 480
+    },
+    {
+      "epoch": 0.693069306930693,
+      "grad_norm": 34.94367218017578,
+      "learning_rate": 3.844884488448845e-05,
+      "loss": 4.9592,
+      "step": 490
+    },
+    {
+      "epoch": 0.7072135785007072,
+      "grad_norm": 6.339000701904297,
+      "learning_rate": 3.821310702498822e-05,
+      "loss": 3.7726,
+      "step": 500
+    },
+    {
+      "epoch": 0.7213578500707214,
+      "grad_norm": 17.672000885009766,
+      "learning_rate": 3.797736916548798e-05,
+      "loss": 3.9599,
+      "step": 510
+    },
+    {
+      "epoch": 0.7355021216407355,
+      "grad_norm": 13.348356246948242,
+      "learning_rate": 3.774163130598775e-05,
+      "loss": 3.8342,
+      "step": 520
+    },
+    {
+      "epoch": 0.7496463932107497,
+      "grad_norm": 1.8930085897445679,
+      "learning_rate": 3.75058934464875e-05,
+      "loss": 4.0049,
+      "step": 530
+    },
+    {
+      "epoch": 0.7637906647807637,
+      "grad_norm": 35.62409210205078,
+      "learning_rate": 3.727015558698727e-05,
+      "loss": 3.9412,
+      "step": 540
+    },
+    {
+      "epoch": 0.7779349363507779,
+      "grad_norm": 2.246541738510132,
+      "learning_rate": 3.703441772748704e-05,
+      "loss": 3.893,
+      "step": 550
+    },
+    {
+      "epoch": 0.7920792079207921,
+      "grad_norm": 57.89748001098633,
+      "learning_rate": 3.67986798679868e-05,
+      "loss": 4.2004,
+      "step": 560
+    },
+    {
+      "epoch": 0.8062234794908062,
+      "grad_norm": 13.958605766296387,
+      "learning_rate": 3.656294200848657e-05,
+      "loss": 4.5987,
+      "step": 570
+    },
+    {
+      "epoch": 0.8203677510608204,
+      "grad_norm": 7.963130950927734,
+      "learning_rate": 3.6327204148986324e-05,
+      "loss": 3.8688,
+      "step": 580
+    },
+    {
+      "epoch": 0.8345120226308345,
+      "grad_norm": 12.124194145202637,
+      "learning_rate": 3.609146628948609e-05,
+      "loss": 3.8787,
+      "step": 590
+    },
+    {
+      "epoch": 0.8486562942008486,
+      "grad_norm": 19.39701271057129,
+      "learning_rate": 3.585572842998586e-05,
+      "loss": 3.8274,
+      "step": 600
+    },
+    {
+      "epoch": 0.8628005657708628,
+      "grad_norm": 7.561882495880127,
+      "learning_rate": 3.561999057048562e-05,
+      "loss": 3.917,
+      "step": 610
+    },
+    {
+      "epoch": 0.8769448373408769,
+      "grad_norm": 8.699311256408691,
+      "learning_rate": 3.538425271098539e-05,
+      "loss": 3.8819,
+      "step": 620
+    },
+    {
+      "epoch": 0.8910891089108911,
+      "grad_norm": 10.60632038116455,
+      "learning_rate": 3.514851485148515e-05,
+      "loss": 4.3288,
+      "step": 630
+    },
+    {
+      "epoch": 0.9052333804809052,
+      "grad_norm": 5.851240634918213,
+      "learning_rate": 3.491277699198491e-05,
+      "loss": 3.7157,
+      "step": 640
+    },
+    {
+      "epoch": 0.9193776520509194,
+      "grad_norm": 12.624049186706543,
+      "learning_rate": 3.467703913248468e-05,
+      "loss": 4.0151,
+      "step": 650
+    },
+    {
+      "epoch": 0.9335219236209336,
+      "grad_norm": 10.379075050354004,
+      "learning_rate": 3.444130127298444e-05,
+      "loss": 4.1419,
+      "step": 660
+    },
+    {
+      "epoch": 0.9476661951909476,
+      "grad_norm": 11.247940063476562,
+      "learning_rate": 3.420556341348421e-05,
+      "loss": 3.5958,
+      "step": 670
+    },
+    {
+      "epoch": 0.9618104667609618,
+      "grad_norm": 10.014704704284668,
+      "learning_rate": 3.396982555398397e-05,
+      "loss": 3.7822,
+      "step": 680
+    },
+    {
+      "epoch": 0.9759547383309759,
+      "grad_norm": 8.791955947875977,
+      "learning_rate": 3.3734087694483734e-05,
+      "loss": 3.9809,
+      "step": 690
+    },
+    {
+      "epoch": 0.9900990099009901,
+      "grad_norm": 30.620357513427734,
+      "learning_rate": 3.34983498349835e-05,
+      "loss": 3.9702,
+      "step": 700
+    },
+    {
+      "epoch": 1.0042432814710043,
+      "grad_norm": 23.29230499267578,
+      "learning_rate": 3.326261197548326e-05,
+      "loss": 3.8758,
+      "step": 710
+    },
+    {
+      "epoch": 1.0183875530410185,
+      "grad_norm": 6.364682674407959,
+      "learning_rate": 3.302687411598303e-05,
+      "loss": 3.6476,
+      "step": 720
+    },
+    {
+      "epoch": 1.0325318246110324,
+      "grad_norm": 22.594091415405273,
+      "learning_rate": 3.279113625648279e-05,
+      "loss": 4.2427,
+      "step": 730
+    },
+    {
+      "epoch": 1.0466760961810466,
+      "grad_norm": 31.865617752075195,
+      "learning_rate": 3.2555398396982555e-05,
+      "loss": 3.8927,
+      "step": 740
+    },
+    {
+      "epoch": 1.0608203677510608,
+      "grad_norm": 2.553858757019043,
+      "learning_rate": 3.231966053748232e-05,
+      "loss": 3.6406,
+      "step": 750
+    },
+    {
+      "epoch": 1.074964639321075,
+      "grad_norm": 24.558555603027344,
+      "learning_rate": 3.2083922677982084e-05,
+      "loss": 3.9391,
+      "step": 760
+    },
+    {
+      "epoch": 1.0891089108910892,
+      "grad_norm": 3.9098362922668457,
+      "learning_rate": 3.184818481848185e-05,
+      "loss": 4.2241,
+      "step": 770
+    },
+    {
+      "epoch": 1.1032531824611032,
+      "grad_norm": 14.435652732849121,
+      "learning_rate": 3.1612446958981614e-05,
+      "loss": 3.5212,
+      "step": 780
+    },
+    {
+      "epoch": 1.1173974540311173,
+      "grad_norm": 4.891509056091309,
+      "learning_rate": 3.1376709099481375e-05,
+      "loss": 3.3049,
+      "step": 790
+    },
+    {
+      "epoch": 1.1315417256011315,
+      "grad_norm": 10.893304824829102,
+      "learning_rate": 3.1140971239981144e-05,
+      "loss": 3.7428,
+      "step": 800
+    },
+    {
+      "epoch": 1.1456859971711457,
+      "grad_norm": 3.631542921066284,
+      "learning_rate": 3.0905233380480905e-05,
+      "loss": 3.7109,
+      "step": 810
+    },
+    {
+      "epoch": 1.15983026874116,
+      "grad_norm": 17.292734146118164,
+      "learning_rate": 3.0669495520980673e-05,
+      "loss": 3.748,
+      "step": 820
+    },
+    {
+      "epoch": 1.1739745403111739,
+      "grad_norm": 12.438305854797363,
+      "learning_rate": 3.043375766148043e-05,
+      "loss": 4.5263,
+      "step": 830
+    },
+    {
+      "epoch": 1.188118811881188,
+      "grad_norm": 7.694697380065918,
+      "learning_rate": 3.01980198019802e-05,
+      "loss": 3.9575,
+      "step": 840
+    },
+    {
+      "epoch": 1.2022630834512023,
+      "grad_norm": 9.88021469116211,
+      "learning_rate": 2.9962281942479965e-05,
+      "loss": 3.5793,
+      "step": 850
+    },
+    {
+      "epoch": 1.2164073550212164,
+      "grad_norm": 18.16057586669922,
+      "learning_rate": 2.972654408297973e-05,
+      "loss": 3.7951,
+      "step": 860
+    },
+    {
+      "epoch": 1.2305516265912306,
+      "grad_norm": 3.5214946269989014,
+      "learning_rate": 2.9490806223479494e-05,
+      "loss": 4.0263,
+      "step": 870
+    },
+    {
+      "epoch": 1.2446958981612446,
+      "grad_norm": 20.135046005249023,
+      "learning_rate": 2.9255068363979256e-05,
+      "loss": 3.678,
+      "step": 880
+    },
+    {
+      "epoch": 1.2588401697312588,
+      "grad_norm": 35.220733642578125,
+      "learning_rate": 2.901933050447902e-05,
+      "loss": 4.1981,
+      "step": 890
+    },
+    {
+      "epoch": 1.272984441301273,
+      "grad_norm": 39.2838134765625,
+      "learning_rate": 2.8783592644978786e-05,
+      "loss": 3.9173,
+      "step": 900
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2121,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.3469133437927424e+18,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b769e33f404ccffb79db1890a0a15a1d95b91ad4d06a151aebffd1ff7dca0621
+size 4783

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff