prithivMLmods commited on
Commit
94b7583
·
verified ·
1 Parent(s): 5f4bcc6

Upload folder using huggingface_hub

Browse files
checkpoint-1876/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "airplane",
8
+ "1": "automobile",
9
+ "2": "bird",
10
+ "3": "cat",
11
+ "4": "deer",
12
+ "5": "dog",
13
+ "6": "frog",
14
+ "7": "horse",
15
+ "8": "ship",
16
+ "9": "truck"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "airplane": 0,
21
+ "automobile": 1,
22
+ "bird": 2,
23
+ "cat": 3,
24
+ "deer": 4,
25
+ "dog": 5,
26
+ "frog": 6,
27
+ "horse": 7,
28
+ "ship": 8,
29
+ "truck": 9
30
+ },
31
+ "logit_scale_init_value": 2.6592,
32
+ "model_type": "metaclip_2",
33
+ "problem_type": "single_label_classification",
34
+ "projection_dim": 384,
35
+ "text_config": {
36
+ "attention_dropout": 0.0,
37
+ "dtype": "float32",
38
+ "eos_token_id": 2,
39
+ "hidden_act": "gelu",
40
+ "hidden_size": 384,
41
+ "initializer_factor": 1.0,
42
+ "initializer_range": 0.02,
43
+ "intermediate_size": 1536,
44
+ "layer_norm_eps": 1e-05,
45
+ "max_position_embeddings": 77,
46
+ "model_type": "metaclip_2_text_model",
47
+ "num_attention_heads": 6,
48
+ "num_hidden_layers": 12,
49
+ "projection_dim": 384,
50
+ "vocab_size": 901629
51
+ },
52
+ "transformers_version": "4.57.1",
53
+ "vision_config": {
54
+ "attention_dropout": 0.0,
55
+ "dtype": "float32",
56
+ "hidden_act": "gelu",
57
+ "hidden_size": 384,
58
+ "image_size": 224,
59
+ "initializer_factor": 1.0,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 1536,
62
+ "layer_norm_eps": 1e-05,
63
+ "model_type": "metaclip_2_vision_model",
64
+ "num_attention_heads": 6,
65
+ "num_channels": 3,
66
+ "num_hidden_layers": 12,
67
+ "patch_size": 16,
68
+ "projection_dim": 384
69
+ }
70
+ }
checkpoint-1876/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b29c5dd252f58b51133ed5893e0ce3794acf8ca2892fbcdb63d27d01824aa9
3
+ size 86703248
checkpoint-1876/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b73e8d9c5816fbb85ade28f395aa076ca26cf79d703e45097e7364d569f7d6c
3
+ size 173522699
checkpoint-1876/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-1876/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3be64a16bd10f2df355cd871b61f0d0e89cc9247ee2c99aa3286c961b5850f
3
+ size 14645
checkpoint-1876/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:821339f9e61aa962297cc39e7f2385be146465c890ae7b9814d27262ff2e33b9
3
+ size 1465
checkpoint-1876/trainer_state.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1876,
3
+ "best_metric": 0.15963459014892578,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-1876",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1876,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.5330490405117271,
14
+ "grad_norm": 104.10272979736328,
15
+ "learning_rate": 1.7574284170718533e-05,
16
+ "loss": 0.8655,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.92635,
22
+ "eval_loss": 0.2205629199743271,
23
+ "eval_model_preparation_time": 0.0039,
24
+ "eval_runtime": 90.292,
25
+ "eval_samples_per_second": 221.503,
26
+ "eval_steps_per_second": 27.688,
27
+ "step": 938
28
+ },
29
+ {
30
+ "epoch": 1.0660980810234542,
31
+ "grad_norm": 53.17558670043945,
32
+ "learning_rate": 1.4873041599135603e-05,
33
+ "loss": 0.4234,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 1.5991471215351813,
38
+ "grad_norm": 17.705827713012695,
39
+ "learning_rate": 1.2171799027552675e-05,
40
+ "loss": 0.3154,
41
+ "step": 1500
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "eval_accuracy": 0.9459,
46
+ "eval_loss": 0.15963459014892578,
47
+ "eval_model_preparation_time": 0.0039,
48
+ "eval_runtime": 87.0564,
49
+ "eval_samples_per_second": 229.736,
50
+ "eval_steps_per_second": 28.717,
51
+ "step": 1876
52
+ }
53
+ ],
54
+ "logging_steps": 500,
55
+ "max_steps": 3752,
56
+ "num_input_tokens_seen": 0,
57
+ "num_train_epochs": 4,
58
+ "save_steps": 500,
59
+ "stateful_callbacks": {
60
+ "TrainerControl": {
61
+ "args": {
62
+ "should_epoch_stop": false,
63
+ "should_evaluate": false,
64
+ "should_log": false,
65
+ "should_save": true,
66
+ "should_training_stop": false
67
+ },
68
+ "attributes": {}
69
+ }
70
+ },
71
+ "total_flos": 1.17019413504e+18,
72
+ "train_batch_size": 32,
73
+ "trial_name": null,
74
+ "trial_params": null
75
+ }
checkpoint-1876/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777
checkpoint-2814/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "airplane",
8
+ "1": "automobile",
9
+ "2": "bird",
10
+ "3": "cat",
11
+ "4": "deer",
12
+ "5": "dog",
13
+ "6": "frog",
14
+ "7": "horse",
15
+ "8": "ship",
16
+ "9": "truck"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "airplane": 0,
21
+ "automobile": 1,
22
+ "bird": 2,
23
+ "cat": 3,
24
+ "deer": 4,
25
+ "dog": 5,
26
+ "frog": 6,
27
+ "horse": 7,
28
+ "ship": 8,
29
+ "truck": 9
30
+ },
31
+ "logit_scale_init_value": 2.6592,
32
+ "model_type": "metaclip_2",
33
+ "problem_type": "single_label_classification",
34
+ "projection_dim": 384,
35
+ "text_config": {
36
+ "attention_dropout": 0.0,
37
+ "dtype": "float32",
38
+ "eos_token_id": 2,
39
+ "hidden_act": "gelu",
40
+ "hidden_size": 384,
41
+ "initializer_factor": 1.0,
42
+ "initializer_range": 0.02,
43
+ "intermediate_size": 1536,
44
+ "layer_norm_eps": 1e-05,
45
+ "max_position_embeddings": 77,
46
+ "model_type": "metaclip_2_text_model",
47
+ "num_attention_heads": 6,
48
+ "num_hidden_layers": 12,
49
+ "projection_dim": 384,
50
+ "vocab_size": 901629
51
+ },
52
+ "transformers_version": "4.57.1",
53
+ "vision_config": {
54
+ "attention_dropout": 0.0,
55
+ "dtype": "float32",
56
+ "hidden_act": "gelu",
57
+ "hidden_size": 384,
58
+ "image_size": 224,
59
+ "initializer_factor": 1.0,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 1536,
62
+ "layer_norm_eps": 1e-05,
63
+ "model_type": "metaclip_2_vision_model",
64
+ "num_attention_heads": 6,
65
+ "num_channels": 3,
66
+ "num_hidden_layers": 12,
67
+ "patch_size": 16,
68
+ "projection_dim": 384
69
+ }
70
+ }
checkpoint-2814/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6733ace664b06b6495f14143722f5303503b062297a2906427fbcb2e35795579
3
+ size 86703248
checkpoint-2814/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c569dbc843448df31a15a4d0df29d70fd7c9b18fda6bc5272dbbfa1a2b3027e5
3
+ size 173522699
checkpoint-2814/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-2814/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a29ae404595fc10a4b5a1063aa0986a935eb92411ef6f000cf138daa8193145e
3
+ size 14645
checkpoint-2814/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe60dc1e32dfc7b25fd9e07949a92d68be512c8d17eae53c27689946c0114383
3
+ size 1465
checkpoint-2814/trainer_state.json ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2814,
3
+ "best_metric": 0.1238662451505661,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-2814",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2814,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.5330490405117271,
14
+ "grad_norm": 104.10272979736328,
15
+ "learning_rate": 1.7574284170718533e-05,
16
+ "loss": 0.8655,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.92635,
22
+ "eval_loss": 0.2205629199743271,
23
+ "eval_model_preparation_time": 0.0039,
24
+ "eval_runtime": 90.292,
25
+ "eval_samples_per_second": 221.503,
26
+ "eval_steps_per_second": 27.688,
27
+ "step": 938
28
+ },
29
+ {
30
+ "epoch": 1.0660980810234542,
31
+ "grad_norm": 53.17558670043945,
32
+ "learning_rate": 1.4873041599135603e-05,
33
+ "loss": 0.4234,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 1.5991471215351813,
38
+ "grad_norm": 17.705827713012695,
39
+ "learning_rate": 1.2171799027552675e-05,
40
+ "loss": 0.3154,
41
+ "step": 1500
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "eval_accuracy": 0.9459,
46
+ "eval_loss": 0.15963459014892578,
47
+ "eval_model_preparation_time": 0.0039,
48
+ "eval_runtime": 87.0564,
49
+ "eval_samples_per_second": 229.736,
50
+ "eval_steps_per_second": 28.717,
51
+ "step": 1876
52
+ },
53
+ {
54
+ "epoch": 2.1321961620469083,
55
+ "grad_norm": 21.37250518798828,
56
+ "learning_rate": 9.470556455969747e-06,
57
+ "loss": 0.2481,
58
+ "step": 2000
59
+ },
60
+ {
61
+ "epoch": 2.6652452025586353,
62
+ "grad_norm": 32.63026809692383,
63
+ "learning_rate": 6.769313884386819e-06,
64
+ "loss": 0.2002,
65
+ "step": 2500
66
+ },
67
+ {
68
+ "epoch": 3.0,
69
+ "eval_accuracy": 0.9582,
70
+ "eval_loss": 0.1238662451505661,
71
+ "eval_model_preparation_time": 0.0039,
72
+ "eval_runtime": 92.3095,
73
+ "eval_samples_per_second": 216.662,
74
+ "eval_steps_per_second": 27.083,
75
+ "step": 2814
76
+ }
77
+ ],
78
+ "logging_steps": 500,
79
+ "max_steps": 3752,
80
+ "num_input_tokens_seen": 0,
81
+ "num_train_epochs": 4,
82
+ "save_steps": 500,
83
+ "stateful_callbacks": {
84
+ "TrainerControl": {
85
+ "args": {
86
+ "should_epoch_stop": false,
87
+ "should_evaluate": false,
88
+ "should_log": false,
89
+ "should_save": true,
90
+ "should_training_stop": false
91
+ },
92
+ "attributes": {}
93
+ }
94
+ },
95
+ "total_flos": 1.75529120256e+18,
96
+ "train_batch_size": 32,
97
+ "trial_name": null,
98
+ "trial_params": null
99
+ }
checkpoint-2814/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777
checkpoint-3752/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "airplane",
8
+ "1": "automobile",
9
+ "2": "bird",
10
+ "3": "cat",
11
+ "4": "deer",
12
+ "5": "dog",
13
+ "6": "frog",
14
+ "7": "horse",
15
+ "8": "ship",
16
+ "9": "truck"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "airplane": 0,
21
+ "automobile": 1,
22
+ "bird": 2,
23
+ "cat": 3,
24
+ "deer": 4,
25
+ "dog": 5,
26
+ "frog": 6,
27
+ "horse": 7,
28
+ "ship": 8,
29
+ "truck": 9
30
+ },
31
+ "logit_scale_init_value": 2.6592,
32
+ "model_type": "metaclip_2",
33
+ "problem_type": "single_label_classification",
34
+ "projection_dim": 384,
35
+ "text_config": {
36
+ "attention_dropout": 0.0,
37
+ "dtype": "float32",
38
+ "eos_token_id": 2,
39
+ "hidden_act": "gelu",
40
+ "hidden_size": 384,
41
+ "initializer_factor": 1.0,
42
+ "initializer_range": 0.02,
43
+ "intermediate_size": 1536,
44
+ "layer_norm_eps": 1e-05,
45
+ "max_position_embeddings": 77,
46
+ "model_type": "metaclip_2_text_model",
47
+ "num_attention_heads": 6,
48
+ "num_hidden_layers": 12,
49
+ "projection_dim": 384,
50
+ "vocab_size": 901629
51
+ },
52
+ "transformers_version": "4.57.1",
53
+ "vision_config": {
54
+ "attention_dropout": 0.0,
55
+ "dtype": "float32",
56
+ "hidden_act": "gelu",
57
+ "hidden_size": 384,
58
+ "image_size": 224,
59
+ "initializer_factor": 1.0,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 1536,
62
+ "layer_norm_eps": 1e-05,
63
+ "model_type": "metaclip_2_vision_model",
64
+ "num_attention_heads": 6,
65
+ "num_channels": 3,
66
+ "num_hidden_layers": 12,
67
+ "patch_size": 16,
68
+ "projection_dim": 384
69
+ }
70
+ }
checkpoint-3752/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d7590d179c1aad8be76d019c64b7308bc08954b64eebed7398a2907829697dd
3
+ size 86703248
checkpoint-3752/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af42edc2144b4e24fca72977978dd6e390ea064412f9ecef8125eb94d23c9955
3
+ size 173522699
checkpoint-3752/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-3752/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:483efe6d84d7c8cbfde1761fe938811764b708d5d65656ea49f25587593bbe02
3
+ size 14645
checkpoint-3752/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba38110e0f7eb61e76afd1da99c98c0105de9e1d2f7c221704071b570a151b3c
3
+ size 1465
checkpoint-3752/trainer_state.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 3752,
3
+ "best_metric": 0.10899118334054947,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-3752",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 3752,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.5330490405117271,
14
+ "grad_norm": 104.10272979736328,
15
+ "learning_rate": 1.7574284170718533e-05,
16
+ "loss": 0.8655,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.92635,
22
+ "eval_loss": 0.2205629199743271,
23
+ "eval_model_preparation_time": 0.0039,
24
+ "eval_runtime": 90.292,
25
+ "eval_samples_per_second": 221.503,
26
+ "eval_steps_per_second": 27.688,
27
+ "step": 938
28
+ },
29
+ {
30
+ "epoch": 1.0660980810234542,
31
+ "grad_norm": 53.17558670043945,
32
+ "learning_rate": 1.4873041599135603e-05,
33
+ "loss": 0.4234,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 1.5991471215351813,
38
+ "grad_norm": 17.705827713012695,
39
+ "learning_rate": 1.2171799027552675e-05,
40
+ "loss": 0.3154,
41
+ "step": 1500
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "eval_accuracy": 0.9459,
46
+ "eval_loss": 0.15963459014892578,
47
+ "eval_model_preparation_time": 0.0039,
48
+ "eval_runtime": 87.0564,
49
+ "eval_samples_per_second": 229.736,
50
+ "eval_steps_per_second": 28.717,
51
+ "step": 1876
52
+ },
53
+ {
54
+ "epoch": 2.1321961620469083,
55
+ "grad_norm": 21.37250518798828,
56
+ "learning_rate": 9.470556455969747e-06,
57
+ "loss": 0.2481,
58
+ "step": 2000
59
+ },
60
+ {
61
+ "epoch": 2.6652452025586353,
62
+ "grad_norm": 32.63026809692383,
63
+ "learning_rate": 6.769313884386819e-06,
64
+ "loss": 0.2002,
65
+ "step": 2500
66
+ },
67
+ {
68
+ "epoch": 3.0,
69
+ "eval_accuracy": 0.9582,
70
+ "eval_loss": 0.1238662451505661,
71
+ "eval_model_preparation_time": 0.0039,
72
+ "eval_runtime": 92.3095,
73
+ "eval_samples_per_second": 216.662,
74
+ "eval_steps_per_second": 27.083,
75
+ "step": 2814
76
+ },
77
+ {
78
+ "epoch": 3.1982942430703627,
79
+ "grad_norm": 37.74256896972656,
80
+ "learning_rate": 4.06807131280389e-06,
81
+ "loss": 0.1637,
82
+ "step": 3000
83
+ },
84
+ {
85
+ "epoch": 3.7313432835820897,
86
+ "grad_norm": 6.652950763702393,
87
+ "learning_rate": 1.3668287412209618e-06,
88
+ "loss": 0.1253,
89
+ "step": 3500
90
+ },
91
+ {
92
+ "epoch": 4.0,
93
+ "eval_accuracy": 0.96315,
94
+ "eval_loss": 0.10899118334054947,
95
+ "eval_model_preparation_time": 0.0039,
96
+ "eval_runtime": 87.3412,
97
+ "eval_samples_per_second": 228.987,
98
+ "eval_steps_per_second": 28.623,
99
+ "step": 3752
100
+ }
101
+ ],
102
+ "logging_steps": 500,
103
+ "max_steps": 3752,
104
+ "num_input_tokens_seen": 0,
105
+ "num_train_epochs": 4,
106
+ "save_steps": 500,
107
+ "stateful_callbacks": {
108
+ "TrainerControl": {
109
+ "args": {
110
+ "should_epoch_stop": false,
111
+ "should_evaluate": false,
112
+ "should_log": false,
113
+ "should_save": true,
114
+ "should_training_stop": true
115
+ },
116
+ "attributes": {}
117
+ }
118
+ },
119
+ "total_flos": 2.34038827008e+18,
120
+ "train_batch_size": 32,
121
+ "trial_name": null,
122
+ "trial_params": null
123
+ }
checkpoint-3752/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777
checkpoint-938/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "airplane",
8
+ "1": "automobile",
9
+ "2": "bird",
10
+ "3": "cat",
11
+ "4": "deer",
12
+ "5": "dog",
13
+ "6": "frog",
14
+ "7": "horse",
15
+ "8": "ship",
16
+ "9": "truck"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "airplane": 0,
21
+ "automobile": 1,
22
+ "bird": 2,
23
+ "cat": 3,
24
+ "deer": 4,
25
+ "dog": 5,
26
+ "frog": 6,
27
+ "horse": 7,
28
+ "ship": 8,
29
+ "truck": 9
30
+ },
31
+ "logit_scale_init_value": 2.6592,
32
+ "model_type": "metaclip_2",
33
+ "problem_type": "single_label_classification",
34
+ "projection_dim": 384,
35
+ "text_config": {
36
+ "attention_dropout": 0.0,
37
+ "dtype": "float32",
38
+ "eos_token_id": 2,
39
+ "hidden_act": "gelu",
40
+ "hidden_size": 384,
41
+ "initializer_factor": 1.0,
42
+ "initializer_range": 0.02,
43
+ "intermediate_size": 1536,
44
+ "layer_norm_eps": 1e-05,
45
+ "max_position_embeddings": 77,
46
+ "model_type": "metaclip_2_text_model",
47
+ "num_attention_heads": 6,
48
+ "num_hidden_layers": 12,
49
+ "projection_dim": 384,
50
+ "vocab_size": 901629
51
+ },
52
+ "transformers_version": "4.57.1",
53
+ "vision_config": {
54
+ "attention_dropout": 0.0,
55
+ "dtype": "float32",
56
+ "hidden_act": "gelu",
57
+ "hidden_size": 384,
58
+ "image_size": 224,
59
+ "initializer_factor": 1.0,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 1536,
62
+ "layer_norm_eps": 1e-05,
63
+ "model_type": "metaclip_2_vision_model",
64
+ "num_attention_heads": 6,
65
+ "num_channels": 3,
66
+ "num_hidden_layers": 12,
67
+ "patch_size": 16,
68
+ "projection_dim": 384
69
+ }
70
+ }
checkpoint-938/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0295970f5c5254383eea7f2873f3d585654e1367b2fd6b267f29ed9f9b3f26c4
3
+ size 86703248
checkpoint-938/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e83a46af7737bff566b2f7f8b4fd87001b4bea72c17045891ca059b70fc464
3
+ size 173522699
checkpoint-938/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-938/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081c357ef46dc87b932a2d21b2bd7241e2c976740daddae5a5457a7579ee9392
3
+ size 14645
checkpoint-938/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ff0afff6239fce33f43c7b9ade60dd4c0cca24a3033a0c5718c09cb7abd681
3
+ size 1465
checkpoint-938/trainer_state.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 938,
3
+ "best_metric": 0.2205629199743271,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-938",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 938,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.5330490405117271,
14
+ "grad_norm": 104.10272979736328,
15
+ "learning_rate": 1.7574284170718533e-05,
16
+ "loss": 0.8655,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.92635,
22
+ "eval_loss": 0.2205629199743271,
23
+ "eval_model_preparation_time": 0.0039,
24
+ "eval_runtime": 90.292,
25
+ "eval_samples_per_second": 221.503,
26
+ "eval_steps_per_second": 27.688,
27
+ "step": 938
28
+ }
29
+ ],
30
+ "logging_steps": 500,
31
+ "max_steps": 3752,
32
+ "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 4,
34
+ "save_steps": 500,
35
+ "stateful_callbacks": {
36
+ "TrainerControl": {
37
+ "args": {
38
+ "should_epoch_stop": false,
39
+ "should_evaluate": false,
40
+ "should_log": false,
41
+ "should_save": true,
42
+ "should_training_stop": false
43
+ },
44
+ "attributes": {}
45
+ }
46
+ },
47
+ "total_flos": 5.8509706752e+17,
48
+ "train_batch_size": 32,
49
+ "trial_name": null,
50
+ "trial_params": null
51
+ }
checkpoint-938/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777
config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "airplane",
8
+ "1": "automobile",
9
+ "2": "bird",
10
+ "3": "cat",
11
+ "4": "deer",
12
+ "5": "dog",
13
+ "6": "frog",
14
+ "7": "horse",
15
+ "8": "ship",
16
+ "9": "truck"
17
+ },
18
+ "initializer_factor": 1.0,
19
+ "label2id": {
20
+ "airplane": 0,
21
+ "automobile": 1,
22
+ "bird": 2,
23
+ "cat": 3,
24
+ "deer": 4,
25
+ "dog": 5,
26
+ "frog": 6,
27
+ "horse": 7,
28
+ "ship": 8,
29
+ "truck": 9
30
+ },
31
+ "logit_scale_init_value": 2.6592,
32
+ "model_type": "metaclip_2",
33
+ "problem_type": "single_label_classification",
34
+ "projection_dim": 384,
35
+ "text_config": {
36
+ "attention_dropout": 0.0,
37
+ "dtype": "float32",
38
+ "eos_token_id": 2,
39
+ "hidden_act": "gelu",
40
+ "hidden_size": 384,
41
+ "initializer_factor": 1.0,
42
+ "initializer_range": 0.02,
43
+ "intermediate_size": 1536,
44
+ "layer_norm_eps": 1e-05,
45
+ "max_position_embeddings": 77,
46
+ "model_type": "metaclip_2_text_model",
47
+ "num_attention_heads": 6,
48
+ "num_hidden_layers": 12,
49
+ "projection_dim": 384,
50
+ "vocab_size": 901629
51
+ },
52
+ "transformers_version": "4.57.1",
53
+ "vision_config": {
54
+ "attention_dropout": 0.0,
55
+ "dtype": "float32",
56
+ "hidden_act": "gelu",
57
+ "hidden_size": 384,
58
+ "image_size": 224,
59
+ "initializer_factor": 1.0,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 1536,
62
+ "layer_norm_eps": 1e-05,
63
+ "model_type": "metaclip_2_vision_model",
64
+ "num_attention_heads": 6,
65
+ "num_channels": 3,
66
+ "num_hidden_layers": 12,
67
+ "patch_size": 16,
68
+ "projection_dim": 384
69
+ }
70
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d7590d179c1aad8be76d019c64b7308bc08954b64eebed7398a2907829697dd
3
+ size 86703248
preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a90bc66716cb082ec5c00af9d059cdce153b87ee7290bee045c716ff787c4e
3
+ size 5777