Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +95 -38
chat_template.jinja +4 -0
config.json +10 -7
generation_config.json +1 -1
model.safetensors +2 -2
tokenizer.json +2 -2
tokenizer_config.json +257 -1

README.md CHANGED Viewed

@@ -1,50 +1,107 @@
 ---
 library_name: transformers
-pipeline_tag: text-generation
-inference: true
-widget:
-- text: Hello!
-  example_title: Hello world
-  group: Python
 ---
-This model is randomly initialized, using the config from [google/gemma-2-27b-it](https://huggingface.co/google/gemma-2-27b-it) but with smaller size.
-Codes:
 ```python
 from transformers import pipeline
-from huggingface_hub import create_repo, upload_folder
 import torch
-import transformers
-import os
-model_id = 'google/gemma-2-27b-it'
-save_path = '/tmp/yujiepan/gemma-2-tiny-random'
-repo_id = 'yujiepan/gemma-2-tiny-random'
-config = transformers.AutoConfig.from_pretrained(model_id)
-config.hidden_size = 8
-config.head_dim = 2
-config.intermediate_size = 16
-config.num_attention_heads = 4
-config.num_hidden_layers = 2
-config.num_key_value_heads = 2
-tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
-tokenizer.save_pretrained(save_path)
-model = transformers.AutoModelForCausalLM.from_config(config, torch_dtype=torch.bfloat16)
-model.generation_config = transformers.GenerationConfig.from_pretrained(model_id)
-with torch.no_grad():
-    for p in model.parameters():
-        torch.nn.init.uniform_(p, -0.1, 0.1)
-pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, do_sample=False, device='cuda')
-print(pipe('Hello World!'))
-model.save_pretrained(save_path)
-os.system(f'ls -alh {save_path}')
-create_repo(repo_id, exist_ok=True)
-upload_folder(repo_id=repo_id, folder_path=save_path)
 ```

 ---
 library_name: transformers
+base_model:
+- google/gemma-2-27b-it
 ---
+This tiny model is intended for debugging. It is randomly initialized using the configuration adapted from [google/gemma-2-27b-it](https://huggingface.co/google/gemma-2-27b-it).
+### Example usage:
 ```python
 from transformers import pipeline
+model_id = "yujiepan/gemma-2-tiny-random"
+pipe = pipeline('text-generation', model=model_id, device='cuda', dtype="bfloat16")
+print(pipe('Hello World!'))
+```
+### Codes to create this repo:
+```python
+import json
+from pathlib import Path
+import accelerate
 import torch
+from huggingface_hub import file_exists, hf_hub_download
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoProcessor,
+    GenerationConfig,
+    set_seed,
+)
+source_model_id = "google/gemma-2-27b-it"
+save_folder = "/tmp/yujiepan/gemma-2-tiny-random"
+processor = AutoProcessor.from_pretrained(
+    source_model_id, trust_remote_code=True)
+processor.save_pretrained(save_folder)
+with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f:
+    config_json = json.load(f)
+config_json['hidden_size'] = 8
+config_json['intermediate_size'] = 64
+config_json['num_attention_heads'] = 8
+config_json['num_hidden_layers'] = 2
+config_json['num_key_value_heads'] = 4
+config_json['head_dim'] = 32
+config_json['tie_word_embeddings'] = True
+with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
+    json.dump(config_json, f, indent=2)
+config = AutoConfig.from_pretrained(
+    save_folder,
+    trust_remote_code=True,
+)
+print(config)
+torch.set_default_dtype(torch.bfloat16)
+model = AutoModelForCausalLM.from_config(config)
+torch.set_default_dtype(torch.float32)
+if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
+    model.generation_config = GenerationConfig.from_pretrained(
+        source_model_id, trust_remote_code=True,
+    )
+set_seed(42)
+model = model.cpu()
+with torch.no_grad():
+    for name, p in sorted(model.named_parameters()):
+        torch.nn.init.normal_(p, 0, 0.1)
+        print(name, p.shape)
+model.save_pretrained(save_folder)
+print(model)
+```
+### Printing the model:
+```text
+Gemma2ForCausalLM(
+  (model): Gemma2Model(
+    (embed_tokens): Embedding(256000, 8, padding_idx=0)
+    (layers): ModuleList(
+      (0-1): 2 x Gemma2DecoderLayer(
+        (self_attn): Gemma2Attention(
+          (q_proj): Linear(in_features=8, out_features=256, bias=False)
+          (k_proj): Linear(in_features=8, out_features=128, bias=False)
+          (v_proj): Linear(in_features=8, out_features=128, bias=False)
+          (o_proj): Linear(in_features=256, out_features=8, bias=False)
+        )
+        (mlp): Gemma2MLP(
+          (gate_proj): Linear(in_features=8, out_features=64, bias=False)
+          (up_proj): Linear(in_features=8, out_features=64, bias=False)
+          (down_proj): Linear(in_features=64, out_features=8, bias=False)
+          (act_fn): GELUTanh()
+        )
+        (input_layernorm): Gemma2RMSNorm((8,), eps=1e-06)
+        (post_attention_layernorm): Gemma2RMSNorm((8,), eps=1e-06)
+        (pre_feedforward_layernorm): Gemma2RMSNorm((8,), eps=1e-06)
+        (post_feedforward_layernorm): Gemma2RMSNorm((8,), eps=1e-06)
+      )
+    )
+    (norm): Gemma2RMSNorm((8,), eps=1e-06)
+    (rotary_emb): Gemma2RotaryEmbedding()
+  )
+  (lm_head): Linear(in_features=8, out_features=256000, bias=False)
+)
 ```

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,4 @@

+{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
+' + message['content'] | trim + '<end_of_turn>
+' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
+'}}{% endif %}

config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "_name_or_path": "google/gemma-2-27b-it",
   "architectures": [
     "Gemma2ForCausalLM"
   ],
@@ -8,27 +7,31 @@
   "attn_logit_softcapping": 50.0,
   "bos_token_id": 2,
   "cache_implementation": "hybrid",
   "eos_token_id": 1,
   "final_logit_softcapping": 30.0,
-  "head_dim": 2,
   "hidden_act": "gelu_pytorch_tanh",
   "hidden_activation": "gelu_pytorch_tanh",
   "hidden_size": 8,
   "initializer_range": 0.02,
-  "intermediate_size": 16,
   "max_position_embeddings": 8192,
   "model_type": "gemma2",
-  "num_attention_heads": 4,
   "num_hidden_layers": 2,
-  "num_key_value_heads": 2,
   "pad_token_id": 0,
   "query_pre_attn_scalar": 144,
   "rms_norm_eps": 1e-06,
   "rope_theta": 10000.0,
   "sliding_window": 4096,
   "sliding_window_size": 4096,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.42.1",
   "use_cache": true,
   "vocab_size": 256000
 }

 {
   "architectures": [
     "Gemma2ForCausalLM"
   ],
   "attn_logit_softcapping": 50.0,
   "bos_token_id": 2,
   "cache_implementation": "hybrid",
+  "dtype": "bfloat16",
   "eos_token_id": 1,
   "final_logit_softcapping": 30.0,
+  "head_dim": 32,
   "hidden_act": "gelu_pytorch_tanh",
   "hidden_activation": "gelu_pytorch_tanh",
   "hidden_size": 8,
   "initializer_range": 0.02,
+  "intermediate_size": 64,
+  "layer_types": [
+    "sliding_attention",
+    "full_attention"
+  ],
   "max_position_embeddings": 8192,
   "model_type": "gemma2",
+  "num_attention_heads": 8,
   "num_hidden_layers": 2,
+  "num_key_value_heads": 4,
   "pad_token_id": 0,
   "query_pre_attn_scalar": 144,
   "rms_norm_eps": 1e-06,
   "rope_theta": 10000.0,
   "sliding_window": 4096,
   "sliding_window_size": 4096,
+  "transformers_version": "4.57.1",
   "use_cache": true,
   "vocab_size": 256000
 }

generation_config.json CHANGED Viewed

@@ -4,5 +4,5 @@
   "cache_implementation": "hybrid",
   "eos_token_id": 1,
   "pad_token_id": 0,
-  "transformers_version": "4.42.1"
 }

   "cache_implementation": "hybrid",
   "eos_token_id": 1,
   "pad_token_id": 0,
+  "transformers_version": "4.57.1"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:438bdf38f16ee5b8697e4432dd369211ae56181d440ef494cb8f8f18b050c363
-size 4100992

 version https://git-lfs.github.com/spec/v1
+oid sha256:b822fb4bd70ba7c075fd4ce0c4c16fa31946a1f52f95df3e6868c70fbf915f30
+size 4129424

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79
-size 17518525

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060
+size 34362873

tokenizer_config.json CHANGED Viewed

@@ -1737,6 +1737,262 @@
       "rstrip": false,
       "single_word": false,
       "special": false
     }
   },
   "additional_special_tokens": [
@@ -1744,9 +2000,9 @@
     "<end_of_turn>"
   ],
   "bos_token": "<bos>",
-  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<eos>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "sp_model_kwargs": {},

       "rstrip": false,
       "single_word": false,
       "special": false
+    },
+    "255968": {
+      "content": "[toxicity=0]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255969": {
+      "content": "\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255970": {
+      "content": "\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255971": {
+      "content": "\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255972": {
+      "content": "\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255973": {
+      "content": "\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255974": {
+      "content": "\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255975": {
+      "content": "\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255976": {
+      "content": "\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255977": {
+      "content": "\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255978": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255979": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255980": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255981": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255982": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255983": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255984": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255985": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255986": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255987": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255988": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255989": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255990": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255991": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255992": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255993": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255994": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255995": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255996": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255997": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255998": {
+      "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255999": {
+      "content": "<unused99>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
     }
   },
   "additional_special_tokens": [
     "<end_of_turn>"
   ],
   "bos_token": "<bos>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<eos>",
+  "extra_special_tokens": {},
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "sp_model_kwargs": {},