yujiepan
/

qwen3-tiny-random-tp

Text Generation

text-generation-inference

Model card Files Files and versions

yujiepan commited on Aug 14

Commit

fc24fc8

·

verified ·

1 Parent(s): 526c9f2

Upload folder using huggingface_hub

Files changed (2) hide show

README.md +1 -0
config.json +0 -62

README.md CHANGED Viewed

@@ -93,6 +93,7 @@ config.num_attention_heads = 8
 config.num_hidden_layers = 2
 config.max_window_layers = 1
 config.tie_word_embeddings = False
 model = AutoModelForCausalLM.from_config(
     config,
     torch_dtype=torch.bfloat16,

 config.num_hidden_layers = 2
 config.max_window_layers = 1
 config.tie_word_embeddings = False
+config.layer_types = ['full_attention'] * 2
 model = AutoModelForCausalLM.from_config(
     config,
     torch_dtype=torch.bfloat16,

config.json CHANGED Viewed

@@ -12,68 +12,6 @@
   "initializer_range": 0.02,
   "intermediate_size": 32,
   "layer_types": [
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
     "full_attention",
     "full_attention"
   ],

   "initializer_range": 0.02,
   "intermediate_size": 32,
   "layer_types": [
     "full_attention",
     "full_attention"
   ],