Error when using this in ollama
#1
by
mahmoudimus
- opened
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv 0: general.architecture str = seed_oss
llama_model_loader: - kv 1: general.type str = model
llama_model_loader: - kv 2: general.name str = Seed OSS 36B Base woSyn
llama_model_loader: - kv 3: general.finetune str = Base-woSyn
llama_model_loader: - kv 4: general.basename str = Seed-OSS
llama_model_loader: - kv 5: general.size_label str = 36B
llama_model_loader: - kv 6: general.license str = apache-2.0
llama_model_loader: - kv 7: general.tags arr[str,2] = ["vllm", "text-generation"]
llama_model_loader: - kv 8: seed_oss.block_count u32 = 64
llama_model_loader: - kv 9: seed_oss.context_length u32 = 524288
llama_model_loader: - kv 10: seed_oss.embedding_length u32 = 5120
llama_model_loader: - kv 11: seed_oss.feed_forward_length u32 = 27648
llama_model_loader: - kv 12: seed_oss.attention.head_count u32 = 80
llama_model_loader: - kv 13: seed_oss.attention.head_count_kv u32 = 8
llama_model_loader: - kv 14: seed_oss.rope.freq_base f32 = 10000000.000000
llama_model_loader: - kv 15: seed_oss.attention.layer_norm_rms_epsilon f32 = 0.000001
llama_model_loader: - kv 16: seed_oss.attention.key_length u32 = 128
llama_model_loader: - kv 17: seed_oss.attention.value_length u32 = 128
llama_model_loader: - kv 18: tokenizer.ggml.model str = gpt2
llama_model_loader: - kv 19: tokenizer.ggml.pre str = seed-coder
llama_model_loader: - kv 20: tokenizer.ggml.tokens arr[str,155136] = ["<seed:bos>", "<seed:pad>", "<seed:e...
llama_model_loader: - kv 21: tokenizer.ggml.token_type arr[i32,155136] = [3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, ...
llama_model_loader: - kv 22: tokenizer.ggml.merges arr[str,154737] = ["Ġ Ġ", "Ġ t", "i n", "Ġ a", "e r...
llama_model_loader: - kv 23: tokenizer.ggml.bos_token_id u32 = 0
llama_model_loader: - kv 24: tokenizer.ggml.eos_token_id u32 = 2
llama_model_loader: - kv 25: tokenizer.ggml.padding_token_id u32 = 1
llama_model_loader: - kv 26: general.quantization_version u32 = 2
llama_model_loader: - kv 27: general.file_type u32 = 15
llama_model_loader: - type f32: 321 tensors
llama_model_loader: - type q4_K: 385 tensors
llama_model_loader: - type q6_K: 65 tensors
print_info: file format = GGUF V3 (latest)
print_info: file type = Q4_K - Medium
print_info: file size = 20.26 GiB (4.81 BPW)
llama_model_load: error loading model: error loading model architecture: unknown model architecture: 'seed_oss'