Arcana-Qwen3-2.4B-A0.6B / qwen3moe_configuration.py
suayptalha's picture
Upload folder using huggingface_hub
a96bc78 verified
raw
history blame contribute delete
575 Bytes
from transformers import PretrainedConfig
class Qwen3MoEConfig(PretrainedConfig):
model_type = "qwen3moe"
def __init__(
self,
router_model_path=None,
expert_model_paths=None,
labels=None,
torch_dtype="auto",
tokenizer_path=None,
**kwargs,
):
super().__init__(**kwargs)
self.router_model_path = router_model_path
self.expert_model_paths = expert_model_paths or {}
self.labels = labels or []
self.torch_dtype = torch_dtype
self.tokenizer_path = tokenizer_path