Spaces:
Sleeping
Sleeping
{ | |
"model_config": { | |
"model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit", | |
"use_cache": false, | |
"rope_scaling": { | |
"type": "dynamic", | |
"factor": 2.0 | |
} | |
}, | |
"training_config": { | |
"num_train_epochs": 3, | |
"per_device_train_batch_size": 2, | |
"gradient_accumulation_steps": 4, | |
"learning_rate": 2e-5, | |
"lr_scheduler_type": "cosine", | |
"warmup_ratio": 0.03, | |
"weight_decay": 0.01, | |
"optim": "adamw_torch", | |
"max_grad_norm": 0.3, | |
"max_seq_length": 2048, | |
"logging_steps": 10, | |
"save_steps": 200, | |
"save_total_limit": 3, | |
"evaluation_strategy": "steps", | |
"eval_steps": 200, | |
"load_best_model_at_end": true, | |
"output_dir": "fine_tuned_model", | |
"disable_tqdm": false, | |
"report_to": ["tensorboard"], | |
"logging_first_step": true | |
}, | |
"hardware_config": { | |
"fp16": true, | |
"bf16": false, | |
"gradient_checkpointing": true, | |
"device_map": "auto", | |
"use_flash_attention": true | |
}, | |
"quantization_config": { | |
"load_in_4bit": true, | |
"bnb_4bit_compute_dtype": "float16", | |
"bnb_4bit_quant_type": "nf4", | |
"bnb_4bit_use_double_quant": true | |
}, | |
"lora_config": { | |
"r": 16, | |
"lora_alpha": 32, | |
"lora_dropout": 0.05, | |
"bias": "none", | |
"target_modules": [ | |
"q_proj", | |
"k_proj", | |
"v_proj", | |
"o_proj", | |
"gate_proj", | |
"up_proj", | |
"down_proj" | |
] | |
}, | |
"dataset_config": { | |
"sort_by_field": "prompt_number", | |
"sort_direction": "ascending", | |
"max_tokens": 2048, | |
"text_field": "conversations", | |
"shuffle_seed": 42, | |
"training_phase_only": true, | |
"pre_tokenized": true, | |
"input_ids_field": "input_ids", | |
"skip_tokenization": true | |
} | |
} |