Spaces:
Sleeping
Sleeping
{ | |
"model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", | |
"dataset_name": "George-API/cognitive-data", | |
"output_dir": "./results", | |
"seed": 42, | |
"# Tokenization settings": "These settings ensure we preserve existing tokenization", | |
"trust_remote_code": true, | |
"use_fast_tokenizer": true, | |
"skip_tokenization": true, | |
"max_seq_length": 2048, | |
"chat_template": "chatml", | |
"# Quantization settings": "4-bit quantization for memory efficiency", | |
"load_in_4bit": true, | |
"bnb_4bit_quant_type": "nf4", | |
"bnb_4bit_compute_dtype": "float16", | |
"bnb_4bit_use_double_quant": true, | |
"# PEFT settings": "LoRA configuration for efficient fine-tuning", | |
"use_peft": true, | |
"lora_r": 16, | |
"lora_alpha": 32, | |
"lora_dropout": 0.05, | |
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], | |
"# Training parameters": "Optimized for cognitive science fine-tuning", | |
"num_train_epochs": 5, | |
"per_device_train_batch_size": 4, | |
"gradient_accumulation_steps": 8, | |
"learning_rate": 3e-5, | |
"weight_decay": 0.01, | |
"warmup_ratio": 0.1, | |
"lr_scheduler_type": "linear", | |
"logging_steps": 10, | |
"save_strategy": "steps", | |
"save_steps": 100, | |
"save_total_limit": 3, | |
"fp16": true, | |
"bf16": false, | |
"max_grad_norm": 0.5, | |
"# Hugging Face Hub settings": "For saving and sharing the model", | |
"push_to_hub": true, | |
"hub_model_id": "DeepSeek-Cognitive-Science", | |
"hub_private_repo": true | |
} |