from model import get_model from pix2tex.dataset.dataset import Im2LatexDataset from pix2tex.trainer import Trainer import os os.makedirs('trained_model', exist_ok=True) # Training parameters config = { "batch_size": 4, "epochs": 1, "max_seq_len": 150, "warmup_steps": 10, "lr": 1e-4, "device": "cpu", "save_dir": "trained_model", "resume": False } # Dataset path dataset = Im2LatexDataset( data_root='handwritten_dataset', transform=None, max_length=config["max_seq_len"] ) # Initialize model and trainer model, tokenizer = get_model() trainer = Trainer(model, tokenizer, config) print("🧠 Starting training...") trainer.train(dataset) print("✅ Training complete. Model saved to 'trained_model/'")