#!/usr/bin/env python

"""
Script to update your Hugging Face Space for R1-Distill-LLama-8b training.
"""

import os
import sys
import json
import argparse
import logging
from pathlib import Path
from huggingface_hub import HfApi, login

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler(sys.stdout)]
)
logger = logging.getLogger(__name__)

def load_env_variables():
    """Load environment variables from system or .env file."""
    # First try to load from local .env file
    try:
        from dotenv import load_dotenv
        env_path = Path(__file__).parent / ".env"
        if env_path.exists():
            # Load and explicitly set environment variables
            with open(env_path) as f:
                for line in f:
                    if line.strip() and not line.startswith('#'):
                        key, value = line.strip().split('=', 1)
                        os.environ[key] = value.strip()
            logger.info(f"Loaded environment variables from {env_path}")
        else:
            logger.warning(f"No .env file found at {env_path}")
    except ImportError:
        logger.warning("python-dotenv not installed, skipping .env loading")
    
    # Set default space name if not provided
    if "HF_SPACE_NAME" not in os.environ:
        os.environ["HF_SPACE_NAME"] = "r1training"
    
    # Verify required variables
    required_vars = {
        "HF_TOKEN": os.environ.get("HF_TOKEN"),
        "HF_USERNAME": os.environ.get("HF_USERNAME"),
        "HF_SPACE_NAME": os.environ.get("HF_SPACE_NAME")
    }
    
    missing_vars = [k for k, v in required_vars.items() if not v]
    if missing_vars:
        raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
    
    logger.info(f"Using environment variables: USERNAME={required_vars['HF_USERNAME']}, SPACE_NAME={required_vars['HF_SPACE_NAME']}")
    return required_vars

def verify_configs():
    """Verify that all necessary configuration files exist and are valid."""
    current_dir = Path(__file__).parent
    required_files = [
        "transformers_config.json",
        "dataset_config.json",
        "README.md",
        "run_transformers_training.py"
    ]
    
    missing_files = []
    for file in required_files:
        if not (current_dir / file).exists():
            missing_files.append(file)
    
    if missing_files:
        raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
    
    # Verify JSON configs
    json_files = [f for f in required_files if f.endswith('.json')]
    for json_file in json_files:
        try:
            with open(current_dir / json_file) as f:
                json.load(f)
            logger.info(f"Verified {json_file} is valid JSON")
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid JSON in {json_file}: {e}")

def create_space(username, space_name):
    """Create or get a Hugging Face Space."""
    try:
        api = HfApi()
        space_id = f"{username}/{space_name}"
        logger.info(f"Checking Space {space_id}...")
        
        # First try to get the space
        try:
            space_info = api.space_info(repo_id=space_id)
            logger.info(f"Space {space_id} already exists")
            return space_info
        except Exception as e:
            logger.info(f"Space {space_id} does not exist, creating new space...")
        
        # Create new space
        try:
            api.create_repo(
                repo_id=space_id,
                private=False,
                repo_type="space",
                space_sdk="gradio"
            )
            logger.info(f"Created new space: {space_id}")
            return api.space_info(repo_id=space_id)
        except Exception as e:
            logger.error(f"Failed to create space: {str(e)}")
            raise
    except Exception as e:
        raise RuntimeError(f"Error with Space {space_id}: {str(e)}")

def main():
    parser = argparse.ArgumentParser(description='Update Hugging Face Space for R1-Distill-LLama-8b training')
    parser.add_argument('--space_name', type=str, help='Space name (default: from env)')
    parser.add_argument('--force', action='store_true', help='Skip confirmation')
    args = parser.parse_args()
    
    if not args.force:
        print("\n" + "!"*80)
        print("WARNING: Updating the Space will INTERRUPT any ongoing training!")
        print("Make sure all checkpoints are saved before proceeding.")
        print("!"*80 + "\n")
        
        confirm = input("Type 'update' to confirm: ")
        if confirm.lower() != 'update':
            logger.info("Update cancelled")
            return False
    
    try:
        # Load environment variables
        env_vars = load_env_variables()
        
        # Verify configurations
        verify_configs()
        logger.info("All configuration files verified successfully")
        
        # Get space name from args or env, prioritize args
        space_name = args.space_name if args.space_name else env_vars["HF_SPACE_NAME"]
        logger.info(f"Using space name: {space_name}")
        
        # Login to Hugging Face
        logger.info("Logging in to Hugging Face...")
        login(token=env_vars["HF_TOKEN"])
        logger.info("Successfully logged in to Hugging Face")
        
        # Create/get space
        space_info = create_space(env_vars["HF_USERNAME"], space_name)
        logger.info(f"Space info: {space_info}")
        
        # Upload files
        current_dir = Path(__file__).parent
        logger.info(f"Uploading files from {current_dir} to Space {env_vars['HF_USERNAME']}/{space_name}...")
        
        # Create .gitignore
        with open(current_dir / ".gitignore", "w") as f:
            f.write(".env\n*.pyc\n__pycache__\n")
        logger.info("Created .gitignore file")
        
        api = HfApi()
        api.upload_folder(
            folder_path=str(current_dir),
            repo_id=f"{env_vars['HF_USERNAME']}/{space_name}",
            repo_type="space",
            ignore_patterns=[".env", "*.pyc", "__pycache__", "TRAINING_IN_PROGRESS.lock"]
        )
        
        logger.info(f"Files uploaded successfully")
        space_url = f"https://huggingface.co/spaces/{env_vars['HF_USERNAME']}/{space_name}"
        logger.info(f"Space URL: {space_url}")
        print(f"\nSpace created successfully! You can view it at:\n{space_url}")
        return True
        
    except Exception as e:
        logger.error(f"Error updating Space: {str(e)}")
        return False

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)