import subprocess import tempfile import shutil import os from typing import Optional, Union, Dict, List, Any def get_repo_tree_structure(repo_source: str, commit_depth: int = 1) -> Union[Dict[str, Any], str]: """ Returns the tree structure of a git repository at a specific commit. Args: repo_source: Path to the git repository or URL to a remote repository. Function automatically detects if it's a local path or remote URL. commit_depth: How many commits back to check (default=1 for the latest commit). For example, 2 would return the tree for the second-to-last commit. Returns: A dictionary representing the repository structure tree or error string if something fails """ temp_dir = None try: # Determine if repo_source is a remote URL or local path is_remote = repo_source.startswith(('http://', 'https://', 'git://', 'ssh://')) or repo_source.endswith('.git') # If it's a remote repository, clone it to a temporary directory if is_remote: temp_dir = tempfile.mkdtemp() print(f"Cloning remote repository: {repo_source}") clone_cmd = ["git", "clone", repo_source, temp_dir] subprocess.check_output(clone_cmd) repo_path = temp_dir else: repo_path = repo_source # Validate commit_depth parameter if commit_depth < 1: return "Error: commit_depth must be a positive integer" # Get the commit hash for the specified depth if commit_depth == 1: # For the latest commit commit_ref = "HEAD" else: # For earlier commits: HEAD~1, HEAD~2, etc. (0-indexed in git, so we subtract 1) commit_ref = f"HEAD~{commit_depth-1}" # Get the commit hash commit_cmd = ["git", "-C", repo_path, "rev-parse", commit_ref] try: commit_hash = subprocess.check_output(commit_cmd).decode('utf-8').strip() except subprocess.CalledProcessError: return f"Error: Could not find commit at depth {commit_depth}" # Use git ls-tree recursively to get the repository structure ls_tree_cmd = ["git", "-C", repo_path, "ls-tree", "-r", "--name-only", commit_hash] file_list = subprocess.check_output(ls_tree_cmd).decode('utf-8').strip().split('\n') # Build a tree structure from the file paths root = {} for file_path in file_list: if not file_path: # Skip empty paths continue parts = file_path.split('/') current = root # Navigate through path components, creating nested dictionaries as needed for i, part in enumerate(parts): if i == len(parts) - 1: # It's a file (last part of path) current[part] = None # Files have None value else: # It's a directory if part not in current: current[part] = {} # Create directory dict if it doesn't exist current = current[part] # Move into the directory return root except subprocess.CalledProcessError as e: return f"Error executing git command: {str(e)}" except Exception as e: return f"Unexpected error: {str(e)}" finally: # Clean up temporary directory if it was created if temp_dir and os.path.exists(temp_dir): shutil.rmtree(temp_dir) # Helper function to print the tree structure in a more readable format def print_tree(tree, indent=""): """ Print the repository tree structure in a readable format Args: tree: Dictionary representing the repository structure indent: Current indentation level string (used in recursion) """ for key, value in sorted(tree.items()): if value is None: # It's a file print(f"{indent}├── {key}") else: # It's a directory print(f"{indent}├── {key}/") print_tree(value, indent + "│ ") # Example usage if __name__ == "__main__": import sys if len(sys.argv) < 2: print("Usage: python git_tree_utils.py [commit_depth]") print(" repo_path_or_url: Path to local repository or URL to remote repository") print(" commit_depth: Optional - How many commits back to check (default=1 for latest commit)") sys.exit(1) repo_source = sys.argv[1] commit_depth = 1 # Default to latest commit # Check if commit_depth was provided if len(sys.argv) > 2: try: commit_depth = int(sys.argv[2]) if commit_depth < 1: print("Error: commit_depth must be a positive integer") sys.exit(1) except ValueError: print("Error: commit_depth must be an integer") sys.exit(1) print(f"Getting repository structure for: {repo_source} (commit depth: {commit_depth})") tree = get_repo_tree_structure(repo_source, commit_depth) if isinstance(tree, dict): print("\nRepository Structure:") print_tree(tree) else: print(tree) # Error message