Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModel, AutoConfig | |
import torch | |
import json | |
from collections import defaultdict, OrderedDict | |
def analyze_model_parameters(model_path, hf_token=None, show_layer_details=False): | |
try: | |
# Prepare token parameter | |
token_kwargs = {} | |
if hf_token and hf_token.strip(): | |
token_kwargs['token'] = hf_token.strip() | |
# Load model configuration first | |
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, **token_kwargs) | |
# Load model on CPU | |
model = AutoModel.from_pretrained(model_path, device_map="cpu", trust_remote_code=True, **token_kwargs) | |
# Initialize counters | |
total_params = 0 | |
trainable_params = 0 | |
embedding_params = 0 | |
non_embedding_params = 0 | |
# Track unique parameters to handle weight tying | |
unique_params = {} | |
param_details = [] | |
layer_breakdown = defaultdict(lambda: {'total': 0, 'trainable': 0, 'params': []}) | |
# Embedding layer patterns (common names for embedding layers) | |
embedding_patterns = [ | |
'embeddings', 'embed', 'wte', 'wpe', 'word_embedding', | |
'position_embedding', 'token_embedding', 'embed_tokens', | |
'embed_positions', 'embed_layer_norm' | |
] | |
def is_embedding_param(name): | |
name_lower = name.lower() | |
return any(pattern in name_lower for pattern in embedding_patterns) | |
def get_layer_name(param_name): | |
"""Extract layer information from parameter name""" | |
parts = param_name.split('.') | |
if len(parts) >= 2: | |
# Handle common transformer architectures | |
if 'layer' in parts or 'layers' in parts: | |
for i, part in enumerate(parts): | |
if part in ['layer', 'layers'] and i + 1 < len(parts): | |
try: | |
layer_num = int(parts[i + 1]) | |
return f"Layer {layer_num}" | |
except ValueError: | |
pass | |
# Handle other patterns | |
if 'encoder' in parts: | |
return "Encoder" | |
elif 'decoder' in parts: | |
return "Decoder" | |
elif any(emb in param_name.lower() for emb in embedding_patterns): | |
return "Embeddings" | |
elif 'classifier' in param_name.lower() or 'head' in param_name.lower(): | |
return "Classification Head" | |
elif 'pooler' in param_name.lower(): | |
return "Pooler" | |
elif 'ln' in param_name.lower() or 'norm' in param_name.lower(): | |
return "Layer Norm" | |
return "Other" | |
# Analyze all parameters | |
for name, param in model.named_parameters(): | |
param_size = param.numel() | |
is_trainable = param.requires_grad | |
is_embedding = is_embedding_param(name) | |
layer_name = get_layer_name(name) | |
# Handle weight tying by using data pointer | |
ptr = param.data_ptr() | |
if ptr not in unique_params: | |
unique_params[ptr] = { | |
'name': name, | |
'size': param_size, | |
'trainable': is_trainable, | |
'embedding': is_embedding, | |
'layer': layer_name, | |
'shape': list(param.shape) | |
} | |
# Add to totals | |
total_params += param_size | |
if is_trainable: | |
trainable_params += param_size | |
if is_embedding: | |
embedding_params += param_size | |
else: | |
non_embedding_params += param_size | |
# Add to layer breakdown | |
layer_breakdown[layer_name]['total'] += param_size | |
if is_trainable: | |
layer_breakdown[layer_name]['trainable'] += param_size | |
# Add parameter details | |
param_details.append({ | |
'name': name, | |
'shape': list(param.shape), | |
'size': param_size, | |
'trainable': is_trainable, | |
'embedding': is_embedding, | |
'layer': layer_name, | |
'shared': ptr in [p['ptr'] for p in param_details if 'ptr' in p], | |
'ptr': ptr | |
}) | |
# Add to layer breakdown details | |
layer_breakdown[layer_name]['params'].append({ | |
'name': name, | |
'shape': list(param.shape), | |
'size': param_size, | |
'trainable': is_trainable | |
}) | |
# Format the summary | |
summary = f""" | |
π **MODEL ANALYSIS: {model_path}** | |
π **PARAMETER SUMMARY** | |
βββ Total Parameters: {total_params:,} | |
βββ Trainable Parameters: {trainable_params:,} | |
βββ Non-trainable Parameters: {total_params - trainable_params:,} | |
βββ Trainable Percentage: {(trainable_params/total_params*100):.1f}% | |
π§ **PARAMETER BREAKDOWN** | |
βββ Embedding Parameters: {embedding_params:,} ({embedding_params/total_params*100:.1f}%) | |
βββ Non-embedding Parameters: {non_embedding_params:,} ({non_embedding_params/total_params*100:.1f}%) | |
π **MODEL INFO** | |
βββ Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'} | |
βββ Architecture: {config.architectures[0] if hasattr(config, 'architectures') and config.architectures else 'Unknown'} | |
βββ Hidden Size: {getattr(config, 'hidden_size', 'Unknown')} | |
""" | |
# Add layer breakdown summary | |
if layer_breakdown: | |
summary += "\nποΈ **LAYER BREAKDOWN SUMMARY**\n" | |
sorted_layers = sorted(layer_breakdown.items(), key=lambda x: ( | |
0 if x[0] == "Embeddings" else | |
1 if x[0].startswith("Layer") else | |
2 if x[0] == "Layer Norm" else | |
3 if x[0] == "Pooler" else | |
4 if x[0] == "Classification Head" else 5 | |
)) | |
for layer_name, info in sorted_layers: | |
percentage = info['total'] / total_params * 100 | |
summary += f"βββ {layer_name}: {info['total']:,} params ({percentage:.1f}%)\n" | |
# Detailed layer breakdown if requested | |
layer_details = "" | |
if show_layer_details: | |
layer_details = "\n" + "="*60 + "\n" | |
layer_details += "π **DETAILED LAYER-BY-LAYER BREAKDOWN**\n" | |
layer_details += "="*60 + "\n" | |
for layer_name, info in sorted_layers: | |
layer_details += f"\nπ **{layer_name.upper()}**\n" | |
layer_details += f" Total: {info['total']:,} | Trainable: {info['trainable']:,}\n" | |
layer_details += f" Parameters:\n" | |
for param_info in info['params']: | |
trainable_mark = "β" if param_info['trainable'] else "β" | |
layer_details += f" {trainable_mark} {param_info['name']}: {param_info['shape']} β {param_info['size']:,}\n" | |
return summary + layer_details | |
except Exception as e: | |
error_msg = str(e) | |
if "401" in error_msg or "authentication" in error_msg.lower(): | |
return f"π **Authentication Error:** This model requires a valid HuggingFace token.\n\nPlease provide your HuggingFace token in the token field above.\n\nOriginal error: {error_msg}" | |
elif "404" in error_msg or "not found" in error_msg.lower(): | |
return f"π **Model Not Found:** The model '{model_path}' was not found.\n\nPlease check:\n- Model path is correct\n- Model exists on HuggingFace Hub\n- You have access to the model (use token if private)\n\nOriginal error: {error_msg}" | |
else: | |
return f"β **Error loading model:** {error_msg}\n\nPlease check that the model path is correct and accessible." | |
def count_parameters_basic(model_path, hf_token=None): | |
"""Basic parameter counting without layer details""" | |
return analyze_model_parameters(model_path, hf_token, show_layer_details=False) | |
def count_parameters_detailed(model_path, hf_token=None): | |
"""Detailed parameter counting with layer-by-layer breakdown""" | |
return analyze_model_parameters(model_path, hf_token, show_layer_details=True) | |
# Create Gradio interface with multiple outputs | |
with gr.Blocks(title="π€ Advanced HuggingFace Model Parameter Analyzer", theme=gr.themes.Soft()) as demo: | |
gr.Markdown(""" | |
# π€ Advanced HuggingFace Model Parameter Analyzer | |
Enter any HuggingFace model path to get detailed parameter analysis including: | |
- **Total & trainable parameter counts** | |
- **Embedding vs non-embedding breakdown** | |
- **Layer-by-layer analysis** | |
- **Weight sharing detection** | |
- **Private model access** with HuggingFace token | |
""") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
model_input = gr.Textbox( | |
label="π HuggingFace Model Path", | |
placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium", | |
value="bert-base-uncased" | |
) | |
with gr.Column(scale=1): | |
hf_token_input = gr.Textbox( | |
label="π HuggingFace Token (Optional)", | |
placeholder="hf_...", | |
type="password", | |
info="Required for private models or gated models" | |
) | |
with gr.Row(): | |
analyze_btn = gr.Button("π Analyze Model", variant="primary") | |
detailed_btn = gr.Button("π Detailed Analysis", variant="secondary") | |
output_text = gr.Textbox( | |
label="π Analysis Results", | |
lines=20, | |
max_lines=50, | |
show_copy_button=True | |
) | |
# Event handlers | |
analyze_btn.click( | |
fn=count_parameters_basic, | |
inputs=[model_input, hf_token_input], | |
outputs=output_text | |
) | |
detailed_btn.click( | |
fn=count_parameters_detailed, | |
inputs=[model_input, hf_token_input], | |
outputs=output_text | |
) | |
# Example models | |
gr.Examples( | |
examples=[ | |
["bert-base-uncased"], | |
["gpt2"], | |
["roberta-base"], | |
["distilbert-base-uncased"], | |
["microsoft/DialoGPT-medium"], | |
["facebook/bart-base"], | |
["t5-small"], | |
["google/flan-t5-small"] | |
], | |
inputs=model_input, | |
label="π― Example Models" | |
) | |
gr.Markdown(""" | |
### π Notes: | |
- **Weight tying detection**: Automatically handles shared parameters (e.g., input/output embeddings) | |
- **Layer categorization**: Groups parameters by transformer layers, embeddings, etc. | |
- **Detailed analysis**: Click "Detailed Analysis" for parameter-by-parameter breakdown | |
- **Private models**: Use your HuggingFace token to access private or gated models | |
- **Token security**: Token is only used for this session and not stored | |
- **Model compatibility**: Works with most HuggingFace transformer models | |
""") | |
if __name__ == "__main__": | |
demo.launch() |