parameter-count / app.py
orionweller's picture
Update app.py
70aaa0d verified
import gradio as gr
from transformers import AutoModel, AutoConfig
import torch
import json
from collections import defaultdict, OrderedDict
def analyze_model_parameters(model_path, hf_token=None, show_layer_details=False):
try:
# Prepare token parameter
token_kwargs = {}
if hf_token and hf_token.strip():
token_kwargs['token'] = hf_token.strip()
# Load model configuration first
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, **token_kwargs)
# Load model on CPU
model = AutoModel.from_pretrained(model_path, device_map="cpu", trust_remote_code=True, **token_kwargs)
# Initialize counters
total_params = 0
trainable_params = 0
embedding_params = 0
non_embedding_params = 0
# Track unique parameters to handle weight tying
unique_params = {}
param_details = []
layer_breakdown = defaultdict(lambda: {'total': 0, 'trainable': 0, 'params': []})
# Embedding layer patterns (common names for embedding layers)
embedding_patterns = [
'embeddings', 'embed', 'wte', 'wpe', 'word_embedding',
'position_embedding', 'token_embedding', 'embed_tokens',
'embed_positions', 'embed_layer_norm'
]
def is_embedding_param(name):
name_lower = name.lower()
return any(pattern in name_lower for pattern in embedding_patterns)
def get_layer_name(param_name):
"""Extract layer information from parameter name"""
parts = param_name.split('.')
if len(parts) >= 2:
# Handle common transformer architectures
if 'layer' in parts or 'layers' in parts:
for i, part in enumerate(parts):
if part in ['layer', 'layers'] and i + 1 < len(parts):
try:
layer_num = int(parts[i + 1])
return f"Layer {layer_num}"
except ValueError:
pass
# Handle other patterns
if 'encoder' in parts:
return "Encoder"
elif 'decoder' in parts:
return "Decoder"
elif any(emb in param_name.lower() for emb in embedding_patterns):
return "Embeddings"
elif 'classifier' in param_name.lower() or 'head' in param_name.lower():
return "Classification Head"
elif 'pooler' in param_name.lower():
return "Pooler"
elif 'ln' in param_name.lower() or 'norm' in param_name.lower():
return "Layer Norm"
return "Other"
# Analyze all parameters
for name, param in model.named_parameters():
param_size = param.numel()
is_trainable = param.requires_grad
is_embedding = is_embedding_param(name)
layer_name = get_layer_name(name)
# Handle weight tying by using data pointer
ptr = param.data_ptr()
if ptr not in unique_params:
unique_params[ptr] = {
'name': name,
'size': param_size,
'trainable': is_trainable,
'embedding': is_embedding,
'layer': layer_name,
'shape': list(param.shape)
}
# Add to totals
total_params += param_size
if is_trainable:
trainable_params += param_size
if is_embedding:
embedding_params += param_size
else:
non_embedding_params += param_size
# Add to layer breakdown
layer_breakdown[layer_name]['total'] += param_size
if is_trainable:
layer_breakdown[layer_name]['trainable'] += param_size
# Add parameter details
param_details.append({
'name': name,
'shape': list(param.shape),
'size': param_size,
'trainable': is_trainable,
'embedding': is_embedding,
'layer': layer_name,
'shared': ptr in [p['ptr'] for p in param_details if 'ptr' in p],
'ptr': ptr
})
# Add to layer breakdown details
layer_breakdown[layer_name]['params'].append({
'name': name,
'shape': list(param.shape),
'size': param_size,
'trainable': is_trainable
})
# Format the summary
summary = f"""
πŸ” **MODEL ANALYSIS: {model_path}**
πŸ“Š **PARAMETER SUMMARY**
β”œβ”€β”€ Total Parameters: {total_params:,}
β”œβ”€β”€ Trainable Parameters: {trainable_params:,}
β”œβ”€β”€ Non-trainable Parameters: {total_params - trainable_params:,}
└── Trainable Percentage: {(trainable_params/total_params*100):.1f}%
🧠 **PARAMETER BREAKDOWN**
β”œβ”€β”€ Embedding Parameters: {embedding_params:,} ({embedding_params/total_params*100:.1f}%)
└── Non-embedding Parameters: {non_embedding_params:,} ({non_embedding_params/total_params*100:.1f}%)
πŸ“‹ **MODEL INFO**
β”œβ”€β”€ Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'}
β”œβ”€β”€ Architecture: {config.architectures[0] if hasattr(config, 'architectures') and config.architectures else 'Unknown'}
└── Hidden Size: {getattr(config, 'hidden_size', 'Unknown')}
"""
# Add layer breakdown summary
if layer_breakdown:
summary += "\nπŸ—οΈ **LAYER BREAKDOWN SUMMARY**\n"
sorted_layers = sorted(layer_breakdown.items(), key=lambda x: (
0 if x[0] == "Embeddings" else
1 if x[0].startswith("Layer") else
2 if x[0] == "Layer Norm" else
3 if x[0] == "Pooler" else
4 if x[0] == "Classification Head" else 5
))
for layer_name, info in sorted_layers:
percentage = info['total'] / total_params * 100
summary += f"β”œβ”€β”€ {layer_name}: {info['total']:,} params ({percentage:.1f}%)\n"
# Detailed layer breakdown if requested
layer_details = ""
if show_layer_details:
layer_details = "\n" + "="*60 + "\n"
layer_details += "πŸ” **DETAILED LAYER-BY-LAYER BREAKDOWN**\n"
layer_details += "="*60 + "\n"
for layer_name, info in sorted_layers:
layer_details += f"\nπŸ“ **{layer_name.upper()}**\n"
layer_details += f" Total: {info['total']:,} | Trainable: {info['trainable']:,}\n"
layer_details += f" Parameters:\n"
for param_info in info['params']:
trainable_mark = "βœ“" if param_info['trainable'] else "βœ—"
layer_details += f" {trainable_mark} {param_info['name']}: {param_info['shape']} β†’ {param_info['size']:,}\n"
return summary + layer_details
except Exception as e:
error_msg = str(e)
if "401" in error_msg or "authentication" in error_msg.lower():
return f"πŸ”’ **Authentication Error:** This model requires a valid HuggingFace token.\n\nPlease provide your HuggingFace token in the token field above.\n\nOriginal error: {error_msg}"
elif "404" in error_msg or "not found" in error_msg.lower():
return f"πŸ” **Model Not Found:** The model '{model_path}' was not found.\n\nPlease check:\n- Model path is correct\n- Model exists on HuggingFace Hub\n- You have access to the model (use token if private)\n\nOriginal error: {error_msg}"
else:
return f"❌ **Error loading model:** {error_msg}\n\nPlease check that the model path is correct and accessible."
def count_parameters_basic(model_path, hf_token=None):
"""Basic parameter counting without layer details"""
return analyze_model_parameters(model_path, hf_token, show_layer_details=False)
def count_parameters_detailed(model_path, hf_token=None):
"""Detailed parameter counting with layer-by-layer breakdown"""
return analyze_model_parameters(model_path, hf_token, show_layer_details=True)
# Create Gradio interface with multiple outputs
with gr.Blocks(title="πŸ€— Advanced HuggingFace Model Parameter Analyzer", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸ€— Advanced HuggingFace Model Parameter Analyzer
Enter any HuggingFace model path to get detailed parameter analysis including:
- **Total & trainable parameter counts**
- **Embedding vs non-embedding breakdown**
- **Layer-by-layer analysis**
- **Weight sharing detection**
- **Private model access** with HuggingFace token
""")
with gr.Row():
with gr.Column(scale=2):
model_input = gr.Textbox(
label="πŸ” HuggingFace Model Path",
placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium",
value="bert-base-uncased"
)
with gr.Column(scale=1):
hf_token_input = gr.Textbox(
label="πŸ”‘ HuggingFace Token (Optional)",
placeholder="hf_...",
type="password",
info="Required for private models or gated models"
)
with gr.Row():
analyze_btn = gr.Button("πŸ“Š Analyze Model", variant="primary")
detailed_btn = gr.Button("πŸ” Detailed Analysis", variant="secondary")
output_text = gr.Textbox(
label="πŸ“‹ Analysis Results",
lines=20,
max_lines=50,
show_copy_button=True
)
# Event handlers
analyze_btn.click(
fn=count_parameters_basic,
inputs=[model_input, hf_token_input],
outputs=output_text
)
detailed_btn.click(
fn=count_parameters_detailed,
inputs=[model_input, hf_token_input],
outputs=output_text
)
# Example models
gr.Examples(
examples=[
["bert-base-uncased"],
["gpt2"],
["roberta-base"],
["distilbert-base-uncased"],
["microsoft/DialoGPT-medium"],
["facebook/bart-base"],
["t5-small"],
["google/flan-t5-small"]
],
inputs=model_input,
label="🎯 Example Models"
)
gr.Markdown("""
### πŸ“ Notes:
- **Weight tying detection**: Automatically handles shared parameters (e.g., input/output embeddings)
- **Layer categorization**: Groups parameters by transformer layers, embeddings, etc.
- **Detailed analysis**: Click "Detailed Analysis" for parameter-by-parameter breakdown
- **Private models**: Use your HuggingFace token to access private or gated models
- **Token security**: Token is only used for this session and not stored
- **Model compatibility**: Works with most HuggingFace transformer models
""")
if __name__ == "__main__":
demo.launch()