Girinath11 commited on
Commit
77daf9a
Β·
verified Β·
1 Parent(s): 8afa1cf

Create model_investigation.py

Browse files
Files changed (1) hide show
  1. model_investigation.py +124 -0
model_investigation.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model_investigation.py - Run this to investigate your model
2
+ # Add this as a separate file to debug your model loading
3
+
4
+ from transformers import AutoConfig, AutoTokenizer
5
+ import logging
6
+ import traceback
7
+
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def investigate_model(model_name="Girinath11/aiml_code_debug_model"):
12
+ """Investigate the model to understand its structure."""
13
+
14
+ print(f"πŸ” Investigating model: {model_name}")
15
+ print("=" * 60)
16
+
17
+ try:
18
+ # 1. Check config
19
+ print("πŸ“‹ STEP 1: Checking model configuration...")
20
+ config = AutoConfig.from_pretrained(model_name)
21
+
22
+ print(f" βœ“ Config type: {type(config).__name__}")
23
+ print(f" βœ“ Model type: {getattr(config, 'model_type', 'Unknown')}")
24
+ print(f" βœ“ Architectures: {getattr(config, 'architectures', 'Unknown')}")
25
+
26
+ # Print all config attributes
27
+ print("\n πŸ“„ Full config attributes:")
28
+ for attr in sorted(dir(config)):
29
+ if not attr.startswith('_') and hasattr(config, attr):
30
+ try:
31
+ value = getattr(config, attr)
32
+ if not callable(value):
33
+ print(f" {attr}: {value}")
34
+ except:
35
+ pass
36
+
37
+ # 2. Check tokenizer
38
+ print(f"\nπŸ”€ STEP 2: Checking tokenizer...")
39
+ try:
40
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
41
+ print(f" βœ“ Tokenizer type: {type(tokenizer).__name__}")
42
+ print(f" βœ“ Vocab size: {len(tokenizer)}")
43
+ print(f" βœ“ Special tokens:")
44
+ print(f" pad_token: {tokenizer.pad_token}")
45
+ print(f" eos_token: {tokenizer.eos_token}")
46
+ print(f" unk_token: {tokenizer.unk_token}")
47
+ print(f" bos_token: {getattr(tokenizer, 'bos_token', 'None')}")
48
+ except Exception as e:
49
+ print(f" ❌ Tokenizer error: {e}")
50
+
51
+ # 3. Try different loading approaches
52
+ print(f"\nπŸ€– STEP 3: Testing model loading approaches...")
53
+
54
+ from transformers import (
55
+ AutoModel,
56
+ AutoModelForSeq2SeqLM,
57
+ AutoModelForCausalLM,
58
+ pipeline
59
+ )
60
+
61
+ approaches = [
62
+ ("AutoModel", lambda: AutoModel.from_pretrained(model_name, trust_remote_code=True)),
63
+ ("AutoModelForCausalLM", lambda: AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)),
64
+ ("AutoModelForSeq2SeqLM", lambda: AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)),
65
+ ("Pipeline text-generation", lambda: pipeline("text-generation", model=model_name, trust_remote_code=True)),
66
+ ("Pipeline text2text-generation", lambda: pipeline("text2text-generation", model=model_name, trust_remote_code=True)),
67
+ ]
68
+
69
+ for approach_name, approach_func in approaches:
70
+ try:
71
+ print(f" πŸ”„ Trying {approach_name}...")
72
+ model = approach_func()
73
+ print(f" βœ… SUCCESS with {approach_name}!")
74
+ print(f" Model type: {type(model)}")
75
+
76
+ # Test a simple generation if possible
77
+ if hasattr(model, 'generate') or 'pipeline' in approach_name.lower():
78
+ print(f" βœ“ Supports text generation")
79
+
80
+ # Quick test
81
+ try:
82
+ test_input = "def hello(): print('world')"
83
+ if 'pipeline' in approach_name.lower():
84
+ result = model(f"Fix this code: {test_input}", max_length=100)
85
+ print(f" βœ“ Test generation successful: {str(result)[:100]}...")
86
+ else:
87
+ inputs = tokenizer(f"Fix: {test_input}", return_tensors="pt", max_length=100, truncation=True)
88
+ outputs = model.generate(**inputs, max_new_tokens=50)
89
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
90
+ print(f" βœ“ Test generation successful: {result[:100]}...")
91
+ except Exception as gen_error:
92
+ print(f" ⚠️ Generation test failed: {str(gen_error)[:100]}...")
93
+
94
+ # We found a working approach, let's break
95
+ print(f"\nπŸŽ‰ RECOMMENDATION: Use {approach_name} for loading this model!")
96
+ break
97
+
98
+ except Exception as e:
99
+ print(f" ❌ {approach_name} failed: {str(e)[:100]}...")
100
+
101
+ # 4. Model files inspection
102
+ print(f"\nπŸ“ STEP 4: Model files information...")
103
+ try:
104
+ from huggingface_hub import list_repo_files
105
+ files = list_repo_files(model_name)
106
+
107
+ model_files = [f for f in files if f.endswith(('.bin', '.safetensors', '.json'))]
108
+ print(f" πŸ“„ Key model files found:")
109
+ for file in sorted(model_files):
110
+ print(f" {file}")
111
+
112
+ except Exception as e:
113
+ print(f" ⚠️ Could not list files: {e}")
114
+
115
+ except Exception as main_error:
116
+ print(f"\n❌ CRITICAL ERROR: {main_error}")
117
+ print(f"Full traceback:\n{traceback.format_exc()}")
118
+
119
+ if __name__ == "__main__":
120
+ investigate_model()
121
+
122
+ print("\n" + "="*60)
123
+ print("🏁 Investigation complete!")
124
+ print("Copy the successful loading approach to your model_wrapper.py")