Spaces:

Girinath11
/

jina-code-debugger

Running

App Files Files Community

Girinath11 commited on 4 days ago

Commit

77daf9a

verified ·

1 Parent(s): 8afa1cf

Create model_investigation.py

Browse files

Files changed (1) hide show

model_investigation.py +124 -0

model_investigation.py ADDED Viewed

	@@ -0,0 +1,124 @@

+# model_investigation.py - Run this to investigate your model
+# Add this as a separate file to debug your model loading
+from transformers import AutoConfig, AutoTokenizer
+import logging
+import traceback
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def investigate_model(model_name="Girinath11/aiml_code_debug_model"):
+    """Investigate the model to understand its structure."""
+    print(f"🔍 Investigating model: {model_name}")
+    print("=" * 60)
+    try:
+        # 1. Check config
+        print("📋 STEP 1: Checking model configuration...")
+        config = AutoConfig.from_pretrained(model_name)
+        print(f"   ✓ Config type: {type(config).__name__}")
+        print(f"   ✓ Model type: {getattr(config, 'model_type', 'Unknown')}")
+        print(f"   ✓ Architectures: {getattr(config, 'architectures', 'Unknown')}")
+        # Print all config attributes
+        print("\n   📄 Full config attributes:")
+        for attr in sorted(dir(config)):
+            if not attr.startswith('_') and hasattr(config, attr):
+                try:
+                    value = getattr(config, attr)
+                    if not callable(value):
+                        print(f"      {attr}: {value}")
+                except:
+                    pass
+        # 2. Check tokenizer
+        print(f"\n🔤 STEP 2: Checking tokenizer...")
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            print(f"   ✓ Tokenizer type: {type(tokenizer).__name__}")
+            print(f"   ✓ Vocab size: {len(tokenizer)}")
+            print(f"   ✓ Special tokens:")
+            print(f"      pad_token: {tokenizer.pad_token}")
+            print(f"      eos_token: {tokenizer.eos_token}")
+            print(f"      unk_token: {tokenizer.unk_token}")
+            print(f"      bos_token: {getattr(tokenizer, 'bos_token', 'None')}")
+        except Exception as e:
+            print(f"   ❌ Tokenizer error: {e}")
+        # 3. Try different loading approaches
+        print(f"\n🤖 STEP 3: Testing model loading approaches...")
+        from transformers import (
+            AutoModel,
+            AutoModelForSeq2SeqLM,
+            AutoModelForCausalLM,
+            pipeline
+        )
+        approaches = [
+            ("AutoModel", lambda: AutoModel.from_pretrained(model_name, trust_remote_code=True)),
+            ("AutoModelForCausalLM", lambda: AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)),
+            ("AutoModelForSeq2SeqLM", lambda: AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)),
+            ("Pipeline text-generation", lambda: pipeline("text-generation", model=model_name, trust_remote_code=True)),
+            ("Pipeline text2text-generation", lambda: pipeline("text2text-generation", model=model_name, trust_remote_code=True)),
+        ]
+        for approach_name, approach_func in approaches:
+            try:
+                print(f"   🔄 Trying {approach_name}...")
+                model = approach_func()
+                print(f"   ✅ SUCCESS with {approach_name}!")
+                print(f"      Model type: {type(model)}")
+                # Test a simple generation if possible
+                if hasattr(model, 'generate') or 'pipeline' in approach_name.lower():
+                    print(f"      ✓ Supports text generation")
+                    # Quick test
+                    try:
+                        test_input = "def hello(): print('world')"
+                        if 'pipeline' in approach_name.lower():
+                            result = model(f"Fix this code: {test_input}", max_length=100)
+                            print(f"      ✓ Test generation successful: {str(result)[:100]}...")
+                        else:
+                            inputs = tokenizer(f"Fix: {test_input}", return_tensors="pt", max_length=100, truncation=True)
+                            outputs = model.generate(**inputs, max_new_tokens=50)
+                            result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                            print(f"      ✓ Test generation successful: {result[:100]}...")
+                    except Exception as gen_error:
+                        print(f"      ⚠️ Generation test failed: {str(gen_error)[:100]}...")
+                # We found a working approach, let's break
+                print(f"\n🎉 RECOMMENDATION: Use {approach_name} for loading this model!")
+                break
+            except Exception as e:
+                print(f"   ❌ {approach_name} failed: {str(e)[:100]}...")
+        # 4. Model files inspection
+        print(f"\n📁 STEP 4: Model files information...")
+        try:
+            from huggingface_hub import list_repo_files
+            files = list_repo_files(model_name)
+            model_files = [f for f in files if f.endswith(('.bin', '.safetensors', '.json'))]
+            print(f"   📄 Key model files found:")
+            for file in sorted(model_files):
+                print(f"      {file}")
+        except Exception as e:
+            print(f"   ⚠️ Could not list files: {e}")
+    except Exception as main_error:
+        print(f"\n❌ CRITICAL ERROR: {main_error}")
+        print(f"Full traceback:\n{traceback.format_exc()}")
+if __name__ == "__main__":
+    investigate_model()
+    print("\n" + "="*60)
+    print("🏁 Investigation complete!")
+    print("Copy the successful loading approach to your model_wrapper.py")