Spaces:

oberbics
/

HistorySpace

Running on Zero

App Files Files Community

oberbics commited on Apr 22

Commit

f3ef5a8

verified ·

1 Parent(s): 0284ff4

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -58

app.py CHANGED Viewed

@@ -115,26 +115,33 @@ def load_model():
 # Then, modify your extract_info function to load the model on first use
 @spaces.GPU
 def extract_info(template, text):
     global tokenizer, model
     if tokenizer is None:
-        return "❌ Tokenizer nicht geladen", "Bitte zuerst den Tokenizer laden"
     try:
         # Load model if not loaded yet
         if model is None:
             try:
                 model = AutoModelForCausalLM.from_pretrained(
                     MODEL_NAME,
                     torch_dtype=TORCH_DTYPE,
                     trust_remote_code=True,
-                    revision="main"
-                ).to(DEVICE).eval()
-                print(f"✅ Model loaded successfully on {DEVICE}")
             except Exception as e:
                 return f"❌ Fehler beim Laden des Modells: {str(e)}", "{}"
         # Format the template as proper JSON with indentation
         template_formatted = json.dumps(json.loads(template), indent=4)
@@ -148,7 +155,7 @@ def extract_info(template, text):
             truncation=True,
             padding=True,
             max_length=MAX_INPUT_LENGTH
-        ).to(DEVICE)
         # Generate output with torch.no_grad() for efficiency
         with torch.no_grad():
@@ -180,59 +187,6 @@ def extract_info(template, text):
         trace = traceback.format_exc()
         print(f"Error in extract_info: {e}\n{trace}")
         return f"❌ Fehler: {str(e)}", "{}"
-@spaces.GPU
-def extract_info(template, text):
-    global tokenizer, model
-    if model is None:
-        return "❌ Modell nicht geladen", "Bitte zuerst das Modell laden"
-    try:
-        # Format the template as proper JSON with indentation as per usage example
-        template_formatted = json.dumps(json.loads(template), indent=4)
-        # Create prompt exactly as shown in the usage example
-        prompt = f"<|input|>\n### Template:\n{template_formatted}\n### Text:\n{text}\n\n<|output|>"
-        # Tokenize with proper settings
-        inputs = tokenizer(
-            [prompt],
-            return_tensors="pt",
-            truncation=True,
-            padding=True,
-            max_length=MAX_INPUT_LENGTH
-        ).to(DEVICE)
-        # Generate output with torch.no_grad() for efficiency
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=MAX_NEW_TOKENS,
-                temperature=0.0,
-                do_sample=False
-            )
-        # Decode the result
-        result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract the output part
-        if "<|output|>" in result_text:
-            json_text = result_text.split("<|output|>")[1].strip()
-        else:
-            json_text = result_text
-        # Try to parse as JSON
-        try:
-            extracted = json.loads(json_text)
-            return "✅ Erfolgreich extrahiert", json.dumps(extracted, indent=2)
-        except json.JSONDecodeError:
-            return "❌ JSON Parsing Fehler", json_text
-    except Exception as e:
-        import traceback
-        trace = traceback.format_exc()
-        print(f"Error in extract_info: {e}\n{trace}")
-        return f"❌ Fehler: {str(e)}", "{}"
 def create_map(df, location_col):
     m = folium.Map(
         location=[20, 0],

 # Then, modify your extract_info function to load the model on first use
 @spaces.GPU
+@spaces.GPU
 def extract_info(template, text):
     global tokenizer, model
     if tokenizer is None:
+        return "❌ Tokenizer nicht geladen", "Bitte zuerst auf 'Modell laden' klicken"
     try:
         # Load model if not loaded yet
         if model is None:
+            print("Model not loaded yet, loading now...")
             try:
                 model = AutoModelForCausalLM.from_pretrained(
                     MODEL_NAME,
                     torch_dtype=TORCH_DTYPE,
                     trust_remote_code=True,
+                    revision="main",
+                    device_map="auto"  # Let the model decide CUDA placement
+                ).eval()
+                print(f"✅ Model loaded successfully")
             except Exception as e:
+                trace = traceback.format_exc()
+                print(f"Error loading model: {e}\n{trace}")
                 return f"❌ Fehler beim Laden des Modells: {str(e)}", "{}"
+        print("Using model for inference...")
         # Format the template as proper JSON with indentation
         template_formatted = json.dumps(json.loads(template), indent=4)
             truncation=True,
             padding=True,
             max_length=MAX_INPUT_LENGTH
+        ).to(model.device)  # Use model's device
         # Generate output with torch.no_grad() for efficiency
         with torch.no_grad():
         trace = traceback.format_exc()
         print(f"Error in extract_info: {e}\n{trace}")
         return f"❌ Fehler: {str(e)}", "{}"
 def create_map(df, location_col):
     m = folium.Map(
         location=[20, 0],