Spaces:

oberbics
/

HistorySpace

Running on Zero

App Files Files Community

oberbics commited on Apr 13

Commit

a89d538

verified ·

1 Parent(s): 7041fb3

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -47

app.py CHANGED Viewed

@@ -1,60 +1,27 @@
-import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-import json
-# Initialize model with error handling
-try:
-    tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
-    model = AutoModelForCausalLM.from_pretrained(
-        "numind/NuExtract-1.5",
-        device_map="auto",
-        torch_dtype=torch.float16
-    )
-    MODEL_LOADED = True
-except Exception as e:
-    MODEL_LOADED = False
-    print(f"Model loading failed: {e}")
 def extract_structure(template, text):
     if not MODEL_LOADED:
         return "❌ Model not loaded", {}, "<p style='color:red'>Model failed to initialize</p>"
-    prompt = f"""Extract from text:
-Template: {template}
-Text: {text}
-JSON Output:"""
     try:
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         outputs = model.generate(**inputs, max_new_tokens=512)
         result = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract JSON portion
-        json_start = result.find("{")
-        json_end = result.rfind("}") + 1
-        extracted = json.loads(result[json_start:json_end])
         return "✅ Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>"
     except Exception as e:
-        return f"❌ Error: {str(e)}", {}, f"<p style='color:red'>{str(e)}</p>"
-# Gradio interface (properly indented block)
-with gr.Blocks() as demo:
-    gr.Markdown("# NuExtract-1.5 Structured Data Extractor")
-    with gr.Row():
-        with gr.Column():
-            template = gr.Textbox(label="Template (JSON)", value='{"fields": ["name", "email"]}')
-            text = gr.TextArea(label="Input Text")
-            btn = gr.Button("Extract")
-        with gr.Column():
-            status = gr.Textbox(label="Status")
-            json_out = gr.JSON(label="Output")
-            html_out = gr.HTML()
-    btn.click(extract_structure, [template, text], [status, json_out, html_out])
-if __name__ == "__main__":
-    demo.launch()

 def extract_structure(template, text):
     if not MODEL_LOADED:
         return "❌ Model not loaded", {}, "<p style='color:red'>Model failed to initialize</p>"
+    # Using the correct format for NuExtract-1.5
+    prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
     try:
+        print(f"Generating with prompt: {prompt[:100]}...")
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         outputs = model.generate(**inputs, max_new_tokens=512)
         result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        print(f"Raw result: {result[:100]}...")
+        # Extract result after the output marker
+        if "<|output|>" in result:
+            json_text = result.split("<|output|>")[1].strip()
+        else:
+            json_text = result
+        # Try to parse as JSON
+        extracted = json.loads(json_text)
         return "✅ Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>"
     except Exception as e:
+        print(f"Error in extraction: {str(e)}")
+        return f"❌ Error: {str(e)}", {}, f"<p style='color:red'>{str(e)}</p>"