Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,60 +1,27 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
-
import torch
|
4 |
-
import json
|
5 |
-
|
6 |
-
# Initialize model with error handling
|
7 |
-
try:
|
8 |
-
tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
|
9 |
-
model = AutoModelForCausalLM.from_pretrained(
|
10 |
-
"numind/NuExtract-1.5",
|
11 |
-
device_map="auto",
|
12 |
-
torch_dtype=torch.float16
|
13 |
-
)
|
14 |
-
MODEL_LOADED = True
|
15 |
-
except Exception as e:
|
16 |
-
MODEL_LOADED = False
|
17 |
-
print(f"Model loading failed: {e}")
|
18 |
-
|
19 |
def extract_structure(template, text):
|
20 |
if not MODEL_LOADED:
|
21 |
return "β Model not loaded", {}, "<p style='color:red'>Model failed to initialize</p>"
|
22 |
|
23 |
-
|
24 |
-
Template
|
25 |
-
Text: {text}
|
26 |
-
JSON Output:"""
|
27 |
|
28 |
try:
|
|
|
29 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
30 |
outputs = model.generate(**inputs, max_new_tokens=512)
|
31 |
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
32 |
|
33 |
-
# Extract
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
|
38 |
return "β
Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>"
|
39 |
except Exception as e:
|
40 |
-
|
41 |
-
|
42 |
-
# Gradio interface (properly indented block)
|
43 |
-
with gr.Blocks() as demo:
|
44 |
-
gr.Markdown("# NuExtract-1.5 Structured Data Extractor")
|
45 |
-
|
46 |
-
with gr.Row():
|
47 |
-
with gr.Column():
|
48 |
-
template = gr.Textbox(label="Template (JSON)", value='{"fields": ["name", "email"]}')
|
49 |
-
text = gr.TextArea(label="Input Text")
|
50 |
-
btn = gr.Button("Extract")
|
51 |
-
|
52 |
-
with gr.Column():
|
53 |
-
status = gr.Textbox(label="Status")
|
54 |
-
json_out = gr.JSON(label="Output")
|
55 |
-
html_out = gr.HTML()
|
56 |
-
|
57 |
-
btn.click(extract_structure, [template, text], [status, json_out, html_out])
|
58 |
-
|
59 |
-
if __name__ == "__main__":
|
60 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
def extract_structure(template, text):
|
2 |
if not MODEL_LOADED:
|
3 |
return "β Model not loaded", {}, "<p style='color:red'>Model failed to initialize</p>"
|
4 |
|
5 |
+
# Using the correct format for NuExtract-1.5
|
6 |
+
prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
|
|
|
|
|
7 |
|
8 |
try:
|
9 |
+
print(f"Generating with prompt: {prompt[:100]}...")
|
10 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
11 |
outputs = model.generate(**inputs, max_new_tokens=512)
|
12 |
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
13 |
+
print(f"Raw result: {result[:100]}...")
|
14 |
|
15 |
+
# Extract result after the output marker
|
16 |
+
if "<|output|>" in result:
|
17 |
+
json_text = result.split("<|output|>")[1].strip()
|
18 |
+
else:
|
19 |
+
json_text = result
|
20 |
+
|
21 |
+
# Try to parse as JSON
|
22 |
+
extracted = json.loads(json_text)
|
23 |
|
24 |
return "β
Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>"
|
25 |
except Exception as e:
|
26 |
+
print(f"Error in extraction: {str(e)}")
|
27 |
+
return f"β Error: {str(e)}", {}, f"<p style='color:red'>{str(e)}</p>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|