oberbics commited on
Commit
a89d538
Β·
verified Β·
1 Parent(s): 7041fb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -47
app.py CHANGED
@@ -1,60 +1,27 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
- import json
5
-
6
- # Initialize model with error handling
7
- try:
8
- tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
9
- model = AutoModelForCausalLM.from_pretrained(
10
- "numind/NuExtract-1.5",
11
- device_map="auto",
12
- torch_dtype=torch.float16
13
- )
14
- MODEL_LOADED = True
15
- except Exception as e:
16
- MODEL_LOADED = False
17
- print(f"Model loading failed: {e}")
18
-
19
  def extract_structure(template, text):
20
  if not MODEL_LOADED:
21
  return "❌ Model not loaded", {}, "<p style='color:red'>Model failed to initialize</p>"
22
 
23
- prompt = f"""Extract from text:
24
- Template: {template}
25
- Text: {text}
26
- JSON Output:"""
27
 
28
  try:
 
29
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
30
  outputs = model.generate(**inputs, max_new_tokens=512)
31
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
32
 
33
- # Extract JSON portion
34
- json_start = result.find("{")
35
- json_end = result.rfind("}") + 1
36
- extracted = json.loads(result[json_start:json_end])
 
 
 
 
37
 
38
  return "βœ… Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>"
39
  except Exception as e:
40
- return f"❌ Error: {str(e)}", {}, f"<p style='color:red'>{str(e)}</p>"
41
-
42
- # Gradio interface (properly indented block)
43
- with gr.Blocks() as demo:
44
- gr.Markdown("# NuExtract-1.5 Structured Data Extractor")
45
-
46
- with gr.Row():
47
- with gr.Column():
48
- template = gr.Textbox(label="Template (JSON)", value='{"fields": ["name", "email"]}')
49
- text = gr.TextArea(label="Input Text")
50
- btn = gr.Button("Extract")
51
-
52
- with gr.Column():
53
- status = gr.Textbox(label="Status")
54
- json_out = gr.JSON(label="Output")
55
- html_out = gr.HTML()
56
-
57
- btn.click(extract_structure, [template, text], [status, json_out, html_out])
58
-
59
- if __name__ == "__main__":
60
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def extract_structure(template, text):
2
  if not MODEL_LOADED:
3
  return "❌ Model not loaded", {}, "<p style='color:red'>Model failed to initialize</p>"
4
 
5
+ # Using the correct format for NuExtract-1.5
6
+ prompt = f"<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>"
 
 
7
 
8
  try:
9
+ print(f"Generating with prompt: {prompt[:100]}...")
10
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
11
  outputs = model.generate(**inputs, max_new_tokens=512)
12
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
13
+ print(f"Raw result: {result[:100]}...")
14
 
15
+ # Extract result after the output marker
16
+ if "<|output|>" in result:
17
+ json_text = result.split("<|output|>")[1].strip()
18
+ else:
19
+ json_text = result
20
+
21
+ # Try to parse as JSON
22
+ extracted = json.loads(json_text)
23
 
24
  return "βœ… Success", extracted, f"<pre>{json.dumps(extracted, indent=2)}</pre>"
25
  except Exception as e:
26
+ print(f"Error in extraction: {str(e)}")
27
+ return f"❌ Error: {str(e)}", {}, f"<p style='color:red'>{str(e)}</p>"