oberbics commited on
Commit
f3ef5a8
Β·
verified Β·
1 Parent(s): 0284ff4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -58
app.py CHANGED
@@ -115,26 +115,33 @@ def load_model():
115
 
116
  # Then, modify your extract_info function to load the model on first use
117
  @spaces.GPU
 
118
  def extract_info(template, text):
119
  global tokenizer, model
120
 
121
  if tokenizer is None:
122
- return "❌ Tokenizer nicht geladen", "Bitte zuerst den Tokenizer laden"
123
 
124
  try:
125
  # Load model if not loaded yet
126
  if model is None:
 
127
  try:
128
  model = AutoModelForCausalLM.from_pretrained(
129
  MODEL_NAME,
130
  torch_dtype=TORCH_DTYPE,
131
  trust_remote_code=True,
132
- revision="main"
133
- ).to(DEVICE).eval()
134
- print(f"βœ… Model loaded successfully on {DEVICE}")
 
135
  except Exception as e:
 
 
136
  return f"❌ Fehler beim Laden des Modells: {str(e)}", "{}"
137
 
 
 
138
  # Format the template as proper JSON with indentation
139
  template_formatted = json.dumps(json.loads(template), indent=4)
140
 
@@ -148,7 +155,7 @@ def extract_info(template, text):
148
  truncation=True,
149
  padding=True,
150
  max_length=MAX_INPUT_LENGTH
151
- ).to(DEVICE)
152
 
153
  # Generate output with torch.no_grad() for efficiency
154
  with torch.no_grad():
@@ -180,59 +187,6 @@ def extract_info(template, text):
180
  trace = traceback.format_exc()
181
  print(f"Error in extract_info: {e}\n{trace}")
182
  return f"❌ Fehler: {str(e)}", "{}"
183
- @spaces.GPU
184
- def extract_info(template, text):
185
- global tokenizer, model
186
- if model is None:
187
- return "❌ Modell nicht geladen", "Bitte zuerst das Modell laden"
188
-
189
- try:
190
- # Format the template as proper JSON with indentation as per usage example
191
- template_formatted = json.dumps(json.loads(template), indent=4)
192
-
193
- # Create prompt exactly as shown in the usage example
194
- prompt = f"<|input|>\n### Template:\n{template_formatted}\n### Text:\n{text}\n\n<|output|>"
195
-
196
- # Tokenize with proper settings
197
- inputs = tokenizer(
198
- [prompt],
199
- return_tensors="pt",
200
- truncation=True,
201
- padding=True,
202
- max_length=MAX_INPUT_LENGTH
203
- ).to(DEVICE)
204
-
205
- # Generate output with torch.no_grad() for efficiency
206
- with torch.no_grad():
207
- outputs = model.generate(
208
- **inputs,
209
- max_new_tokens=MAX_NEW_TOKENS,
210
- temperature=0.0,
211
- do_sample=False
212
- )
213
-
214
- # Decode the result
215
- result_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
216
-
217
- # Extract the output part
218
- if "<|output|>" in result_text:
219
- json_text = result_text.split("<|output|>")[1].strip()
220
- else:
221
- json_text = result_text
222
-
223
- # Try to parse as JSON
224
- try:
225
- extracted = json.loads(json_text)
226
- return "βœ… Erfolgreich extrahiert", json.dumps(extracted, indent=2)
227
- except json.JSONDecodeError:
228
- return "❌ JSON Parsing Fehler", json_text
229
-
230
- except Exception as e:
231
- import traceback
232
- trace = traceback.format_exc()
233
- print(f"Error in extract_info: {e}\n{trace}")
234
- return f"❌ Fehler: {str(e)}", "{}"
235
-
236
  def create_map(df, location_col):
237
  m = folium.Map(
238
  location=[20, 0],
 
115
 
116
  # Then, modify your extract_info function to load the model on first use
117
  @spaces.GPU
118
+ @spaces.GPU
119
  def extract_info(template, text):
120
  global tokenizer, model
121
 
122
  if tokenizer is None:
123
+ return "❌ Tokenizer nicht geladen", "Bitte zuerst auf 'Modell laden' klicken"
124
 
125
  try:
126
  # Load model if not loaded yet
127
  if model is None:
128
+ print("Model not loaded yet, loading now...")
129
  try:
130
  model = AutoModelForCausalLM.from_pretrained(
131
  MODEL_NAME,
132
  torch_dtype=TORCH_DTYPE,
133
  trust_remote_code=True,
134
+ revision="main",
135
+ device_map="auto" # Let the model decide CUDA placement
136
+ ).eval()
137
+ print(f"βœ… Model loaded successfully")
138
  except Exception as e:
139
+ trace = traceback.format_exc()
140
+ print(f"Error loading model: {e}\n{trace}")
141
  return f"❌ Fehler beim Laden des Modells: {str(e)}", "{}"
142
 
143
+ print("Using model for inference...")
144
+
145
  # Format the template as proper JSON with indentation
146
  template_formatted = json.dumps(json.loads(template), indent=4)
147
 
 
155
  truncation=True,
156
  padding=True,
157
  max_length=MAX_INPUT_LENGTH
158
+ ).to(model.device) # Use model's device
159
 
160
  # Generate output with torch.no_grad() for efficiency
161
  with torch.no_grad():
 
187
  trace = traceback.format_exc()
188
  print(f"Error in extract_info: {e}\n{trace}")
189
  return f"❌ Fehler: {str(e)}", "{}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  def create_map(df, location_col):
191
  m = folium.Map(
192
  location=[20, 0],