namanpenguin commited on
Commit
4864b8e
·
verified ·
1 Parent(s): 248e31b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -14
app.py CHANGED
@@ -76,7 +76,7 @@ training_status = {
76
  }
77
 
78
  # Load the model and tokenizer for prediction
79
- model_path = MODEL_SAVE_DIR / "BERT_model_model.pth"
80
  tokenizer = get_tokenizer('bert-base-uncased')
81
 
82
  # Initialize model and label encoders with error handling
@@ -260,7 +260,7 @@ async def validate_model(
260
 
261
  data_df, label_encoders = load_and_preprocess_data(str(file_path))
262
 
263
- model_path = MODEL_SAVE_DIR / f"{model_name}_model.pth"
264
  if not model_path.exists():
265
  raise HTTPException(status_code=404, detail="BERT model file not found")
266
 
@@ -349,7 +349,7 @@ async def predict(
349
  """
350
  try:
351
  # Load the model
352
- model_path = MODEL_SAVE_DIR / f"{model_name}_model.pth"
353
  if not model_path.exists():
354
  raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
355
 
@@ -390,17 +390,31 @@ async def predict(
390
  for i, row in data_df.iterrows():
391
  transaction_pred = {}
392
  for j, (col, probs) in enumerate(zip(LABEL_COLUMNS, all_probabilities)):
393
- pred = np.argmax(probs[i])
394
- decoded_pred = label_encoders[col].inverse_transform([pred])[0]
395
-
396
  class_probs = {
397
  label: float(probs[i][j])
398
  for j, label in enumerate(label_encoders[col].classes_)
399
  }
400
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  transaction_pred[col] = {
402
- "prediction": decoded_pred,
403
- "probabilities": class_probs
 
 
 
 
404
  }
405
 
406
  predictions.append({
@@ -466,17 +480,31 @@ async def predict(
466
 
467
  response = {}
468
  for i, (col, probs) in enumerate(zip(LABEL_COLUMNS, all_probabilities)):
469
- pred = np.argmax(probs[0])
470
- decoded_pred = label_encoders[col].inverse_transform([pred])[0]
471
-
472
  class_probs = {
473
  label: float(probs[0][j])
474
  for j, label in enumerate(label_encoders[col].classes_)
475
  }
476
 
 
 
 
 
 
 
 
 
 
 
 
 
477
  response[col] = {
478
- "prediction": decoded_pred,
479
- "probabilities": class_probs
 
 
 
 
480
  }
481
 
482
  return response
@@ -565,5 +593,5 @@ async def train_model_task(config: TrainingConfig, file_path: str, training_id:
565
  })
566
 
567
  if __name__ == "__main__":
568
- port = int(os.environ.get("PORT", 7860))
569
  uvicorn.run(app, host="0.0.0.0", port=port)
 
76
  }
77
 
78
  # Load the model and tokenizer for prediction
79
+ model_path = MODEL_SAVE_DIR / "BERT_model.pth"
80
  tokenizer = get_tokenizer('bert-base-uncased')
81
 
82
  # Initialize model and label encoders with error handling
 
260
 
261
  data_df, label_encoders = load_and_preprocess_data(str(file_path))
262
 
263
+ model_path = MODEL_SAVE_DIR / f"{model_name}.pth"
264
  if not model_path.exists():
265
  raise HTTPException(status_code=404, detail="BERT model file not found")
266
 
 
349
  """
350
  try:
351
  # Load the model
352
+ model_path = MODEL_SAVE_DIR / f"{model_name}.pth"
353
  if not model_path.exists():
354
  raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
355
 
 
390
  for i, row in data_df.iterrows():
391
  transaction_pred = {}
392
  for j, (col, probs) in enumerate(zip(LABEL_COLUMNS, all_probabilities)):
393
+ # Get probabilities for each class
 
 
394
  class_probs = {
395
  label: float(probs[i][j])
396
  for j, label in enumerate(label_encoders[col].classes_)
397
  }
398
 
399
+ # Sort probabilities in descending order
400
+ sorted_probs = sorted(class_probs.items(), key=lambda x: x[1], reverse=True)
401
+
402
+ # Get top prediction and its probability
403
+ top_pred, top_prob = sorted_probs[0]
404
+
405
+ # Get top 3 predictions with probabilities
406
+ top_3_predictions = [
407
+ {"label": label, "probability": prob}
408
+ for label, prob in sorted_probs[:3]
409
+ ]
410
+
411
  transaction_pred[col] = {
412
+ "top_prediction": {
413
+ "label": top_pred,
414
+ "probability": top_prob
415
+ },
416
+ "alternative_predictions": top_3_predictions[1:], # Exclude the top prediction
417
+ "all_probabilities": class_probs # Keep all probabilities for reference
418
  }
419
 
420
  predictions.append({
 
480
 
481
  response = {}
482
  for i, (col, probs) in enumerate(zip(LABEL_COLUMNS, all_probabilities)):
483
+ # Get probabilities for each class
 
 
484
  class_probs = {
485
  label: float(probs[0][j])
486
  for j, label in enumerate(label_encoders[col].classes_)
487
  }
488
 
489
+ # Sort probabilities in descending order
490
+ sorted_probs = sorted(class_probs.items(), key=lambda x: x[1], reverse=True)
491
+
492
+ # Get top prediction and its probability
493
+ top_pred, top_prob = sorted_probs[0]
494
+
495
+ # Get top 3 predictions with probabilities
496
+ top_3_predictions = [
497
+ {"label": label, "probability": prob}
498
+ for label, prob in sorted_probs[:3]
499
+ ]
500
+
501
  response[col] = {
502
+ "top_prediction": {
503
+ "label": top_pred,
504
+ "probability": top_prob
505
+ },
506
+ "alternative_predictions": top_3_predictions[1:], # Exclude the top prediction
507
+ "all_probabilities": class_probs # Keep all probabilities for reference
508
  }
509
 
510
  return response
 
593
  })
594
 
595
  if __name__ == "__main__":
596
+ port = int(os.environ.get("PORT", 7861))
597
  uvicorn.run(app, host="0.0.0.0", port=port)