Spaces:

point9
/

bert

Running

App Files Files Community

namanpenguin commited on 9 days ago

Commit

248e31b

verified ·

1 Parent(s): 2bff341

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -10

app.py CHANGED Viewed

@@ -76,12 +76,22 @@ training_status = {
 }
 # Load the model and tokenizer for prediction
-model_path = "BERT_model.pth"
 tokenizer = get_tokenizer('bert-base-uncased')
-model = BertMultiOutputModel([len(load_label_encoders()[col].classes_) for col in LABEL_COLUMNS]).to(DEVICE)
-if os.path.exists(model_path):
-    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
-    model.eval()
 class TrainingConfig(BaseModel):
     model_name: str = "bert-base-uncased"
@@ -250,7 +260,7 @@ async def validate_model(
         data_df, label_encoders = load_and_preprocess_data(str(file_path))
-        model_path = MODEL_SAVE_DIR / f"{model_name}.pth"
         if not model_path.exists():
             raise HTTPException(status_code=404, detail="BERT model file not found")
@@ -339,7 +349,7 @@ async def predict(
     """
     try:
         # Load the model
-        model_path = MODEL_SAVE_DIR / f"{model_name}.pth"
         if not model_path.exists():
             raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
@@ -527,17 +537,17 @@ async def train_model_task(config: TrainingConfig, file_path: str, training_id:
         train_loader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True)
-        criterions = initialize_criterions(num_labels_list)
         optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
         for epoch in range(config.num_epochs):
             training_status["current_epoch"] = epoch + 1
-            train_loss = train_model(model, train_loader, criterions, optimizer)
             training_status["current_loss"] = train_loss
             # Save model after each epoch
-            save_model(model, training_id)
         training_status.update({
             "is_training": False,

 }
 # Load the model and tokenizer for prediction
+model_path = MODEL_SAVE_DIR / "BERT_model_model.pth"
 tokenizer = get_tokenizer('bert-base-uncased')
+# Initialize model and label encoders with error handling
+try:
+    label_encoders = load_label_encoders()
+    model = BertMultiOutputModel([len(label_encoders[col].classes_) for col in LABEL_COLUMNS]).to(DEVICE)
+    if model_path.exists():
+        model.load_state_dict(torch.load(model_path, map_location=DEVICE))
+        model.eval()
+    else:
+        print(f"Warning: Model file {model_path} not found. Model will be initialized but not loaded.")
+except Exception as e:
+    print(f"Warning: Could not load label encoders or model: {str(e)}")
+    print("Model will be initialized when training starts.")
+    model = None
 class TrainingConfig(BaseModel):
     model_name: str = "bert-base-uncased"
         data_df, label_encoders = load_and_preprocess_data(str(file_path))
+        model_path = MODEL_SAVE_DIR / f"{model_name}_model.pth"
         if not model_path.exists():
             raise HTTPException(status_code=404, detail="BERT model file not found")
     """
     try:
         # Load the model
+        model_path = MODEL_SAVE_DIR / f"{model_name}_model.pth"
         if not model_path.exists():
             raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
         train_loader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True)
+        criterions = initialize_criterions(data_df_original, label_encoders)
         optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
         for epoch in range(config.num_epochs):
             training_status["current_epoch"] = epoch + 1
+            train_loss = train_model(model, train_loader, optimizer, criterions, epoch)
             training_status["current_loss"] = train_loss
             # Save model after each epoch
+            save_model(model, training_id, 'pth')
         training_status.update({
             "is_training": False,