Update app.py
Browse files
app.py
CHANGED
@@ -76,12 +76,22 @@ training_status = {
|
|
76 |
}
|
77 |
|
78 |
# Load the model and tokenizer for prediction
|
79 |
-
model_path = "
|
80 |
tokenizer = get_tokenizer('bert-base-uncased')
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
class TrainingConfig(BaseModel):
|
87 |
model_name: str = "bert-base-uncased"
|
@@ -250,7 +260,7 @@ async def validate_model(
|
|
250 |
|
251 |
data_df, label_encoders = load_and_preprocess_data(str(file_path))
|
252 |
|
253 |
-
model_path = MODEL_SAVE_DIR / f"{model_name}.pth"
|
254 |
if not model_path.exists():
|
255 |
raise HTTPException(status_code=404, detail="BERT model file not found")
|
256 |
|
@@ -339,7 +349,7 @@ async def predict(
|
|
339 |
"""
|
340 |
try:
|
341 |
# Load the model
|
342 |
-
model_path = MODEL_SAVE_DIR / f"{model_name}.pth"
|
343 |
if not model_path.exists():
|
344 |
raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
|
345 |
|
@@ -527,17 +537,17 @@ async def train_model_task(config: TrainingConfig, file_path: str, training_id:
|
|
527 |
|
528 |
train_loader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True)
|
529 |
|
530 |
-
criterions = initialize_criterions(
|
531 |
optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
|
532 |
|
533 |
for epoch in range(config.num_epochs):
|
534 |
training_status["current_epoch"] = epoch + 1
|
535 |
|
536 |
-
train_loss = train_model(model, train_loader, criterions,
|
537 |
training_status["current_loss"] = train_loss
|
538 |
|
539 |
# Save model after each epoch
|
540 |
-
save_model(model, training_id)
|
541 |
|
542 |
training_status.update({
|
543 |
"is_training": False,
|
|
|
76 |
}
|
77 |
|
78 |
# Load the model and tokenizer for prediction
|
79 |
+
model_path = MODEL_SAVE_DIR / "BERT_model_model.pth"
|
80 |
tokenizer = get_tokenizer('bert-base-uncased')
|
81 |
+
|
82 |
+
# Initialize model and label encoders with error handling
|
83 |
+
try:
|
84 |
+
label_encoders = load_label_encoders()
|
85 |
+
model = BertMultiOutputModel([len(label_encoders[col].classes_) for col in LABEL_COLUMNS]).to(DEVICE)
|
86 |
+
if model_path.exists():
|
87 |
+
model.load_state_dict(torch.load(model_path, map_location=DEVICE))
|
88 |
+
model.eval()
|
89 |
+
else:
|
90 |
+
print(f"Warning: Model file {model_path} not found. Model will be initialized but not loaded.")
|
91 |
+
except Exception as e:
|
92 |
+
print(f"Warning: Could not load label encoders or model: {str(e)}")
|
93 |
+
print("Model will be initialized when training starts.")
|
94 |
+
model = None
|
95 |
|
96 |
class TrainingConfig(BaseModel):
|
97 |
model_name: str = "bert-base-uncased"
|
|
|
260 |
|
261 |
data_df, label_encoders = load_and_preprocess_data(str(file_path))
|
262 |
|
263 |
+
model_path = MODEL_SAVE_DIR / f"{model_name}_model.pth"
|
264 |
if not model_path.exists():
|
265 |
raise HTTPException(status_code=404, detail="BERT model file not found")
|
266 |
|
|
|
349 |
"""
|
350 |
try:
|
351 |
# Load the model
|
352 |
+
model_path = MODEL_SAVE_DIR / f"{model_name}_model.pth"
|
353 |
if not model_path.exists():
|
354 |
raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
|
355 |
|
|
|
537 |
|
538 |
train_loader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True)
|
539 |
|
540 |
+
criterions = initialize_criterions(data_df_original, label_encoders)
|
541 |
optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
|
542 |
|
543 |
for epoch in range(config.num_epochs):
|
544 |
training_status["current_epoch"] = epoch + 1
|
545 |
|
546 |
+
train_loss = train_model(model, train_loader, optimizer, criterions, epoch)
|
547 |
training_status["current_loss"] = train_loss
|
548 |
|
549 |
# Save model after each epoch
|
550 |
+
save_model(model, training_id, 'pth')
|
551 |
|
552 |
training_status.update({
|
553 |
"is_training": False,
|