from utils.model import BiLSTMAttentionBERT import torch from transformers import AutoTokenizer from sklearn.preprocessing import LabelEncoder import numpy as np import streamlit as st import requests def load_model_for_prediction(): try: st.write("Starting model loading...") # Test Hugging Face connectivity st.write("Testing connection to Hugging Face...") response = requests.get("https://huggingface.co/joko333/BiLSTM_v01") if response.status_code != 200: st.error(f"Cannot connect to Hugging Face. Status code: {response.status_code}") return None, None, None # Load model with logging st.write("Loading BiLSTM model...") model = BiLSTMAttentionBERT.from_pretrained( "joko333/BiLSTM_v01", hidden_dim=128, num_classes=22, num_layers=2, dropout=0.5 ) st.write("Model loaded successfully") # Initialize label encoder st.write("Initializing label encoder...") label_encoder = LabelEncoder() label_encoder.classes_ = np.array(['Addition', 'Causal', 'Cause and Effect', 'Clarification', 'Comparison', 'Concession', 'Conditional', 'Contrast', 'Contrastive Emphasis', 'Definition', 'Elaboration', 'Emphasis', 'Enumeration', 'Explanation', 'Generalization', 'Illustration', 'Inference', 'Problem Solution', 'Purpose', 'Sequential', 'Summary', 'Temporal Sequence']) st.write("Label encoder initialized") # Load tokenizer st.write("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained('dmis-lab/biobert-base-cased-v1.2') st.write("Tokenizer loaded successfully") return model, label_encoder, tokenizer except Exception as e: st.error(f"Detailed error: {str(e)}") st.error(f"Error type: {type(e).__name__}") import traceback st.error(f"Traceback: {traceback.format_exc()}") return None, None, None def predict_sentence(model, sentence, tokenizer, label_encoder): """ Make prediction for a single sentence with label validation. """ # Validation checks if model is None: print("Error: Model not loaded") return "Error: Model not loaded", 0.0 if tokenizer is None: print("Error: Tokenizer not loaded") return "Error: Tokenizer not loaded", 0.0 if label_encoder is None: print("Error: Label encoder not loaded") return "Error: Label encoder not loaded", 0.0 # Force CPU device device = torch.device('cpu') model = model.to(device) model.eval() # Tokenize try: encoding = tokenizer( sentence, add_special_tokens=True, max_length=512, padding='max_length', truncation=True, return_tensors='pt' ).to(device) with torch.no_grad(): outputs = model(encoding['input_ids'], encoding['attention_mask']) probabilities = torch.softmax(outputs, dim=1) prob, pred_idx = torch.max(probabilities, dim=1) predicted_label = label_encoder.classes_[pred_idx.item()] return predicted_label, prob.item() except Exception as e: print(f"Prediction error: {str(e)}") return f"Error: {str(e)}", 0.0 def print_labels(label_encoder, show_counts=False): """Print all labels and their corresponding indices""" print("\nAvailable labels:") print("-" * 40) for idx, label in enumerate(label_encoder.classes_): print(f"Index {idx}: {label}") print("-" * 40) print(f"Total number of classes: {len(label_encoder.classes_)}\n")