# models/roberta_model.py import torch import torch.nn as nn from transformers import RobertaModel from config import DROPOUT_RATE, ROBERTA_MODEL_NAME # Import ROBERTA_MODEL_NAME class RobertaMultiOutputModel(nn.Module): """ RoBERTa-based model for multi-output classification. Uses a pre-trained RoBERTa model as its backbone. RoBERTa is an optimized version of BERT, often performing better. """ # Statically set tokenizer name for easy access in main.py tokenizer_name = ROBERTA_MODEL_NAME def __init__(self, num_labels): """ Initializes the RobertaMultiOutputModel. Args: num_labels (list): A list where each element is the number of classes for a corresponding label column. """ super(RobertaMultiOutputModel, self).__init__() # Load the pre-trained RoBERTa model. # RoBERTa's pooler_output typically corresponds to the hidden state of the # first token (), which is often used for sequence classification. self.roberta = RobertaModel.from_pretrained(ROBERTA_MODEL_NAME) self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout layer # Create classification heads for each label column. self.classifiers = nn.ModuleList([ nn.Linear(self.roberta.config.hidden_size, n_classes) for n_classes in num_labels ]) def forward(self, input_ids, attention_mask): """ Performs the forward pass of the model. Args: input_ids (torch.Tensor): Tensor of token IDs. attention_mask (torch.Tensor): Tensor indicating attention. Returns: list: A list of logit tensors, one for each classification head. """ # Pass input_ids and attention_mask through RoBERTa. # .pooler_output is used for classification. pooled_output = self.roberta(input_ids=input_ids, attention_mask=attention_mask).pooler_output # Apply dropout pooled_output = self.dropout(pooled_output) # Pass the pooled output through each classification head. return [classifier(pooled_output) for classifier in self.classifiers]