Spaces:

point9
/

bert

Running

App Files Files Community

bert / models /bert_model.py

namanpenguin

Upload 15 files

ad944b3 verified 18 days ago

raw

history blame

2.63 kB

	# models/bert_model.py

	import torch
	import torch.nn as nn
	from transformers import BertModel
	from config import DROPOUT_RATE, BERT_MODEL_NAME # Import BERT_MODEL_NAME from config

	class BertMultiOutputModel(nn.Module):
	"""
	BERT-based model for multi-output classification.
	It uses a pre-trained BERT model as its backbone and adds a dropout layer
	followed by separate linear classification heads for each target label.
	"""
	# Statically set tokenizer name for easy access in main.py
	tokenizer_name = BERT_MODEL_NAME

	def __init__(self, num_labels):
	"""
	Initializes the BertMultiOutputModel.

	Args:
	num_labels (list): A list where each element is the number of classes
	for a corresponding label column.
	"""
	super(BertMultiOutputModel, self).__init__()
	# Load the pre-trained BERT model.
	# BertModel provides contextual embeddings and a pooled output for classification.
	self.bert = BertModel.from_pretrained(BERT_MODEL_NAME)
	self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout layer for regularization

	# Create a list of classification heads, one for each label column.
	# Each head is a linear layer mapping BERT's pooled output size to the number of classes for that label.
	self.classifiers = nn.ModuleList([
	nn.Linear(self.bert.config.hidden_size, n_classes) for n_classes in num_labels
	])

	def forward(self, input_ids, attention_mask):
	"""
	Performs the forward pass of the model.

	Args:
	input_ids (torch.Tensor): Tensor of token IDs (from tokenizer).
	attention_mask (torch.Tensor): Tensor indicating attention (from tokenizer).

	Returns:
	list: A list of logit tensors, one for each classification head.
	Each tensor has shape (batch_size, num_classes_for_that_label).
	"""
	# Pass input_ids and attention_mask through BERT.
	# .pooler_output typically represents the hidden state of the [CLS] token,
	# processed through a linear layer and tanh activation, often used for classification.
	pooled_output = self.bert(input_ids=input_ids, attention_mask=attention_mask).pooler_output

	# Apply dropout for regularization
	pooled_output = self.dropout(pooled_output)

	# Pass the pooled output through each classification head.
	# The result is a list of logits (raw scores before softmax/sigmoid) for each label.
	return [classifier(pooled_output) for classifier in self.classifiers]