Spaces:

joko333
/

logical_structure_analysis

Sleeping

App Files Files Community

logical_structure_analysis / utils /BiLSTM.py

joko333

Implement sentence analysis functionality in Analysis page; add BiLSTM model and prediction utilities

ca5c473 8 months ago

raw

history blame contribute delete

2.52 kB

	import torch
	from torch import nn
	from transformers import AutoModel


	class BiLSTMAttentionBERT(nn.Module):
	def __init__(self,
	hidden_dim=256,
	num_classes=22, # Based on the label distribution
	num_layers=2, # Multiple LSTM layers
	dropout=0.1):
	super().__init__()

	# Load BioBERT instead of BERT
	self.bert_model = AutoModel.from_pretrained('dmis-lab/biobert-base-cased-v1.2')
	bert_dim = self.bert_model.config.hidden_size # Still 768 for BioBERT basee
	# Dropout for BERT outputs
	self.dropout_bert = nn.Dropout(dropout)
	# Multi-layer BiLSTM
	self.lstm = nn.LSTM(
	input_size=bert_dim,
	hidden_size=hidden_dim,
	num_layers=num_layers,
	bidirectional=True,
	batch_first=True,
	dropout=dropout if num_layers > 1 else 0
	)

	# Multi-head attention
	self.attention = nn.MultiheadAttention(
	embed_dim=hidden_dim * 2, # *2 for bidirectional
	num_heads=1,
	dropout=dropout,
	batch_first=True
	)

	# Regularization layers
	self.dropout1 = nn.Dropout(dropout)
	self.dropout2 = nn.Dropout(dropout + 0.1)
	self.layer_norm = nn.LayerNorm(hidden_dim * 2)
	self.batch_norm = nn.BatchNorm1d(hidden_dim * 2)

	# Classification head
	self.classifier = nn.Sequential(
	nn.Linear(hidden_dim * 2, hidden_dim),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.BatchNorm1d(hidden_dim),
	nn.Linear(hidden_dim, num_classes)
	)

	def forward(self, input_ids, attention_mask):
	# BERT encoding
	bert_output = self.bert_model(
	input_ids=input_ids,
	attention_mask=attention_mask,
	return_dict=True
	)
	sequence_output = self.dropout_bert(bert_output.last_hidden_state)

	# BiLSTM processing
	lstm_out, _ = self.lstm(sequence_output)
	lstm_out = self.layer_norm(lstm_out)

	# Self-attention
	attn_out, _ = self.attention(
	query=lstm_out,
	key=lstm_out,
	value=lstm_out,
	need_weights=False
	)

	# Pooling and normalization
	pooled = torch.mean(attn_out, dim=1)
	pooled = self.batch_norm(pooled)
	pooled = self.dropout2(pooled)

	# Classification
	return self.classifier(pooled)