import re import math import matplotlib.pyplot as plt import matplotlib.font_manager as fm import torch import torch.nn as nn from transformers import ElectraModel, AutoTokenizer import numpy as np from sklearn.linear_model import LinearRegression from collections import defaultdict import base64 from io import BytesIO # 폰트 설정 font_path = './NanumGothic.ttf' fm.fontManager.addfont(font_path) plt.rcParams['font.family'] = fm.FontProperties(fname=font_path).get_name() plt.rcParams['axes.unicode_minus'] = False # 라벨 정의 LABELS = ['불평/불만', '환영/호의', '감동/감탄', '지긋지긋', '고마움', '슬픔', '화남/분노', '존경', '기대감', '우쭐댐/무시함', '안타까움/실망', '비장함', '의심/불신', '뿌듯함', '편안/쾌적', '신기함/관심', '아껴주는', '부끄러움', '공포/무서움', '절망', '한심함', '역겨움/징그러움', '짜증', '어이없음', '없음', '패배/자기혐오', '귀찮음', '힘듦/지침', '즐거움/신남', '깨달음', '죄책감', '증오/혐오', '흐뭇함(귀여움/예쁨)', '당황/난처', '경악', '부담/안_내킴', '서러움', '재미없음', '불쌍함/연민', '놀람', '행복', '불안/걱정', '기쁨', '안심/신뢰'] NEGATIVE_EMOTIONS = [ '불평/불만', '지긋지긋', '슬픔', '화남/분노', '의심/불신', '공포/무서움', '절망', '한심함', '역겨움/징그러움', '짜증', '어이없음', '패배/자기혐오', '귀찮음', '힘듦/지침', '죄책감', '증오/혐오', '당황/난처', '부담/안_내킴', '서러움', '재미없음' ] # 디바이스 device = "cuda" if torch.cuda.is_available() else "cpu" # 모델 정의 class KOTEtagger(nn.Module): def __init__(self): super().__init__() self.electra = ElectraModel.from_pretrained("beomi/KcELECTRA-base", revision='v2021').to(device) self.tokenizer = AutoTokenizer.from_pretrained("beomi/KcELECTRA-base", revision='v2021') self.classifier = nn.Linear(self.electra.config.hidden_size, 44).to(device) def forward(self, text): encoding = self.tokenizer.encode_plus( text, add_special_tokens=True, max_length=512, return_token_type_ids=False, padding="max_length", return_attention_mask=True, return_tensors='pt', ).to(device) output = self.electra(encoding["input_ids"], attention_mask=encoding["attention_mask"]) output = output.last_hidden_state[:, 0, :] output = self.classifier(output) return torch.sigmoid(output) # 모델 로드 trained_model = KOTEtagger() trained_model.load_state_dict(torch.load("kote_pytorch_lightning.bin", map_location=device), strict=False) trained_model.eval() # 함수들 def parse_dialogue(text): lines = text.strip().split('\n') return [ (match.group(1).strip(), match.group(2).strip()) for line in lines if (match := re.match(r"([^:]+):(.+)", line.strip())) ] def adjusted_score(raw_score, k=5): return 100 / (1 + math.exp(-k * (raw_score - 0.5))) def apply_ema(scores, alpha=0.4): if not scores: return [] smoothed = [scores[0]] for s in scores[1:]: smoothed.append(alpha * s + (1 - alpha) * smoothed[-1]) return smoothed # 메인 처리 함수 def predict_and_plot(raw_text): dialogue = parse_dialogue(raw_text) emotion_scores = defaultdict(lambda: defaultdict(list)) # 예측 for speaker, sentence in dialogue: preds = trained_model(sentence)[0] for label, score in zip(LABELS, preds): if label in NEGATIVE_EMOTIONS: adjusted = adjusted_score(score.item()) emotion_scores[speaker][label].append(adjusted) html_output = "" for speaker in emotion_scores: html_output += f"

{speaker} 감정 예측 결과:

" fig, ax = plt.subplots(figsize=(10, 4)) max_y = 0 plotted = False predicted_scores = {} for label in NEGATIVE_EMOTIONS: raw_scores = emotion_scores[speaker].get(label, []) scores = apply_ema(raw_scores) if len(scores) >= 2 and max(scores) >= 40: X = np.arange(len(scores)).reshape(-1, 1) y = np.array(scores) model = LinearRegression().fit(X, y) predicted = model.predict([[len(scores)]])[0] predicted_scores[label] = predicted line, = ax.plot(scores, label=label) color = line.get_color() ax.plot([len(scores)-1, len(scores)], [scores[-1], predicted], linestyle='--', color=color) plotted = True max_y = max(max_y, predicted, *scores) html_output += f"

- {label}: 예측 점수 {predicted:.2f}" if predicted >= 80: html_output += f" ⚠️ 경고!" html_output += "

" if plotted: ax.set_title(f"{speaker}의 부정 감정 변화 및 예측") ax.set_xlabel("발화 순서") ax.set_ylabel("감정 점수") ax.set_ylim(0, max(100, max_y + 10)) ax.legend() ax.grid(True) buf = BytesIO() plt.tight_layout() plt.savefig(buf, format='png') plt.close(fig) img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8') html_output += f"
" else: html_output += "

⚠️ 시각화할 수 있는 감정이 없습니다.


" return html_output