Spaces:

leewatson
/

kshs33_emotion_predict

Sleeping

App Files Files Community

kshs33_emotion_predict / emotion_predictor.py

leewatson

Update emotion_predictor.py

f8d9884 verified about 2 months ago

raw

history blame contribute delete

5.77 kB

	import re
	import math
	import matplotlib.pyplot as plt
	import matplotlib.font_manager as fm
	import torch
	import torch.nn as nn
	from transformers import ElectraModel, AutoTokenizer
	import numpy as np
	from sklearn.linear_model import LinearRegression
	from collections import defaultdict
	import base64
	from io import BytesIO

	# 폰트 설정
	font_path = './NanumGothic.ttf'
	fm.fontManager.addfont(font_path)
	plt.rcParams['font.family'] = fm.FontProperties(fname=font_path).get_name()
	plt.rcParams['axes.unicode_minus'] = False

	# 라벨 정의
	LABELS = ['불평/불만', '환영/호의', '감동/감탄', '지긋지긋', '고마움', '슬픔', '화남/분노', '존경', '기대감', '우쭐댐/무시함',
	'안타까움/실망', '비장함', '의심/불신', '뿌듯함', '편안/쾌적', '신기함/관심', '아껴주는', '부끄러움', '공포/무서움',
	'절망', '한심함', '역겨움/징그러움', '짜증', '어이없음', '없음', '패배/자기혐오', '귀찮음', '힘듦/지침', '즐거움/신남',
	'깨달음', '죄책감', '증오/혐오', '흐뭇함(귀여움/예쁨)', '당황/난처', '경악', '부담/안_내킴', '서러움', '재미없음',
	'불쌍함/연민', '놀람', '행복', '불안/걱정', '기쁨', '안심/신뢰']
	NEGATIVE_EMOTIONS = [
	'불평/불만', '지긋지긋', '슬픔', '화남/분노', '의심/불신', '공포/무서움', '절망', '한심함', '역겨움/징그러움', '짜증', '어이없음',
	'패배/자기혐오', '귀찮음', '힘듦/지침', '죄책감', '증오/혐오', '당황/난처', '부담/안_내킴', '서러움', '재미없음'
	]

	# 디바이스
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# 모델 정의
	class KOTEtagger(nn.Module):
	def __init__(self):
	super().__init__()
	self.electra = ElectraModel.from_pretrained("beomi/KcELECTRA-base", revision='v2021').to(device)
	self.tokenizer = AutoTokenizer.from_pretrained("beomi/KcELECTRA-base", revision='v2021')
	self.classifier = nn.Linear(self.electra.config.hidden_size, 44).to(device)

	def forward(self, text):
	encoding = self.tokenizer.encode_plus(
	text,
	add_special_tokens=True,
	max_length=512,
	return_token_type_ids=False,
	padding="max_length",
	return_attention_mask=True,
	return_tensors='pt',
	).to(device)
	output = self.electra(encoding["input_ids"], attention_mask=encoding["attention_mask"])
	output = output.last_hidden_state[:, 0, :]
	output = self.classifier(output)
	return torch.sigmoid(output)

	# 모델 로드
	trained_model = KOTEtagger()
	trained_model.load_state_dict(torch.load("kote_pytorch_lightning.bin", map_location=device), strict=False)
	trained_model.eval()

	# 함수들
	def parse_dialogue(text):
	lines = text.strip().split('\n')
	return [
	(match.group(1).strip(), match.group(2).strip())
	for line in lines
	if (match := re.match(r"([^:]+):(.+)", line.strip()))
	]

	def adjusted_score(raw_score, k=5):
	return 100 / (1 + math.exp(-k * (raw_score - 0.5)))

	def apply_ema(scores, alpha=0.4):
	if not scores:
	return []
	smoothed = [scores[0]]
	for s in scores[1:]:
	smoothed.append(alpha * s + (1 - alpha) * smoothed[-1])
	return smoothed

	# 메인 처리 함수
	def predict_and_plot(raw_text):
	dialogue = parse_dialogue(raw_text)
	emotion_scores = defaultdict(lambda: defaultdict(list))

	# 예측
	for speaker, sentence in dialogue:
	preds = trained_model(sentence)[0]
	for label, score in zip(LABELS, preds):
	if label in NEGATIVE_EMOTIONS:
	adjusted = adjusted_score(score.item())
	emotion_scores[speaker][label].append(adjusted)

	html_output = ""
	for speaker in emotion_scores:
	html_output += f"<h3>{speaker} 감정 예측 결과:</h3>"
	fig, ax = plt.subplots(figsize=(10, 4))
	max_y = 0
	plotted = False
	predicted_scores = {}

	for label in NEGATIVE_EMOTIONS:
	raw_scores = emotion_scores[speaker].get(label, [])
	scores = apply_ema(raw_scores)
	if len(scores) >= 2 and max(scores) >= 40:
	X = np.arange(len(scores)).reshape(-1, 1)
	y = np.array(scores)
	model = LinearRegression().fit(X, y)
	predicted = model.predict([[len(scores)]])[0]
	predicted_scores[label] = predicted
	line, = ax.plot(scores, label=label)
	color = line.get_color()
	ax.plot([len(scores)-1, len(scores)], [scores[-1], predicted], linestyle='--', color=color)
	plotted = True
	max_y = max(max_y, predicted, *scores)
	html_output += f"<p>- {label}: 예측 점수 {predicted:.2f}"
	if predicted >= 80:
	html_output += f" <b style='color:red'>⚠️ 경고!</b>"
	html_output += "</p>"

	if plotted:
	ax.set_title(f"{speaker}의 부정 감정 변화 및 예측")
	ax.set_xlabel("발화 순서")
	ax.set_ylabel("감정 점수")
	ax.set_ylim(0, max(100, max_y + 10))
	ax.legend()
	ax.grid(True)
	buf = BytesIO()
	plt.tight_layout()
	plt.savefig(buf, format='png')
	plt.close(fig)
	img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
	html_output += f"<img src='data:image/png;base64,{img_base64}'/><hr/>"
	else:
	html_output += "<p>⚠️ 시각화할 수 있는 감정이 없습니다.</p><hr/>"

	return html_output