Spaces:

JeongHyunsung
/

Humanity

Sleeping

App Files Files Community

Humanity / app.py

JeongHyunsung

Update app.py

8d4d16e verified 3 months ago

raw

history blame contribute delete

8.04 kB

	import torch
	import torch.nn as nn
	from transformers import AutoTokenizer, AutoModel
	import streamlit as st
	import os
	import numpy as np

	st.markdown("""
	<style>
	.big-title {
	font-size: 1.8em;
	font-weight: 800;
	margin-bottom: 0.2em;
	}
	.sub-info {
	font-size: 1.1em;
	color: #666;
	margin-bottom: 1.2em;
	}
	.card {
	background-color: #f1f3f6;
	padding: 1.2em;
	border-left: 5px solid #3366cc;
	border-radius: 6px;
	margin-bottom: 1em;
	}
	</style>
	""", unsafe_allow_html=True)


	KOTE_LABELS = [
	'불평/불만', '환영/호의', '감동/감탄', '지긋지긋', '고마움', '슬픔', '화남/분노', '존경',
	'기대감', '우쭐댐/무시함', '안타까움/실망', '비장함', '의심/불신', '뿌듯함', '편안/쾌적',
	'신기함/관심', '아껴주는', '부끄러움', '공포/무서움', '절망', '한심함', '역겨움/징그러움',
	'짜증', '어이없음', '없음', '패배/자기혐오', '귀찮음', '힘듦/지침', '즐거움/신남', '깨달음',
	'죄책감', '증오/혐오', '흐뭇함(귀여움/예쁨)', '당황/난처', '경악', '부담/안_내킴', '서러움',
	'재미없음', '불쌍함/연민', '놀람', '행복', '불안/걱정', '기쁨', '안심/신뢰'
	]

	class MLPClassifier(nn.Module):
	def __init__(self, input_dim=1024, num_labels=44):
	super(MLPClassifier, self).__init__()
	self.mlp = nn.Sequential(
	nn.Linear(input_dim, 512),
	nn.BatchNorm1d(512),
	nn.ReLU(),
	nn.Dropout(0.3),
	nn.Linear(512, 256),
	nn.BatchNorm1d(256),
	nn.ReLU(),
	nn.Dropout(0.3),
	nn.Linear(256, num_labels)
	)

	def forward(self, x):
	return self.mlp(x)

	@st.cache_resource
	def load_model():
	device = torch.device("cpu")
	tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
	base_model = AutoModel.from_pretrained("klue/roberta-large").eval()
	mlp_model = MLPClassifier().eval()
	ckpt_path = os.path.join("checkpoints", "mlp_model.pth")
	mlp_model.load_state_dict(torch.load(ckpt_path, map_location=device))
	return tokenizer, base_model, mlp_model

	tokenizer, base_model, mlp_model = load_model()

	def predict_emotion(text, top_k=5):
	encoded = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
	with torch.no_grad():
	outputs = base_model(**encoded)
	cls_emb = outputs.last_hidden_state[:, 0, :]
	logits = mlp_model(cls_emb)
	probs = torch.sigmoid(logits).squeeze(0).numpy()
	result = sorted(zip(KOTE_LABELS, probs), key=lambda x: x[1], reverse=True)
	return result[:top_k], probs

	tabs = st.tabs(["감정 분석 체험", "AI는 어떻게 감정을 이해할까?", "Few-shot Fine-tuning이란?", "활용과 의의", "기타 자료료"])

	with tabs[0]:
	st.markdown('<div class="big-title">🎭 한국어 감정 분석 AI 체험</div>', unsafe_allow_html=True)
	st.markdown('<div class="sub-info">2025년 1학기 디지털 인문학 입문 (SLA23501) · <b>Team 샐러드볼</b><br>강수현 · 김동우 · 정예은 · 정현성 · 최종윤</div>', unsafe_allow_html=True)

	st.markdown("""
	<div class="card">
	인공지능은 입력된 문장을 분석해 감정이 어떻게 표현되었는지를 예측합니다.<br>
	아래에 문장을 입력하거나 예시 문장을 불러온 후, 감정 예측 버튼을 눌러 체험해보세요.
	</div>
	""", unsafe_allow_html=True)

	if "text_input" not in st.session_state:
	st.session_state.text_input = ""

	col1, col2 = st.columns([1, 1])
	with col1:
	if st.button("📌 예시 문장 불러오기"):
	st.session_state.text_input = "그걸 이제 말해줘? 친절하네 정말"
	with col2:
	predict_clicked = st.button("🔍 감정 예측하기")

	text = st.text_area(
	"✍️ 문장을 입력하세요:",
	value=st.session_state.text_input,
	height=120,
	placeholder="예: 오늘 하루 정말 행복했어요."
	)
	st.session_state.text_input = text

	if predict_clicked:
	if text.strip():
	with st.spinner("AI가 감정을 분석 중입니다..."):
	results, full_probs = predict_emotion(text)

	top_emotion, top_prob = results[0]
	st.markdown(
	f'<div class="card"><div class="highlight">✅ 가장 강하게 표현된 감정: <b>{top_emotion}</b> ({top_prob:.2f})</div>',
	unsafe_allow_html=True
	)

	st.subheader("📊 상위 감정 결과")
	for label, prob in results:
	st.markdown(f"- {label}: `{prob:.3f}`")

	st.subheader("📈 확률 분포 (Top 5)")
	st.bar_chart({label: prob for label, prob in results})

	st.markdown("</div>", unsafe_allow_html=True)
	else:
	st.warning("문장을 먼저 입력해주세요.")


	with tabs[1]:
	st.markdown('<div class="big-title">🤖 인공지능은 감정을 어떻게 이해할까요?</div>', unsafe_allow_html=True)
	st.markdown("""
	<div class="card">
	인공지능은 문장을 '숫자의 벡터'로 바꾸어 이해합니다.<br><br>
	이 과정은 다음과 같은 단계로 이루어집니다:
	<ol>
	<li><b>사전학습 언어 모델</b>(KLUE-RoBERTa)이 문장을 읽고 핵심 의미를 추출합니다.</li>
	<li>이 결과를 바탕으로 <b>감정 분류기</b>(MLP)가 감정을 예측합니다.</li>
	<li>각 감정에 대한 가능성을 <b>확률로</b> 보여줍니다.</li>
	</ol>
	</div>
	""", unsafe_allow_html=True)

	with tabs[2]:
	st.markdown('<div class="big-title">🧠 Few-shot Fine-tuning이란?</div>', unsafe_allow_html=True)
	st.markdown("""
	<div class="card">
	우리가 사용하는 KLUE-RoBERTa 모델은 이미 수많은 문장을 학습한 거대한 언어 모델입니다.<br><br>
	하지만 감정 분석이라는 특정한 작업에 맞게 조금 더 학습시키는 과정이 필요합니다.<br><br>
	이때 전체 모델을 다시 학습하지 않고, 마지막 분류기(MLP)만 학습하는 방식이 바로
	<b>Few-shot Fine-tuning</b>입니다.<br><br>
	이 방법을 통해 적은 양의 감정 데이터만으로도 높은 성능을 달성할 수 있습니다.
	</div>
	""", unsafe_allow_html=True)

	with tabs[3]:
	st.markdown('<div class="big-title">📌 이 기술은 어디에 쓰일 수 있을까요?</div>', unsafe_allow_html=True)

	st.markdown("""
	<div class="card">
	이 감정 분석 기술은 단순히 문장의 감정을 분류하는 데 그치지 않고,
	<b>디지털 사회에서의 감정 흐름</b>과 <b>공론장의 정서적 구조</b>를 이해하는 데까지 확장될 수 있습니다.<br><br>

	특히 다음과 같은 분야에 활용될 수 있습니다:
	<ul>
	<li>😡 <b>악성 댓글 탐지</b>: 유해 표현, 혐오 표현을 조기에 감지하고 필터링</li>
	<li>📈 <b>댓글 감정 흐름 시각화</b>: 유튜브나 뉴스 댓글에서 감정 전파 구조 분석</li>
	<li>📰 <b>사회 이슈 공감/혐오 반응 추적</b>: 특정 사건에 대한 감정 반응 모니터링</li>
	<li>💬 <b>온라인 공론장 감정 전염 연구</b>: 감정이 댓글-대댓글로 어떻게 확산되는지 정량적 분석</li>
	</ul>

	나아가 이 기술은 <b>디지털 인문학의 새로운 분석 도구</b>로 활용되어,
	텍스트 기반 여론의 정서적 구조를 보다 깊이 있게 이해하는 기반이 될 수 있습니다.
	</div>
	""", unsafe_allow_html=True)

	with tabs[4]:
	st.image("image/clustering-2.png", use_column_width=True)
	st.image("image/clustering-10.png", use_column_width=True)
	st.image("image/clustering-plutchiks.png", use_column_width=True)
	st.image("image/clustering-plutchiks-bert.png", use_column_width=True)