Manggomee's picture
Chatbot_ver01
2e6852b
import os
import yaml
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import torch
import shutil
import tempfile
# ํŒŒ์ผ ๊ฒฝ๋กœ
KNOWLEDGE_FILE = "company_knowledge.md"
PERSONA_FILE = "persona.yaml"
CHITCHAT_FILE = "chitchat.yaml"
KEYWORD_MAP_FILE = "keyword_map.yaml"
CEO_VIDEO_FILE = "ceo_video.mp4"
CEO_IMG_FILE = "ceo.jpg" # ํ•„์š”์‹œ ์‚ฌ์šฉ
def load_yaml(file_path, default_data=None):
try:
with open(file_path, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
except Exception:
return default_data if default_data is not None else []
def parse_knowledge_base(file_path):
import re
faqs = []
if not os.path.exists(file_path):
return []
with open(file_path, encoding="utf-8") as f:
content = f.read()
# Q:\s*(...) \nA:\s*(...)\n{2,} ๋˜๋Š” ๋
blocks = re.findall(r"Q:\s*(.*?)\nA:\s*(.*?)(?=(\n{2,}Q:|\Z))", content, re.DOTALL)
for q, a, _ in blocks:
faqs.append({"question": q.strip(), "answer": a.strip()})
return faqs
# ๋ฐ์ดํ„ฐ ๋กœ๋“œ
persona = load_yaml(PERSONA_FILE, {})
chitchat_map = load_yaml(CHITCHAT_FILE, [])
keyword_map = load_yaml(KEYWORD_MAP_FILE, [])
knowledge_base = parse_knowledge_base(KNOWLEDGE_FILE)
kb_questions = [item['question'] for item in knowledge_base]
kb_answers = [item['answer'] for item in knowledge_base]
# ๋ฌด๋ฃŒ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ
model = SentenceTransformer('distilbert-base-multilingual-cased')
if kb_questions:
kb_embeddings = model.encode(kb_questions, convert_to_tensor=True)
else:
kb_embeddings = None
# ์‚ญ์ œ์„ (์ทจ์†Œ์„ ) ์ ์šฉ ํ•จ์ˆ˜
def apply_strike(text, del_section="6000~6500๋งŒ์›, ์„ฑ๊ณผ๊ธ‰ 1800~2400๋งŒ์›"):
# ๊ธ‰์—ฌ ์ •๋ณด๊ฐ€ ํฌํ•จ๋œ ๋‹ต๋ณ€์ผ ๋•Œ๋งŒ strike-through
if del_section in text:
return text.replace(
del_section,
f"<s>{del_section}</s>"
)
return text
# Chitchat(์ธ์‚ฌ ๋“ฑ) ๋งค์นญ
def find_chitchat(user_question):
uq = user_question.lower()
for chat in chitchat_map:
if any(kw in uq for kw in chat.get('keywords', [])):
return chat['answer']
return None
# ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ Q ๋งคํ•‘ (๋ณต์ง€: ํœด๊ฐ€ ์ œ๋„, ๊ต์œก, ๋ณต๋ฆฌํ›„์ƒ ๋“ฑ ๊ฐ•ํ™”)
def map_user_question_to_knowledge(user_question):
uq = user_question.lower()
for item in keyword_map:
for kw in item.get('keywords', []):
if kw in uq:
return item['question']
return None
def find_answer_by_question(q):
for item in knowledge_base:
if item['question'] == q:
return item['answer']
return None
def find_answer_by_keywords(user_question):
uq = user_question.lower()
for item in knowledge_base:
for kw in item.get('keywords', []):
if kw in uq:
return item['answer']
return None
def best_faq_answer(user_question):
uq = user_question.strip()
if not uq:
return "๋ฌด์—‡์ด ๊ถ๊ธˆํ•˜์‹ ์ง€ ๋ง์”€ํ•ด ์ฃผ์„ธ์š”!"
chit = find_chitchat(uq)
if chit:
return chit
# (1) ํ‚ค์›Œ๋“œ๋งต ์šฐ์„  ๋งคํ•‘ (๋ณต์ง€/๊ธ‰์—ฌ ๊ฐ๊ฐ ๋ถ„๋ฆฌ)
mapped_q = map_user_question_to_knowledge(uq)
if mapped_q:
answer = find_answer_by_question(mapped_q)
if answer:
# ๋ณต์ง€ ๋ถ„์•ผ: '์—ฐ๋ด‰ ์ˆ˜์ค€' ๋‹ต๋ณ€ ์•„๋‹Œ ๊ฒฝ์šฐ์—๋Š” ์‚ญ์ œ์„  ์—†์Œ
if "์—ฐ๋ด‰" in mapped_q:
return apply_strike(answer)
return answer
# (2) knowledge_base ์ง์ ‘ ํ‚ค์›Œ๋“œ ๋งค์นญ (๋ณต์ง€ ๊ด€๋ จ ํ‚ค์›Œ๋“œ ๊ฐ•ํ™”๋˜์–ด์•ผ ํ•จ!)
answer = find_answer_by_keywords(uq)
if answer:
return answer
# (3) ์ž„๋ฒ ๋”ฉ ์œ ์‚ฌ๋„ ๊ธฐ๋ฐ˜ soft-matching
if kb_embeddings is not None and len(kb_answers) > 0:
q_emb = model.encode([uq], convert_to_tensor=True)
scores = util.cos_sim(q_emb, kb_embeddings)[0]
best_idx = int(torch.argmax(scores))
best_question = kb_questions[best_idx]
# ๋ณต์ง€์งˆ๋ฌธ์ธ๋ฐ ์—ฐ๋ด‰ํ‚ค์›Œ๋“œ ๋งค์นญ๋˜๋Š” ๊ฒฝ์šฐ, ๋ณต์ง€ ์šฐ์„  ๋‹ต๋ณ€์„ ์„ ํƒํ•˜๋„๋ก
# ์•„๋ž˜ if์‹์€ ์‹ค์ œ ๋ณต์ง€ ๋‹ต๋ณ€ ์šฐ์„  ์ฝ”๋“œ
๋ณต์ง€๊ฐ€๋Šฅ = ["๋ณต์ง€", "ํœด๊ฐ€", "๊ต์œก", "ํ–‰์‚ฌ", "๋™ํ˜ธํšŒ", "๋ณต๋ฆฌํ›„์ƒ", "์ œ๋„"]
์—ฐ๋ด‰๊ฐ€๋Šฅ = ["์—ฐ๋ด‰", "๊ธ‰์—ฌ", "์›”๊ธ‰", "์ž„๊ธˆ", "๋ณด์ƒ", "๋ด‰๊ธ‰", "์ฒ˜์šฐ"]
if any(w in uq for w in ๋ณต์ง€๊ฐ€๋Šฅ) and not any(w in best_question for w in ์—ฐ๋ด‰๊ฐ€๋Šฅ):
return kb_answers[best_idx]
# ์‚ญ์ œ์„ ์€ ์—ฐ๋ด‰ ๋‹ต๋ณ€์—๋งŒ
if "์—ฐ๋ด‰" in best_question or "๊ธ‰์—ฌ" in best_question:
return apply_strike(kb_answers[best_idx])
return kb_answers[best_idx]
# (4) fallback
return persona.get('style', {}).get('unknown_answer', "์•„์ง ์ค€๋น„๋˜์ง€ ์•Š์€ ์งˆ๋ฌธ์ž…๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์งˆ๋ฌธ๋„ ํ•ด์ฃผ์„ธ์š”!")
# ์งˆ๋ฌธ ๋ฐ›์„ ๋•Œ๋งˆ๋‹ค CEO ์˜์ƒ ๋ณต์‚ฌ๋ณธ ์ž„์‹œํŒŒ์ผ๋กœ ์ƒ์„ฑ โ†’ autoplay ํ™•์‹ค
def get_temp_video_copy():
temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
temp_filepath = temp_file.name
temp_file.close()
shutil.copyfile(CEO_VIDEO_FILE, temp_filepath)
return temp_filepath
def chat_interface(message, history):
bot_response = best_faq_answer(message)
history.append((message, bot_response))
temp_video_path = get_temp_video_copy()
# ํ…์ŠคํŠธ์— html๊ฐ€๋Šฅํ•˜๋ฉด answer์— html์‚ญ์ œ์„  ์œ ์ง€
return history, "", gr.update(value=temp_video_path, autoplay=True, interactive=False, elem_id="ceo-video-panel")
with gr.Blocks(theme=gr.themes.Soft(), css="style.css") as demo:
with gr.Row(elem_id="main-row"):
with gr.Column(scale=1, min_width=350):
video_player = gr.Video(
value=CEO_VIDEO_FILE,
autoplay=False, loop=False, interactive=False,
height=350, elem_id="ceo-video-panel"
)
with gr.Column(scale=2):
chatbot = gr.Chatbot(
label="",
height=350,
elem_id="chatbot-box",
show_copy_button=True
)
with gr.Row():
msg_input = gr.Textbox(placeholder="๋ฌด์—‡์ด๋“  ๋ฌผ์–ด๋ณด์„ธ์š”.", scale=4, show_label=False)
send_btn = gr.Button("์ „์†ก", scale=1, min_width=80)
gr.Examples(
examples=["๋ณต์ง€ ๋ญ ์žˆ์–ด?", "ํœด๊ฐ€ ์ œ๋„ ์„ค๋ช…ํ•ด์ค˜", "์—ฐ๋ด‰ ์•Œ๋ ค์ค˜", "๋™ํ˜ธํšŒ ํ–‰์‚ฌ?", "์‹์‚ฌ์ œ๊ณต?", "์ฃผ๋ ฅ์ œํ’ˆ", "์กฐ์ง๋ฌธํ™”"],
inputs=msg_input
)
# ์—ฐ๊ฒฐ
outputs_list = [chatbot, msg_input, video_player]
msg_input.submit(chat_interface, [msg_input, chatbot], outputs_list)
send_btn.click(chat_interface, [msg_input, chatbot], outputs_list)
if __name__ == "__main__":
demo.launch()