import os import yaml import gradio as gr from sentence_transformers import SentenceTransformer, util import torch import shutil import tempfile # 파일 경로 KNOWLEDGE_FILE = "company_knowledge.md" PERSONA_FILE = "persona.yaml" CHITCHAT_FILE = "chitchat.yaml" KEYWORD_MAP_FILE = "keyword_map.yaml" CEO_VIDEO_FILE = "ceo_video.mp4" CEO_IMG_FILE = "ceo.jpg" # 필요시 사용 def load_yaml(file_path, default_data=None): try: with open(file_path, "r", encoding="utf-8") as f: return yaml.safe_load(f) except Exception: return default_data if default_data is not None else [] def parse_knowledge_base(file_path): import re faqs = [] if not os.path.exists(file_path): return [] with open(file_path, encoding="utf-8") as f: content = f.read() # Q:\s*(...) \nA:\s*(...)\n{2,} 또는 끝 blocks = re.findall(r"Q:\s*(.*?)\nA:\s*(.*?)(?=(\n{2,}Q:|\Z))", content, re.DOTALL) for q, a, _ in blocks: faqs.append({"question": q.strip(), "answer": a.strip()}) return faqs # 데이터 로드 persona = load_yaml(PERSONA_FILE, {}) chitchat_map = load_yaml(CHITCHAT_FILE, []) keyword_map = load_yaml(KEYWORD_MAP_FILE, []) knowledge_base = parse_knowledge_base(KNOWLEDGE_FILE) kb_questions = [item['question'] for item in knowledge_base] kb_answers = [item['answer'] for item in knowledge_base] # 무료 임베딩 모델 model = SentenceTransformer('distilbert-base-multilingual-cased') if kb_questions: kb_embeddings = model.encode(kb_questions, convert_to_tensor=True) else: kb_embeddings = None # 삭제선(취소선) 적용 함수 def apply_strike(text, del_section="6000~6500만원, 성과급 1800~2400만원"): # 급여 정보가 포함된 답변일 때만 strike-through if del_section in text: return text.replace( del_section, f"{del_section}" ) return text # Chitchat(인사 등) 매칭 def find_chitchat(user_question): uq = user_question.lower() for chat in chitchat_map: if any(kw in uq for kw in chat.get('keywords', [])): return chat['answer'] return None # 키워드 기반 Q 매핑 (복지: 휴가 제도, 교육, 복리후생 등 강화) def map_user_question_to_knowledge(user_question): uq = user_question.lower() for item in keyword_map: for kw in item.get('keywords', []): if kw in uq: return item['question'] return None def find_answer_by_question(q): for item in knowledge_base: if item['question'] == q: return item['answer'] return None def find_answer_by_keywords(user_question): uq = user_question.lower() for item in knowledge_base: for kw in item.get('keywords', []): if kw in uq: return item['answer'] return None def best_faq_answer(user_question): uq = user_question.strip() if not uq: return "무엇이 궁금하신지 말씀해 주세요!" chit = find_chitchat(uq) if chit: return chit # (1) 키워드맵 우선 매핑 (복지/급여 각각 분리) mapped_q = map_user_question_to_knowledge(uq) if mapped_q: answer = find_answer_by_question(mapped_q) if answer: # 복지 분야: '연봉 수준' 답변 아닌 경우에는 삭제선 없음 if "연봉" in mapped_q: return apply_strike(answer) return answer # (2) knowledge_base 직접 키워드 매칭 (복지 관련 키워드 강화되어야 함!) answer = find_answer_by_keywords(uq) if answer: return answer # (3) 임베딩 유사도 기반 soft-matching if kb_embeddings is not None and len(kb_answers) > 0: q_emb = model.encode([uq], convert_to_tensor=True) scores = util.cos_sim(q_emb, kb_embeddings)[0] best_idx = int(torch.argmax(scores)) best_question = kb_questions[best_idx] # 복지질문인데 연봉키워드 매칭되는 경우, 복지 우선 답변을 선택하도록 # 아래 if식은 실제 복지 답변 우선 코드 복지가능 = ["복지", "휴가", "교육", "행사", "동호회", "복리후생", "제도"] 연봉가능 = ["연봉", "급여", "월급", "임금", "보상", "봉급", "처우"] if any(w in uq for w in 복지가능) and not any(w in best_question for w in 연봉가능): return kb_answers[best_idx] # 삭제선은 연봉 답변에만 if "연봉" in best_question or "급여" in best_question: return apply_strike(kb_answers[best_idx]) return kb_answers[best_idx] # (4) fallback return persona.get('style', {}).get('unknown_answer', "아직 준비되지 않은 질문입니다. 다른 질문도 해주세요!") # 질문 받을 때마다 CEO 영상 복사본 임시파일로 생성 → autoplay 확실 def get_temp_video_copy(): temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) temp_filepath = temp_file.name temp_file.close() shutil.copyfile(CEO_VIDEO_FILE, temp_filepath) return temp_filepath def chat_interface(message, history): bot_response = best_faq_answer(message) history.append((message, bot_response)) temp_video_path = get_temp_video_copy() # 텍스트에 html가능하면 answer에 html삭제선 유지 return history, "", gr.update(value=temp_video_path, autoplay=True, interactive=False, elem_id="ceo-video-panel") with gr.Blocks(theme=gr.themes.Soft(), css="style.css") as demo: with gr.Row(elem_id="main-row"): with gr.Column(scale=1, min_width=350): video_player = gr.Video( value=CEO_VIDEO_FILE, autoplay=False, loop=False, interactive=False, height=350, elem_id="ceo-video-panel" ) with gr.Column(scale=2): chatbot = gr.Chatbot( label="", height=350, elem_id="chatbot-box", show_copy_button=True ) with gr.Row(): msg_input = gr.Textbox(placeholder="무엇이든 물어보세요.", scale=4, show_label=False) send_btn = gr.Button("전송", scale=1, min_width=80) gr.Examples( examples=["복지 뭐 있어?", "휴가 제도 설명해줘", "연봉 알려줘", "동호회 행사?", "식사제공?", "주력제품", "조직문화"], inputs=msg_input ) # 연결 outputs_list = [chatbot, msg_input, video_player] msg_input.submit(chat_interface, [msg_input, chatbot], outputs_list) send_btn.click(chat_interface, [msg_input, chatbot], outputs_list) if __name__ == "__main__": demo.launch()