Manggomee commited on
Commit
cefd5b3
·
1 Parent(s): bfe5511

add: app .py 파일 추가

Browse files
Files changed (1) hide show
  1. chatbot_ver1/app.py +175 -0
chatbot_ver1/app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import yaml
3
+ import gradio as gr
4
+ from sentence_transformers import SentenceTransformer, util
5
+ import torch
6
+ import shutil
7
+ import tempfile
8
+
9
+ # 파일 경로
10
+ KNOWLEDGE_FILE = "company_knowledge.md"
11
+ PERSONA_FILE = "persona.yaml"
12
+ CHITCHAT_FILE = "chitchat.yaml"
13
+ KEYWORD_MAP_FILE = "keyword_map.yaml"
14
+ CEO_VIDEO_FILE = "ceo_video.mp4"
15
+ CEO_IMG_FILE = "ceo.jpg" # 필요시 사용
16
+
17
+ def load_yaml(file_path, default_data=None):
18
+ try:
19
+ with open(file_path, "r", encoding="utf-8") as f:
20
+ return yaml.safe_load(f)
21
+ except Exception:
22
+ return default_data if default_data is not None else []
23
+
24
+ def parse_knowledge_base(file_path):
25
+ import re
26
+ faqs = []
27
+ if not os.path.exists(file_path):
28
+ return []
29
+ with open(file_path, encoding="utf-8") as f:
30
+ content = f.read()
31
+ # Q:\s*(...) \nA:\s*(...)\n{2,} 또는 끝
32
+ blocks = re.findall(r"Q:\s*(.*?)\nA:\s*(.*?)(?=(\n{2,}Q:|\Z))", content, re.DOTALL)
33
+ for q, a, _ in blocks:
34
+ faqs.append({"question": q.strip(), "answer": a.strip()})
35
+ return faqs
36
+
37
+ # 데이터 로드
38
+ persona = load_yaml(PERSONA_FILE, {})
39
+ chitchat_map = load_yaml(CHITCHAT_FILE, [])
40
+ keyword_map = load_yaml(KEYWORD_MAP_FILE, [])
41
+ knowledge_base = parse_knowledge_base(KNOWLEDGE_FILE)
42
+ kb_questions = [item['question'] for item in knowledge_base]
43
+ kb_answers = [item['answer'] for item in knowledge_base]
44
+
45
+ # 무료 임베딩 모델
46
+ model = SentenceTransformer('distilbert-base-multilingual-cased')
47
+ if kb_questions:
48
+ kb_embeddings = model.encode(kb_questions, convert_to_tensor=True)
49
+ else:
50
+ kb_embeddings = None
51
+
52
+ # 삭제선(취소선) 적용 함수
53
+ def apply_strike(text, del_section="6000~6500만원, 성과급 1800~2400만원"):
54
+ # 급여 정보가 포함된 답변일 때만 strike-through
55
+ if del_section in text:
56
+ return text.replace(
57
+ del_section,
58
+ f"<s>{del_section}</s>"
59
+ )
60
+ return text
61
+
62
+ # Chitchat(인사 등) 매칭
63
+ def find_chitchat(user_question):
64
+ uq = user_question.lower()
65
+ for chat in chitchat_map:
66
+ if any(kw in uq for kw in chat.get('keywords', [])):
67
+ return chat['answer']
68
+ return None
69
+
70
+ # 키워드 기반 Q 매핑 (복지: 휴가 제도, 교육, 복리후생 등 강화)
71
+ def map_user_question_to_knowledge(user_question):
72
+ uq = user_question.lower()
73
+ for item in keyword_map:
74
+ for kw in item.get('keywords', []):
75
+ if kw in uq:
76
+ return item['question']
77
+ return None
78
+
79
+ def find_answer_by_question(q):
80
+ for item in knowledge_base:
81
+ if item['question'] == q:
82
+ return item['answer']
83
+ return None
84
+
85
+ def find_answer_by_keywords(user_question):
86
+ uq = user_question.lower()
87
+ for item in knowledge_base:
88
+ for kw in item.get('keywords', []):
89
+ if kw in uq:
90
+ return item['answer']
91
+ return None
92
+
93
+ def best_faq_answer(user_question):
94
+ uq = user_question.strip()
95
+ if not uq:
96
+ return "무엇이 궁금하신지 말씀해 주세요!"
97
+ chit = find_chitchat(uq)
98
+ if chit:
99
+ return chit
100
+ # (1) 키워드맵 우선 매핑 (복지/급여 각각 분리)
101
+ mapped_q = map_user_question_to_knowledge(uq)
102
+ if mapped_q:
103
+ answer = find_answer_by_question(mapped_q)
104
+ if answer:
105
+ # 복지 분야: '연봉 수준' 답변 아닌 경우에는 삭제선 없음
106
+ if "연봉" in mapped_q:
107
+ return apply_strike(answer)
108
+ return answer
109
+ # (2) knowledge_base 직접 키워드 매칭 (복지 관련 키워드 강화되어야 함!)
110
+ answer = find_answer_by_keywords(uq)
111
+ if answer:
112
+ return answer
113
+ # (3) 임베딩 유사도 기반 soft-matching
114
+ if kb_embeddings is not None and len(kb_answers) > 0:
115
+ q_emb = model.encode([uq], convert_to_tensor=True)
116
+ scores = util.cos_sim(q_emb, kb_embeddings)[0]
117
+ best_idx = int(torch.argmax(scores))
118
+ best_question = kb_questions[best_idx]
119
+ # 복지질문인데 연봉키워드 매칭되는 경우, 복지 우선 답변을 선택하도록
120
+ # 아래 if식은 실제 복지 답변 우선 코드
121
+ 복지가능 = ["복지", "휴가", "교육", "행사", "동호회", "복리후생", "제도"]
122
+ 연봉가능 = ["연봉", "급여", "월급", "임금", "보상", "봉급", "처우"]
123
+ if any(w in uq for w in 복지가능) and not any(w in best_question for w in 연봉가능):
124
+ return kb_answers[best_idx]
125
+ # 삭제선은 연봉 답변에만
126
+ if "연봉" in best_question or "급여" in best_question:
127
+ return apply_strike(kb_answers[best_idx])
128
+ return kb_answers[best_idx]
129
+ # (4) fallback
130
+ return persona.get('style', {}).get('unknown_answer', "아직 준비되지 않은 질문입니다. 다른 질문도 해주세요!")
131
+
132
+ # 질문 받을 때마다 CEO 영상 복사본 임시파일로 생성 → autoplay 확실
133
+ def get_temp_video_copy():
134
+ temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
135
+ temp_filepath = temp_file.name
136
+ temp_file.close()
137
+ shutil.copyfile(CEO_VIDEO_FILE, temp_filepath)
138
+ return temp_filepath
139
+
140
+ def chat_interface(message, history):
141
+ bot_response = best_faq_answer(message)
142
+ history.append((message, bot_response))
143
+ temp_video_path = get_temp_video_copy()
144
+ # 텍스트에 html가능하면 answer에 html삭제선 유지
145
+ return history, "", gr.update(value=temp_video_path, autoplay=True, interactive=False, elem_id="ceo-video-panel")
146
+
147
+ with gr.Blocks(theme=gr.themes.Soft(), css="style.css") as demo:
148
+ with gr.Row(elem_id="main-row"):
149
+ with gr.Column(scale=1, min_width=350):
150
+ video_player = gr.Video(
151
+ value=CEO_VIDEO_FILE,
152
+ autoplay=False, loop=False, interactive=False,
153
+ height=350, elem_id="ceo-video-panel"
154
+ )
155
+ with gr.Column(scale=2):
156
+ chatbot = gr.Chatbot(
157
+ label="",
158
+ height=350,
159
+ elem_id="chatbot-box",
160
+ show_copy_button=True
161
+ )
162
+ with gr.Row():
163
+ msg_input = gr.Textbox(placeholder="무엇이든 물어보세요.", scale=4, show_label=False)
164
+ send_btn = gr.Button("전송", scale=1, min_width=80)
165
+ gr.Examples(
166
+ examples=["복지 뭐 있어?", "휴가 제도 설명해줘", "연봉 알려줘", "동호회 행사?", "식사제공?", "주력제품", "조직문화"],
167
+ inputs=msg_input
168
+ )
169
+ # 연결
170
+ outputs_list = [chatbot, msg_input, video_player]
171
+ msg_input.submit(chat_interface, [msg_input, chatbot], outputs_list)
172
+ send_btn.click(chat_interface, [msg_input, chatbot], outputs_list)
173
+
174
+ if __name__ == "__main__":
175
+ demo.launch()