Spaces:

aj-team4
/

project11

Sleeping

File size: 6,772 Bytes

2e6852b

import os
import yaml
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import torch
import shutil
import tempfile

# 파일 경로
KNOWLEDGE_FILE = "company_knowledge.md"
PERSONA_FILE = "persona.yaml"
CHITCHAT_FILE = "chitchat.yaml"
KEYWORD_MAP_FILE = "keyword_map.yaml"
CEO_VIDEO_FILE = "ceo_video.mp4"
CEO_IMG_FILE = "ceo.jpg" # 필요시 사용

def load_yaml(file_path, default_data=None):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            return yaml.safe_load(f)
    except Exception:
        return default_data if default_data is not None else []

def parse_knowledge_base(file_path):
    import re
    faqs = []
    if not os.path.exists(file_path):
        return []
    with open(file_path, encoding="utf-8") as f:
        content = f.read()
    # Q:\s*(...) \nA:\s*(...)\n{2,} 또는 끝
    blocks = re.findall(r"Q:\s*(.*?)\nA:\s*(.*?)(?=(\n{2,}Q:|\Z))", content, re.DOTALL)
    for q, a, _ in blocks:
        faqs.append({"question": q.strip(), "answer": a.strip()})
    return faqs

# 데이터 로드
persona = load_yaml(PERSONA_FILE, {})
chitchat_map = load_yaml(CHITCHAT_FILE, [])
keyword_map = load_yaml(KEYWORD_MAP_FILE, [])
knowledge_base = parse_knowledge_base(KNOWLEDGE_FILE)
kb_questions = [item['question'] for item in knowledge_base]
kb_answers = [item['answer'] for item in knowledge_base]

# 무료 임베딩 모델
model = SentenceTransformer('distilbert-base-multilingual-cased')
if kb_questions:
    kb_embeddings = model.encode(kb_questions, convert_to_tensor=True)
else:
    kb_embeddings = None

# 삭제선(취소선) 적용 함수
def apply_strike(text, del_section="6000~6500만원, 성과급 1800~2400만원"):
    # 급여 정보가 포함된 답변일 때만 strike-through  
    if del_section in text:
        return text.replace(
            del_section,
            f"<s>{del_section}</s>"
        )
    return text

# Chitchat(인사 등) 매칭
def find_chitchat(user_question):
    uq = user_question.lower()
    for chat in chitchat_map:
        if any(kw in uq for kw in chat.get('keywords', [])):
            return chat['answer']
    return None

# 키워드 기반 Q 매핑 (복지: 휴가 제도, 교육, 복리후생 등 강화)
def map_user_question_to_knowledge(user_question):
    uq = user_question.lower()
    for item in keyword_map:
        for kw in item.get('keywords', []):
            if kw in uq:
                return item['question']
    return None

def find_answer_by_question(q):
    for item in knowledge_base:
        if item['question'] == q:
            return item['answer']
    return None

def find_answer_by_keywords(user_question):
    uq = user_question.lower()
    for item in knowledge_base:
        for kw in item.get('keywords', []):
            if kw in uq:
                return item['answer']
    return None

def best_faq_answer(user_question):
    uq = user_question.strip()
    if not uq:
        return "무엇이 궁금하신지 말씀해 주세요!"
    chit = find_chitchat(uq)
    if chit:
        return chit
    # (1) 키워드맵 우선 매핑 (복지/급여 각각 분리)
    mapped_q = map_user_question_to_knowledge(uq)
    if mapped_q:
        answer = find_answer_by_question(mapped_q)
        if answer:
            # 복지 분야: '연봉 수준' 답변 아닌 경우에는 삭제선 없음
            if "연봉" in mapped_q:
                return apply_strike(answer)
            return answer
    # (2) knowledge_base 직접 키워드 매칭 (복지 관련 키워드 강화되어야 함!)
    answer = find_answer_by_keywords(uq)
    if answer:
        return answer
    # (3) 임베딩 유사도 기반 soft-matching
    if kb_embeddings is not None and len(kb_answers) > 0:
        q_emb = model.encode([uq], convert_to_tensor=True)
        scores = util.cos_sim(q_emb, kb_embeddings)[0]
        best_idx = int(torch.argmax(scores))
        best_question = kb_questions[best_idx]
        # 복지질문인데 연봉키워드 매칭되는 경우, 복지 우선 답변을 선택하도록
        # 아래 if식은 실제 복지 답변 우선 코드
        복지가능 = ["복지", "휴가", "교육", "행사", "동호회", "복리후생", "제도"]
        연봉가능 = ["연봉", "급여", "월급", "임금", "보상", "봉급", "처우"]
        if any(w in uq for w in 복지가능) and not any(w in best_question for w in 연봉가능):
            return kb_answers[best_idx]
        # 삭제선은 연봉 답변에만
        if "연봉" in best_question or "급여" in best_question:
            return apply_strike(kb_answers[best_idx])
        return kb_answers[best_idx]
    # (4) fallback
    return persona.get('style', {}).get('unknown_answer', "아직 준비되지 않은 질문입니다. 다른 질문도 해주세요!")

# 질문 받을 때마다 CEO 영상 복사본 임시파일로 생성 → autoplay 확실
def get_temp_video_copy():
    temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    temp_filepath = temp_file.name
    temp_file.close()
    shutil.copyfile(CEO_VIDEO_FILE, temp_filepath)
    return temp_filepath

def chat_interface(message, history):
    bot_response = best_faq_answer(message)
    history.append((message, bot_response))
    temp_video_path = get_temp_video_copy()
    # 텍스트에 html가능하면 answer에 html삭제선 유지
    return history, "", gr.update(value=temp_video_path, autoplay=True, interactive=False, elem_id="ceo-video-panel")

with gr.Blocks(theme=gr.themes.Soft(), css="style.css") as demo:
    with gr.Row(elem_id="main-row"):
        with gr.Column(scale=1, min_width=350):
            video_player = gr.Video(
                value=CEO_VIDEO_FILE,
                autoplay=False, loop=False, interactive=False,
                height=350, elem_id="ceo-video-panel"
            )
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(
                label="",
                height=350,
                elem_id="chatbot-box",
                show_copy_button=True
            )
            with gr.Row():
                msg_input = gr.Textbox(placeholder="무엇이든 물어보세요.", scale=4, show_label=False)
                send_btn = gr.Button("전송", scale=1, min_width=80)
            gr.Examples(
                examples=["복지 뭐 있어?", "휴가 제도 설명해줘", "연봉 알려줘", "동호회 행사?", "식사제공?", "주력제품", "조직문화"],
                inputs=msg_input
            )
    # 연결
    outputs_list = [chatbot, msg_input, video_player]
    msg_input.submit(chat_interface, [msg_input, chatbot], outputs_list)
    send_btn.click(chat_interface, [msg_input, chatbot], outputs_list)

if __name__ == "__main__":
    demo.launch()