File size: 5,985 Bytes
bf5988d
43fc1d0
 
 
3708980
7979e55
10453d5
3708980
d9b0f38
 
43fc1d0
3708980
a6d830f
3708980
 
a6d830f
3708980
a6d830f
 
3708980
a6d830f
43fc1d0
10453d5
 
3708980
d9b0f38
d995c6e
43fc1d0
d9b0f38
 
0e9051c
43fc1d0
 
 
 
 
 
 
 
 
d3a7619
 
 
 
 
 
 
43fc1d0
 
 
 
 
 
 
 
 
3708980
43fc1d0
b412fdb
43fc1d0
 
3708980
 
43fc1d0
 
3708980
 
43fc1d0
 
 
 
 
 
dfe0c2c
3708980
d3a7619
 
b412fdb
d3a7619
 
 
b412fdb
d3a7619
 
 
 
 
b412fdb
c90c7b6
 
 
 
d3a7619
 
43fc1d0
 
 
 
 
 
 
 
 
3708980
43fc1d0
 
 
 
 
3708980
43fc1d0
 
3708980
b412fdb
3708980
 
43fc1d0
3708980
d3a7619
 
3708980
43fc1d0
3708980
 
 
 
 
 
43fc1d0
3708980
 
 
43fc1d0
 
3708980
43fc1d0
3708980
5ed04a5
 
 
c90c7b6
5ed04a5
0e9051c
d995c6e
3708980
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import asyncio
from typing import List, Dict

# Protobuf C-extension ๋Œ€์‹  pure-Python ๊ตฌํ˜„ ์‚ฌ์šฉ
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

# .env ํŒŒ์ผ ๋ฐ Space Secrets ๋กœ๋“œ
from dotenv import load_dotenv
load_dotenv()

# Gradio client ๋ฒ„๊ทธ ์šฐํšŒ (OpenAPI ํŒŒ์‹ฑ)
import gradio_client.utils as client_utils
orig = client_utils.json_schema_to_python_type
def safe_json_type(schema, defs=None):
    try:
        return orig(schema, defs)
    except Exception:
        return "Any"
client_utils.json_schema_to_python_type = safe_json_type

# Google API Key ๊ฒ€์ฆ
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
    raise EnvironmentError("GOOGLE_API_KEY๋ฅผ Settingsโ†’Secrets์— ์ถ”๊ฐ€ํ•ด์ฃผ์„ธ์š”.")
os.environ["GOOGLE_API_KEY"] = api_key

# ChromaDB ๊ฒฝ๋กœ ์„ค์ •
db_dir = os.path.join(os.getcwd(), "chromadb_KH_media")
os.environ["CHROMA_DB_DIR"] = db_dir

# === ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์ž„ํฌํŠธ ===
import chromadb
import gradio as gr
from sentence_transformers import SentenceTransformer
from google.adk.agents import Agent
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.genai import types

# === ํ˜„์žฌ ๊ต์ˆ˜์ง„ ๋ชฉ๋ก === 
PROFESSORS = [
    "์ด์ธํฌ", "๊น€ํƒœ์šฉ", "๋ฐ•์ข…๋ฏผ", "ํ™์ง€์•„", "์ด์ •๊ต",
    "์ด๊ธฐํ˜•", "์ด์„ ์˜", "์กฐ์ˆ˜์˜", "์ด์ข…ํ˜", "์ด๋‘ํ™ฉ",
    "์ด์ƒ์›", "์ดํ›ˆ", "์ตœ์ˆ˜์ง„", "์ตœ๋ฏผ์•„", "๊น€๊ด€ํ˜ธ"
]

# === Simple RAG ์‹œ์Šคํ…œ ===
class SimpleRAGSystem:
    def __init__(self, db_path: str = None, collection: str = "KH_media_docs"):
        db_path = db_path or os.getenv("CHROMA_DB_DIR")
        self.embedding_model = SentenceTransformer("snunlp/KR-SBERT-V40K-klueNLI-augSTS")
        self.client = chromadb.PersistentClient(path=db_path)
        self.collection = self.client.get_collection(name=collection)
        count = self.collection.count()
        if count == 0:
            raise RuntimeError("ChromaDB๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค.")

    def search_similar_docs(self, query: str, top_k: int = 20) -> List[Dict]:
        emb = self.embedding_model.encode(query).tolist()
        res = self.collection.query(
            query_embeddings=[emb], n_results=top_k,
            include=["documents", "metadatas"]
        )
        docs = []
        for doc, meta in zip(res["documents"][0], res["metadatas"][0]):
            docs.append({"role": "system", "content": doc})
        return docs

rag_system = SimpleRAGSystem()

# === Google ADK ์„ค์ • ===
session_svc = InMemorySessionService()
agent = Agent(model="gemini-2.0-flash-lite", #"gemini-2.0-flash"
              name="khu_media_advisor",
              instruction="""๋‹น์‹ ์€ ๊ฒฝํฌ๋Œ€ํ•™๊ต ๋ฏธ๋””์–ดํ•™๊ณผ ์ „๋ฌธ ์ƒ๋‹ด AI์ž…๋‹ˆ๋‹ค.
# ์ฃผ์š” ์—ญํ• :
- ์ œ๊ณต๋œ ๋ฌธ์„œ ์ •๋ณด๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๋‹ต๋ณ€ ์ œ๊ณต
- ๋ฏธ๋””์–ดํ•™๊ณผ ๊ด€๋ จ ์งˆ๋ฌธ์— ์นœ์ ˆํ•˜๊ณ  ๊ตฌ์ฒด์ ์œผ๋กœ ์‘๋‹ต
- ๋ฌธ์„œ์— ์—†๋Š” ๋‚ด์šฉ์€ ์ผ๋ฐ˜ ์ง€์‹์œผ๋กœ ๋ณด์™„ (๋‹จ, ๋ช…์‹œ)
# ๋‹ต๋ณ€ ์Šคํƒ€์ผ:
- ์ž์„ธํ•˜๊ณ  ํ’๋ถ€ํ•œ ์„ค๋ช…์„ ํฌํ•จํ•˜์—ฌ ์ƒ์„ธํ•˜๊ณ  ๊ธธ๊ฒŒ ๋‹ต๋ณ€ ์ œ๊ณต
- ์นœ๊ทผํ•˜๊ณ  ๋„์›€์ด ๋˜๋Š” ์ƒ๋‹ด์‚ฌ ํ†ค
- ํ•ต์‹ฌ ์ •๋ณด๋ฅผ ๋ช…ํ™•ํ•˜๊ฒŒ ์ „๋‹ฌ
- ์ถ”๊ฐ€ ๊ถ๊ธˆํ•œ ์ ์ด ์žˆ์œผ๋ฉด ์–ธ์ œ๋“  ๋ฌผ์–ด๋ณด๋ผ๊ณ  ์•ˆ๋‚ด
# ์ฐธ๊ณ  ๋ฌธ์„œ ํ™œ์šฉ:
- ๋ฌธ์„œ ๋‚ด์šฉ์ด ์žˆ์œผ๋ฉด ๊ตฌ์ฒด์ ์œผ๋กœ ์ธ์šฉ
- ์—ฌ๋Ÿฌ ๋ฌธ์„œ์˜ ์ •๋ณด๋ฅผ ์ข…ํ•ฉํ•˜์—ฌ ๋‹ต๋ณ€ ์ž‘์„ฑ
- ์ •ํ™•ํ•˜์ง€ ์•Š์€ ์ •๋ณด๋Š” ์ถ”์ธกํ•˜์ง€ ๋ง๊ณ  ์†”์งํ•˜๊ฒŒ ๋ชจ๋ฅธ๋‹ค๊ณ  ๋‹ต๋ณ€

# ํ˜„์žฌ ๊ฒฝํฌ๋Œ€ํ•™๊ต ๋ฏธ๋””์–ดํ•™๊ณผ ๊ต์ˆ˜์ง„์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:
์ด์ธํฌ, ๊น€ํƒœ์šฉ, ๋ฐ•์ข…๋ฏผ, ํ™์ง€์•„, ์ด์ •๊ต, ์ด๊ธฐํ˜•, ์ด์„ ์˜, ์กฐ์ˆ˜์˜, ์ด์ข…ํ˜, ์ด๋‘ํ™ฉ, ์ด์ƒ์›, ์ดํ›ˆ, ์ตœ์ˆ˜์ง„, ์ตœ๋ฏผ์•„, ๊น€๊ด€ํ˜ธ"""             
)

runner = Runner(agent=agent, app_name="khu_media_chatbot", session_service=session_svc)
session_id = None

async def get_response(prompt: str) -> str:
    global session_id
    if session_id is None:
        sess = await session_svc.create_session(app_name="khu_media_chatbot", user_id="user")
        session_id = sess.id
    content = types.Content(role="user", parts=[types.Part(text=prompt)])
    response = ""
    for ev in runner.run(user_id="user", session_id=session_id, new_message=content):
        if ev.is_final_response():
            response = ev.content.parts[0].text
    return response

# === Gradio UI ===
with gr.Blocks(title="๊ฒฝํฌ๋Œ€ ๋ฏธ๋””์–ดํ•™๊ณผ AI ์ƒ๋‹ด์‚ฌ", theme="soft") as app:
    gr.Markdown("# ๐ŸŽฌ ๊ฒฝํฌ๋Œ€ ๋ฏธ๋””์–ดํ•™๊ณผ AI ์ƒ๋‹ด์‚ฌ")
    chatbot = gr.Chatbot(type="messages", height=400)
    msg = gr.Textbox(show_label=False, placeholder="์ด ๊ณณ์— ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...")
    send = gr.Button("์ „์†ก")

    def chat_fn(user_input, history):
        history = history or []
        # ์ž…๋ ฅ ์ „์ฒ˜๋ฆฌ: ์˜ค์šฉ ๋ฐฉ์ง€
        user_input = user_input.replace("์ „๊ณต", "๋ถ„์•ผ").replace("๊ต์ˆ˜", "๊ต์ˆ˜์ง„")
        # RAG ์ปจํ…์ŠคํŠธ
        docs = rag_system.search_similar_docs(user_input)
        # Combine existing history (dicts) with new user message
        new_history = history + [{"role": "user", "content": user_input}]
        # Insert docs as system messages
        new_history += docs
        # Build prompt text from history
        prompt = "\n".join([f"{m['role']}: {m['content']}" for m in new_history])
        resp = asyncio.run(get_response(prompt))
        new_history.append({"role": "assistant", "content": resp})
        return new_history, ""

    send.click(chat_fn, inputs=[msg, chatbot], outputs=[chatbot, msg])
    msg.submit(chat_fn, inputs=[msg, chatbot], outputs=[chatbot, msg])

    gr.Markdown(f"""
    ---
    ### โš™๏ธ ์‹œ์Šคํ…œ ์ •๋ณด\n
    **ChromaDB ๋ฌธ์„œ ์ˆ˜**: {rag_system.collection.count()}๊ฐœ\n
    **์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ**: snunlp/KR-SBERT-V40K-klueNLI-augSTS (ํ•œ๊ตญ์–ด ํŠนํ™”)\n    
    **์–ธ์–ด ๋ชจ๋ธ**: Google Gemini 2.0 Flash (๋ฌด๋ฃŒ)  
    """)

if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT",7860)), share=False, show_api=False)