Spaces:
Sleeping
Sleeping
File size: 5,985 Bytes
bf5988d 43fc1d0 3708980 7979e55 10453d5 3708980 d9b0f38 43fc1d0 3708980 a6d830f 3708980 a6d830f 3708980 a6d830f 3708980 a6d830f 43fc1d0 10453d5 3708980 d9b0f38 d995c6e 43fc1d0 d9b0f38 0e9051c 43fc1d0 d3a7619 43fc1d0 3708980 43fc1d0 b412fdb 43fc1d0 3708980 43fc1d0 3708980 43fc1d0 dfe0c2c 3708980 d3a7619 b412fdb d3a7619 b412fdb d3a7619 b412fdb c90c7b6 d3a7619 43fc1d0 3708980 43fc1d0 3708980 43fc1d0 3708980 b412fdb 3708980 43fc1d0 3708980 d3a7619 3708980 43fc1d0 3708980 43fc1d0 3708980 43fc1d0 3708980 43fc1d0 3708980 5ed04a5 c90c7b6 5ed04a5 0e9051c d995c6e 3708980 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import os
import asyncio
from typing import List, Dict
# Protobuf C-extension ๋์ pure-Python ๊ตฌํ ์ฌ์ฉ
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
# .env ํ์ผ ๋ฐ Space Secrets ๋ก๋
from dotenv import load_dotenv
load_dotenv()
# Gradio client ๋ฒ๊ทธ ์ฐํ (OpenAPI ํ์ฑ)
import gradio_client.utils as client_utils
orig = client_utils.json_schema_to_python_type
def safe_json_type(schema, defs=None):
try:
return orig(schema, defs)
except Exception:
return "Any"
client_utils.json_schema_to_python_type = safe_json_type
# Google API Key ๊ฒ์ฆ
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise EnvironmentError("GOOGLE_API_KEY๋ฅผ SettingsโSecrets์ ์ถ๊ฐํด์ฃผ์ธ์.")
os.environ["GOOGLE_API_KEY"] = api_key
# ChromaDB ๊ฒฝ๋ก ์ค์
db_dir = os.path.join(os.getcwd(), "chromadb_KH_media")
os.environ["CHROMA_DB_DIR"] = db_dir
# === ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ํฌํธ ===
import chromadb
import gradio as gr
from sentence_transformers import SentenceTransformer
from google.adk.agents import Agent
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.genai import types
# === ํ์ฌ ๊ต์์ง ๋ชฉ๋ก ===
PROFESSORS = [
"์ด์ธํฌ", "๊นํ์ฉ", "๋ฐ์ข
๋ฏผ", "ํ์ง์", "์ด์ ๊ต",
"์ด๊ธฐํ", "์ด์ ์", "์กฐ์์", "์ด์ข
ํ", "์ด๋ํฉ",
"์ด์์", "์ดํ", "์ต์์ง", "์ต๋ฏผ์", "๊น๊ดํธ"
]
# === Simple RAG ์์คํ
===
class SimpleRAGSystem:
def __init__(self, db_path: str = None, collection: str = "KH_media_docs"):
db_path = db_path or os.getenv("CHROMA_DB_DIR")
self.embedding_model = SentenceTransformer("snunlp/KR-SBERT-V40K-klueNLI-augSTS")
self.client = chromadb.PersistentClient(path=db_path)
self.collection = self.client.get_collection(name=collection)
count = self.collection.count()
if count == 0:
raise RuntimeError("ChromaDB๊ฐ ๋น์ด์์ต๋๋ค.")
def search_similar_docs(self, query: str, top_k: int = 20) -> List[Dict]:
emb = self.embedding_model.encode(query).tolist()
res = self.collection.query(
query_embeddings=[emb], n_results=top_k,
include=["documents", "metadatas"]
)
docs = []
for doc, meta in zip(res["documents"][0], res["metadatas"][0]):
docs.append({"role": "system", "content": doc})
return docs
rag_system = SimpleRAGSystem()
# === Google ADK ์ค์ ===
session_svc = InMemorySessionService()
agent = Agent(model="gemini-2.0-flash-lite", #"gemini-2.0-flash"
name="khu_media_advisor",
instruction="""๋น์ ์ ๊ฒฝํฌ๋ํ๊ต ๋ฏธ๋์ดํ๊ณผ ์ ๋ฌธ ์๋ด AI์
๋๋ค.
# ์ฃผ์ ์ญํ :
- ์ ๊ณต๋ ๋ฌธ์ ์ ๋ณด๋ฅผ ๋ฐํ์ผ๋ก ๋ต๋ณ ์ ๊ณต
- ๋ฏธ๋์ดํ๊ณผ ๊ด๋ จ ์ง๋ฌธ์ ์น์ ํ๊ณ ๊ตฌ์ฒด์ ์ผ๋ก ์๋ต
- ๋ฌธ์์ ์๋ ๋ด์ฉ์ ์ผ๋ฐ ์ง์์ผ๋ก ๋ณด์ (๋จ, ๋ช
์)
# ๋ต๋ณ ์คํ์ผ:
- ์์ธํ๊ณ ํ๋ถํ ์ค๋ช
์ ํฌํจํ์ฌ ์์ธํ๊ณ ๊ธธ๊ฒ ๋ต๋ณ ์ ๊ณต
- ์น๊ทผํ๊ณ ๋์์ด ๋๋ ์๋ด์ฌ ํค
- ํต์ฌ ์ ๋ณด๋ฅผ ๋ช
ํํ๊ฒ ์ ๋ฌ
- ์ถ๊ฐ ๊ถ๊ธํ ์ ์ด ์์ผ๋ฉด ์ธ์ ๋ ๋ฌผ์ด๋ณด๋ผ๊ณ ์๋ด
# ์ฐธ๊ณ ๋ฌธ์ ํ์ฉ:
- ๋ฌธ์ ๋ด์ฉ์ด ์์ผ๋ฉด ๊ตฌ์ฒด์ ์ผ๋ก ์ธ์ฉ
- ์ฌ๋ฌ ๋ฌธ์์ ์ ๋ณด๋ฅผ ์ข
ํฉํ์ฌ ๋ต๋ณ ์์ฑ
- ์ ํํ์ง ์์ ์ ๋ณด๋ ์ถ์ธกํ์ง ๋ง๊ณ ์์งํ๊ฒ ๋ชจ๋ฅธ๋ค๊ณ ๋ต๋ณ
# ํ์ฌ ๊ฒฝํฌ๋ํ๊ต ๋ฏธ๋์ดํ๊ณผ ๊ต์์ง์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:
์ด์ธํฌ, ๊นํ์ฉ, ๋ฐ์ข
๋ฏผ, ํ์ง์, ์ด์ ๊ต, ์ด๊ธฐํ, ์ด์ ์, ์กฐ์์, ์ด์ข
ํ, ์ด๋ํฉ, ์ด์์, ์ดํ, ์ต์์ง, ์ต๋ฏผ์, ๊น๊ดํธ"""
)
runner = Runner(agent=agent, app_name="khu_media_chatbot", session_service=session_svc)
session_id = None
async def get_response(prompt: str) -> str:
global session_id
if session_id is None:
sess = await session_svc.create_session(app_name="khu_media_chatbot", user_id="user")
session_id = sess.id
content = types.Content(role="user", parts=[types.Part(text=prompt)])
response = ""
for ev in runner.run(user_id="user", session_id=session_id, new_message=content):
if ev.is_final_response():
response = ev.content.parts[0].text
return response
# === Gradio UI ===
with gr.Blocks(title="๊ฒฝํฌ๋ ๋ฏธ๋์ดํ๊ณผ AI ์๋ด์ฌ", theme="soft") as app:
gr.Markdown("# ๐ฌ ๊ฒฝํฌ๋ ๋ฏธ๋์ดํ๊ณผ AI ์๋ด์ฌ")
chatbot = gr.Chatbot(type="messages", height=400)
msg = gr.Textbox(show_label=False, placeholder="์ด ๊ณณ์ ์ง๋ฌธ์ ์
๋ ฅํ์ธ์...")
send = gr.Button("์ ์ก")
def chat_fn(user_input, history):
history = history or []
# ์
๋ ฅ ์ ์ฒ๋ฆฌ: ์ค์ฉ ๋ฐฉ์ง
user_input = user_input.replace("์ ๊ณต", "๋ถ์ผ").replace("๊ต์", "๊ต์์ง")
# RAG ์ปจํ
์คํธ
docs = rag_system.search_similar_docs(user_input)
# Combine existing history (dicts) with new user message
new_history = history + [{"role": "user", "content": user_input}]
# Insert docs as system messages
new_history += docs
# Build prompt text from history
prompt = "\n".join([f"{m['role']}: {m['content']}" for m in new_history])
resp = asyncio.run(get_response(prompt))
new_history.append({"role": "assistant", "content": resp})
return new_history, ""
send.click(chat_fn, inputs=[msg, chatbot], outputs=[chatbot, msg])
msg.submit(chat_fn, inputs=[msg, chatbot], outputs=[chatbot, msg])
gr.Markdown(f"""
---
### โ๏ธ ์์คํ
์ ๋ณด\n
**ChromaDB ๋ฌธ์ ์**: {rag_system.collection.count()}๊ฐ\n
**์๋ฒ ๋ฉ ๋ชจ๋ธ**: snunlp/KR-SBERT-V40K-klueNLI-augSTS (ํ๊ตญ์ด ํนํ)\n
**์ธ์ด ๋ชจ๋ธ**: Google Gemini 2.0 Flash (๋ฌด๋ฃ)
""")
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT",7860)), share=False, show_api=False)
|