suhyun1 commited on
Commit
ac96ad4
ยท
verified ยท
1 Parent(s): ffc1d66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +273 -136
app.py CHANGED
@@ -1,137 +1,274 @@
1
- import os
2
- import gradio as gr
3
- import bs4
4
-
5
- from langchain_community.document_loaders import WebBaseLoader
6
- from langchain.text_splitter import CharacterTextSplitter
7
- from langchain_community.embeddings import HuggingFaceEmbeddings
8
- from langchain.vectorstores import FAISS
9
- from langchain.chains import RetrievalQA
10
- from langchain_groq import ChatGroq
11
- from langchain_community.document_loaders import UnstructuredExcelLoader
12
-
13
- # ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ๋ถ€ํ„ฐ Groq API Key ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
14
- groq_api_key = os.environ.get("GROQ_API_KEY", "")
15
-
16
- # ๊ตญ๊ฐ€๊ธฐ๋ก์› ์›น ๋ฌธ์„œ ๋ชฉ๋ก
17
- urls = [
18
- "https://archives.go.kr/next/newsearch/listSubjectContent.do?subjectFieldId=000011",
19
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003140&pageFlag=A&sitePage=1-2-1",
20
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003288&pageFlag=A&sitePage=1-2-1",
21
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003290&pageFlag=A&sitePage=1-2-1",
22
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003292&pageFlag=A&sitePage=1-2-1",
23
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008757&pageFlag=A&sitePage=1-2-1",
24
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003293&pageFlag=A&sitePage=1-2-1",
25
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003294&pageFlag=A&sitePage=1-2-1",
26
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003295&pageFlag=A&sitePage=1-2-1",
27
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003289&pageFlag=A&sitePage=1-2-1",
28
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010816&pageFlag=A&sitePage=1-2-1",
29
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010817&pageFlag=A&sitePage=1-2-1",
30
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=009154&pageFlag=A&sitePage=1-2-1",
31
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003260&pageFlag=A&sitePage=1-2-1",
32
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003278&pageFlag=A&sitePage=1-2-1",
33
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003281&pageFlag=A&sitePage=1-2-1",
34
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003283&pageFlag=A&sitePage=1-2-1",
35
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003284&pageFlag=A&sitePage=1-2-1",
36
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003280&pageFlag=A&sitePage=1-2-1",
37
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003282&pageFlag=A&sitePage=1-2-1",
38
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003287&pageFlag=A&sitePage=1-2-1",
39
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003286&pageFlag=A&sitePage=1-2-1",
40
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003285&pageFlag=A&sitePage=1-2-1",
41
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003279&pageFlag=A&sitePage=1-2-1",
42
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003141&pageFlag=A&sitePage=1-2-1",
43
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003143&pageFlag=A&sitePage=1-2-1",
44
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003144&pageFlag=A&sitePage=1-2-1",
45
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003142&pageFlag=A&sitePage=1-2-1",
46
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008653&pageFlag=A&sitePage=1-2-1",
47
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010827&pageFlag=A&sitePage=1-2-1",
48
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008582&pageFlag=A&sitePage=1-2-1",
49
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008663&pageFlag=A&sitePage=1-2-1",
50
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008581&pageFlag=A&sitePage=1-2-1",
51
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010828&pageFlag=A&sitePage=1-2-1",
52
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010830&pageFlag=A&sitePage=1-2-1",
53
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010831&pageFlag=A&sitePage=1-2-1",
54
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003145&pageFlag=A&sitePage=1-2-1",
55
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=009425&pageFlag=A&sitePage=1-2-1",
56
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003146&pageFlag=A&sitePage=1-2-1",
57
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010821&pageFlag=A&sitePage=1-2-1",
58
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003151&pageFlag=A&sitePage=1-2-1",
59
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003149&pageFlag=A&sitePage=1-2-1",
60
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003148&pageFlag=A&sitePage=1-2-1",
61
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008655&pageFlag=A&sitePage=1-2-1",
62
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008654&pageFlag=A&sitePage=1-2-1",
63
- "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003150&pageFlag=A&sitePage=1-2-1",
64
- "https://archives.go.kr/next/newmanager/recodeRegister.do",
65
- "https://archives.go.kr/next/newtour/tourCourse.do",
66
- "https://archives.go.kr/next/newrecordsMngPro/recordsDonateInfo.do",
67
- "https://archives.go.kr/next/newdata/pepoleRecodPresentIntro.do",
68
- "https://archives.go.kr/next/newsearch/searchGuideList.do",
69
- "https://archives.go.kr/next/newsearch/searchGuideList.do?page=2",
70
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=441",
71
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=381",
72
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=341",
73
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=261",
74
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=227",
75
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=59",
76
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=30",
77
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=64",
78
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=321",
79
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=124",
80
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=267",
81
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=141",
82
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=149",
83
- "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=22"
84
- ]
85
-
86
- # ์›น๋ฌธ์„œ ๋กœ๋”ฉ
87
- loader = WebBaseLoader(web_paths=urls, bs_kwargs=dict(parse_only=bs4.SoupStrainer()))
88
- docs = loader.load()
89
-
90
- # ๊ธฐ๋ก๋ฌผ ๋ชฉ๋ก ์—‘์…€ ํŒŒ์ผ
91
- excel_files = [
92
- "๊ต์œก ์ „๋ฐ˜ ๊ด€๋ จ ๊ธฐ๋ก๋ฌผ ๋ชฉ๋ก1.xls",
93
- "๊ต์œก ์ „๋ฐ˜ ๊ด€๋ จ ๊ธฐ๋ก๋ฌผ ๋ชฉ๋ก2.xls",
94
- "๊ต์œก ์ „๋ฐ˜ ๊ด€๋ จ ๊ธฐ๋ก๋ฌผ ๋ชฉ๋ก3.xls"
95
- ]
96
-
97
- # ์—‘์…€ ๋ฌธ์„œ ๋กœ๋”ฉ
98
- excel_docs = []
99
- for file in excel_files:
100
- loader = UnstructuredExcelLoader(file)
101
- excel_docs.extend(loader.load())
102
-
103
- # ์›น๋ฌธ์„œ + ์—‘์…€๋ฌธ์„œ ๊ฒฐํ•ฉ
104
- docs.extend(excel_docs)
105
-
106
- # ๋ฌธ์„œ ๋ถ„ํ• 
107
- splitter = CharacterTextSplitter(separator="\n", chunk_size=500, chunk_overlap=50)
108
- split_docs = splitter.split_documents(docs)
109
-
110
- # ์ž„๋ฒ ๋”ฉ ๋ฐ ๋ฒกํ„ฐ ์ €์žฅ ๋ฐ ๋ฆฌํŠธ๋ฆฌ๋ฒ„ ์„ค์ •
111
- embedding_model = HuggingFaceEmbeddings(model_name="snunlp/KR-SBERT-V40K-klueNLI-augSTS")
112
- vectorstore = FAISS.from_documents(split_docs, embedding_model)
113
- retriever = vectorstore.as_retriever()
114
-
115
- # LLM + QA ์ฒด์ธ
116
- llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama3-70b-8192")
117
- qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
118
-
119
- # Gradio ์ฑ„ํŒ… ํ•จ์ˆ˜
120
- def chat_with_history(user_input, history):
121
- if history is None:
122
- history = []
123
- query = user_input.strip() + " ํ•œ๊ตญ์–ด๋กœ ๋‹ตํ•ด์ฃผ์„ธ์š”."
124
- result = qa_chain({"query": query})
125
- answer = result.get("result", "๋‹ต๋ณ€์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
126
- history.append((user_input, answer))
127
- return "", history, history
128
-
129
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
130
- with gr.Blocks() as demo:
131
- gr.Markdown("## ๐Ÿ“š ๊ตญ๊ฐ€๊ธฐ๋ก์› ์ •๋ณด ์ฑ—๋ด‡")
132
- chatbot = gr.Chatbot(label="๊ธฐ๋ก์› ์ฑ—๋ด‡")
133
- msg = gr.Textbox(placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", label="๐Ÿ’ฌ ์งˆ๋ฌธ ์ž…๋ ฅ")
134
- state = gr.State([])
135
- msg.submit(chat_with_history, inputs=[msg, state], outputs=[msg, chatbot, state])
136
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  demo.launch()
 
1
+ import os
2
+ import gradio as gr
3
+ import bs4
4
+
5
+ from langchain_community.document_loaders import WebBaseLoader
6
+ from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.chains import RetrievalQA
10
+ from langchain_groq import ChatGroq
11
+ from langchain_community.document_loaders import UnstructuredExcelLoader
12
+
13
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ๋ถ€ํ„ฐ Groq API Key ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
14
+ groq_api_key = os.environ.get("GROQ_API_KEY", "")
15
+
16
+ # ๊ตญ๊ฐ€๊ธฐ๋ก์› ์›น ๋ฌธ์„œ ๋ชฉ๋ก
17
+ urls = [
18
+ "https://archives.go.kr/next/newsearch/listSubjectContent.do?subjectFieldId=000011",
19
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003140&pageFlag=A&sitePage=1-2-1",
20
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003288&pageFlag=A&sitePage=1-2-1",
21
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003290&pageFlag=A&sitePage=1-2-1",
22
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003292&pageFlag=A&sitePage=1-2-1",
23
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008757&pageFlag=A&sitePage=1-2-1",
24
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003293&pageFlag=A&sitePage=1-2-1",
25
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003294&pageFlag=A&sitePage=1-2-1",
26
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003295&pageFlag=A&sitePage=1-2-1",
27
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003289&pageFlag=A&sitePage=1-2-1",
28
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010816&pageFlag=A&sitePage=1-2-1",
29
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010817&pageFlag=A&sitePage=1-2-1",
30
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=009154&pageFlag=A&sitePage=1-2-1",
31
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003260&pageFlag=A&sitePage=1-2-1",
32
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003278&pageFlag=A&sitePage=1-2-1",
33
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003281&pageFlag=A&sitePage=1-2-1",
34
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003283&pageFlag=A&sitePage=1-2-1",
35
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003284&pageFlag=A&sitePage=1-2-1",
36
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003280&pageFlag=A&sitePage=1-2-1",
37
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003282&pageFlag=A&sitePage=1-2-1",
38
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003287&pageFlag=A&sitePage=1-2-1",
39
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003286&pageFlag=A&sitePage=1-2-1",
40
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003285&pageFlag=A&sitePage=1-2-1",
41
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003279&pageFlag=A&sitePage=1-2-1",
42
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003141&pageFlag=A&sitePage=1-2-1",
43
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003143&pageFlag=A&sitePage=1-2-1",
44
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003144&pageFlag=A&sitePage=1-2-1",
45
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003142&pageFlag=A&sitePage=1-2-1",
46
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008653&pageFlag=A&sitePage=1-2-1",
47
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010827&pageFlag=A&sitePage=1-2-1",
48
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008582&pageFlag=A&sitePage=1-2-1",
49
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008663&pageFlag=A&sitePage=1-2-1",
50
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008581&pageFlag=A&sitePage=1-2-1",
51
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010828&pageFlag=A&sitePage=1-2-1",
52
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010830&pageFlag=A&sitePage=1-2-1",
53
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010831&pageFlag=A&sitePage=1-2-1",
54
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003145&pageFlag=A&sitePage=1-2-1",
55
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=009425&pageFlag=A&sitePage=1-2-1",
56
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003146&pageFlag=A&sitePage=1-2-1",
57
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010821&pageFlag=A&sitePage=1-2-1",
58
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003151&pageFlag=A&sitePage=1-2-1",
59
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003149&pageFlag=A&sitePage=1-2-1",
60
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003148&pageFlag=A&sitePage=1-2-1",
61
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008655&pageFlag=A&sitePage=1-2-1",
62
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008654&pageFlag=A&sitePage=1-2-1",
63
+ "https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003150&pageFlag=A&sitePage=1-2-1",
64
+ "https://archives.go.kr/next/newmanager/recodeRegister.do",
65
+ "https://archives.go.kr/next/newtour/tourCourse.do",
66
+ "https://archives.go.kr/next/newrecordsMngPro/recordsDonateInfo.do",
67
+ "https://archives.go.kr/next/newdata/pepoleRecodPresentIntro.do",
68
+ "https://archives.go.kr/next/newsearch/searchGuideList.do",
69
+ "https://archives.go.kr/next/newsearch/searchGuideList.do?page=2",
70
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=441",
71
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=381",
72
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=341",
73
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=261",
74
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=227",
75
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=59",
76
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=30",
77
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=64",
78
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=321",
79
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=124",
80
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=267",
81
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=141",
82
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=149",
83
+ "https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=22"
84
+ ]
85
+
86
+ # ์›น๋ฌธ์„œ ๋กœ๋”ฉ
87
+ loader = WebBaseLoader(web_paths=urls, bs_kwargs=dict(parse_only=bs4.SoupStrainer()))
88
+ docs = loader.load()
89
+
90
+ # ๊ธฐ๋ก๋ฌผ ๋ชฉ๋ก ์—‘์…€ ํŒŒ์ผ
91
+ excel_files = [
92
+ "๊ต์œก ์ „๋ฐ˜ ๊ด€๋ จ ๊ธฐ๋ก๋ฌผ ๋ชฉ๋ก1.xls",
93
+ "๊ต์œก ์ „๋ฐ˜ ๊ด€๋ จ ๊ธฐ๋ก๋ฌผ ๋ชฉ๋ก2.xls",
94
+ "๊ต์œก ์ „๋ฐ˜ ๊ด€๋ จ ๊ธฐ๋ก๋ฌผ ๋ชฉ๋ก3.xls"
95
+ ]
96
+
97
+ # ์—‘์…€ ๋ฌธ์„œ ๋กœ๋”ฉ
98
+ excel_docs = []
99
+ for file in excel_files:
100
+ loader = UnstructuredExcelLoader(file)
101
+ excel_docs.extend(loader.load())
102
+
103
+ # ์›น๋ฌธ์„œ + ์—‘์…€๋ฌธ์„œ ๊ฒฐํ•ฉ
104
+ docs.extend(excel_docs)
105
+
106
+ # ๋ฌธ์„œ ๋ถ„ํ• 
107
+ splitter = CharacterTextSplitter(separator="\n", chunk_size=500, chunk_overlap=50)
108
+ split_docs = splitter.split_documents(docs)
109
+
110
+ # ์ž„๋ฒ ๋”ฉ ๋ฐ ๋ฒกํ„ฐ ์ €์žฅ ๋ฐ ๋ฆฌํŠธ๋ฆฌ๋ฒ„ ์„ค์ •
111
+ embedding_model = HuggingFaceEmbeddings(model_name="snunlp/KR-SBERT-V40K-klueNLI-augSTS")
112
+ vectorstore = FAISS.from_documents(split_docs, embedding_model)
113
+ retriever = vectorstore.as_retriever()
114
+
115
+ # LLM + QA ์ฒด์ธ
116
+ llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama3-70b-8192")
117
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
118
+
119
+ # ์˜ˆ์‹œ ์งˆ๋ฌธ
120
+ example_questions = [
121
+ "๊ธฐ๋ก๋ฌผ ์—ด๋žŒ ๋ฐฉ๋ฒ•์€ ์–ด๋–ป๊ฒŒ ๋˜๋‚˜์š”?",
122
+ "๊ฒฌํ•™์‹ ์ฒญ์— ๋Œ€ํ•ด ์•Œ ์ˆ˜ ์žˆ๋‚˜์š”?",
123
+ "๊ธฐ๋ก๋ฌผ ๊ธฐ์ฆ ๋ฐฉ๋ฒ•์€ ๋ฌด์—‡์ธ๊ฐ€์š”?",
124
+ "๊ธฐ๋ก๋ฌผ ๊ฒ€์ƒ‰ ๊ธธ์žก์ด๊ฐ€ ๋ฌด์—‡์ธ๊ฐ€์š”?",
125
+ "ใ…‡ใ…‡ใ…‡์˜ ์ฃผ์ œ ์œ ํ˜•์€ ๋ฌด์—‡์ธ๊ฐ€์š”?",
126
+ "ใ…‡ใ…‡ใ…‡์˜ ํ•˜์œ„์ฃผ์ œ๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”?",
127
+ "ใ…‡ใ…‡ใ…‡์˜ ๋ฐฐ๊ฒฝ์€ ๋ฌด์—‡์ธ๊ฐ€์š”?",
128
+ "ใ…‡ใ…‡ใ…‡์˜ ์—ญ์‚ฌ์  ์˜์˜๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”?",
129
+ "ใ…‡ใ…‡ใ…‡์˜ ์ง‘ํ•„์ž๋Š” ๋ˆ„๊ตฌ์ธ๊ฐ€์š”?",
130
+ "ใ…‡ใ…‡ใ…‡์˜ ์‚ฌ์ง„ ๊ธฐ๋ก๋ฌผ์ด ์žˆ๋‚˜์š”?",
131
+ "ใ…‡ใ…‡ใ…‡์˜ ๊ธฐ๋ก๋ฌผ ์ค‘์— ์ •๋ถ€๊ฐ„ํ–‰๋ฌผ์€ ๋ช‡๊ฐœ์ธ๊ฐ€์š”?",
132
+ "ใ…‡ใ…‡ใ…‡์˜ ๊ธฐ๋ก๋ฌผ์€ ๋ช‡๊ฑด์ธ๊ฐ€์š”?",
133
+ "ใ…‡ใ…‡ใ…‡์˜ ๊ธฐ๋ก๋ฌผ๋“ค์˜ ์ œ๋ชฉ์„ ์•Œ ์ˆ˜ ์žˆ๋‚˜์š”?"
134
+ ]
135
+
136
+ # ํ‚ค์›Œ๋“œ ๊ณ„์ธต ๊ตฌ์กฐ
137
+ keyword_tree = {
138
+ "๊ต์œก ์ „๋ฐ˜": {
139
+ "๊ต์œก ๋ฏผ์ฃผํ™”์šด๋™": {
140
+ "๊ต์›์šด๋™": {
141
+ "๊ต์œก ๋ฏผ์ฃผํ™”์„ ์–ธ": {},
142
+ "๋ฏผ์ค‘๊ต์œก์ง€ ์‚ฌ๊ฑด": {},
143
+ "์ฐธ๊ต์œก ์šด๋™": {}
144
+ },
145
+ "ํ•™๋ถ€๋ชจ ์šด๋™": {
146
+ "๊ต์œก๊ฐœํ˜์‹œ๋ฏผ์šด๋™์—ฐ๋Œ€": {}
147
+ },
148
+ "ํ•™์ƒ์šด๋™": {}
149
+ },
150
+ "๊ต์œก ์ •๋ณดํ™” ์ •์ฑ…": {
151
+ "AI ๋””์ง€ํ„ธ ๊ต๊ณผ์„œ": {},
152
+ "e๋Ÿฌ๋‹ํ™œ์„ฑํ™”": {},
153
+ "๊ต์œกํ–‰์ •์ •๋ณด์‹œ์Šคํ…œ(NEIS)": {}
154
+ },
155
+ "๊ต์œก๊ฐœํ˜": {
156
+ "๊ต์œก๊ฐœํ˜์‹ฌ์˜ํšŒ": {},
157
+ "๊ต์œก๊ฐœํ˜์œ„์›ํšŒ": {
158
+ "5ยท31 ๊ต์œก๊ฐœํ˜": {}
159
+ },
160
+ "๊ต์œก์ •์ฑ…์‹ฌ์˜ํšŒ": {},
161
+ "๊ต์œก์ •์ฑ…์ž๋ฌธํšŒ์˜": {},
162
+ "๊ต์œกํ˜์‹ ์œ„์›ํšŒ": {},
163
+ "์ƒˆ๊ต์œก๊ณต๋™์ฒด์œ„์›ํšŒ": {},
164
+ "์ธ๋ ฅ์ž์›๊ฐœ๋ฐœํšŒ์˜": {},
165
+ "์žฅ๊ธฐ์ข…ํ•ฉ๊ณ„ํš์‹ฌ์˜ํšŒ": {}
166
+ },
167
+ "๊ต์œก์ด๋…": {
168
+ "๊ตญ๋ฏผ๊ต์œกํ—Œ์žฅ": {
169
+ "ํ•™๋„ํ˜ธ๊ตญ๋‹จ": {}
170
+ },
171
+ "ํ™์ต์ธ๊ฐ„ ๊ต์œก์ด๋…": {
172
+ "์ผ๋ฏผ์ฃผ์˜": {}
173
+ }
174
+ },
175
+ "๊ต์œก์ •์ฑ… ๊ด€๋ จ ๊ธฐ๊ด€": {
176
+ "ํ•œ๊ตญ๊ต์œก๊ฐœ๋ฐœ์›": {},
177
+ "ํ•œ๊ตญ๊ต์œก๊ณผ์ •ํ‰๊ฐ€์›": {},
178
+ "ํ•œ๊ตญ๊ต์œก๋ฐฉ์†ก๊ณต์‚ฌ": {}
179
+ },
180
+ "ํ•™์ˆ ์ง„ํฅ ์ •์ฑ…": {
181
+ "KERIS": {},
182
+ "๋Œ€ํ•œ๋ฏผ๊ตญํ•™์ˆ ์›": {}
183
+ },
184
+ "ํ•™์ œ": {
185
+ "ํ•™๋ น์ธ๊ตฌ ๊ฐ์†Œ": {},
186
+ "ํ•™์ œ ํ™•์ •": {}
187
+ },
188
+ "ํ—Œ๋ฒ•์˜ ๊ต์œก์กฐํ•ญ๊ณผ ๋ณ€์ฒœ": {
189
+ "๊ณ ๋“ฑ๊ต์œก๋ฒ•": {},
190
+ "๊ต์œก๊ธฐ๋ณธ๋ฒ•": {},
191
+ "๊ต์œก๋ฒ• ์ œ์ •": {},
192
+ "๊ต์œก์— ๊ด€ํ•œ ์ž„์‹œํŠน๋ก€๋ฒ•": {},
193
+ "์‚ฌ๋ฆฝํ•™๊ต๋ฒ•": {},
194
+ "์ดˆยท์ค‘๋“ฑ๊ต์œก๋ฒ•": {}
195
+ }
196
+ }
197
+ }
198
+
199
+ # ๊ฒฝ๋กœ์—์„œ ํ•˜์œ„ ํ‚ค์›Œ๋“œ ๋ฐ˜ํ™˜
200
+ def get_keywords(path):
201
+ node = keyword_tree
202
+ for key in path:
203
+ node = node.get(key, {})
204
+ return list(node.keys())
205
+
206
+ def format_path(path):
207
+ return " > ".join(path) if path else "๊ต์œก ์ „๋ฐ˜"
208
+
209
+ def on_keyword_select(selected, path):
210
+ new_path = path + [selected]
211
+ next_keywords = get_keywords(new_path)
212
+ formatted = format_path(new_path)
213
+ return formatted, new_path, gr.update(choices=next_keywords)
214
+
215
+ # Gradio ์ฑ„ํŒ… ํ•จ์ˆ˜
216
+ def chat_with_history(user_input, history):
217
+ if history is None:
218
+ history = []
219
+ query = user_input.strip() + " ํ•œ๊ตญ์–ด๋กœ ๋‹ตํ•ด์ฃผ์„ธ์š”."
220
+ result = qa_chain({"query": query})
221
+ answer = result.get("result", "๋‹ต๋ณ€์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
222
+ history.append((user_input, answer))
223
+ return "", history, history
224
+
225
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
226
+ with gr.Blocks() as demo:
227
+ gr.Markdown("## ๐Ÿ“š ๊ตญ๊ฐ€๊ธฐ๋ก์› ์ฑ—๋ด‡")
228
+ gr.Markdown(
229
+ """### **๊ตญ๊ฐ€๊ธฐ๋ก์› ์ •๋ณด ์ฑ—๋ด‡์— ์˜ค์‹  ๊ฒƒ์„ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค!**
230
+
231
+ ์ด ์ฑ—๋ด‡์€ ๊ตญ๊ฐ€๊ธฐ๋ก์›์— ๋ณด๊ด€๋œ ๋‹ค์–‘ํ•œ ๊ธฐ๋ก๋ฌผ์„ ๋ฐ”ํƒ•์œผ๋กœ ์—ฌ๋Ÿฌ๋ถ„์˜ ๊ถ๊ธˆ์ฆ์„ ์‰ฝ๊ณ  ๋น ๋ฅด๊ฒŒ ํ•ด๊ฒฐํ•ด ๋“œ๋ฆฝ๋‹ˆ๋‹ค.
232
+
233
+ ๊ตญ๊ฐ€๊ธฐ๋ก์›์˜ ์—ญํ• , ๊ธฐ๋ก๋ฌผ ์—ด๋žŒ ๋ฐฉ๋ฒ•, ๊ฒฌํ•™ ์‹ ์ฒญ, ๊ธฐ์ฆ ์ ˆ์ฐจ ๋“ฑ์˜ ๊ณต์‹ ์ •๋ณด๋ฅผ ํ™•์ธํ•  ์ˆ˜ ์žˆ์œผ๋ฉฐ, ๊ต์œก ์ „๋ฐ˜ ๋ถ„์•ผ์— ๊ด€ํ•œ ์ •๋ณด๋ฅผ ์•ˆ๋‚ดํ•ด ๋“œ๋ฆฝ๋‹ˆ๋‹ค.
234
+
235
+ ์•„๋ž˜ ์ž…๋ ฅ์ฐฝ์— ๊ถ๊ธˆํ•œ ๋‚ด์šฉ์„ ์ž์œ ๋กญ๊ฒŒ ์ž…๋ ฅํ•ด ๋ณด์„ธ์š”.
236
+
237
+ ๐Ÿ’ก ์งˆ๋ฌธ์„ ์–ด๋–ป๊ฒŒ ์‹œ์ž‘ํ• ์ง€ ๊ณ ๋ฏผ ์ค‘์ด์‹ ๊ฐ€์š”?
238
+ - **์˜ˆ์‹œ ์งˆ๋ฌธ ๋ณด๊ธฐ**์—์„œ ์งˆ๋ฌธ์„ ๊ณจ๋ผ๋ณด์„ธ์š”. (์˜ˆ์‹œ ์งˆ๋ฌธ์˜ ใ…‡ใ…‡ใ…‡์— ๊ด€์‹ฌ ์žˆ๋Š” ์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•ด๋ณด์„ธ์š”.)
239
+ - ๋˜๋Š” ์•„๋ž˜์˜ **๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ํƒ์ƒ‰** ๊ธฐ๋Šฅ์„ ํ™œ์šฉํ•ด ์ฃผ์ œ๋ณ„ ํ‚ค์›Œ๋“œ๋ฅผ ์ฐพ์•„๋ณด์„ธ์š”.
240
+ """
241
+ )
242
+ chatbot = gr.Chatbot(label="๊ธฐ๋ก์› ์ฑ—๋ด‡", type="messages")
243
+ with gr.Row():
244
+ dropdown = gr.Dropdown(choices=example_questions, label="๐Ÿ“ ์˜ˆ์‹œ ์งˆ๋ฌธ ๋ณด๊ธฐ")
245
+ msg = gr.Textbox(placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", label="๐Ÿ’ฌ ์งˆ๋ฌธ ์ž…๋ ฅ", lines=1)
246
+
247
+ state = gr.State([]) # ์ฑ„ํŒ… ๊ธฐ๋ก
248
+ path_state = gr.State([]) # ํ‚ค์›Œ๋“œ ๊ฒฝ๋กœ ์ƒํƒœ
249
+
250
+ dropdown.change(lambda q: q, inputs=dropdown, outputs=msg)
251
+ msg.submit(chat_with_history, inputs=[msg, state], outputs=[msg, chatbot, state])
252
+
253
+ with gr.Column():
254
+ gr.Markdown("### ๐Ÿ” ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ํƒ์ƒ‰")
255
+ gr.Markdown(
256
+ """ ๊ต์œก ์ „๋ฐ˜์˜ ํ‚ค์›Œ๋“œ๋ฅผ ๋ณด์‹ค ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
257
+
258
+ ์ฃผ์ œ๋ณ„๋กœ ์ •๋ฆฌ๋œ **๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ ๊ณ„์ธต ๊ตฌ์กฐ**๋ฅผ ๋”ฐ๋ผ๊ฐ€๋ฉฐ ๊ด€์‹ฌ ์žˆ๋Š” ๋ถ„์•ผ๋ฅผ ๋‹จ๊ณ„์ ์œผ๋กœ ํƒ์ƒ‰ํ•  ์ˆ˜ ์žˆ๋Š” ๊ธฐ๋Šฅ์ž…๋‹ˆ๋‹ค.
259
+
260
+ - ๋จผ์ € ์ƒ์œ„ ์ฃผ์ œ๋ถ€ํ„ฐ ์‹œ์ž‘ํ•ด ๋ณด์„ธ์š”.
261
+ - ์›ํ•˜๋Š” ์ฃผ์ œ๋ฅผ ํด๋ฆญํ•˜๋ฉด ๊ทธ ์•„๋ž˜์˜ ์„ธ๋ถ€ ํ•ญ๋ชฉ์ด ํŽผ์ณ์ง‘๋‹ˆ๋‹ค.
262
+ - ํด๋ฆญ์„ ๊ณ„์†ํ•ด ๋“ค์–ด๊ฐ€๋ฉด, ์ ์  ๋” ๊ตฌ์ฒด์ ์ธ ํ‚ค์›Œ๋“œ์— ๋„๋‹ฌํ•  ์ˆ˜ ์žˆ์–ด์š”.
263
+ """
264
+ )
265
+
266
+ keyword_selector = gr.Radio(choices=get_keywords([]), label="ํ‚ค์›Œ๋“œ ์„ ํƒ", value=None)
267
+
268
+ keyword_selector.change(
269
+ fn=on_keyword_select,
270
+ inputs=[keyword_selector, path_state],
271
+ outputs=[keyword_path_display, path_state, keyword_selector]
272
+ )
273
+
274
  demo.launch()