AIRider commited on
Commit
7ebe30e
ยท
verified ยท
1 Parent(s): c305495

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +281 -0
app.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from requests.adapters import HTTPAdapter
5
+ from requests.packages.urllib3.util.retry import Retry
6
+ import re
7
+ import time
8
+ import random
9
+ import os
10
+ from huggingface_hub import InferenceClient
11
+
12
+ def setup_session():
13
+ try:
14
+ session = requests.Session()
15
+ retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
16
+ session.mount('https://', HTTPAdapter(max_retries=retries))
17
+ return session
18
+ except Exception as e:
19
+ return None
20
+
21
+ def generate_naver_search_url(query):
22
+ base_url = "https://search.naver.com/search.naver?"
23
+ params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query}
24
+ url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
25
+ return url
26
+
27
+ def crawl_blog_content(url, session):
28
+ try:
29
+ headers = {
30
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
31
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
32
+ "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
33
+ "Accept-Encoding": "gzip, deflate, br",
34
+ "Connection": "keep-alive",
35
+ "Referer": "https://search.naver.com/search.naver",
36
+ }
37
+
38
+ # ๋žœ๋ค ๋”œ๋ ˆ์ด ์ถ”๊ฐ€
39
+ delay = random.uniform(1, 2)
40
+ time.sleep(delay)
41
+
42
+ response = session.get(url, headers=headers)
43
+ if response.status_code != 200:
44
+ return ""
45
+
46
+ soup = BeautifulSoup(response.content, "html.parser")
47
+ content = soup.find("div", attrs={'class': 'se-main-container'})
48
+
49
+ if content:
50
+ return clean_text(content.get_text())
51
+ else:
52
+ return ""
53
+ except Exception as e:
54
+ return ""
55
+
56
+ def crawl_naver_search_results(url, session):
57
+ try:
58
+ headers = {
59
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
60
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
61
+ "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
62
+ "Accept-Encoding": "gzip, deflate, br",
63
+ "Connection": "keep-alive",
64
+ "Referer": "https://search.naver.com/search.naver",
65
+ }
66
+ response = session.get(url, headers=headers)
67
+ if response.status_code != 200:
68
+ return []
69
+
70
+ soup = BeautifulSoup(response.content, "html.parser")
71
+ results = []
72
+ count = 0
73
+ for li in soup.find_all("li", class_=re.compile("bx.*")):
74
+ if count >= 10:
75
+ break
76
+ for div in li.find_all("div", class_="detail_box"):
77
+ for div2 in div.find_all("div", class_="title_area"):
78
+ title = div2.text.strip()
79
+ for a in div2.find_all("a", href=True):
80
+ link = a["href"]
81
+ if "blog.naver" in link:
82
+ link = link.replace("https://", "https://m.")
83
+ results.append({"์ œ๋ชฉ": title, "๋งํฌ": link})
84
+ count += 1
85
+ if count >= 10:
86
+ break
87
+ if count >= 10:
88
+ break
89
+ if count >= 10:
90
+ break
91
+
92
+ return results
93
+ except Exception as e:
94
+ return []
95
+
96
+ def clean_text(text):
97
+ text = re.sub(r'\s+', ' ', text).strip()
98
+ return text
99
+
100
+ def create_client(model_name):
101
+ return InferenceClient(model_name, token=os.getenv("HF_TOKEN"))
102
+
103
+ client = create_client("CohereForAI/c4ai-command-r-plus")
104
+
105
+ def call_api(content, system_message, max_tokens, temperature, top_p):
106
+ messages = [{"role": "system", "content": system_message}, {"role": "user", "content": content}]
107
+ random_seed = random.randint(0, 1000000)
108
+ response = client.chat_completion(messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, seed=random_seed)
109
+ modified_text = response.choices[0].message.content
110
+ input_tokens = response.usage.prompt_tokens
111
+ output_tokens = response.usage.completion_tokens
112
+ total_tokens = response.usage.total_tokens
113
+ return modified_text, input_tokens, output_tokens, total_tokens
114
+
115
+ def analyze_info(category, topic, references1, references2, references3):
116
+ return f"์„ ํƒํ•œ ์นดํ…Œ๊ณ ๋ฆฌ: {category}\n๋ธ”๋กœ๊ทธ ์ฃผ์ œ: {topic}\n์ฐธ๊ณ  ๊ธ€1: {references1}\n์ฐธ๊ณ  ๊ธ€2: {references2}\n์ฐธ๊ณ  ๊ธ€3: {references3}"
117
+
118
+ def suggest_title(category, topic, references1, references2, references3, system_message, max_tokens, temperature, top_p):
119
+ full_content = analyze_info(category, topic, references1, references2, references3)
120
+ modified_text, input_tokens, output_tokens, total_tokens = call_api(full_content, system_message, max_tokens, temperature, top_p)
121
+ token_usage_message = f"[์ž…๋ ฅ ํ† ํฐ์ˆ˜: {input_tokens}]\n[์ถœ๋ ฅ ํ† ํฐ์ˆ˜: {output_tokens}]\n[์ด ํ† ํฐ์ˆ˜: {total_tokens}]"
122
+ return modified_text, token_usage_message
123
+
124
+ def generate_outline(category, topic, references1, references2, references3, title, system_message, max_tokens, temperature, top_p):
125
+ full_content = analyze_info(category, topic, references1, references2, references3)
126
+ content = f"{full_content}\nTitle: {title}"
127
+ modified_text, input_tokens, output_tokens, total_tokens = call_api(content, system_message, max_tokens, temperature, top_p)
128
+ token_usage_message = f"[์ž…๋ ฅ ํ† ํฐ์ˆ˜: {input_tokens}]\n[์ถœ๋ ฅ ํ† ํฐ์ˆ˜: {output_tokens}]\n[์ด ํ† ํฐ์ˆ˜: {total_tokens}]"
129
+ return modified_text, token_usage_message
130
+
131
+ def generate_blog_post(category, topic, references1, references2, references3, title, outline, system_message, max_tokens, temperature, top_p):
132
+ full_content = analyze_info(category, topic, references1, references2, references3)
133
+ content = f"{full_content}\nTitle: {title}\nOutline: {outline}"
134
+ modified_text, input_tokens, output_tokens, total_tokens = call_api(content, system_message, max_tokens, temperature, top_p)
135
+ formatted_text = modified_text.replace('\n', '\n\n')
136
+ token_usage_message = f"[์ž…๋ ฅ ํ† ํฐ์ˆ˜: {input_tokens}]\n[์ถœ๋ ฅ ํ† ํฐ์ˆ˜: {output_tokens}]\n[์ด ํ† ํฐ์ˆ˜: {total_tokens}]"
137
+ return formatted_text, token_usage_message
138
+
139
+ def fetch_references(topic):
140
+ search_url = generate_naver_search_url(topic)
141
+ session = setup_session()
142
+ if session is None:
143
+ return "Failed to set up session.", "", "", ""
144
+ results = crawl_naver_search_results(search_url, session)
145
+ if not results:
146
+ return "No results found.", "", "", ""
147
+
148
+ selected_results = random.sample(results, 3)
149
+ references1_content = f"์ œ๋ชฉ: {selected_results[0]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[0]['๋งํฌ'], session)}"
150
+ references2_content = f"์ œ๋ชฉ: {selected_results[1]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[1]['๋งํฌ'], session)}"
151
+ references3_content = f"์ œ๋ชฉ: {selected_results[2]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[2]['๋งํฌ'], session)}"
152
+
153
+ return "์ฐธ๊ณ ๊ธ€ ์ƒ์„ฑ ์™„๋ฃŒ", references1_content, references2_content, references3_content
154
+
155
+ def fetch_references_and_generate_all_steps(category, topic, blog_title, system_message_outline, max_tokens_outline, temperature_outline, top_p_outline, system_message_blog_post, max_tokens_blog_post, temperature_blog_post, top_p_blog_post):
156
+ search_url = generate_naver_search_url(topic)
157
+ session = setup_session()
158
+ if session is None:
159
+ return "", "", "", "", "", "", "", "", "", ""
160
+
161
+ results = crawl_naver_search_results(search_url, session)
162
+ if not results:
163
+ return "", "", "", "", "", "", "", "", "", ""
164
+
165
+ selected_results = random.sample(results, 3)
166
+ references1_content = f"์ œ๋ชฉ: {selected_results[0]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[0]['๋งํฌ'], session)}"
167
+ references2_content = f"์ œ๋ชฉ: {selected_results[1]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[1]['๋งํฌ'], session)}"
168
+ references3_content = f"์ œ๋ชฉ: {selected_results[2]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[2]['๋งํฌ'], session)}"
169
+
170
+ # ์•„์›ƒ๋ผ์ธ ์ƒ์„ฑ
171
+ outline_result, outline_token_usage = generate_outline(category, topic, references1_content, references2_content, references3_content, blog_title, system_message_outline, max_tokens_outline, temperature_outline, top_p_outline)
172
+
173
+ # ๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ
174
+ blog_post_result, blog_post_token_usage = generate_blog_post(category, topic, references1_content, references2_content, references3_content, blog_title, outline_result, system_message_blog_post, max_tokens_blog_post, temperature_blog_post, top_p_blog_post)
175
+
176
+ return references1_content, references2_content, references3_content, outline_result, outline_token_usage, blog_post_result, blog_post_token_usage
177
+
178
+ def get_title_prompt(category):
179
+ if (category == "์ผ๋ฐ˜"):
180
+ return """
181
+ # ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ ์ƒ์„ฑ ๊ทœ์น™(์ผ๋ฐ˜)
182
+ """
183
+ elif (category == "๊ฑด๊ฐ•์ •๋ณด"):
184
+ return """
185
+ # ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ ์ƒ์„ฑ ๊ทœ์น™(๊ฑด๊ฐ•์ •๋ณด)
186
+ """
187
+
188
+ def get_outline_prompt(category):
189
+ if (category == "์ผ๋ฐ˜"):
190
+ return """
191
+ # ๋ธ”๋กœ๊ทธ ์†Œ์ฃผ์ œ(Subtopic) ์ƒ์„ฑ ๊ทœ์น™(์ผ๋ฐ˜)
192
+ """
193
+ elif (category == "๊ฑด๊ฐ•์ •๋ณด"):
194
+ return """
195
+ # ๋ธ”๋กœ๊ทธ ์†Œ์ฃผ์ œ(Subtopic) ์ƒ์„ฑ ๊ทœ์น™(๊ฑด๊ฐ•์ •๋ณด)
196
+ """
197
+
198
+ def get_blog_post_prompt(category):
199
+ if (category == "์ผ๋ฐ˜"):
200
+ return """
201
+ # ๋ธ”๋กœ๊ทธ ํ…์ŠคํŠธ ์ƒ์„ฑ ๊ทœ์น™(์ผ๋ฐ˜)
202
+ """
203
+ elif (category == "๊ฑด๊ฐ•์ •๋ณด"):
204
+ return """
205
+ # ๋ธ”๋กœ๊ทธ ํ…์ŠคํŠธ ์ƒ์„ฑ ๊ทœ์น™(๊ฑด๊ฐ•์ •๋ณด)
206
+ """
207
+
208
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
209
+ title = "์ •๋ณด์„ฑ ํฌ์ŠคํŒ… ์ž๋™์ƒ์„ฑ๊ธฐ(์ œ๋ชฉ์ถ”์ฒœ ํ›„ ์ž๋™)"
210
+
211
+ def update_prompts(category):
212
+ title_prompt = get_title_prompt(category)
213
+ outline_prompt = get_outline_prompt(category)
214
+ blog_post_prompt = get_blog_post_prompt(category)
215
+ return title_prompt, outline_prompt, blog_post_prompt
216
+
217
+ with gr.Blocks() as demo:
218
+ gr.Markdown(f"# {title}")
219
+
220
+ # 1๋‹จ๊ณ„
221
+ gr.Markdown("### 1๋‹จ๊ณ„ : ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ์ง€์ •ํ•ด์ฃผ์„ธ์š”")
222
+ category = gr.Radio(choices=["์ผ๋ฐ˜", "๊ฑด๊ฐ•์ •๋ณด"], label="ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ", value="์ผ๋ฐ˜")
223
+
224
+ # 2๋‹จ๊ณ„
225
+ gr.Markdown("### 2๋‹จ๊ณ„ : ๋ธ”๋กœ๊ทธ ์ฃผ์ œ, ๋˜๋Š” ํ‚ค์›Œ๋“œ๋ฅผ ์ƒ์„ธํžˆ ์ž…๋ ฅํ•˜์„ธ์š”")
226
+ topic = gr.Textbox(label="๋ธ”๋กœ๊ทธ ์ฃผ์ œ(์˜ˆ์‹œ: ์˜ค์ง•์–ด ๋ฌด์นจํšŒ(X), ์˜ค์ง•์–ด ๋ฌด์นจํšŒ ๋ ˆ์‹œํ”ผ(O))", placeholder="์˜ˆ์‹œ: ์—ฌํ–‰์ง€ ์ถ”์ฒœ(X), 8์›” ๊ตญ๋‚ด ์—ฌํ–‰์ง€ ์ถ”์ฒœ(O)")
227
+
228
+ # 3๋‹จ๊ณ„: ์ฐธ๊ณ  ๊ธ€์„ ์œ„ํ•œ ๋ณ€์ˆ˜๋“ค ๋ฏธ๋ฆฌ ์ •์˜
229
+ references1 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 1", placeholder="์ฐธ๊ณ ํ•  ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=False)
230
+ references2 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 2", placeholder="์ฐธ๊ณ ํ•  ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=False)
231
+ references3 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 3", placeholder="์ฐธ๊ณ ํ•  ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=False)
232
+
233
+ # ์ œ๋ชฉ ์ถ”์ฒœ
234
+ gr.Markdown("### 4๋‹จ๊ณ„ : ์ œ๋ชฉ ์ถ”์ฒœํ•˜๊ธฐ")
235
+
236
+ with gr.Accordion("์ œ๋ชฉ ์„ค์ •", open=True):
237
+ title_system_message = gr.Textbox(label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€", value=get_title_prompt("์ผ๋ฐ˜"), lines=15)
238
+ title_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=5000, step=1000)
239
+ title_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
240
+ title_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
241
+
242
+ title_suggestions = gr.Textbox(label="์ œ๋ชฉ ์ถ”์ฒœ", lines=10)
243
+ title_token_output = gr.Markdown(label="์‚ฌ์šฉ๋œ ํ† ํฐ ์ˆ˜")
244
+
245
+ # ์ œ๋ชฉ ์ถ”์ฒœ ๋ฒ„ํŠผ
246
+ title_btn = gr.Button("์ œ๋ชฉ ์ถ”์ฒœํ•˜๊ธฐ")
247
+ title_btn.click(fn=suggest_title, inputs=[category, topic, references1, references2, references3, title_system_message, title_max_tokens, title_temperature, title_top_p], outputs=[title_suggestions, title_token_output])
248
+
249
+ blog_title = gr.Textbox(label="๋ธ”๋กœ๊ทธ ์ œ๋ชฉ", placeholder="๋ธ”๋กœ๊ทธ ์ œ๋ชฉ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”")
250
+
251
+ # ๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ
252
+ gr.Markdown("### 5๋‹จ๊ณ„ : ๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
253
+ gr.HTML("<span style='color: grey;'>[๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ์•„์›ƒ๋ผ์ธ ์ƒ์„ฑ ๋ฐ ๋ธ”๋กœ๊ทธ ๊ธ€ ์ž‘์„ฑ์ด ์ž๋™์œผ๋กœ ์ง„ํ–‰๋ฉ๋‹ˆ๋‹ค.]</span>")
254
+
255
+ with gr.Accordion("๋ธ”๋กœ๊ทธ ๊ธ€ ์„ค์ •", open=True):
256
+ outline_system_message = gr.Textbox(label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€", value=get_outline_prompt("์ผ๋ฐ˜"), lines=20)
257
+ outline_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=6000, step=1000)
258
+ outline_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
259
+ outline_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
260
+
261
+ blog_system_message = gr.Textbox(label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€", value=get_blog_post_prompt("์ผ๋ฐ˜"), lines=20)
262
+ blog_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=12000, value=8000, step=1000)
263
+ blog_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
264
+ blog_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
265
+
266
+ outline_result = gr.Textbox(label="์•„์›ƒ๋ผ์ธ ๊ฒฐ๊ณผ", lines=15, visible=False)
267
+ outline_token_output = gr.Markdown(label="์‚ฌ์šฉ๋œ ํ† ํฐ ์ˆ˜", visible=False)
268
+ output = gr.Textbox(label="์ƒ์„ฑ๋œ ๋ธ”๋กœ๊ทธ ๊ธ€", lines=30)
269
+ token_output = gr.Markdown(label="์‚ฌ์šฉ๋œ ํ† ํฐ ์ˆ˜")
270
+
271
+ # ๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ ๋ฒ„ํŠผ
272
+ generate_post_btn = gr.Button("๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
273
+ generate_post_btn.click(
274
+ fn=fetch_references_and_generate_all_steps,
275
+ inputs=[category, topic, blog_title, outline_system_message, outline_max_tokens, outline_temperature, outline_top_p, blog_system_message, blog_max_tokens, blog_temperature, blog_top_p],
276
+ outputs=[references1, references2, references3, outline_result, outline_token_output, output, token_output]
277
+ )
278
+
279
+ category.change(fn=update_prompts, inputs=category, outputs=[title_system_message, outline_system_message, blog_system_message])
280
+
281
+ demo.launch()