Spaces:
Sleeping
Sleeping
File size: 4,513 Bytes
22acb53 2130106 f2496ac 22acb53 24d4881 aaef8fd fdb1d41 aaef8fd 879bc79 404f618 879bc79 2130106 879bc79 e94df39 9c6f4a1 e94df39 58f2b8f 9c6f4a1 e94df39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import streamlit as st
import tensorflow as tf
import numpy as np
import pandas as pd
from transformers import *
from tqdm import tqdm
from tensorflow.python.client import device_lib
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
PATH = './checkpoint-7500/'
SEQ_LEN = 128
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
def create_sentiment_bert():
# ๋ฒํธ pretrained ๋ชจ๋ธ ๋ก๋
model = TFAutoModel.from_pretrained(PATH,local_files_only=True)
# ํ ํฐ ์ธํ, ๋ง์คํฌ ์ธํ, ์ธ๊ทธ๋จผํธ ์ธํ ์ ์
token_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_word_ids')
mask_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_masks')
segment_inputs = tf.keras.layers.Input((SEQ_LEN,), dtype=tf.int32, name='input_segment')
# ์ธํ์ด [ํ ํฐ, ๋ง์คํฌ, ์ธ๊ทธ๋จผํธ]์ธ ๋ชจ๋ธ ์ ์
bert_outputs = model([token_inputs, mask_inputs, segment_inputs])
bert_outputs = bert_outputs[1]
sentiment_first = tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02))(bert_outputs)
sentiment_model = tf.keras.Model([token_inputs, mask_inputs, segment_inputs], sentiment_first)
sentiment_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), metrics = ['accuracy'])
return sentiment_model
def sentence_convert_data(data):
global tokenizer
tokens, masks, segments = [], [], []
token = tokenizer.encode(data, max_length=SEQ_LEN, truncation=True, padding='max_length')
num_zeros = token.count(0)
mask = [1]*(SEQ_LEN-num_zeros) + [0]*num_zeros
segment = [0]*SEQ_LEN
tokens.append(token)
segments.append(segment)
masks.append(mask)
tokens = np.array(tokens)
masks = np.array(masks)
segments = np.array(segments)
return [tokens, masks, segments]
def movie_evaluation_predict(sentence):
data_x = sentence_convert_data(sentence)
predict = sentiment_model.predict(data_x)
predict_value = np.ravel(predict)
predict_answer = np.round(predict_value,0).item()
print(predict_value)
if predict_answer == 0:
st.write("(๋ถ์ ํ๋ฅ : %.2f) ๋ถ์ ์ ์ธ ์ํ ํ๊ฐ์
๋๋ค." % (1.0-predict_value))
elif predict_answer == 1:
st.write("(๊ธ์ ํ๋ฅ : %.2f) ๊ธ์ ์ ์ธ ์ํ ํ๊ฐ์
๋๋ค." % predict_value)
def setup_driver():
chrome_options = Options()
chrome_options.add_argument("--headless") # ๋ฐฑ๊ทธ๋ผ์ด๋ ์คํ
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=chrome_options)
return driver
def scrape_content(url):
driver = setup_driver()
try:
driver.get(url)
# ํ์ด์ง ๋ก๋ฉ ๋๊ธฐ
time.sleep(3)
# ๋ณธ๋ฌธ ์ถ์ถ
soup = BeautifulSoup(driver.page_source, 'html.parser')
content = soup.find('article') # ๋ณธ๋ฌธ ํ๊ทธ์ ๋ง๊ฒ ์์
# ๋๊ธ ์ถ์ถ
comments = soup.find_all('span', class_='u_cbox_contents') # ๋๊ธ ํ๊ทธ์ ๋ง๊ฒ ์์
return {
'content': content.text if content else "๋ณธ๋ฌธ์ ์ฐพ์ ์ ์์ต๋๋ค.",
'comments': [comment.text for comment in comments]
}
finally:
driver.quit()
def main():
sentiment_model = create_sentiment_bert()
url = st.text_input("URL์ ์
๋ ฅํ์ธ์")
if st.button("ํฌ๋กค๋ง ์์"):
if url:
with st.spinner("ํฌ๋กค๋ง ์ค..."):
result = scrape_content(url)
st.subheader("๋ณธ๋ฌธ")
st.write(result['content'])
st.subheader("๋๊ธ")
for idx, comment in enumerate(result['comments'], 1):
st.write(f"{idx}. {comment}")
else:
st.error("URL์ ์
๋ ฅํด์ฃผ์ธ์")
'''
test = st.form('test')
sentence = test.text_input("Your sentence")
submit = test.form_submit_button("Submit")
if submit:
movie_evaluation_predict(sentence)
'''
return 0
if __name__ == "__main__":
main()
|