|
from selenium import webdriver |
|
from selenium.common.exceptions import WebDriverException |
|
from PIL import Image |
|
from io import BytesIO |
|
|
|
import time |
|
|
|
from selenium.webdriver.common.by import By |
|
from selenium.webdriver.support.ui import WebDriverWait |
|
from selenium.webdriver.support import expected_conditions as EC |
|
from bs4 import BeautifulSoup |
|
|
|
def take_webdata(url): |
|
options = webdriver.ChromeOptions() |
|
options.add_argument('--headless') |
|
options.add_argument('--no-sandbox') |
|
options.add_argument('--disable-dev-shm-usage') |
|
|
|
try: |
|
wd = webdriver.Chrome(options=options) |
|
wd.set_window_size(1080, 720) |
|
wd.get(url) |
|
wd.implicitly_wait(5) |
|
|
|
page_title = wd.title |
|
screenshot = wd.get_screenshot_as_png() |
|
|
|
except WebDriverException as e: |
|
return Image.new('RGB', (1, 1)), page_title |
|
finally: |
|
if wd: |
|
wd.quit() |
|
|
|
return Image.open(BytesIO(screenshot)) , page_title |
|
|
|
|
|
def scrape_vehicle(page_source): |
|
soup = BeautifulSoup(page_source, "html.parser") |
|
data_kendaraan = {} |
|
table = soup.find("table") |
|
for row in table.find_all("tr"): |
|
cells = row.find_all("td") |
|
if len(cells) >= 3: |
|
key = cells[0].get_text(strip=True).lower().replace(".", "").replace(" ", "_") |
|
value = cells[2].get_text(strip=True) |
|
data_kendaraan[key] = value |
|
|
|
rincians = [] |
|
rincian_div = soup.find("div", id="det_pkb") |
|
if rincian_div: |
|
rows = rincian_div.find_all("div", class_="row") |
|
for row in rows[1:]: |
|
cols = row.find_all("p") |
|
if len(cols) >= 3: |
|
rincian = { |
|
"pokok": cols[0].get_text(strip=True), |
|
"denda": cols[1].get_text(strip=True), |
|
"total": cols[2].get_text(strip=True), |
|
} |
|
rincian["jenis"] = cols[3].get_text(strip=True) if len(cols) > 3 else "" |
|
rincian["jenis"] = rincian["jenis"].upper() |
|
rincian = {k: v for k, v in rincian.items() if v} |
|
if rincian: |
|
rincians.append(rincian) |
|
|
|
return data_kendaraan, rincians |
|
|
|
|
|
def get_vehicle_info(plate_number: str): |
|
|
|
options = webdriver.ChromeOptions() |
|
options.add_argument("--headless") |
|
options.add_argument("--disable-gpu") |
|
options.add_argument("--no-sandbox") |
|
|
|
|
|
driver = webdriver.Chrome(options=options) |
|
|
|
try: |
|
driver.get("https://www.jambisamsat.net/infopkb.html") |
|
time.sleep(1) |
|
|
|
WebDriverWait(driver, 10).until( |
|
EC.presence_of_element_located((By.ID, "no_polisi")) |
|
) |
|
|
|
input_field = driver.find_element(By.ID, "no_polisi") |
|
input_field.clear() |
|
input_field.send_keys(plate_number) |
|
|
|
submit_button = driver.find_element(By.CSS_SELECTOR, 'button.btn.btn-primary[type="submit"]') |
|
submit_button.click() |
|
|
|
|
|
WebDriverWait(driver, 10).until( |
|
EC.url_contains("infopkb.php") |
|
) |
|
|
|
driver.implicitly_wait(3) |
|
|
|
scroll_height = driver.execute_script("return document.body.scrollHeight") |
|
driver.set_window_size(1920, scroll_height + 200) |
|
time.sleep(1) |
|
|
|
data_kendaraan, rincian = scrape_vehicle(driver.page_source) |
|
|
|
print(data_kendaraan, rincian) |
|
|
|
page_title = driver.title |
|
screenshot = driver.get_screenshot_as_png() |
|
|
|
return Image.open(BytesIO(screenshot)) , page_title |
|
|
|
except WebDriverException as e: |
|
return Image.new('RGB', (1, 1)), page_title |
|
|
|
finally: |
|
driver.quit() |