|
from selenium import webdriver |
|
from selenium.common.exceptions import WebDriverException |
|
from PIL import Image |
|
from io import BytesIO |
|
|
|
import time |
|
|
|
from selenium.webdriver.common.by import By |
|
from selenium.webdriver.support.ui import WebDriverWait |
|
from selenium.webdriver.support import expected_conditions as EC |
|
|
|
def take_webdata(url): |
|
options = webdriver.ChromeOptions() |
|
options.add_argument('--headless') |
|
options.add_argument('--no-sandbox') |
|
options.add_argument('--disable-dev-shm-usage') |
|
|
|
try: |
|
wd = webdriver.Chrome(options=options) |
|
wd.set_window_size(1080, 720) |
|
wd.get(url) |
|
wd.implicitly_wait(5) |
|
|
|
page_title = wd.title |
|
screenshot = wd.get_screenshot_as_png() |
|
|
|
except WebDriverException as e: |
|
return Image.new('RGB', (1, 1)), page_title |
|
finally: |
|
if wd: |
|
wd.quit() |
|
|
|
return Image.open(BytesIO(screenshot)) , page_title |
|
|
|
|
|
def scrape_vehicle(driver): |
|
data_kendaraan = {} |
|
try: |
|
rows = driver.find_elements(By.CSS_SELECTOR, "table tr") |
|
for row in rows: |
|
cols = row.find_elements(By.TAG_NAME, "td") |
|
if len(cols) >= 3: |
|
key = cols[0].text.strip().lower().replace(".", "").replace(" ", "_") |
|
value = cols[2].text.strip() |
|
data_kendaraan[key] = value |
|
except Exception as e: |
|
print("Gagal parsing tabel:", e) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rincian_total = {} |
|
rincian_pkb = [] |
|
rincian_swdkllj = [] |
|
|
|
try: |
|
container = driver.find_element(By.ID, "det_pkb") |
|
rows = container.find_elements(By.CLASS_NAME, "row") |
|
|
|
current_section = None |
|
for row in rows: |
|
cols = row.find_elements(By.TAG_NAME, "p") |
|
texts = [col.text.strip() for col in cols if col.text.strip()] |
|
if not texts: |
|
continue |
|
|
|
label = texts[0].lower() |
|
|
|
if "total" in label: |
|
current_section = "total" |
|
continue |
|
elif "rincian pkb" in label: |
|
current_section = "pkb" |
|
continue |
|
elif "rincian swdkllj" in label: |
|
current_section = "swdkllj" |
|
continue |
|
|
|
if len(texts) >= 3: |
|
rincian = { |
|
"pokok": texts[0], |
|
"denda": texts[1], |
|
"total": texts[2] |
|
} |
|
if len(texts) > 3: |
|
rincian["jenis"] = texts[3].upper() |
|
|
|
if current_section == "total": |
|
rincian_total = rincian |
|
elif current_section == "pkb": |
|
rincian_pkb.append(rincian) |
|
elif current_section == "swdkllj": |
|
rincian_swdkllj.append(rincian) |
|
|
|
except Exception as e: |
|
print("Gagal parsing det_pkb:", e |
|
|
|
rincians = [rincian_total, rincian_pkb, rincian_swdkllj] |
|
return data_kendaraan, rincians |
|
|
|
|
|
def get_vehicle_info(plate_number: str): |
|
|
|
options = webdriver.ChromeOptions() |
|
options.add_argument("--headless") |
|
options.add_argument("--disable-gpu") |
|
options.add_argument("--no-sandbox") |
|
|
|
|
|
driver = webdriver.Chrome(options=options) |
|
|
|
try: |
|
driver.get("https://www.jambisamsat.net/infopkb.html") |
|
time.sleep(1) |
|
|
|
WebDriverWait(driver, 10).until( |
|
EC.presence_of_element_located((By.ID, "no_polisi")) |
|
) |
|
|
|
input_field = driver.find_element(By.ID, "no_polisi") |
|
input_field.clear() |
|
input_field.send_keys(plate_number) |
|
|
|
submit_button = driver.find_element(By.CSS_SELECTOR, 'button.btn.btn-primary[type="submit"]') |
|
submit_button.click() |
|
|
|
|
|
WebDriverWait(driver, 10).until( |
|
EC.url_contains("infopkb.php") |
|
) |
|
|
|
driver.implicitly_wait(3) |
|
|
|
scroll_height = driver.execute_script("return document.body.scrollHeight") |
|
driver.set_window_size(1920, scroll_height + 200) |
|
time.sleep(1) |
|
|
|
data_kendaraan, rincian = scrape_vehicle(driver) |
|
|
|
print(data_kendaraan, rincian) |
|
|
|
page_title = driver.title |
|
screenshot = driver.get_screenshot_as_png() |
|
|
|
return Image.open(BytesIO(screenshot)) , page_title |
|
|
|
except WebDriverException as e: |
|
return Image.new('RGB', (1, 1)), page_title |
|
|
|
finally: |
|
driver.quit() |