from selenium import webdriver from selenium.common.exceptions import WebDriverException from PIL import Image from io import BytesIO import time from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC def take_webdata(url): options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') try: wd = webdriver.Chrome(options=options) wd.set_window_size(1080, 720) # Adjust the window size here wd.get(url) wd.implicitly_wait(5) # Get the page title page_title = wd.title screenshot = wd.get_screenshot_as_png() except WebDriverException as e: return Image.new('RGB', (1, 1)), page_title finally: if wd: wd.quit() return Image.open(BytesIO(screenshot)) , page_title def scrape_vehicle(driver): data_kendaraan = {} try: rows = driver.find_elements(By.CSS_SELECTOR, "table tr") for row in rows: cols = row.find_elements(By.TAG_NAME, "td") if len(cols) >= 3: key = cols[0].text.strip().lower().replace(".", "").replace(" ", "_") value = cols[2].text.strip() data_kendaraan[key] = value except Exception as e: print("Gagal parsing tabel:", e) # rincians = [] # try: # container = driver.find_element(By.ID, "det_pkb") # rows = container.find_elements(By.CLASS_NAME, "row") # for row in rows[1:]: # skip header # cols = row.find_elements(By.TAG_NAME, "p") # if len(cols) >= 3: # rincian = { # "pokok": cols[0].text.strip(), # "denda": cols[1].text.strip(), # "total": cols[2].text.strip(), # } # if len(cols) > 3: # rincian["jenis"] = cols[3].text.strip().upper() # rincians.append(rincian) # except Exception as e: # print("Gagal parsing det_pkb:", e) rincian_total = {} rincian_pkb = [] rincian_swdkllj = [] try: container = driver.find_element(By.ID, "det_pkb") rows = container.find_elements(By.CLASS_NAME, "row") current_section = None for row in rows: cols = row.find_elements(By.TAG_NAME, "p") print("[COLUMNS]", cols) texts = [col.text.strip() for col in cols if col.text.strip()] print("[TEXTS]", texts) if not texts: continue label = texts[0].lower() print("[LABEL]", label) if "total" in label: current_section = "total" continue elif "rincian pkb" in label: current_section = "pkb" continue elif "rincian swdkllj" in label: current_section = "swdkllj" continue if len(texts) >= 3: rincian = { "pokok": texts[0], "denda": texts[1], "total": texts[2] } if len(texts) > 3: rincian["jenis"] = texts[3].upper() if current_section == "total": rincian_total = rincian elif current_section == "pkb": rincian_pkb.append(rincian) elif current_section == "swdkllj": rincian_swdkllj.append(rincian) except Exception as e: print("Gagal parsing det_pkb:", e) rincians = [rincian_total, rincian_pkb, rincian_swdkllj] return data_kendaraan, rincians def get_vehicle_info(plate_number: str): # Configure headless Chrome options = webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument("--disable-gpu") options.add_argument("--no-sandbox") # Path to chromedriver (adjust if needed) driver = webdriver.Chrome(options=options) try: driver.get("https://www.jambisamsat.net/infopkb.html") time.sleep(1) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "no_polisi")) ) input_field = driver.find_element(By.ID, "no_polisi") input_field.clear() input_field.send_keys(plate_number) submit_button = driver.find_element(By.CSS_SELECTOR, 'button.btn.btn-primary[type="submit"]') submit_button.click() # Wait for the new page to load WebDriverWait(driver, 10).until( EC.url_contains("infopkb.php") ) driver.implicitly_wait(3) scroll_height = driver.execute_script("return document.body.scrollHeight") driver.set_window_size(1920, scroll_height + 200) # force full-page height time.sleep(1) data_kendaraan, rincian = scrape_vehicle(driver) print(data_kendaraan, rincian) page_title = driver.title screenshot = driver.get_screenshot_as_png() return Image.open(BytesIO(screenshot)) , page_title except WebDriverException as e: return Image.new('RGB', (1, 1)), page_title finally: driver.quit()