jonathanjordan21 commited on
Commit
006a96f
·
verified ·
1 Parent(s): 1fe400e

Update extract.py

Browse files
Files changed (1) hide show
  1. extract.py +43 -46
extract.py CHANGED
@@ -64,58 +64,55 @@ def scrape_vehicle(driver):
64
  # except Exception as e:
65
  # print("Gagal parsing det_pkb:", e)
66
 
67
-
68
- rincian_total = {}
69
- rincian_pkb = []
70
- rincian_swdkllj = []
 
 
 
 
 
 
 
 
 
 
 
71
 
 
 
72
  try:
73
- container = driver.find_element(By.ID, "det_pkb")
74
- rows = container.find_elements(By.CLASS_NAME, "row")
75
-
76
- current_section = None
77
- for row in rows:
78
  cols = row.find_elements(By.TAG_NAME, "p")
79
- print("[COLUMNS]", cols)
80
- texts = [col.text.strip() for col in cols if col.text.strip()]
81
- print("[TEXTS]", texts)
82
- if not texts:
83
- continue
84
-
85
- label = texts[0].lower()
86
-
87
- print("[LABEL]", label)
88
-
89
- if "total" in label:
90
- current_section = "total"
91
- continue
92
- elif "rincian pkb" in label:
93
- current_section = "pkb"
94
- continue
95
- elif "rincian swdkllj" in label:
96
- current_section = "swdkllj"
97
- continue
98
-
99
- if len(texts) >= 3:
100
- rincian = {
101
- "pokok": texts[0],
102
- "denda": texts[1],
103
- "total": texts[2]
104
- }
105
- if len(texts) > 3:
106
- rincian["jenis"] = texts[3].upper()
107
-
108
- if current_section == "total":
109
- rincian_total = rincian
110
- elif current_section == "pkb":
111
- rincian_pkb.append(rincian)
112
- elif current_section == "swdkllj":
113
- rincian_swdkllj.append(rincian)
114
-
115
  except Exception as e:
116
  print("Gagal parsing det_pkb:", e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- rincians = [rincian_total, rincian_pkb, rincian_swdkllj]
119
  return data_kendaraan, rincians
120
 
121
 
 
64
  # except Exception as e:
65
  # print("Gagal parsing det_pkb:", e)
66
 
67
+ total_tagihan = []
68
+ try:
69
+ all_rows = driver.find_elements(By.CSS_SELECTOR, "div.row")
70
+ for row in all_rows:
71
+ if "Pokok" in row.text and "Denda" in row.text and "Total" in row.text:
72
+ cols = row.find_elements(By.TAG_NAME, "p")
73
+ print(["COLS TOTAL]", cols)
74
+ if len(cols) >= 3:
75
+ total_tagihan.append({
76
+ "pokok": cols[0].text.strip(),
77
+ "denda": cols[1].text.strip(),
78
+ "total": cols[2].text.strip(),
79
+ })
80
+ except Exception as e:
81
+ print("Gagal parsing total tagihan:", e)
82
 
83
+ # ==== 3. Ambil det_pkb ====
84
+ rincians_pkb = []
85
  try:
86
+ pkb_rows = driver.find_elements(By.CSS_SELECTOR, "#det_pkb .row")[1:] # skip header
87
+ for row in pkb_rows:
 
 
 
88
  cols = row.find_elements(By.TAG_NAME, "p")
89
+ print(["COLS PKB]", cols)
90
+ if len(cols) >= 3:
91
+ rincians_pkb.append({
92
+ "pokok": cols[0].text.strip(),
93
+ "denda": cols[1].text.strip(),
94
+ "total": cols[2].text.strip()
95
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  except Exception as e:
97
  print("Gagal parsing det_pkb:", e)
98
+
99
+ # ==== 4. Ambil det_swd ====
100
+ rincians_swd = []
101
+ try:
102
+ swd_rows = driver.find_elements(By.CSS_SELECTOR, "#det_swd .row")[1:] # skip header
103
+ for row in swd_rows:
104
+ cols = row.find_elements(By.TAG_NAME, "p")
105
+ print(["COLS SWD]", cols)
106
+ if len(cols) >= 3:
107
+ rincians_swd.append({
108
+ "pokok": cols[0].text.strip(),
109
+ "denda": cols[1].text.strip(),
110
+ "total": cols[2].text.strip()
111
+ })
112
+ except Exception as e:
113
+ print("Gagal parsing det_swd:", e)
114
 
115
+ rincians = [total_tagihan, rincian_pkb, rincian_swd]
116
  return data_kendaraan, rincians
117
 
118