jonathanjordan21 commited on
Commit
0997ad9
·
verified ·
1 Parent(s): be21b28

Update extract.py

Browse files
Files changed (1) hide show
  1. extract.py +58 -10
extract.py CHANGED
@@ -46,24 +46,72 @@ def scrape_vehicle(driver):
46
  except Exception as e:
47
  print("Gagal parsing tabel:", e)
48
 
49
- rincians = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  try:
51
  container = driver.find_element(By.ID, "det_pkb")
52
  rows = container.find_elements(By.CLASS_NAME, "row")
53
- for row in rows[1:]: # skip header
 
 
54
  cols = row.find_elements(By.TAG_NAME, "p")
55
- if len(cols) >= 3:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  rincian = {
57
- "pokok": cols[0].text.strip(),
58
- "denda": cols[1].text.strip(),
59
- "total": cols[2].text.strip(),
60
  }
61
- if len(cols) > 3:
62
- rincian["jenis"] = cols[3].text.strip().upper()
63
- rincians.append(rincian)
 
 
 
 
 
 
 
64
  except Exception as e:
65
- print("Gagal parsing det_pkb:", e)
66
 
 
67
  return data_kendaraan, rincians
68
 
69
 
 
46
  except Exception as e:
47
  print("Gagal parsing tabel:", e)
48
 
49
+ # rincians = []
50
+ # try:
51
+ # container = driver.find_element(By.ID, "det_pkb")
52
+ # rows = container.find_elements(By.CLASS_NAME, "row")
53
+ # for row in rows[1:]: # skip header
54
+ # cols = row.find_elements(By.TAG_NAME, "p")
55
+ # if len(cols) >= 3:
56
+ # rincian = {
57
+ # "pokok": cols[0].text.strip(),
58
+ # "denda": cols[1].text.strip(),
59
+ # "total": cols[2].text.strip(),
60
+ # }
61
+ # if len(cols) > 3:
62
+ # rincian["jenis"] = cols[3].text.strip().upper()
63
+ # rincians.append(rincian)
64
+ # except Exception as e:
65
+ # print("Gagal parsing det_pkb:", e)
66
+
67
+
68
+ rincian_total = {}
69
+ rincian_pkb = []
70
+ rincian_swdkllj = []
71
+
72
  try:
73
  container = driver.find_element(By.ID, "det_pkb")
74
  rows = container.find_elements(By.CLASS_NAME, "row")
75
+
76
+ current_section = None
77
+ for row in rows:
78
  cols = row.find_elements(By.TAG_NAME, "p")
79
+ texts = [col.text.strip() for col in cols if col.text.strip()]
80
+ if not texts:
81
+ continue
82
+
83
+ label = texts[0].lower()
84
+
85
+ if "total" in label:
86
+ current_section = "total"
87
+ continue
88
+ elif "rincian pkb" in label:
89
+ current_section = "pkb"
90
+ continue
91
+ elif "rincian swdkllj" in label:
92
+ current_section = "swdkllj"
93
+ continue
94
+
95
+ if len(texts) >= 3:
96
  rincian = {
97
+ "pokok": texts[0],
98
+ "denda": texts[1],
99
+ "total": texts[2]
100
  }
101
+ if len(texts) > 3:
102
+ rincian["jenis"] = texts[3].upper()
103
+
104
+ if current_section == "total":
105
+ rincian_total = rincian
106
+ elif current_section == "pkb":
107
+ rincian_pkb.append(rincian)
108
+ elif current_section == "swdkllj":
109
+ rincian_swdkllj.append(rincian)
110
+
111
  except Exception as e:
112
+ print("Gagal parsing det_pkb:", e
113
 
114
+ rincians = [rincian_total, rincian_pkb, rincian_swdkllj]
115
  return data_kendaraan, rincians
116
 
117