Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -48,17 +48,16 @@ def random_headers(referer):
|
|
48 |
def try_download(url, headers, proxies):
|
49 |
"""
|
50 |
Attempt a simple GET through a proxy dict {"http":..., "https":...}.
|
51 |
-
Returns the response if status_code==200, else None.
|
52 |
"""
|
53 |
try:
|
54 |
-
resp = session.get(url, headers=headers, proxies=proxies, stream=True, timeout=
|
55 |
if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
|
56 |
return resp
|
57 |
except Exception as e:
|
58 |
print(f"⚠️ Proxy download error ({proxies}): {e}")
|
59 |
return None
|
60 |
|
61 |
-
|
62 |
session = requests.Session()
|
63 |
# --- Utility Functions ---
|
64 |
def process_url_images(data, fmt, w, h):
|
@@ -85,55 +84,54 @@ handle_process = lambda mode, sd, ups, pu, fmt, w, h: (
|
|
85 |
process_single_url_image(pu, fmt, w, h) if pu.strip() else
|
86 |
([], None, "⚠️ No valid input provided", None)
|
87 |
)
|
88 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
def download_image(url, save_path):
|
|
|
91 |
if os.path.exists(save_path) and os.path.getsize(save_path) > 1000:
|
92 |
-
print(f"✅ Skipping cached: {save_path}")
|
93 |
return save_path
|
94 |
|
95 |
-
|
|
|
96 |
log_failure(url, "Skipped known slow site")
|
97 |
return None
|
98 |
|
99 |
referer = f"{urlparse(url).scheme}://{urlparse(url).netloc}/"
|
100 |
headers = random_headers(referer)
|
101 |
|
102 |
-
# 1
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
if PROXY_LIST:
|
117 |
-
headers = random_headers(referer)
|
118 |
-
with ThreadPoolExecutor(max_workers=5) as executor:
|
119 |
-
futures = {
|
120 |
-
executor.submit(try_download, url, headers, {"http": p, "https": p}): p
|
121 |
-
for p in random.sample(PROXY_LIST, min(5, len(PROXY_LIST)))
|
122 |
-
}
|
123 |
-
for future in as_completed(futures):
|
124 |
-
result = future.result()
|
125 |
-
if result:
|
126 |
-
with open(save_path, 'wb') as f:
|
127 |
-
for chunk in result.iter_content(8192):
|
128 |
-
f.write(chunk)
|
129 |
-
print("✅ Proxy worked:", futures[future])
|
130 |
-
return save_path
|
131 |
-
|
132 |
-
# 3. cloudscraper fallback
|
133 |
try:
|
134 |
-
print("🟠 cloudscraper fallback...")
|
135 |
scraper = cloudscraper.create_scraper(sess=session)
|
136 |
-
resp = scraper.get(url, headers=headers, stream=True, timeout=
|
137 |
if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
|
138 |
with open(save_path, 'wb') as f:
|
139 |
for chunk in resp.iter_content(8192):
|
@@ -143,7 +141,7 @@ def download_image(url, save_path):
|
|
143 |
except Exception as e:
|
144 |
print(f"❌ cloudscraper failed: {e}")
|
145 |
|
146 |
-
#
|
147 |
try:
|
148 |
print("🧪 Headless browser fallback...")
|
149 |
chrome_opts = Options()
|
@@ -153,7 +151,10 @@ def download_image(url, save_path):
|
|
153 |
chrome_opts.add_argument("--disable-dev-shm-usage")
|
154 |
chrome_opts.add_argument(f"user-agent={headers['User-Agent']}")
|
155 |
|
156 |
-
driver = webdriver.Chrome(
|
|
|
|
|
|
|
157 |
driver.get(url)
|
158 |
time.sleep(4)
|
159 |
final_url = driver.current_url
|
@@ -164,21 +165,20 @@ def download_image(url, save_path):
|
|
164 |
for k, v in cookies.items():
|
165 |
session.cookies.set(k, v)
|
166 |
r = session.get(final_url, headers=headers, stream=True, timeout=15)
|
167 |
-
if r.status_code == 200 and
|
168 |
with open(save_path, 'wb') as f:
|
169 |
for chunk in r.iter_content(8192):
|
170 |
f.write(chunk)
|
171 |
print("✅ Selenium + cookies worked")
|
172 |
return save_path
|
173 |
-
else:
|
174 |
-
log_failure(url, f"Selenium bad response: {r.status_code}")
|
175 |
except Exception as e:
|
176 |
print(f"❌ Selenium failed: {e}")
|
177 |
-
log_failure(url, f"Selenium exception: {e}")
|
178 |
|
179 |
-
#
|
180 |
log_failure(url, "All methods failed")
|
181 |
return None
|
|
|
|
|
182 |
lamp_html = """
|
183 |
<style>
|
184 |
* {
|
@@ -344,68 +344,103 @@ def threaded_download_and_open(item, temp_dir):
|
|
344 |
def resize_and_save_worker(args):
|
345 |
name, img, fmt, w, h, out_dir = args
|
346 |
try:
|
|
|
347 |
img = img.convert('RGBA')
|
348 |
-
bg
|
349 |
img = Image.alpha_composite(bg, img)
|
350 |
-
|
351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
img = resize_with_padding(img, w, h)
|
353 |
-
|
|
|
|
|
354 |
out_path = os.path.join(out_dir, out_name)
|
355 |
-
img.save(out_path, format=
|
|
|
356 |
return out_path, None
|
|
|
357 |
except Exception as e:
|
358 |
return None, f"{name}: {e}"
|
359 |
|
|
|
|
|
|
|
360 |
def process_and_zip(items, fmt, w, h):
|
361 |
-
tmp
|
362 |
proc = os.path.join(tmp, 'out')
|
363 |
os.makedirs(proc, exist_ok=True)
|
364 |
-
files, fails = [], []
|
365 |
|
366 |
-
|
|
|
|
|
|
|
367 |
results = list(ex.map(lambda it: threaded_download_and_open(it, tmp), items))
|
368 |
|
|
|
369 |
for name, imgs, err in results:
|
370 |
-
if
|
371 |
-
print("🛑 Stopped before processing.")
|
372 |
-
break
|
373 |
-
|
374 |
-
if err or imgs is None:
|
375 |
fails.append(f"{name}: {err}")
|
376 |
continue
|
377 |
|
378 |
for i, img in enumerate(imgs):
|
379 |
try:
|
380 |
-
|
381 |
-
|
|
|
382 |
img = Image.alpha_composite(bg, img)
|
383 |
|
384 |
-
|
385 |
-
|
|
|
|
|
386 |
|
|
|
|
|
|
|
|
|
|
|
387 |
img = resize_with_padding(img, w, h)
|
388 |
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
except Exception as e:
|
|
|
394 |
fails.append(f"{name}_frame{i+1}: {e}")
|
395 |
|
|
|
396 |
if not files:
|
397 |
stop_event.clear()
|
398 |
shutil.rmtree(tmp)
|
399 |
return None, None, "No images processed.", None
|
400 |
|
|
|
401 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
402 |
zip_name = f"{date_str}.zip"
|
403 |
zip_path = os.path.join(tmp, zip_name)
|
404 |
-
|
405 |
-
with zipfile.ZipFile(zip_path, 'w') as zf:
|
406 |
for f in files:
|
407 |
zf.write(f, os.path.basename(f))
|
408 |
|
|
|
409 |
msg_lines = [f"✅ Processed {len(files)} image(s)."]
|
410 |
if fails:
|
411 |
msg_lines.append(f"❌ Failed: {len(fails)} image(s)")
|
@@ -421,50 +456,31 @@ def read_uploaded_workbook(file):
|
|
421 |
# read all sheets except "Cleared Data"
|
422 |
xls = pd.ExcelFile(file.name)
|
423 |
sheets = [s for s in xls.sheet_names if s.lower() != "cleared data"]
|
424 |
-
|
425 |
-
# load each sheet and drop ones that are totally empty
|
426 |
-
df_list = []
|
427 |
-
for s in sheets:
|
428 |
-
df_s = pd.read_excel(file.name, sheet_name=s, engine="openpyxl")
|
429 |
-
# drop rows and cols that are all-NA, then check if anything remains
|
430 |
-
if not df_s.dropna(how="all").empty:
|
431 |
-
df_list.append(df_s)
|
432 |
-
|
433 |
-
if not df_list:
|
434 |
-
return [], "❌ No data in any sheet"
|
435 |
-
|
436 |
-
# now safely concatenate only non-empty sheets
|
437 |
df = pd.concat(df_list, ignore_index=True)
|
438 |
df.columns = [c.strip() for c in df.columns]
|
439 |
|
440 |
-
# find the ItemCode column
|
441 |
item_col = next((c for c in df.columns if c.lower() == 'itemcode'), None)
|
442 |
if not item_col:
|
443 |
return [], "❌ Missing 'ItemCode' column"
|
444 |
|
445 |
-
# collect all URL/image/link columns
|
446 |
url_cols = [c for c in df.columns if any(k in c.lower() for k in ["url", "image", "link"])]
|
447 |
data = []
|
448 |
for _, row in df.iterrows():
|
449 |
raw = row[item_col]
|
450 |
if pd.isna(raw):
|
451 |
continue
|
452 |
-
key =
|
453 |
-
if str(raw).strip().replace('.', '', 1).isdigit()
|
454 |
-
else str(raw).strip())
|
455 |
idx = 0
|
456 |
for col in url_cols:
|
457 |
if pd.notna(row[col]):
|
458 |
name = f"{key}" if idx == 0 else f"{key}_{idx}"
|
459 |
data.append({"url": str(row[col]).strip(), "name": name})
|
460 |
idx += 1
|
461 |
-
|
462 |
return data, f"✅ Fetched {len(data)} image link(s)"
|
463 |
-
|
464 |
except Exception as e:
|
465 |
return [], f"❌ Error: {e}"
|
466 |
|
467 |
-
|
468 |
def clear_all(tmp_dir):
|
469 |
# wipe out the temp folder if it exists
|
470 |
if tmp_dir and os.path.exists(tmp_dir):
|
@@ -620,6 +636,27 @@ def process_all(mode, workbook_file, ups, pu, fmt, w, h):
|
|
620 |
|
621 |
# 4️⃣ Nothing valid
|
622 |
return [], None, "⚠️ No valid input provided", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
623 |
|
624 |
|
625 |
with gr.Blocks(css=css) as demo:
|
@@ -661,9 +698,9 @@ with gr.Blocks(css=css) as demo:
|
|
661 |
with gr.Group(elem_classes="panel"):
|
662 |
with gr.Row():
|
663 |
format_choice = gr.Dropdown(
|
664 |
-
["
|
665 |
label="🖼️ Format",
|
666 |
-
value="
|
667 |
)
|
668 |
width = gr.Number(label="Width (px)", value=1000, precision=0)
|
669 |
height = gr.Number(label="Height (px)", value=1000, precision=0)
|
@@ -715,20 +752,7 @@ with gr.Blocks(css=css) as demo:
|
|
715 |
inputs=[upload_box],
|
716 |
outputs=[status]
|
717 |
)
|
718 |
-
|
719 |
-
fn=lambda txt: txt, # a no‑op so we can attach JS
|
720 |
-
inputs=[status],
|
721 |
-
outputs=[status],
|
722 |
-
js="""
|
723 |
-
(txt) => {
|
724 |
-
const el = document.getElementById('status-box');
|
725 |
-
if (el) {
|
726 |
-
el.scrollTop = 0;
|
727 |
-
}
|
728 |
-
return txt;
|
729 |
-
}
|
730 |
-
"""
|
731 |
-
)
|
732 |
process_btn.click(
|
733 |
fn=process_all,
|
734 |
inputs=[
|
@@ -755,40 +779,14 @@ with gr.Blocks(css=css) as demo:
|
|
755 |
)
|
756 |
|
757 |
clear_btn.click(
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
status,
|
767 |
-
temp_dir_state,
|
768 |
-
image_url_input
|
769 |
-
],
|
770 |
-
js="""() => {
|
771 |
-
// 1) Clear all browser storage
|
772 |
-
localStorage.clear();
|
773 |
-
sessionStorage.clear();
|
774 |
-
|
775 |
-
// 2) Purge any ServiceWorker caches
|
776 |
-
if ('caches' in window) {
|
777 |
-
caches.keys().then(keys => {
|
778 |
-
keys.forEach(key => caches.delete(key));
|
779 |
-
});
|
780 |
-
}
|
781 |
-
|
782 |
-
// 3) Scroll the status box back to the top
|
783 |
-
const el = document.getElementById('status-box');
|
784 |
-
if (el) el.scrollTop = 0;
|
785 |
-
|
786 |
-
// 4) Return placeholders for each Python output
|
787 |
-
return [null, [], [], {visible: false, value: null}, [], "Cleared.", null, ""];
|
788 |
-
}"""
|
789 |
-
)
|
790 |
-
|
791 |
-
|
792 |
|
793 |
stop_btn.click(
|
794 |
fn=lambda: "🛑 Stop signal sent",
|
@@ -797,4 +795,3 @@ with gr.Blocks(css=css) as demo:
|
|
797 |
|
798 |
if __name__ == "__main__":
|
799 |
demo.queue().launch(debug=True)
|
800 |
-
|
|
|
48 |
def try_download(url, headers, proxies):
|
49 |
"""
|
50 |
Attempt a simple GET through a proxy dict {"http":..., "https":...}.
|
51 |
+
Returns the response if status_code==200 and content is image, else None.
|
52 |
"""
|
53 |
try:
|
54 |
+
resp = session.get(url, headers=headers, proxies=proxies, stream=True, timeout=15)
|
55 |
if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
|
56 |
return resp
|
57 |
except Exception as e:
|
58 |
print(f"⚠️ Proxy download error ({proxies}): {e}")
|
59 |
return None
|
60 |
|
|
|
61 |
session = requests.Session()
|
62 |
# --- Utility Functions ---
|
63 |
def process_url_images(data, fmt, w, h):
|
|
|
84 |
process_single_url_image(pu, fmt, w, h) if pu.strip() else
|
85 |
([], None, "⚠️ No valid input provided", None)
|
86 |
)
|
87 |
+
from PIL import ImageFile
|
88 |
+
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
89 |
+
|
90 |
+
def fetch_with_retries(url, headers, timeout=15, retries=2, backoff=1):
|
91 |
+
"""
|
92 |
+
Try up to retries+1 times to GET url with timeout seconds.
|
93 |
+
Returns a requests.Response or None.
|
94 |
+
"""
|
95 |
+
for attempt in range(retries + 1):
|
96 |
+
try:
|
97 |
+
resp = session.get(url, headers=headers, stream=True, timeout=timeout)
|
98 |
+
if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
|
99 |
+
return resp
|
100 |
+
except Exception as e:
|
101 |
+
print(f"⚠️ Attempt {attempt+1} failed for {url}: {e}")
|
102 |
+
time.sleep(backoff * (attempt + 1))
|
103 |
+
return None
|
104 |
|
105 |
def download_image(url, save_path):
|
106 |
+
# Skip cached
|
107 |
if os.path.exists(save_path) and os.path.getsize(save_path) > 1000:
|
|
|
108 |
return save_path
|
109 |
|
110 |
+
# Skip blocked
|
111 |
+
if any(b in url for b in BLOCKED_SITES):
|
112 |
log_failure(url, "Skipped known slow site")
|
113 |
return None
|
114 |
|
115 |
referer = f"{urlparse(url).scheme}://{urlparse(url).netloc}/"
|
116 |
headers = random_headers(referer)
|
117 |
|
118 |
+
# 1) Direct with retries
|
119 |
+
print(f"🚀 Fetching with retries: {url}")
|
120 |
+
resp = fetch_with_retries(url, headers, timeout=15, retries=2)
|
121 |
+
if resp:
|
122 |
+
with open(save_path, 'wb') as f:
|
123 |
+
for chunk in resp.iter_content(8192):
|
124 |
+
f.write(chunk)
|
125 |
+
print("✅ Direct fetch succeeded")
|
126 |
+
return save_path
|
127 |
+
else:
|
128 |
+
print("❌ Direct fetch failed after retries")
|
129 |
+
|
130 |
+
# 2) cloudscraper fallback
|
131 |
+
print("🟠 cloudscraper fallback...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
try:
|
|
|
133 |
scraper = cloudscraper.create_scraper(sess=session)
|
134 |
+
resp = scraper.get(url, headers=headers, stream=True, timeout=15)
|
135 |
if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
|
136 |
with open(save_path, 'wb') as f:
|
137 |
for chunk in resp.iter_content(8192):
|
|
|
141 |
except Exception as e:
|
142 |
print(f"❌ cloudscraper failed: {e}")
|
143 |
|
144 |
+
# 3) Selenium fallback
|
145 |
try:
|
146 |
print("🧪 Headless browser fallback...")
|
147 |
chrome_opts = Options()
|
|
|
151 |
chrome_opts.add_argument("--disable-dev-shm-usage")
|
152 |
chrome_opts.add_argument(f"user-agent={headers['User-Agent']}")
|
153 |
|
154 |
+
driver = webdriver.Chrome(
|
155 |
+
service=Service(ChromeDriverManager().install()),
|
156 |
+
options=chrome_opts
|
157 |
+
)
|
158 |
driver.get(url)
|
159 |
time.sleep(4)
|
160 |
final_url = driver.current_url
|
|
|
165 |
for k, v in cookies.items():
|
166 |
session.cookies.set(k, v)
|
167 |
r = session.get(final_url, headers=headers, stream=True, timeout=15)
|
168 |
+
if r.status_code == 200 and "image" in r.headers.get("Content-Type", ""):
|
169 |
with open(save_path, 'wb') as f:
|
170 |
for chunk in r.iter_content(8192):
|
171 |
f.write(chunk)
|
172 |
print("✅ Selenium + cookies worked")
|
173 |
return save_path
|
|
|
|
|
174 |
except Exception as e:
|
175 |
print(f"❌ Selenium failed: {e}")
|
|
|
176 |
|
177 |
+
# 4) Give up
|
178 |
log_failure(url, "All methods failed")
|
179 |
return None
|
180 |
+
|
181 |
+
|
182 |
lamp_html = """
|
183 |
<style>
|
184 |
* {
|
|
|
344 |
def resize_and_save_worker(args):
|
345 |
name, img, fmt, w, h, out_dir = args
|
346 |
try:
|
347 |
+
# 1) Composite RGBA over white
|
348 |
img = img.convert('RGBA')
|
349 |
+
bg = Image.new('RGBA', img.size, (255,255,255,255))
|
350 |
img = Image.alpha_composite(bg, img)
|
351 |
+
|
352 |
+
# 2) Determine save format & extension
|
353 |
+
fmt_upper = fmt.upper()
|
354 |
+
save_fmt = "JPEG" if fmt_upper in ("JPEG", "JPG") else fmt_upper
|
355 |
+
ext = "jpg" if fmt_upper in ("JPEG", "JPG") else fmt.lower()
|
356 |
+
|
357 |
+
# 3) Convert to RGB if needed
|
358 |
+
if save_fmt == "JPEG":
|
359 |
+
img = img.convert("RGB")
|
360 |
+
|
361 |
+
# 4) Resize with padding
|
362 |
img = resize_with_padding(img, w, h)
|
363 |
+
|
364 |
+
# 5) Build filename & save
|
365 |
+
out_name = f"{name}.{ext}"
|
366 |
out_path = os.path.join(out_dir, out_name)
|
367 |
+
img.save(out_path, format=save_fmt, quality=90)
|
368 |
+
|
369 |
return out_path, None
|
370 |
+
|
371 |
except Exception as e:
|
372 |
return None, f"{name}: {e}"
|
373 |
|
374 |
+
from multiprocessing import cpu_count
|
375 |
+
import zipfile
|
376 |
+
|
377 |
def process_and_zip(items, fmt, w, h):
|
378 |
+
tmp = tempfile.mkdtemp()
|
379 |
proc = os.path.join(tmp, 'out')
|
380 |
os.makedirs(proc, exist_ok=True)
|
|
|
381 |
|
382 |
+
files, fails = [], []
|
383 |
+
# 1) Download & open in parallel
|
384 |
+
workers = cpu_count() * 2
|
385 |
+
with ThreadPoolExecutor(max_workers=workers) as ex:
|
386 |
results = list(ex.map(lambda it: threaded_download_and_open(it, tmp), items))
|
387 |
|
388 |
+
# 2) Process each downloaded image/list of frames
|
389 |
for name, imgs, err in results:
|
390 |
+
if err or not imgs:
|
|
|
|
|
|
|
|
|
391 |
fails.append(f"{name}: {err}")
|
392 |
continue
|
393 |
|
394 |
for i, img in enumerate(imgs):
|
395 |
try:
|
396 |
+
# Composite RGBA over white
|
397 |
+
img = img.convert("RGBA")
|
398 |
+
bg = Image.new("RGBA", img.size, (255,255,255,255))
|
399 |
img = Image.alpha_composite(bg, img)
|
400 |
|
401 |
+
# Determine save format & extension
|
402 |
+
fmt_upper = fmt.upper()
|
403 |
+
save_fmt = "JPEG" if fmt_upper in ("JPEG","JPG") else fmt_upper
|
404 |
+
ext = "jpg" if fmt_upper in ("JPEG","JPG") else fmt.lower()
|
405 |
|
406 |
+
# Convert to RGB if JPEG
|
407 |
+
if save_fmt == "JPEG":
|
408 |
+
img = img.convert("RGB")
|
409 |
+
|
410 |
+
# Resize with padding
|
411 |
img = resize_with_padding(img, w, h)
|
412 |
|
413 |
+
# Build filename
|
414 |
+
fname = (
|
415 |
+
f"{name}_frame{i+1}.{ext}"
|
416 |
+
if len(imgs) > 1
|
417 |
+
else f"{name}.{ext}"
|
418 |
+
)
|
419 |
+
out_path = os.path.join(proc, fname)
|
420 |
+
|
421 |
+
# Save & record
|
422 |
+
img.save(out_path, format=save_fmt, quality=90)
|
423 |
+
files.append(out_path)
|
424 |
+
|
425 |
except Exception as e:
|
426 |
+
print(f"⚠️ Save failed for {name}: {e}")
|
427 |
fails.append(f"{name}_frame{i+1}: {e}")
|
428 |
|
429 |
+
# 3) If nothing succeeded, clean up and report
|
430 |
if not files:
|
431 |
stop_event.clear()
|
432 |
shutil.rmtree(tmp)
|
433 |
return None, None, "No images processed.", None
|
434 |
|
435 |
+
# 4) Create ZIP in “store” mode (no compression)
|
436 |
date_str = datetime.now().strftime("%Y-%m-%d")
|
437 |
zip_name = f"{date_str}.zip"
|
438 |
zip_path = os.path.join(tmp, zip_name)
|
439 |
+
with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_STORED) as zf:
|
|
|
440 |
for f in files:
|
441 |
zf.write(f, os.path.basename(f))
|
442 |
|
443 |
+
# 5) Build status message
|
444 |
msg_lines = [f"✅ Processed {len(files)} image(s)."]
|
445 |
if fails:
|
446 |
msg_lines.append(f"❌ Failed: {len(fails)} image(s)")
|
|
|
456 |
# read all sheets except "Cleared Data"
|
457 |
xls = pd.ExcelFile(file.name)
|
458 |
sheets = [s for s in xls.sheet_names if s.lower() != "cleared data"]
|
459 |
+
df_list = [pd.read_excel(file.name, sheet_name=s, engine="openpyxl") for s in sheets]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
460 |
df = pd.concat(df_list, ignore_index=True)
|
461 |
df.columns = [c.strip() for c in df.columns]
|
462 |
|
|
|
463 |
item_col = next((c for c in df.columns if c.lower() == 'itemcode'), None)
|
464 |
if not item_col:
|
465 |
return [], "❌ Missing 'ItemCode' column"
|
466 |
|
|
|
467 |
url_cols = [c for c in df.columns if any(k in c.lower() for k in ["url", "image", "link"])]
|
468 |
data = []
|
469 |
for _, row in df.iterrows():
|
470 |
raw = row[item_col]
|
471 |
if pd.isna(raw):
|
472 |
continue
|
473 |
+
key = str(raw).strip().split('.')[0] if str(raw).strip().replace('.', '', 1).isdigit() else str(raw).strip()
|
|
|
|
|
474 |
idx = 0
|
475 |
for col in url_cols:
|
476 |
if pd.notna(row[col]):
|
477 |
name = f"{key}" if idx == 0 else f"{key}_{idx}"
|
478 |
data.append({"url": str(row[col]).strip(), "name": name})
|
479 |
idx += 1
|
|
|
480 |
return data, f"✅ Fetched {len(data)} image link(s)"
|
|
|
481 |
except Exception as e:
|
482 |
return [], f"❌ Error: {e}"
|
483 |
|
|
|
484 |
def clear_all(tmp_dir):
|
485 |
# wipe out the temp folder if it exists
|
486 |
if tmp_dir and os.path.exists(tmp_dir):
|
|
|
636 |
|
637 |
# 4️⃣ Nothing valid
|
638 |
return [], None, "⚠️ No valid input provided", None
|
639 |
+
|
640 |
+
if os.path.exists(FAILED_LOG_PATH):
|
641 |
+
with open(FAILED_LOG_PATH) as f:
|
642 |
+
failed = [line.split("\t")[1] for line in f if line.strip()]
|
643 |
+
if failed:
|
644 |
+
print(f"🔄 Retrying {len(failed)} failed URLs…")
|
645 |
+
# reuse your ThreadPoolExecutor to download them to a “retries” folder
|
646 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
647 |
+
os.makedirs("retries", exist_ok=True)
|
648 |
+
def retry_one(url):
|
649 |
+
fname = os.path.join("retries", url.split("/")[-1].split("?")[0])
|
650 |
+
r = fetch_with_retries(url, random_headers(f"{urlparse(url).scheme}://{urlparse(url).netloc}/"), timeout=15, retries=2)
|
651 |
+
if r:
|
652 |
+
with open(fname, 'wb') as f:
|
653 |
+
for c in r.iter_content(8192): f.write(c)
|
654 |
+
return True
|
655 |
+
return False
|
656 |
+
|
657 |
+
with ThreadPoolExecutor(max_workers=8) as ex:
|
658 |
+
results = list(ex.map(retry_one, failed))
|
659 |
+
print(f"✅ Retried successes: {sum(results)} / {len(failed)}")
|
660 |
|
661 |
|
662 |
with gr.Blocks(css=css) as demo:
|
|
|
698 |
with gr.Group(elem_classes="panel"):
|
699 |
with gr.Row():
|
700 |
format_choice = gr.Dropdown(
|
701 |
+
["JPG","PNG","WEBP","TIFF","GIF","JFIF","AVIF"],
|
702 |
label="🖼️ Format",
|
703 |
+
value="JPG"
|
704 |
)
|
705 |
width = gr.Number(label="Width (px)", value=1000, precision=0)
|
706 |
height = gr.Number(label="Height (px)", value=1000, precision=0)
|
|
|
752 |
inputs=[upload_box],
|
753 |
outputs=[status]
|
754 |
)
|
755 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
756 |
process_btn.click(
|
757 |
fn=process_all,
|
758 |
inputs=[
|
|
|
779 |
)
|
780 |
|
781 |
clear_btn.click(
|
782 |
+
fn=clear_all,
|
783 |
+
inputs=[temp_dir_state],
|
784 |
+
outputs=[
|
785 |
+
workbook_upload, image_data_state, upload_box,
|
786 |
+
zip_file_hidden, single_downloads, status,
|
787 |
+
temp_dir_state, image_url_input
|
788 |
+
]
|
789 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
790 |
|
791 |
stop_btn.click(
|
792 |
fn=lambda: "🛑 Stop signal sent",
|
|
|
795 |
|
796 |
if __name__ == "__main__":
|
797 |
demo.queue().launch(debug=True)
|
|