Spaces:

Workpam
/

Image_editor

Running

App Files Files Community

Workpam commited on 24 days ago

Commit

13989eb

verified ·

1 Parent(s): fb34449

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -141

app.py CHANGED Viewed

@@ -48,17 +48,16 @@ def random_headers(referer):
 def try_download(url, headers, proxies):
     """
     Attempt a simple GET through a proxy dict {"http":..., "https":...}.
-    Returns the response if status_code==200, else None.
     """
     try:
-        resp = session.get(url, headers=headers, proxies=proxies, stream=True, timeout=8)
         if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
             return resp
     except Exception as e:
         print(f"⚠️ Proxy download error ({proxies}): {e}")
     return None
 session = requests.Session()
 # --- Utility Functions ---
 def process_url_images(data, fmt, w, h):
@@ -85,55 +84,54 @@ handle_process = lambda mode, sd, ups, pu, fmt, w, h: (
     process_single_url_image(pu, fmt, w, h) if pu.strip() else
     ([], None, "⚠️ No valid input provided", None)
 )
-import cloudscraper
 def download_image(url, save_path):
     if os.path.exists(save_path) and os.path.getsize(save_path) > 1000:
-        print(f"✅ Skipping cached: {save_path}")
         return save_path
-    if any(blocked in url for blocked in BLOCKED_SITES):
         log_failure(url, "Skipped known slow site")
         return None
     referer = f"{urlparse(url).scheme}://{urlparse(url).netloc}/"
     headers = random_headers(referer)
-    # 1. Try direct
-    try:
-        print("🚀 Trying direct...")
-        resp = session.get(url, headers=headers, stream=True, timeout=8)
-        if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
-            with open(save_path, 'wb') as f:
-                for chunk in resp.iter_content(8192):
-                    f.write(chunk)
-            print("✅ Direct worked")
-            return save_path
-    except Exception as e:
-        print(f"⚠️ Direct failed: {e}")
-    # 2. Try proxies in parallel
-    if PROXY_LIST:
-        headers = random_headers(referer)
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = {
-                executor.submit(try_download, url, headers, {"http": p, "https": p}): p
-                for p in random.sample(PROXY_LIST, min(5, len(PROXY_LIST)))
-            }
-            for future in as_completed(futures):
-                result = future.result()
-                if result:
-                    with open(save_path, 'wb') as f:
-                        for chunk in result.iter_content(8192):
-                            f.write(chunk)
-                    print("✅ Proxy worked:", futures[future])
-                    return save_path
-    # 3. cloudscraper fallback
     try:
-        print("🟠 cloudscraper fallback...")
         scraper = cloudscraper.create_scraper(sess=session)
-        resp = scraper.get(url, headers=headers, stream=True, timeout=12)
         if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
             with open(save_path, 'wb') as f:
                 for chunk in resp.iter_content(8192):
@@ -143,7 +141,7 @@ def download_image(url, save_path):
     except Exception as e:
         print(f"❌ cloudscraper failed: {e}")
-    # 4. Final fallback: Selenium
     try:
         print("🧪 Headless browser fallback...")
         chrome_opts = Options()
@@ -153,7 +151,10 @@ def download_image(url, save_path):
         chrome_opts.add_argument("--disable-dev-shm-usage")
         chrome_opts.add_argument(f"user-agent={headers['User-Agent']}")
-        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_opts)
         driver.get(url)
         time.sleep(4)
         final_url = driver.current_url
@@ -164,21 +165,20 @@ def download_image(url, save_path):
             for k, v in cookies.items():
                 session.cookies.set(k, v)
             r = session.get(final_url, headers=headers, stream=True, timeout=15)
-            if r.status_code == 200 and 'image' in r.headers.get("Content-Type", ""):
                 with open(save_path, 'wb') as f:
                     for chunk in r.iter_content(8192):
                         f.write(chunk)
                 print("✅ Selenium + cookies worked")
                 return save_path
-            else:
-                log_failure(url, f"Selenium bad response: {r.status_code}")
     except Exception as e:
         print(f"❌ Selenium failed: {e}")
-        log_failure(url, f"Selenium exception: {e}")
-    # Fail
     log_failure(url, "All methods failed")
     return None
 lamp_html = """
 <style>
   * {
@@ -344,68 +344,103 @@ def threaded_download_and_open(item, temp_dir):
 def resize_and_save_worker(args):
     name, img, fmt, w, h, out_dir = args
     try:
         img = img.convert('RGBA')
-        bg = Image.new('RGBA', img.size, (255,255,255,255))
         img = Image.alpha_composite(bg, img)
-        if fmt.upper() == 'JPEG':
-            img = img.convert('RGB')
         img = resize_with_padding(img, w, h)
-        out_name = f"{name}.{fmt.lower()}"
         out_path = os.path.join(out_dir, out_name)
-        img.save(out_path, format=fmt.upper(), quality=90)
         return out_path, None
     except Exception as e:
         return None, f"{name}: {e}"
 def process_and_zip(items, fmt, w, h):
-    tmp = tempfile.mkdtemp()
     proc = os.path.join(tmp, 'out')
     os.makedirs(proc, exist_ok=True)
-    files, fails = [], []
-    with ThreadPoolExecutor(max_workers=8) as ex:
         results = list(ex.map(lambda it: threaded_download_and_open(it, tmp), items))
     for name, imgs, err in results:
-        if stop_event.is_set():
-            print("🛑 Stopped before processing.")
-            break
-        if err or imgs is None:
             fails.append(f"{name}: {err}")
             continue
         for i, img in enumerate(imgs):
             try:
-                img = img.convert('RGBA')
-                bg = Image.new('RGBA', img.size, (255, 255, 255, 255))
                 img = Image.alpha_composite(bg, img)
-                if fmt.upper() == 'JPEG':
-                    img = img.convert('RGB')
                 img = resize_with_padding(img, w, h)
-                fname = f"{name}_frame{i+1}.{fmt.lower()}" if len(imgs) > 1 else f"{name}.{fmt.lower()}"
-                path = os.path.join(proc, fname)
-                img.save(path, format=fmt.upper(), quality=90)
-                files.append(path)
             except Exception as e:
                 fails.append(f"{name}_frame{i+1}: {e}")
     if not files:
         stop_event.clear()
         shutil.rmtree(tmp)
         return None, None, "No images processed.", None
     date_str = datetime.now().strftime("%Y-%m-%d")
     zip_name = f"{date_str}.zip"
     zip_path = os.path.join(tmp, zip_name)
-    with zipfile.ZipFile(zip_path, 'w') as zf:
         for f in files:
             zf.write(f, os.path.basename(f))
     msg_lines = [f"✅ Processed {len(files)} image(s)."]
     if fails:
         msg_lines.append(f"❌ Failed: {len(fails)} image(s)")
@@ -421,50 +456,31 @@ def read_uploaded_workbook(file):
         # read all sheets except "Cleared Data"
         xls = pd.ExcelFile(file.name)
         sheets = [s for s in xls.sheet_names if s.lower() != "cleared data"]
-        # load each sheet and drop ones that are totally empty
-        df_list = []
-        for s in sheets:
-            df_s = pd.read_excel(file.name, sheet_name=s, engine="openpyxl")
-            # drop rows and cols that are all-NA, then check if anything remains
-            if not df_s.dropna(how="all").empty:
-                df_list.append(df_s)
-        if not df_list:
-            return [], "❌ No data in any sheet"
-        # now safely concatenate only non-empty sheets
         df = pd.concat(df_list, ignore_index=True)
         df.columns = [c.strip() for c in df.columns]
-        # find the ItemCode column
         item_col = next((c for c in df.columns if c.lower() == 'itemcode'), None)
         if not item_col:
             return [], "❌ Missing 'ItemCode' column"
-        # collect all URL/image/link columns
         url_cols = [c for c in df.columns if any(k in c.lower() for k in ["url", "image", "link"])]
         data = []
         for _, row in df.iterrows():
             raw = row[item_col]
             if pd.isna(raw):
                 continue
-            key = (str(raw).strip().split('.')[0]
-                   if str(raw).strip().replace('.', '', 1).isdigit()
-                   else str(raw).strip())
             idx = 0
             for col in url_cols:
                 if pd.notna(row[col]):
                     name = f"{key}" if idx == 0 else f"{key}_{idx}"
                     data.append({"url": str(row[col]).strip(), "name": name})
                     idx += 1
         return data, f"✅ Fetched {len(data)} image link(s)"
     except Exception as e:
         return [], f"❌ Error: {e}"
 def clear_all(tmp_dir):
     # wipe out the temp folder if it exists
     if tmp_dir and os.path.exists(tmp_dir):
@@ -620,6 +636,27 @@ def process_all(mode, workbook_file, ups, pu, fmt, w, h):
     # 4️⃣ Nothing valid
     return [], None, "⚠️ No valid input provided", None
 with gr.Blocks(css=css) as demo:
@@ -661,9 +698,9 @@ with gr.Blocks(css=css) as demo:
             with gr.Group(elem_classes="panel"):
                 with gr.Row():
                     format_choice = gr.Dropdown(
-                        ["JPEG","PNG","WEBP","TIFF","GIF","JFIF","AVIF"],
                         label="🖼️ Format",
-                        value="JPEG"
                     )
                     width  = gr.Number(label="Width (px)",  value=1000, precision=0)
                     height = gr.Number(label="Height (px)", value=1000, precision=0)
@@ -715,20 +752,7 @@ with gr.Blocks(css=css) as demo:
         inputs=[upload_box],
         outputs=[status]
     )
-    status.change(
-    fn=lambda txt: txt,               # a no‑op so we can attach JS
-    inputs=[status],
-    outputs=[status],
-    js="""
-      (txt) => {
-        const el = document.getElementById('status-box');
-        if (el) {
-          el.scrollTop = 0;
-        }
-        return txt;
-      }
-    """
-)
     process_btn.click(
         fn=process_all,
         inputs=[
@@ -755,40 +779,14 @@ with gr.Blocks(css=css) as demo:
     )
     clear_btn.click(
-    fn=clear_all,
-    inputs=[temp_dir_state],
-    outputs=[
-        workbook_upload,
-        image_data_state,
-        upload_box,
-        zip_file_hidden,
-        single_downloads,
-        status,
-        temp_dir_state,
-        image_url_input
-    ],
-    js="""() => {
-      // 1) Clear all browser storage
-      localStorage.clear();
-      sessionStorage.clear();
-      // 2) Purge any ServiceWorker caches
-      if ('caches' in window) {
-        caches.keys().then(keys => {
-          keys.forEach(key => caches.delete(key));
-        });
-      }
-      // 3) Scroll the status box back to the top
-      const el = document.getElementById('status-box');
-      if (el) el.scrollTop = 0;
-      // 4) Return placeholders for each Python output
-      return [null, [], [], {visible: false, value: null}, [], "Cleared.", null, ""];
-    }"""
-)
     stop_btn.click(
         fn=lambda: "🛑 Stop signal sent",
@@ -797,4 +795,3 @@ with gr.Blocks(css=css) as demo:
 if __name__ == "__main__":
     demo.queue().launch(debug=True)

 def try_download(url, headers, proxies):
     """
     Attempt a simple GET through a proxy dict {"http":..., "https":...}.
+    Returns the response if status_code==200 and content is image, else None.
     """
     try:
+        resp = session.get(url, headers=headers, proxies=proxies, stream=True, timeout=15)
         if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
             return resp
     except Exception as e:
         print(f"⚠️ Proxy download error ({proxies}): {e}")
     return None
 session = requests.Session()
 # --- Utility Functions ---
 def process_url_images(data, fmt, w, h):
     process_single_url_image(pu, fmt, w, h) if pu.strip() else
     ([], None, "⚠️ No valid input provided", None)
 )
+from PIL import ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+def fetch_with_retries(url, headers, timeout=15, retries=2, backoff=1):
+    """
+    Try up to retries+1 times to GET url with timeout seconds.
+    Returns a requests.Response or None.
+    """
+    for attempt in range(retries + 1):
+        try:
+            resp = session.get(url, headers=headers, stream=True, timeout=timeout)
+            if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
+                return resp
+        except Exception as e:
+            print(f"⚠️ Attempt {attempt+1} failed for {url}: {e}")
+        time.sleep(backoff * (attempt + 1))
+    return None
 def download_image(url, save_path):
+    # Skip cached
     if os.path.exists(save_path) and os.path.getsize(save_path) > 1000:
         return save_path
+    # Skip blocked
+    if any(b in url for b in BLOCKED_SITES):
         log_failure(url, "Skipped known slow site")
         return None
     referer = f"{urlparse(url).scheme}://{urlparse(url).netloc}/"
     headers = random_headers(referer)
+    # 1) Direct with retries
+    print(f"🚀 Fetching with retries: {url}")
+    resp = fetch_with_retries(url, headers, timeout=15, retries=2)
+    if resp:
+        with open(save_path, 'wb') as f:
+            for chunk in resp.iter_content(8192):
+                f.write(chunk)
+        print("✅ Direct fetch succeeded")
+        return save_path
+    else:
+        print("❌ Direct fetch failed after retries")
+    # 2) cloudscraper fallback
+    print("🟠 cloudscraper fallback...")
     try:
         scraper = cloudscraper.create_scraper(sess=session)
+        resp = scraper.get(url, headers=headers, stream=True, timeout=15)
         if resp.status_code == 200 and "image" in resp.headers.get("Content-Type", ""):
             with open(save_path, 'wb') as f:
                 for chunk in resp.iter_content(8192):
     except Exception as e:
         print(f"❌ cloudscraper failed: {e}")
+    # 3) Selenium fallback
     try:
         print("🧪 Headless browser fallback...")
         chrome_opts = Options()
         chrome_opts.add_argument("--disable-dev-shm-usage")
         chrome_opts.add_argument(f"user-agent={headers['User-Agent']}")
+        driver = webdriver.Chrome(
+            service=Service(ChromeDriverManager().install()),
+            options=chrome_opts
+        )
         driver.get(url)
         time.sleep(4)
         final_url = driver.current_url
             for k, v in cookies.items():
                 session.cookies.set(k, v)
             r = session.get(final_url, headers=headers, stream=True, timeout=15)
+            if r.status_code == 200 and "image" in r.headers.get("Content-Type", ""):
                 with open(save_path, 'wb') as f:
                     for chunk in r.iter_content(8192):
                         f.write(chunk)
                 print("✅ Selenium + cookies worked")
                 return save_path
     except Exception as e:
         print(f"❌ Selenium failed: {e}")
+    # 4) Give up
     log_failure(url, "All methods failed")
     return None
 lamp_html = """
 <style>
   * {
 def resize_and_save_worker(args):
     name, img, fmt, w, h, out_dir = args
     try:
+        # 1) Composite RGBA over white
         img = img.convert('RGBA')
+        bg  = Image.new('RGBA', img.size, (255,255,255,255))
         img = Image.alpha_composite(bg, img)
+        # 2) Determine save format & extension
+        fmt_upper = fmt.upper()
+        save_fmt  = "JPEG" if fmt_upper in ("JPEG", "JPG") else fmt_upper
+        ext       = "jpg"  if fmt_upper in ("JPEG", "JPG") else fmt.lower()
+        # 3) Convert to RGB if needed
+        if save_fmt == "JPEG":
+            img = img.convert("RGB")
+        # 4) Resize with padding
         img = resize_with_padding(img, w, h)
+        # 5) Build filename & save
+        out_name = f"{name}.{ext}"
         out_path = os.path.join(out_dir, out_name)
+        img.save(out_path, format=save_fmt, quality=90)
         return out_path, None
     except Exception as e:
         return None, f"{name}: {e}"
+from multiprocessing import cpu_count
+import zipfile
 def process_and_zip(items, fmt, w, h):
+    tmp  = tempfile.mkdtemp()
     proc = os.path.join(tmp, 'out')
     os.makedirs(proc, exist_ok=True)
+    files, fails = [], []
+    # 1) Download & open in parallel
+    workers = cpu_count() * 2
+    with ThreadPoolExecutor(max_workers=workers) as ex:
         results = list(ex.map(lambda it: threaded_download_and_open(it, tmp), items))
+    # 2) Process each downloaded image/list of frames
     for name, imgs, err in results:
+        if err or not imgs:
             fails.append(f"{name}: {err}")
             continue
         for i, img in enumerate(imgs):
             try:
+                # Composite RGBA over white
+                img = img.convert("RGBA")
+                bg  = Image.new("RGBA", img.size, (255,255,255,255))
                 img = Image.alpha_composite(bg, img)
+                # Determine save format & extension
+                fmt_upper = fmt.upper()
+                save_fmt  = "JPEG" if fmt_upper in ("JPEG","JPG") else fmt_upper
+                ext       = "jpg"  if fmt_upper in ("JPEG","JPG") else fmt.lower()
+                # Convert to RGB if JPEG
+                if save_fmt == "JPEG":
+                    img = img.convert("RGB")
+                # Resize with padding
                 img = resize_with_padding(img, w, h)
+                # Build filename
+                fname = (
+                    f"{name}_frame{i+1}.{ext}"
+                    if len(imgs) > 1
+                    else f"{name}.{ext}"
+                )
+                out_path = os.path.join(proc, fname)
+                # Save & record
+                img.save(out_path, format=save_fmt, quality=90)
+                files.append(out_path)
             except Exception as e:
+                print(f"⚠️ Save failed for {name}: {e}")
                 fails.append(f"{name}_frame{i+1}: {e}")
+    # 3) If nothing succeeded, clean up and report
     if not files:
         stop_event.clear()
         shutil.rmtree(tmp)
         return None, None, "No images processed.", None
+    # 4) Create ZIP in “store” mode (no compression)
     date_str = datetime.now().strftime("%Y-%m-%d")
     zip_name = f"{date_str}.zip"
     zip_path = os.path.join(tmp, zip_name)
+    with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_STORED) as zf:
         for f in files:
             zf.write(f, os.path.basename(f))
+    # 5) Build status message
     msg_lines = [f"✅ Processed {len(files)} image(s)."]
     if fails:
         msg_lines.append(f"❌ Failed: {len(fails)} image(s)")
         # read all sheets except "Cleared Data"
         xls = pd.ExcelFile(file.name)
         sheets = [s for s in xls.sheet_names if s.lower() != "cleared data"]
+        df_list = [pd.read_excel(file.name, sheet_name=s, engine="openpyxl") for s in sheets]
         df = pd.concat(df_list, ignore_index=True)
         df.columns = [c.strip() for c in df.columns]
         item_col = next((c for c in df.columns if c.lower() == 'itemcode'), None)
         if not item_col:
             return [], "❌ Missing 'ItemCode' column"
         url_cols = [c for c in df.columns if any(k in c.lower() for k in ["url", "image", "link"])]
         data = []
         for _, row in df.iterrows():
             raw = row[item_col]
             if pd.isna(raw):
                 continue
+            key = str(raw).strip().split('.')[0] if str(raw).strip().replace('.', '', 1).isdigit() else str(raw).strip()
             idx = 0
             for col in url_cols:
                 if pd.notna(row[col]):
                     name = f"{key}" if idx == 0 else f"{key}_{idx}"
                     data.append({"url": str(row[col]).strip(), "name": name})
                     idx += 1
         return data, f"✅ Fetched {len(data)} image link(s)"
     except Exception as e:
         return [], f"❌ Error: {e}"
 def clear_all(tmp_dir):
     # wipe out the temp folder if it exists
     if tmp_dir and os.path.exists(tmp_dir):
     # 4️⃣ Nothing valid
     return [], None, "⚠️ No valid input provided", None
+    if os.path.exists(FAILED_LOG_PATH):
+        with open(FAILED_LOG_PATH) as f:
+            failed = [line.split("\t")[1] for line in f if line.strip()]
+    if failed:
+        print(f"🔄 Retrying {len(failed)} failed URLs…")
+        # reuse your ThreadPoolExecutor to download them to a “retries” folder
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        os.makedirs("retries", exist_ok=True)
+        def retry_one(url):
+            fname = os.path.join("retries", url.split("/")[-1].split("?")[0])
+            r = fetch_with_retries(url, random_headers(f"{urlparse(url).scheme}://{urlparse(url).netloc}/"), timeout=15, retries=2)
+            if r:
+                with open(fname, 'wb') as f:
+                    for c in r.iter_content(8192): f.write(c)
+                return True
+            return False
+        with ThreadPoolExecutor(max_workers=8) as ex:
+            results = list(ex.map(retry_one, failed))
+        print(f"✅ Retried successes: {sum(results)} / {len(failed)}")
 with gr.Blocks(css=css) as demo:
             with gr.Group(elem_classes="panel"):
                 with gr.Row():
                     format_choice = gr.Dropdown(
+                        ["JPG","PNG","WEBP","TIFF","GIF","JFIF","AVIF"],
                         label="🖼️ Format",
+                        value="JPG"
                     )
                     width  = gr.Number(label="Width (px)",  value=1000, precision=0)
                     height = gr.Number(label="Height (px)", value=1000, precision=0)
         inputs=[upload_box],
         outputs=[status]
     )
     process_btn.click(
         fn=process_all,
         inputs=[
     )
     clear_btn.click(
+        fn=clear_all,
+        inputs=[temp_dir_state],
+        outputs=[
+            workbook_upload, image_data_state, upload_box,
+            zip_file_hidden, single_downloads, status,
+            temp_dir_state, image_url_input
+        ]
+    )
     stop_btn.click(
         fn=lambda: "🛑 Stop signal sent",
 if __name__ == "__main__":
     demo.queue().launch(debug=True)