Spaces:

Tonic
/

GOT-OCR

Running

App Files Files Community

Tonic commited on 14 days ago

Commit

a33adcb

unverified ·

1 Parent(s): 79ffa77

attempts to fix mcp

Browse files

Files changed (3) hide show

__pycache__/app.cpython-313.pyc +0 -0
__pycache__/globe.cpython-313.pyc +0 -0
app.py +182 -27

__pycache__/app.cpython-313.pyc ADDED Viewed

Binary file (24.1 kB). View file

__pycache__/globe.cpython-313.pyc ADDED Viewed

Binary file (2.84 kB). View file

app.py CHANGED Viewed

@@ -16,17 +16,85 @@ import time
 import shutil
 import cv2
 import re
-model_name = 'ucaslcl/GOT-OCR2_0'
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
-model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True,  device_map=device, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-model = model.eval().to(device)
-model.config.pad_token_id = tokenizer.eos_token_id
 UPLOAD_FOLDER = "./uploads"
 RESULTS_FOLDER = "./results"
@@ -40,6 +108,84 @@ def image_to_base64(image):
     image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode()
 @spaces.GPU()
 def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
@@ -69,27 +215,36 @@ def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
         else:
             return "Error: Unsupported image format", None, None
-        if task == "Plain Text OCR":
-            res = model.chat(tokenizer, image_path, ocr_type='ocr')
-            return res, None, unique_id
-        else:
-            if task == "Format Text OCR":
-                res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
-            elif task == "Fine-grained OCR (Box)":
-                res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path)
-            elif task == "Fine-grained OCR (Color)":
-                res = model.chat(tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path)
-            elif task == "Multi-crop OCR":
-                res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
-            elif task == "Render Formatted OCR":
-                res = model.chat(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
-            if os.path.exists(result_path):
-                with open(result_path, 'r') as f:
-                    html_content = f.read()
-                return res, html_content, unique_id
-            else:
                 return res, None, unique_id
     except Exception as e:
         return f"Error: {str(e)}", None, None
     finally:

 import shutil
 import cv2
 import re
+import warnings
+# Try to import spaces module for ZeroGPU compatibility
+try:
+    import spaces
+    SPACES_AVAILABLE = True
+except ImportError:
+    SPACES_AVAILABLE = False
+    # Create a dummy decorator for local development
+    def dummy_gpu_decorator(func):
+        return func
+    spaces = type('spaces', (), {'GPU': dummy_gpu_decorator})()
+# Suppress specific warnings that are known issues with GOT-OCR
+warnings.filterwarnings("ignore", message="The attention mask and the pad token id were not set")
+warnings.filterwarnings("ignore", message="Setting `pad_token_id` to `eos_token_id`")
+warnings.filterwarnings("ignore", message="The attention mask is not set and cannot be inferred")
+warnings.filterwarnings("ignore", message="The `seen_tokens` attribute is deprecated")
+def initialize_model_safely():
+    """
+    Safely initialize the GOT-OCR model with proper error handling for ZeroGPU
+    """
+    model_name = 'ucaslcl/GOT-OCR2_0'
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    try:
+        # Initialize tokenizer with proper settings
+        tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+        # Set pad token properly
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
+        model = AutoModel.from_pretrained(
+            'ucaslcl/GOT-OCR2_0',
+            trust_remote_code=True,
+            low_cpu_mem_usage=True,
+            device_map=device,
+            use_safetensors=True,
+            pad_token_id=tokenizer.eos_token_id,
+            use_cache=True,
+            torch_dtype=torch.float16 if device == 'cuda' else torch.float32
+        )
+        model = model.eval().to(device)
+        model.config.pad_token_id = tokenizer.eos_token_id
+        # Ensure the model has proper tokenizer settings
+        if hasattr(model, 'config'):
+            model.config.pad_token_id = tokenizer.eos_token_id
+            model.config.eos_token_id = tokenizer.eos_token_id
+        return model, tokenizer
+    except Exception as e:
+        print(f"Error initializing model: {str(e)}")
+        # Fallback initialization
+        try:
+            tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+            if tokenizer.pad_token is None:
+                tokenizer.pad_token = tokenizer.eos_token
+            model = AutoModel.from_pretrained(
+                'ucaslcl/GOT-OCR2_0',
+                trust_remote_code=True,
+                low_cpu_mem_usage=True,
+                device_map=device,
+                use_safetensors=True
+            )
+            model = model.eval().to(device)
+            return model, tokenizer
+        except Exception as fallback_error:
+            raise Exception(f"Failed to initialize model: {str(e)}. Fallback also failed: {str(fallback_error)}")
+model, tokenizer = initialize_model_safely()
 UPLOAD_FOLDER = "./uploads"
 RESULTS_FOLDER = "./results"
     image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode()
+def safe_model_chat(model, tokenizer, image_path, **kwargs):
+    """
+    Safe wrapper for model.chat to handle DynamicCache and other compatibility issues
+    Optimized for ZeroGPU environments
+    """
+    try:
+        # First attempt: normal call
+        return model.chat(tokenizer, image_path, **kwargs)
+    except AttributeError as e:
+        if "get_max_length" in str(e):
+            # Try to fix the cache issue by clearing it
+            try:
+                if hasattr(model, 'clear_cache'):
+                    model.clear_cache()
+                # Retry the call
+                return model.chat(tokenizer, image_path, **kwargs)
+            except:
+                # If still failing, try with different parameters
+                try:
+                    # Remove any cache-related parameters
+                    kwargs_copy = kwargs.copy()
+                    if 'use_cache' in kwargs_copy:
+                        del kwargs_copy['use_cache']
+                    return model.chat(tokenizer, image_path, **kwargs_copy)
+                except:
+                    raise Exception("Model compatibility issue: DynamicCache error. Please try again.")
+        else:
+            raise e
+    except Exception as e:
+        # Handle other potential issues
+        if "attention_mask" in str(e).lower():
+            # Try to handle attention mask issues
+            try:
+                return model.chat(tokenizer, image_path, **kwargs)
+            except:
+                raise Exception(f"Attention mask error: {str(e)}")
+        else:
+            raise e
+def safe_model_chat_crop(model, tokenizer, image_path, **kwargs):
+    """
+    Safe wrapper for model.chat_crop to handle DynamicCache and other compatibility issues
+    Optimized for ZeroGPU environments
+    """
+    try:
+        # First attempt: normal call
+        return model.chat_crop(tokenizer, image_path, **kwargs)
+    except AttributeError as e:
+        if "get_max_length" in str(e):
+            # Try to fix the cache issue by clearing it
+            try:
+                if hasattr(model, 'clear_cache'):
+                    model.clear_cache()
+                # Retry the call
+                return model.chat_crop(tokenizer, image_path, **kwargs)
+            except:
+                # If still failing, try with different parameters
+                try:
+                    # Remove any cache-related parameters
+                    kwargs_copy = kwargs.copy()
+                    if 'use_cache' in kwargs_copy:
+                        del kwargs_copy['use_cache']
+                    return model.chat_crop(tokenizer, image_path, **kwargs_copy)
+                except:
+                    raise Exception("Model compatibility issue: DynamicCache error. Please try again.")
+        else:
+            raise e
+    except Exception as e:
+        # Handle other potential issues
+        if "attention_mask" in str(e).lower():
+            # Try to handle attention mask issues
+            try:
+                return model.chat_crop(tokenizer, image_path, **kwargs)
+            except:
+                raise Exception(f"Attention mask error: {str(e)}")
+        else:
+            raise e
 @spaces.GPU()
 def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
         else:
             return "Error: Unsupported image format", None, None
+        # Wrap model calls in try-except to handle DynamicCache errors
+        try:
+            if task == "Plain Text OCR":
+                res = safe_model_chat(model, tokenizer, image_path, ocr_type='ocr')
                 return res, None, unique_id
+            else:
+                if task == "Format Text OCR":
+                    res = safe_model_chat(model, tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
+                elif task == "Fine-grained OCR (Box)":
+                    res = safe_model_chat(model, tokenizer, image_path, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=result_path)
+                elif task == "Fine-grained OCR (Color)":
+                    res = safe_model_chat(model, tokenizer, image_path, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=result_path)
+                elif task == "Multi-crop OCR":
+                    res = safe_model_chat_crop(model, tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
+                elif task == "Render Formatted OCR":
+                    res = safe_model_chat(model, tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
+                if os.path.exists(result_path):
+                    with open(result_path, 'r') as f:
+                        html_content = f.read()
+                    return res, html_content, unique_id
+                else:
+                    return res, None, unique_id
+        except AttributeError as e:
+            if "get_max_length" in str(e):
+                # Handle DynamicCache compatibility issue
+                return "Error: Model compatibility issue detected. Please try again or contact support.", None, None
+            else:
+                raise e
     except Exception as e:
         return f"Error: {str(e)}", None, None
     finally: