Spaces:

Ansemin101
/

Markit_v2

Sleeping

App Files Files Community

AnseMin commited on Mar 19

Commit

610b772

1 Parent(s): 62f9c09

handling zerogpu usage

Browse files

Files changed (4) hide show

app.py +8 -0
requirements.txt +1 -0
setup.sh +5 -0
src/parsers/got_ocr_parser.py +93 -82

app.py CHANGED Viewed

@@ -47,6 +47,14 @@ except Exception:
     print("WARNING: Hugging Face CLI not found. Installing...")
     subprocess.run([sys.executable, "-m", "pip", "install", "-q", "huggingface_hub[cli]"], check=False)
 # Try to load environment variables from .env file
 try:
     from dotenv import load_dotenv

     print("WARNING: Hugging Face CLI not found. Installing...")
     subprocess.run([sys.executable, "-m", "pip", "install", "-q", "huggingface_hub[cli]"], check=False)
+# Check if spaces module is installed (needed for ZeroGPU)
+try:
+    import spaces
+    print("Spaces module found for ZeroGPU support")
+except ImportError:
+    print("WARNING: Spaces module not found. Installing...")
+    subprocess.run([sys.executable, "-m", "pip", "install", "-q", "spaces"], check=False)
 # Try to load environment variables from .env file
 try:
     from dotenv import load_dotenv

requirements.txt CHANGED Viewed

@@ -3,6 +3,7 @@ gradio==5.14.0
 markdown==3.7
 Pillow>=9.0.0,<11.0.0
 numpy<2.0.0
 # Image processing
 opencv-python-headless>=4.5.0  # Headless version for server environments

 markdown==3.7
 Pillow>=9.0.0,<11.0.0
 numpy<2.0.0
+spaces  # For ZeroGPU support
 # Image processing
 opencv-python-headless>=4.5.0  # Headless version for server environments

setup.sh CHANGED Viewed

@@ -45,6 +45,11 @@ echo "Installing Hugging Face CLI..."
 pip install -q -U "huggingface_hub[cli]"
 echo "Hugging Face CLI installed successfully"
 # Add debug section for GOT-OCR repo
 echo "===== GOT-OCR Repository Debugging ====="

 pip install -q -U "huggingface_hub[cli]"
 echo "Hugging Face CLI installed successfully"
+# Install spaces module for ZeroGPU support
+echo "Installing spaces module for ZeroGPU support..."
+pip install -q -U spaces
+echo "Spaces module installed successfully"
 # Add debug section for GOT-OCR repo
 echo "===== GOT-OCR Repository Debugging ====="

src/parsers/got_ocr_parser.py CHANGED Viewed

@@ -7,7 +7,13 @@ import tempfile
 import shutil
 from typing import Dict, List, Optional, Any, Union
-import spaces  # Import spaces module for ZeroGPU support
 from src.parsers.parser_interface import DocumentParser
 from src.parsers.parser_registry import ParserRegistry
@@ -72,8 +78,9 @@ class GotOcrParser(DocumentParser):
             import transformers
             import tiktoken
-            # For ZeroGPU, we don't need to check CUDA availability here
-            # as the GPU will be allocated when needed
             # Check for latex2markdown
             try:
@@ -195,13 +202,9 @@ class GotOcrParser(DocumentParser):
             logger.error(f"Failed to set up GOT-OCR2.0 repository: {str(e)}")
             return False
-    @spaces.GPU(duration=120)  # Set duration to 120 seconds for OCR processing
     def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str:
         """Parse a document using GOT-OCR 2.0.
-        This method is decorated with @spaces.GPU to enable ZeroGPU support.
-        When called, it will request a GPU from the ZeroGPU pool.
         Args:
             file_path: Path to the image file
             ocr_method: OCR method to use ('plain' or 'format')
@@ -284,95 +287,67 @@ class GotOcrParser(DocumentParser):
                     f.write(f"cd {parent_dir}\n")  # Change to parent directory
                     f.write("export PYTHONPATH=$PYTHONPATH:$(pwd)\n")  # Add current directory to PYTHONPATH
-                    # Add environment variables for ZeroGPU support
-                    f.write("export SPACES_ZERO_GPU=1\n")  # Enable ZeroGPU
-                    f.write("export CUDA_VISIBLE_DEVICES=0\n")  # Use first available GPU
                     # Add a Python script to patch torch.bfloat16
                     patch_script = os.path.join(tempfile.gettempdir(), "patch_torch.py")
                     with open(patch_script, 'w') as patch_f:
                         patch_f.write("""
 import sys
 import torch
-import spaces
-@spaces.GPU(duration=120)
-def patch_torch():
-    # Patch torch.bfloat16 to use torch.float16 instead
-    if hasattr(torch, 'bfloat16'):
-        # Save reference to original bfloat16
-        original_bfloat16 = torch.bfloat16
-        # Replace with float16
-        torch.bfloat16 = torch.float16
-        print("Successfully patched torch.bfloat16 to use torch.float16")
-    # Also patch torch.autocast context manager for CUDA
-    original_autocast = torch.autocast
-    def patched_autocast(*args, **kwargs):
-        # Force dtype to float16 when CUDA is involved
-        if args and args[0] == "cuda" and kwargs.get("dtype") == torch.bfloat16:
-            kwargs["dtype"] = torch.float16
-            print(f"Autocast: Changed bfloat16 to float16 for {args}")
-        return original_autocast(*args, **kwargs)
-    torch.autocast = patched_autocast
-    print("Successfully patched torch.autocast to ensure float16 is used instead of bfloat16")
-patch_torch()  # Execute the patching
 """)
-                        # Build the command with the patch included and ZeroGPU support
-                        py_cmd = [
-                            sys.executable,
-                            "-c",
-                            f"""
-import sys
-import spaces
-sys.path.insert(0, '{parent_dir}')
-exec(open('{patch_script}').read())
-@spaces.GPU(duration=120)
-def run_got_ocr():
-    import runpy
-    runpy.run_path('{script_path}', run_name='__main__')
-run_got_ocr()
-"""
-                        ]
-                        # Add the arguments
-                        py_cmd.extend(["--model-name", self._weights_path])
-                        py_cmd.extend(["--image-file", str(file_path)])
-                        py_cmd.extend(["--type", ocr_type])
-                        # Add render flag if required
-                        if render:
-                            py_cmd.append("--render")
-                        # Check if box or color is specified in kwargs
-                        if 'box' in kwargs and kwargs['box']:
-                            py_cmd.extend(["--box", str(kwargs['box'])])
-                        if 'color' in kwargs and kwargs['color']:
-                            py_cmd.extend(["--color", kwargs['color']])
-                        # Add the command to the script
-                        f.write(" ".join(py_cmd) + "\n")
                 # Make the script executable
                 os.chmod(temp_script, 0o755)
-                # Run the script
-                logger.info(f"Running command through wrapper script: {temp_script}")
-                process = subprocess.run(
-                    [temp_script],
-                    check=True,
-                    capture_output=True,
-                    text=True
-                )
-                # Process the output
-                result = process.stdout.strip()
                 # If render was requested, find and return the path to the HTML file
                 if render:
@@ -417,6 +392,42 @@ run_got_ocr()
             # Generic error
             raise RuntimeError(f"Error processing document with GOT-OCR: {str(e)}")
     @classmethod
     def release_model(cls):

 import shutil
 from typing import Dict, List, Optional, Any, Union
+# Import spaces module for ZeroGPU support
+try:
+    import spaces
+    HAS_SPACES = True
+except ImportError:
+    HAS_SPACES = False
 from src.parsers.parser_interface import DocumentParser
 from src.parsers.parser_registry import ParserRegistry
             import transformers
             import tiktoken
+            # Check CUDA availability if using torch
+            if hasattr(torch, 'cuda') and not torch.cuda.is_available():
+                logger.warning("CUDA is not available. GOT-OCR performs best with GPU acceleration.")
             # Check for latex2markdown
             try:
             logger.error(f"Failed to set up GOT-OCR2.0 repository: {str(e)}")
             return False
     def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str:
         """Parse a document using GOT-OCR 2.0.
         Args:
             file_path: Path to the image file
             ocr_method: OCR method to use ('plain' or 'format')
                     f.write(f"cd {parent_dir}\n")  # Change to parent directory
                     f.write("export PYTHONPATH=$PYTHONPATH:$(pwd)\n")  # Add current directory to PYTHONPATH
                     # Add a Python script to patch torch.bfloat16
                     patch_script = os.path.join(tempfile.gettempdir(), "patch_torch.py")
                     with open(patch_script, 'w') as patch_f:
                         patch_f.write("""
 import sys
 import torch
+# Patch torch.bfloat16 to use torch.float16 instead
+if hasattr(torch, 'bfloat16'):
+    # Save reference to original bfloat16
+    original_bfloat16 = torch.bfloat16
+    # Replace with float16
+    torch.bfloat16 = torch.float16
+    print("Successfully patched torch.bfloat16 to use torch.float16")
+# Also patch torch.autocast context manager for CUDA
+original_autocast = torch.autocast
+def patched_autocast(*args, **kwargs):
+    # Force dtype to float16 when CUDA is involved
+    if args and args[0] == "cuda" and kwargs.get("dtype") == torch.bfloat16:
+        kwargs["dtype"] = torch.float16
+        print(f"Autocast: Changed bfloat16 to float16 for {args}")
+    return original_autocast(*args, **kwargs)
+torch.autocast = patched_autocast
+print("Successfully patched torch.autocast to ensure float16 is used instead of bfloat16")
 """)
+                    # Build the command with the patch included
+                    py_cmd = [
+                        sys.executable,
+                        "-c",
+                        f"import sys; sys.path.insert(0, '{parent_dir}'); "
+                        f"exec(open('{patch_script}').read()); "
+                        f"import runpy; runpy.run_path('{script_path}', run_name='__main__')"
+                    ]
+                    # Add the arguments
+                    py_cmd.extend(["--model-name", self._weights_path])
+                    py_cmd.extend(["--image-file", str(file_path)])
+                    py_cmd.extend(["--type", ocr_type])
+                    # Add render flag if required
+                    if render:
+                        py_cmd.append("--render")
+                    # Check if box or color is specified in kwargs
+                    if 'box' in kwargs and kwargs['box']:
+                        py_cmd.extend(["--box", str(kwargs['box'])])
+                    if 'color' in kwargs and kwargs['color']:
+                        py_cmd.extend(["--color", kwargs['color']])
+                    # Add the command to the script
+                    f.write(" ".join(py_cmd) + "\n")
                 # Make the script executable
                 os.chmod(temp_script, 0o755)
+                # Run the script with GPU access if available
+                result = self._run_with_gpu(temp_script)
                 # If render was requested, find and return the path to the HTML file
                 if render:
             # Generic error
             raise RuntimeError(f"Error processing document with GOT-OCR: {str(e)}")
+    # Define a method that will be decorated with spaces.GPU to ensure GPU access
+    def _run_with_gpu(self, script_path):
+        """Run a script with GPU access using the spaces.GPU decorator if available."""
+        if HAS_SPACES:
+            # Use the spaces.GPU decorator to ensure GPU access
+            return self._run_script_with_gpu_allocation(script_path)
+        else:
+            # Fall back to regular execution if spaces module is not available
+            logger.info(f"Running command through wrapper script without ZeroGPU: {script_path}")
+            process = subprocess.run(
+                [script_path],
+                check=True,
+                capture_output=True,
+                text=True
+            )
+            return process.stdout.strip()
+    # This method will be decorated with spaces.GPU
+    if HAS_SPACES:
+        @spaces.GPU(duration=180)  # Allocate up to 3 minutes for OCR processing
+        def _run_script_with_gpu_allocation(self, script_path):
+            """Run a script with GPU access using the spaces.GPU decorator."""
+            logger.info(f"Running command through wrapper script with ZeroGPU allocation: {script_path}")
+            process = subprocess.run(
+                [script_path],
+                check=True,
+                capture_output=True,
+                text=True
+            )
+            return process.stdout.strip()
+    else:
+        # Define a dummy method if spaces is not available
+        def _run_script_with_gpu_allocation(self, script_path):
+            # This should never be called if HAS_SPACES is False
+            raise NotImplementedError("spaces module is not available")
     @classmethod
     def release_model(cls):