Spaces:

Ansemin101
/

Markit_v2

Running

App Files Files Community

AnseMin commited on Mar 17

Commit

66d2b1b

1 Parent(s): 9b25e42

adding zerogpufor got ocr

Browse files

Files changed (4) hide show

build.sh +0 -34
requirements.txt +4 -1
setup.sh +37 -2
src/parsers/got_ocr_parser.py +44 -5

build.sh DELETED Viewed

@@ -1,34 +0,0 @@
-#!/bin/bash
-# Exit on error
-set -e
-echo "Starting build process..."
-# Install system dependencies
-echo "Installing system dependencies..."
-apt-get update && apt-get install -y \
-    wget \
-    pkg-config
-# Install Google Gemini API client
-echo "Installing Google Gemini API client..."
-pip install -q -U google-genai
-echo "Google Gemini API client installed successfully"
-# Install GOT-OCR dependencies
-echo "Installing GOT-OCR dependencies..."
-pip install -q -U torch==2.0.1 torchvision==0.15.2 transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.3
-echo "GOT-OCR dependencies installed successfully"
-# Install Python dependencies
-echo "Installing Python dependencies..."
-pip install -e .
-# Create .env file if it doesn't exist
-if [ ! -f .env ]; then
-    echo "Creating .env file..."
-    cp .env.example .env || echo "Warning: .env.example not found"
-fi
-echo "Build process completed successfully!"

requirements.txt CHANGED Viewed

@@ -21,4 +21,7 @@ transformers==4.37.2  # Pin to a specific version that works with safetensors 0.
 tiktoken==0.6.0
 verovio==4.3.1
 accelerate==0.28.0
-safetensors==0.4.3  # Updated to meet minimum version required by accelerate

 tiktoken==0.6.0
 verovio==4.3.1
 accelerate==0.28.0
+safetensors==0.4.3  # Updated to meet minimum version required by accelerate
+# ZeroGPU support for HuggingFace Spaces
+spaces>=0.19.1

setup.sh CHANGED Viewed

@@ -3,7 +3,20 @@
 # Exit on error
 set -e
-echo "Setting up environment..."
 # Install Python dependencies
 echo "Installing Python dependencies..."
@@ -16,4 +29,26 @@ echo "Installing GOT-OCR dependencies..."
 pip install -q -U torch==2.0.1 torchvision==0.15.2 transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.3
 echo "GOT-OCR dependencies installed successfully"
-echo "Setup completed"

 # Exit on error
 set -e
+echo "Starting setup process..."
+# Check if running with sudo/root permissions for system dependencies
+if [ "$EUID" -eq 0 ]; then
+    # Install system dependencies
+    echo "Installing system dependencies..."
+    apt-get update && apt-get install -y \
+        wget \
+        pkg-config
+    echo "System dependencies installed successfully"
+else
+    echo "Not running as root. Skipping system dependencies installation."
+    echo "If system dependencies are needed, please run this script with sudo."
+fi
 # Install Python dependencies
 echo "Installing Python dependencies..."
 pip install -q -U torch==2.0.1 torchvision==0.15.2 transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.3
 echo "GOT-OCR dependencies installed successfully"
+# Install ZeroGPU support
+echo "Installing ZeroGPU support..."
+pip install -q -U spaces>=0.19.1
+echo "ZeroGPU support installed successfully"
+# Install the project in development mode
+echo "Installing project in development mode..."
+pip install -e .
+echo "Project installed successfully"
+# Create .env file if it doesn't exist
+if [ ! -f .env ]; then
+    echo "Creating .env file..."
+    if [ -f .env.example ]; then
+        cp .env.example .env
+        echo ".env file created from .env.example"
+    else
+        echo "Warning: .env.example not found. Creating empty .env file."
+        touch .env
+    fi
+fi
+echo "Setup process completed successfully!"

src/parsers/got_ocr_parser.py CHANGED Viewed

@@ -25,9 +25,19 @@ try:
             "Consider downgrading to version <4.48.0"
         )
     GOT_AVAILABLE = True
 except ImportError:
     GOT_AVAILABLE = False
     logger.warning("GOT-OCR dependencies not installed. The parser will not be available.")
 class GotOcrParser(DocumentParser):
@@ -65,15 +75,35 @@ class GotOcrParser(DocumentParser):
                     'stepfun-ai/GOT-OCR2_0',
                     trust_remote_code=True
                 )
                 cls._model = AutoModel.from_pretrained(
                     'stepfun-ai/GOT-OCR2_0',
                     trust_remote_code=True,
                     low_cpu_mem_usage=True,
-                    device_map='cuda',
                     use_safetensors=True,
                     pad_token_id=cls._tokenizer.eos_token_id
                 )
-                cls._model = cls._model.eval().cuda()
                 logger.info("GOT-OCR model loaded successfully")
             except Exception as e:
                 cls._model = None
@@ -92,6 +122,15 @@ class GotOcrParser(DocumentParser):
             cls._tokenizer = None
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         logger.info("GOT-OCR model released from memory")
     def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str:
@@ -102,9 +141,9 @@ class GotOcrParser(DocumentParser):
                 "torch, transformers, tiktoken, verovio, accelerate"
             )
-        # Check if CUDA is available
-        if not torch.cuda.is_available():
-            raise RuntimeError("GOT-OCR requires CUDA. CPU-only mode is not supported.")
         # Check file extension
         file_path = Path(file_path)

             "Consider downgrading to version <4.48.0"
         )
+    # Import spaces for ZeroGPU support
+    try:
+        import spaces
+        ZEROGPU_AVAILABLE = True
+        logger.info("ZeroGPU support is available")
+    except ImportError:
+        ZEROGPU_AVAILABLE = False
+        logger.info("ZeroGPU not available, will use standard GPU if available")
     GOT_AVAILABLE = True
 except ImportError:
     GOT_AVAILABLE = False
+    ZEROGPU_AVAILABLE = False
     logger.warning("GOT-OCR dependencies not installed. The parser will not be available.")
 class GotOcrParser(DocumentParser):
                     'stepfun-ai/GOT-OCR2_0',
                     trust_remote_code=True
                 )
+                # Determine device mapping based on ZeroGPU availability
+                if ZEROGPU_AVAILABLE:
+                    logger.info("Using ZeroGPU for model loading")
+                    # Request GPU resources through ZeroGPU
+                    spaces.enable_gpu()
+                    device_map = 'cuda'
+                elif torch.cuda.is_available():
+                    logger.info("Using local CUDA device for model loading")
+                    device_map = 'cuda'
+                else:
+                    logger.warning("No GPU available, falling back to CPU (not recommended)")
+                    device_map = 'auto'
                 cls._model = AutoModel.from_pretrained(
                     'stepfun-ai/GOT-OCR2_0',
                     trust_remote_code=True,
                     low_cpu_mem_usage=True,
+                    device_map=device_map,
                     use_safetensors=True,
                     pad_token_id=cls._tokenizer.eos_token_id
                 )
+                # Set model to evaluation mode
+                if device_map == 'cuda':
+                    cls._model = cls._model.eval().cuda()
+                else:
+                    cls._model = cls._model.eval()
                 logger.info("GOT-OCR model loaded successfully")
             except Exception as e:
                 cls._model = None
             cls._tokenizer = None
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        # Release ZeroGPU resources if available
+        if ZEROGPU_AVAILABLE:
+            try:
+                spaces.disable_gpu()
+                logger.info("ZeroGPU resources released")
+            except Exception as e:
+                logger.warning(f"Error releasing ZeroGPU resources: {str(e)}")
         logger.info("GOT-OCR model released from memory")
     def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str:
                 "torch, transformers, tiktoken, verovio, accelerate"
             )
+        # Check if CUDA is available (either directly or through ZeroGPU)
+        if not torch.cuda.is_available() and not ZEROGPU_AVAILABLE:
+            logger.warning("No GPU available. GOT-OCR performance may be severely degraded.")
         # Check file extension
         file_path = Path(file_path)