#!/usr/bin/env python3 # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import argparse from nemo_retriever_ocr.inference.pipeline import NemoRetrieverOCR def main(image_path, merge_level, no_visualize, model_dir): ocr_pipeline = NemoRetrieverOCR() predictions = ocr_pipeline(image_path, merge_level=merge_level, visualize=not no_visualize) print(f"Found {len(predictions)} text regions.") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run OCR inference and annotate image.") parser.add_argument("image_path", type=str, help="Path to the input image.") parser.add_argument( "--merge-level", type=str, choices=["word", "sentence", "paragraph"], default="paragraph", help="Merge level for OCR output (word, sentence, paragraph).", ) parser.add_argument("--no-visualize", action="store_true", help="Do not save the annotated image.") parser.add_argument( "--model-dir", type=str, help="Path to the model checkpoints.", default="./checkpoints", ) args = parser.parse_args() main( args.image_path, merge_level=args.merge_level, no_visualize=args.no_visualize, model_dir=args.model_dir, )