import atexit import functools from queue import Queue from threading import Event, Thread from paddleocr import PaddleOCR import documentos import herramientas LANG_CONFIG = { "ch": {"num_workers": 2}, "en": {"num_workers": 2}, "fr": {"num_workers": 1}, "german": {"num_workers": 1}, "korean": {"num_workers": 1}, "japan": {"num_workers": 1}, } CONCURRENCY_LIMIT = 8 class PaddleOCRModelManager(object): def __init__(self, num_workers, model_factory): super().__init__() self._model_factory = model_factory self._queue = Queue() self._workers = [] self._model_initialized_event = Event() for _ in range(num_workers): worker = Thread(target=self._worker, daemon=False) worker.start() self._model_initialized_event.wait() self._model_initialized_event.clear() self._workers.append(worker) def infer(self, *args, **kwargs): # XXX: Should I use a more lightweight data structure, say, a future? result_queue = Queue(maxsize=1) self._queue.put((args, kwargs, result_queue)) success, payload = result_queue.get() if success: return payload else: raise payload def close(self): for _ in self._workers: self._queue.put(None) for worker in self._workers: worker.join() def _worker(self): model = self._model_factory() self._model_initialized_event.set() while True: item = self._queue.get() if item is None: break args, kwargs, result_queue = item try: result = model.ocr(*args, **kwargs) result_queue.put((True, result)) except Exception as e: result_queue.put((False, e)) finally: self._queue.task_done() def create_model(lang): return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False) model_managers = {} for lang, config in LANG_CONFIG.items(): model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang)) model_managers[lang] = model_manager def close_model_managers(): for manager in model_managers.values(): manager.close() # XXX: Not sure if gradio allows adding custom teardown logic atexit.register(close_model_managers) def inference(img, lang): ocr = model_managers[lang] result = ocr.infer(img, cls=True)[0] textos_extraidos = herramientas.listaTextosExtraidos(result) #Campos DNI Panamá. nombre, apellido, identificacion = documentos.dni(textos_extraidos) print(f"Hola: {nombre}, {apellido} con identificación: {identificacion}") return { "nombre": nombre, "apellido": apellido, "identificacion": identificacion }