Feat: add OCR's muti-gpus and parallel processing support (#5972)

### What problem does this PR solve? Add OCR's muti-gpus and parallel processing support ### Type of change - [x] New Feature (non-breaking change which adds functionality) @yuzhichang I've tried to resolve the comments in #5697. OCR jobs can now be done on both CPU and GPU. ( By the way, I've encountered a “Generate embedding error” issue #5954 that might be due to my outdated GPUs? idk. ) Please review it and give me suggestions. GPU: ![gpu_ocr](https://github.com/user-attachments/assets/0ee2ecfb-a665-4e50-8bc7-15941b9cd80e) ![smi](https://github.com/user-attachments/assets/a2312f8c-cf24-443d-bf89-bec50503546d) CPU: ![cpu_ocr](https://github.com/user-attachments/assets/1ba6bb0b-94df-41ea-be79-790096da4bf1)
2025-12-08 20:42:30 +08:00 · 2025-03-17 11:58:40 +08:00
parent 8495036ff9
commit 3e19044dee
5 changed files with 157 additions and 48 deletions
--- a/deepdoc/vision/t_ocr.py
+++ b/deepdoc/vision/t_ocr.py
@ -28,14 +28,24 @@ from deepdoc.vision.seeit import draw_box
 from deepdoc.vision import OCR, init_in_out
 import argparse
 import numpy as np
+import trio

+# os.environ['CUDA_VISIBLE_DEVICES'] = '0,2' #2 gpus, uncontinuous
+os.environ['CUDA_VISIBLE_DEVICES'] = '0' #1 gpu
+# os.environ['CUDA_VISIBLE_DEVICES'] = '' #cpu

 def main(args):
-    ocr = OCR()
+    import torch.cuda
+
+    cuda_devices = torch.cuda.device_count()
+    limiter = [trio.CapacityLimiter(1) for _ in range(cuda_devices)] if cuda_devices > 1 else None
+    ocr = OCR(parallel_devices = cuda_devices)
    images, outputs = init_in_out(args)

-    for i, img in enumerate(images):
-        bxs = ocr(np.array(img))
+
+    def __ocr(i, id, img):
+        print("Task {} start".format(i))
+        bxs = ocr(np.array(img), id)
        bxs = [(line[0], line[1][0]) for line in bxs]
        bxs = [{
            "text": t,
@ -47,6 +57,30 @@ def main(args):
        with open(outputs[i] + ".txt", "w+", encoding='utf-8') as f:
            f.write("\n".join([o["text"] for o in bxs]))

+        print("Task {} done".format(i))
+
+    async def __ocr_thread(i, id, img, limiter = None):
+        if limiter:
+            async with limiter:
+                print("Task {} use device {}".format(i, id))
+                await trio.to_thread.run_sync(lambda: __ocr(i, id, img))
+        else:
+            __ocr(i, id, img)
+
+    async def __ocr_launcher():
+        if cuda_devices > 1:
+            async with trio.open_nursery() as nursery:
+                for i, img in enumerate(images):
+                    nursery.start_soon(__ocr_thread, i, i % cuda_devices, img, limiter[i % cuda_devices])
+                    await trio.sleep(0.1)
+        else:
+            for i, img in enumerate(images):
+                await __ocr_thread(i, 0, img)
+
+    trio.run(__ocr_launcher)
+
+    print("OCR tasks are all done")
+

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()