Fix: remove garbage filtering rules (#11567)

### What problem does this PR solve?
change:

remove garbage filtering rules

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
buua436
2025-11-27 17:54:49 +08:00
committed by GitHub
parent 89d82ff031
commit a674338c21

View File

@ -17,7 +17,7 @@
import logging
import math
import os
import re
# import re
from collections import Counter
from copy import deepcopy
@ -62,8 +62,9 @@ class LayoutRecognizer(Recognizer):
def __call__(self, image_list, ocr_res, scale_factor=3, thr=0.2, batch_size=16, drop=True):
def __is_garbage(b):
patt = [r"^•+$", "^[0-9]{1,2} / ?[0-9]{1,2}$", r"^[0-9]{1,2} of [0-9]{1,2}$", "^http://[^ ]{12,}", "\\(cid *: *[0-9]+ *\\)"]
return any([re.search(p, b["text"]) for p in patt])
return False
# patt = [r"^•+$", "^[0-9]{1,2} / ?[0-9]{1,2}$", r"^[0-9]{1,2} of [0-9]{1,2}$", "^http://[^ ]{12,}", "\\(cid *: *[0-9]+ *\\)"]
# return any([re.search(p, b["text"]) for p in patt])
if self.client:
layouts = self.client.predict(image_list)