refine manul parser (#131)

This commit is contained in:
KevinHuSh
2024-03-19 12:26:04 +08:00
committed by GitHub
parent d56c9e7630
commit 9da671b951
13 changed files with 145 additions and 52 deletions

View File

@ -247,7 +247,7 @@ class HuParser:
b["SP"] = ii
def __ocr(self, pagenum, img, chars, ZM=3):
bxs = self.ocr(np.array(img))
bxs = self.ocr.detect(np.array(img))
if not bxs:
self.boxes.append([])
return
@ -278,8 +278,10 @@ class HuParser:
for b in bxs:
if not b["text"]:
b["text"] = b["txt"]
left, right, top, bott = b["x0"]*ZM, b["x1"]*ZM, b["top"]*ZM, b["bottom"]*ZM
b["text"] = self.ocr.recognize(np.array(img), np.array([[left, top], [right, top], [right, bott], [left, bott]], dtype=np.float32))
del b["txt"]
bxs = [b for b in bxs if b["text"]]
if self.mean_height[-1] == 0:
self.mean_height[-1] = np.median([b["bottom"] - b["top"]
for b in bxs])