mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
refine manul parser (#131)
This commit is contained in:
@ -247,7 +247,7 @@ class HuParser:
|
||||
b["SP"] = ii
|
||||
|
||||
def __ocr(self, pagenum, img, chars, ZM=3):
|
||||
bxs = self.ocr(np.array(img))
|
||||
bxs = self.ocr.detect(np.array(img))
|
||||
if not bxs:
|
||||
self.boxes.append([])
|
||||
return
|
||||
@ -278,8 +278,10 @@ class HuParser:
|
||||
|
||||
for b in bxs:
|
||||
if not b["text"]:
|
||||
b["text"] = b["txt"]
|
||||
left, right, top, bott = b["x0"]*ZM, b["x1"]*ZM, b["top"]*ZM, b["bottom"]*ZM
|
||||
b["text"] = self.ocr.recognize(np.array(img), np.array([[left, top], [right, top], [right, bott], [left, bott]], dtype=np.float32))
|
||||
del b["txt"]
|
||||
bxs = [b for b in bxs if b["text"]]
|
||||
if self.mean_height[-1] == 0:
|
||||
self.mean_height[-1] = np.median([b["bottom"] - b["top"]
|
||||
for b in bxs])
|
||||
|
||||
Reference in New Issue
Block a user