refine pdf parser, add time zone to userinfo (#112)

This commit is contained in:
KevinHuSh
2024-03-08 11:24:24 +08:00
committed by GitHub
parent 63e498ac79
commit 8f86ab9f7f
7 changed files with 28 additions and 23 deletions

View File

@ -96,7 +96,7 @@ class LayoutRecognizer(Recognizer):
continue
bxs[i]["layoutno"] = f"{ty}-{ii}"
bxs[i]["layout_type"] = lts_[ii]["type"]
bxs[i]["layout_type"] = lts_[ii]["type"] if lts_[ii]["type"]!="equation" else "figure"
i += 1
for lt in ["footer", "header", "reference", "figure caption",
@ -105,7 +105,7 @@ class LayoutRecognizer(Recognizer):
# add box to figure layouts which has not text box
for i, lt in enumerate(
[lt for lt in lts if lt["type"] == "figure"]):
[lt for lt in lts if lt["type"] in ["figure","equation"]]):
if lt.get("visited"):
continue
lt = deepcopy(lt)