mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
refine pdf parser, add time zone to userinfo (#112)
This commit is contained in:
@ -101,7 +101,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
|
||||
d = copy.deepcopy(doc)
|
||||
if pdf_parser:
|
||||
d["image"], poss = pdf_parser.crop(ck, need_position=True)
|
||||
add_positions(d, poss)
|
||||
add_positions(d, poss, from_page)
|
||||
ck = pdf_parser.remove_tag(ck)
|
||||
tokenize(d, ck, eng)
|
||||
res.append(d)
|
||||
@ -112,7 +112,7 @@ if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
|
||||
def dummy(a, b):
|
||||
def dummy(prog=None, msg=""):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user