refactor retieval_test, add SQl retrieval methods (#61)

This commit is contained in:
KevinHuSh
2024-02-08 17:01:01 +08:00
committed by GitHub
parent 0a903c7714
commit 5e0a689c43
16 changed files with 238 additions and 74 deletions

View File

@ -57,7 +57,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **k
callback(0.8, "Finish parsing.")
else: raise NotImplementedError("file type not supported yet(docx, pdf, txt supported)")
cks = naive_merge(sections, kwargs.get("chunk_token_num", 128), kwargs.get("delimer", "\n。;!?"))
parser_config = kwargs.get("parser_config", {"chunk_token_num": 128, "delimer": "\n。;!?"})
cks = naive_merge(sections, parser_config["chunk_token_num"], parser_config["delimer"])
eng = is_english(cks)
res = []
# wrap up to es documents