fix position extraction bug (#93)

* fix position extraction bug

* remove delimiter for naive parser
This commit is contained in:
KevinHuSh
2024-03-04 17:08:35 +08:00
committed by GitHub
parent fae00827e6
commit 7bfaf0df29
11 changed files with 34 additions and 22 deletions

View File

@ -246,6 +246,8 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。"):
tk_nums[-1] += tnum
for sec, pos in sections:
add_chunk(sec, pos)
continue
s, e = 0, 1
while e < len(sec):
if sec[e] in delimiter: