mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix:use the same logic to handle pos in tokenize_chunks_with_images (#8732)
### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/8719 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -559,9 +559,6 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"):
|
||||
def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。;!?"):
|
||||
if not texts or len(texts) != len(images):
|
||||
return [], []
|
||||
# Enuser texts is str not tuple, if it is tuple, convert to str (get the first item)
|
||||
if isinstance(texts[0], tuple):
|
||||
texts = [t[0] for t in texts]
|
||||
cks = [""]
|
||||
result_images = [None]
|
||||
tk_nums = [0]
|
||||
@ -596,6 +593,12 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。
|
||||
for sub_sec in splited_sec:
|
||||
if re.match(f"^{dels}$", sub_sec):
|
||||
continue
|
||||
# if text is tuple, unpack it
|
||||
if isinstance(text, tuple):
|
||||
text_str = text[0]
|
||||
text_pos = text[1] if len(text) > 1 else ""
|
||||
add_chunk(text_str, image, text_pos)
|
||||
else:
|
||||
add_chunk(text, image)
|
||||
|
||||
return cks, result_images
|
||||
|
||||
Reference in New Issue
Block a user