diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index 78f73ece7..9a24d48b2 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -559,9 +559,6 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"): def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。;!?"): if not texts or len(texts) != len(images): return [], [] - # Enuser texts is str not tuple, if it is tuple, convert to str (get the first item) - if isinstance(texts[0], tuple): - texts = [t[0] for t in texts] cks = [""] result_images = [None] tk_nums = [0] @@ -596,6 +593,12 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。 for sub_sec in splited_sec: if re.match(f"^{dels}$", sub_sec): continue + # if text is tuple, unpack it + if isinstance(text, tuple): + text_str = text[0] + text_pos = text[1] if len(text) > 1 else "" + add_chunk(text_str, image, text_pos) + else: add_chunk(text, image) return cks, result_images