From 1a5608d0f80c3625edc44535dde66bcc580a9842 Mon Sep 17 00:00:00 2001 From: Stephen Hu Date: Mon, 28 Apr 2025 13:35:34 +0800 Subject: [PATCH] Fix: Add title_tks for Pictures (#7365) ### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/7362 append title_tks ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/app/picture.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rag/app/picture.py b/rag/app/picture.py index 97a954c9c..f5fb3ae72 100644 --- a/rag/app/picture.py +++ b/rag/app/picture.py @@ -15,6 +15,7 @@ # import io +import re import numpy as np from PIL import Image @@ -24,6 +25,8 @@ from api.db.services.llm_service import LLMBundle from deepdoc.vision import OCR from rag.nlp import tokenize from rag.utils import clean_markdown_block +from rag.nlp import rag_tokenizer + ocr = OCR() @@ -32,6 +35,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs): img = Image.open(io.BytesIO(binary)).convert('RGB') doc = { "docnm_kwd": filename, + "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)), "image": img } bxs = ocr(np.array(img))