From cec06bfb5daf7719656771259740ffdceefa602c Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Thu, 15 Jan 2026 17:46:21 +0800 Subject: [PATCH] Fix: empty chunk issue. (#12638) #12570 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/llm_app.py | 7 ++++--- rag/flow/splitter/splitter.py | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index 3272a36ad..695f4f13e 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -373,13 +373,14 @@ def my_llms(): @manager.route("/list", methods=["GET"]) # noqa: F821 @login_required -def list_app(): +async def list_app(): self_deployed = ["FastEmbed", "Ollama", "Xinference", "LocalAI", "LM-Studio", "GPUStack"] weighted = [] model_type = request.args.get("model_type") + tenant_id = current_user.id try: - TenantLLMService.ensure_mineru_from_env(current_user.id) - objs = TenantLLMService.query(tenant_id=current_user.id) + TenantLLMService.ensure_mineru_from_env(tenant_id) + objs = TenantLLMService.query(tenant_id=tenant_id) facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key and o.status == StatusEnum.VALID.value]) status = {(o.llm_name + "@" + o.llm_factory) for o in objs if o.status == StatusEnum.VALID.value} llms = LLMService.get_all() diff --git a/rag/flow/splitter/splitter.py b/rag/flow/splitter/splitter.py index 343241ab3..4fc4e544c 100644 --- a/rag/flow/splitter/splitter.py +++ b/rag/flow/splitter/splitter.py @@ -93,6 +93,8 @@ class Splitter(ProcessBase): split_sec = re.split(r"(%s)" % custom_pattern, c, flags=re.DOTALL) if split_sec: for j in range(0, len(split_sec), 2): + if not split_sec[j].strip(): + continue docs.append({ "text": split_sec[j], "mom": c @@ -156,6 +158,8 @@ class Splitter(ProcessBase): if split_sec: c["mom"] = c["text"] for j in range(0, len(split_sec), 2): + if not split_sec[j].strip(): + continue cc = deepcopy(c) cc["text"] = split_sec[j] docs.append(cc)