From 65c3f0406c101cd836570d8842e5c4ea978213a5 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Mon, 13 Oct 2025 11:53:48 +0800 Subject: [PATCH] Fix: maintain backward compatibility for KB tasks (#10508) ### What problem does this PR solve? Maintain backward compatibility for KB tasks ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/kb_app.py | 16 ++++++++++++---- rag/svr/task_executor.py | 41 +++++++++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index bca28fb6f..141f1f5d4 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -36,6 +36,7 @@ from api import settings from rag.nlp import search from api.constants import DATASET_NAME_LIMIT from rag.settings import PAGERANK_FLD +from rag.utils.redis_conn import REDIS_CONN from rag.utils.storage_factory import STORAGE_IMPL @@ -760,18 +761,25 @@ def delete_kb_task(): match pipeline_task_type: case PipelineTaskType.GRAPH_RAG: settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id) - kb_task_id = "graphrag_task_id" + kb_task_id_field = "graphrag_task_id" + task_id = kb.graphrag_task_id kb_task_finish_at = "graphrag_task_finish_at" case PipelineTaskType.RAPTOR: - kb_task_id = "raptor_task_id" + kb_task_id_field = "raptor_task_id" + task_id = kb.raptor_task_id kb_task_finish_at = "raptor_task_finish_at" case PipelineTaskType.MINDMAP: - kb_task_id = "mindmap_task_id" + kb_task_id_field = "mindmap_task_id" + task_id = kb.mindmap_task_id kb_task_finish_at = "mindmap_task_finish_at" case _: return get_error_data_result(message="Internal Error: Invalid task type") - ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id: "", kb_task_finish_at: None}) + def cancel_task(task_id): + REDIS_CONN.set(f"{task_id}-cancel", "x") + cancel_task(task_id) + + ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id_field: "", kb_task_finish_at: None}) if not ok: return server_error_response(f"Internal error: cannot delete task {pipeline_task_type}") diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index c0b1d2c51..9801b53dd 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -691,7 +691,7 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si raptor_config["threshold"], ) original_length = len(chunks) - chunks = await raptor(chunks, row["kb_parser_config"]["raptor"]["random_seed"], callback) + chunks = await raptor(chunks, kb_parser_config["raptor"]["random_seed"], callback) doc = { "doc_id": fake_doc_id, "kb_id": [str(row["kb_id"])], @@ -814,8 +814,22 @@ async def do_handle_task(task): kb_parser_config = kb.parser_config if not kb_parser_config.get("raptor", {}).get("use_raptor", False): - progress_callback(prog=-1.0, msg="Internal error: Invalid RAPTOR configuration") - return + kb_parser_config.update( + { + "raptor": { + "use_raptor": True, + "prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.", + "max_token": 256, + "threshold": 0.1, + "max_cluster": 64, + "random_seed": 0, + }, + } + ) + if not KnowledgebaseService.update_by_id(kb.id, {"parser_config":kb_parser_config}): + progress_callback(prog=-1.0, msg="Internal error: Invalid RAPTOR configuration") + return + # bind LLM for raptor chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language) # run RAPTOR @@ -838,8 +852,25 @@ async def do_handle_task(task): kb_parser_config = kb.parser_config if not kb_parser_config.get("graphrag", {}).get("use_graphrag", False): - progress_callback(prog=-1.0, msg="Internal error: Invalid GraphRAG configuration") - return + kb_parser_config.update( + { + "graphrag": { + "use_graphrag": True, + "entity_types": [ + "organization", + "person", + "geo", + "event", + "category", + ], + "method": "light", + } + } + ) + if not KnowledgebaseService.update_by_id(kb.id, {"parser_config":kb_parser_config}): + progress_callback(prog=-1.0, msg="Internal error: Invalid GraphRAG configuration") + return + graphrag_conf = kb_parser_config.get("graphrag", {}) start_ts = timer()