Fix: maintain backward compatibility for KB tasks (#10508)

### What problem does this PR solve?

Maintain backward compatibility for KB tasks

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Yongteng Lei
2025-10-13 11:53:48 +08:00
committed by GitHub
parent 7fb8b30cc2
commit 65c3f0406c
2 changed files with 48 additions and 9 deletions

View File

@ -36,6 +36,7 @@ from api import settings
from rag.nlp import search
from api.constants import DATASET_NAME_LIMIT
from rag.settings import PAGERANK_FLD
from rag.utils.redis_conn import REDIS_CONN
from rag.utils.storage_factory import STORAGE_IMPL
@ -760,18 +761,25 @@ def delete_kb_task():
match pipeline_task_type:
case PipelineTaskType.GRAPH_RAG:
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
kb_task_id = "graphrag_task_id"
kb_task_id_field = "graphrag_task_id"
task_id = kb.graphrag_task_id
kb_task_finish_at = "graphrag_task_finish_at"
case PipelineTaskType.RAPTOR:
kb_task_id = "raptor_task_id"
kb_task_id_field = "raptor_task_id"
task_id = kb.raptor_task_id
kb_task_finish_at = "raptor_task_finish_at"
case PipelineTaskType.MINDMAP:
kb_task_id = "mindmap_task_id"
kb_task_id_field = "mindmap_task_id"
task_id = kb.mindmap_task_id
kb_task_finish_at = "mindmap_task_finish_at"
case _:
return get_error_data_result(message="Internal Error: Invalid task type")
ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id: "", kb_task_finish_at: None})
def cancel_task(task_id):
REDIS_CONN.set(f"{task_id}-cancel", "x")
cancel_task(task_id)
ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id_field: "", kb_task_finish_at: None})
if not ok:
return server_error_response(f"Internal error: cannot delete task {pipeline_task_type}")

View File

@ -691,7 +691,7 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si
raptor_config["threshold"],
)
original_length = len(chunks)
chunks = await raptor(chunks, row["kb_parser_config"]["raptor"]["random_seed"], callback)
chunks = await raptor(chunks, kb_parser_config["raptor"]["random_seed"], callback)
doc = {
"doc_id": fake_doc_id,
"kb_id": [str(row["kb_id"])],
@ -814,8 +814,22 @@ async def do_handle_task(task):
kb_parser_config = kb.parser_config
if not kb_parser_config.get("raptor", {}).get("use_raptor", False):
progress_callback(prog=-1.0, msg="Internal error: Invalid RAPTOR configuration")
return
kb_parser_config.update(
{
"raptor": {
"use_raptor": True,
"prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
"max_token": 256,
"threshold": 0.1,
"max_cluster": 64,
"random_seed": 0,
},
}
)
if not KnowledgebaseService.update_by_id(kb.id, {"parser_config":kb_parser_config}):
progress_callback(prog=-1.0, msg="Internal error: Invalid RAPTOR configuration")
return
# bind LLM for raptor
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language)
# run RAPTOR
@ -838,8 +852,25 @@ async def do_handle_task(task):
kb_parser_config = kb.parser_config
if not kb_parser_config.get("graphrag", {}).get("use_graphrag", False):
progress_callback(prog=-1.0, msg="Internal error: Invalid GraphRAG configuration")
return
kb_parser_config.update(
{
"graphrag": {
"use_graphrag": True,
"entity_types": [
"organization",
"person",
"geo",
"event",
"category",
],
"method": "light",
}
}
)
if not KnowledgebaseService.update_by_id(kb.id, {"parser_config":kb_parser_config}):
progress_callback(prog=-1.0, msg="Internal error: Invalid GraphRAG configuration")
return
graphrag_conf = kb_parser_config.get("graphrag", {})
start_ts = timer()