diff --git a/api/db/services/task_service.py b/api/db/services/task_service.py index 5fd0eefc3..1fdfed350 100644 --- a/api/db/services/task_service.py +++ b/api/db/services/task_service.py @@ -387,12 +387,12 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int): for task in parse_task_array: ck_num += reuse_prev_task_chunks(task, prev_tasks, chunking_config) TaskService.filter_delete([Task.doc_id == doc["id"]]) - chunk_ids = [] - for task in prev_tasks: - if task["chunk_ids"]: - chunk_ids.extend(task["chunk_ids"].split()) - if chunk_ids: - settings.docStoreConn.delete({"id": chunk_ids}, search.index_name(chunking_config["tenant_id"]), + pre_chunk_ids = [] + for pre_task in prev_tasks: + if pre_task["chunk_ids"]: + pre_chunk_ids.extend(pre_task["chunk_ids"].split()) + if pre_chunk_ids: + settings.docStoreConn.delete({"id": pre_chunk_ids}, search.index_name(chunking_config["tenant_id"]), chunking_config["kb_id"]) DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num}) diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index b028f7125..0377c72c3 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -543,6 +543,7 @@ async def do_handle_task(task): # Either using graphrag or Standard chunking methods elif task.get("task_type", "") == "graphrag": if not task_parser_config.get("graphrag", {}).get("use_graphrag", False): + progress_callback(prog=-1.0, msg="Internal configuration error.") return graphrag_conf = task["kb_parser_config"].get("graphrag", {}) start_ts = timer() @@ -558,8 +559,6 @@ async def do_handle_task(task): start_ts = timer() chunks = await build_chunks(task, progress_callback) logging.info("Build document {}: {:.2f}s".format(task_document_name, timer() - start_ts)) - if chunks is None: - return if not chunks: progress_callback(1., msg=f"No chunk built from {task_document_name}") return @@ -614,6 +613,7 @@ async def do_handle_task(task): async with trio.open_nursery() as nursery: for chunk_id in chunk_ids: nursery.start_soon(delete_image, task_dataset_id, chunk_id) + progress_callback(-1, msg=f"Chunk updates failed since task {task['id']} is unknown.") return logging.info("Indexing doc({}), page({}-{}), chunks({}), elapsed: {:.2f}".format(task_document_name, task_from_page,