Feat: add foundational support for RAPTOR dataset pipeline logs (#10277)

### What problem does this PR solve? Add foundational support for RAPTOR dataset pipeline logs. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-31 15:45:08 +08:00 · 2025-09-25 16:46:24 +08:00
parent a1147ce609
commit c1151519a0
4 changed files with 161 additions and 19 deletions
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -342,8 +342,7 @@ class DocumentService(CommonService):
                               process_duration=cls.model.process_duration + duration).where(
            cls.model.id == doc_id).execute()
        if num == 0:
-            raise LookupError(
-                "Document not found which is supposed to be there")
+            logging.warning("Document not found which is supposed to be there")
        num = Knowledgebase.update(
            token_num=Knowledgebase.token_num +
                      token_num,
@ -781,8 +780,9 @@ def queue_raptor_o_graphrag_tasks(doc, ty, priority, fake_doc_id="", doc_ids=[])
    task["digest"] = hasher.hexdigest()
    bulk_insert_into_db(Task, [task], True)

-    if ty == "graphrag":
+    if ty in ["graphrag", "raptor"]:
        task["doc_ids"] = doc_ids
+        DocumentService.begin2parse(doc["id"])
    assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
    return task["id"]