Feat: add foundational support for RAPTOR dataset pipeline logs (#10277)

### What problem does this PR solve?

Add foundational support for RAPTOR dataset pipeline logs.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-09-25 16:46:24 +08:00
committed by GitHub
parent a1147ce609
commit c1151519a0
4 changed files with 161 additions and 19 deletions

View File

@ -651,6 +651,7 @@ class Knowledgebase(DataBaseModel):
pagerank = IntegerField(default=0, index=False)
graphrag_task_id = CharField(max_length=32, null=True, help_text="Graph RAG task ID", index=True)
raptor_task_id = CharField(max_length=32, null=True, help_text="RAPTOR task ID", index=True)
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
@ -1079,4 +1080,8 @@ def migrate_db():
migrate(migrator.add_column("knowledgebase", "graphrag_task_id", CharField(max_length=32, null=True, help_text="Gragh RAG task ID", index=True)))
except Exception:
pass
try:
migrate(migrator.add_column("knowledgebase", "raptor_task_id", CharField(max_length=32, null=True, help_text="RAPTOR task ID", index=True)))
except Exception:
pass
logging.disable(logging.NOTSET)

View File

@ -342,8 +342,7 @@ class DocumentService(CommonService):
process_duration=cls.model.process_duration + duration).where(
cls.model.id == doc_id).execute()
if num == 0:
raise LookupError(
"Document not found which is supposed to be there")
logging.warning("Document not found which is supposed to be there")
num = Knowledgebase.update(
token_num=Knowledgebase.token_num +
token_num,
@ -781,8 +780,9 @@ def queue_raptor_o_graphrag_tasks(doc, ty, priority, fake_doc_id="", doc_ids=[])
task["digest"] = hasher.hexdigest()
bulk_insert_into_db(Task, [task], True)
if ty == "graphrag":
if ty in ["graphrag", "raptor"]:
task["doc_ids"] = doc_ids
DocumentService.begin2parse(doc["id"])
assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
return task["id"]