Cache the result from llm for graphrag and raptor (#4051)

### What problem does this PR solve?

#4045

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2024-12-17 09:48:03 +08:00
committed by GitHub
parent 8ea631a2a0
commit cb6e9ce164
12 changed files with 161 additions and 38 deletions

View File

@ -271,7 +271,7 @@ def queue_tasks(doc: dict, bucket: str, name: str):
def reuse_prev_task_chunks(task: dict, prev_tasks: list[dict], chunking_config: dict):
idx = bisect.bisect_left(prev_tasks, task["from_page"], key=lambda x: x["from_page"])
idx = bisect.bisect_left(prev_tasks, task.get("from_page", 0), key=lambda x: x.get("from_page",0))
if idx >= len(prev_tasks):
return 0
prev_task = prev_tasks[idx]
@ -279,7 +279,11 @@ def reuse_prev_task_chunks(task: dict, prev_tasks: list[dict], chunking_config:
return 0
task["chunk_ids"] = prev_task["chunk_ids"]
task["progress"] = 1.0
task["progress_msg"] = f"Page({task['from_page']}~{task['to_page']}): reused previous task's chunks"
if "from_page" in task and "to_page" in task:
task["progress_msg"] = f"Page({task['from_page']}~{task['to_page']}): "
else:
task["progress_msg"] = ""
task["progress_msg"] += "reused previous task's chunks."
prev_task["chunk_ids"] = ""
return len(task["chunk_ids"].split())