Fix: toc no chunk found issue. (#12197)

### What problem does this PR solve?

#12170

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-12-25 14:06:20 +08:00
committed by GitHub
parent 5ebabf5bed
commit 8cbfb5aef6
3 changed files with 9 additions and 2 deletions

View File

@ -619,6 +619,8 @@ class Dealer:
chunks[id2idx[cid]]["similarity"] += sim chunks[id2idx[cid]]["similarity"] += sim
continue continue
chunk = self.dataStore.get(cid, idx_nms, kb_ids) chunk = self.dataStore.get(cid, idx_nms, kb_ids)
if not chunk:
continue
d = { d = {
"chunk_id": cid, "chunk_id": cid,
"content_ltks": chunk["content_ltks"], "content_ltks": chunk["content_ltks"],

View File

@ -827,6 +827,11 @@ async def relevant_chunks_with_toc(query: str, toc:list[dict], chat_mdl, topn: i
META_DATA = load_prompt("meta_data") META_DATA = load_prompt("meta_data")
async def gen_metadata(chat_mdl, schema:dict, content:str): async def gen_metadata(chat_mdl, schema:dict, content:str):
template = PROMPT_JINJA_ENV.from_string(META_DATA) template = PROMPT_JINJA_ENV.from_string(META_DATA)
for k, desc in schema.items():
if "enum" in desc and not desc.get("enum"):
del desc["enum"]
if desc.get("enum"):
desc["description"] += "\n** Extracted values must strictly match the given list specified by `enum`. **"
system_prompt = template.render(content=content, schema=schema) system_prompt = template.render(content=content, schema=schema)
user_prompt = "Output: " user_prompt = "Output: "
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length) _, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)

View File

@ -374,13 +374,13 @@ async def build_chunks(task, progress_callback):
chat_mdl = LLMBundle(task["tenant_id"], LLMType.CHAT, llm_name=task["llm_id"], lang=task["language"]) chat_mdl = LLMBundle(task["tenant_id"], LLMType.CHAT, llm_name=task["llm_id"], lang=task["language"])
async def gen_metadata_task(chat_mdl, d): async def gen_metadata_task(chat_mdl, d):
cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata", {}) cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata", task["parser_config"]["metadata"])
if not cached: if not cached:
async with chat_limiter: async with chat_limiter:
cached = await gen_metadata(chat_mdl, cached = await gen_metadata(chat_mdl,
metadata_schema(task["parser_config"]["metadata"]), metadata_schema(task["parser_config"]["metadata"]),
d["content_with_weight"]) d["content_with_weight"])
set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata", {}) set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata", task["parser_config"]["metadata"])
if cached: if cached:
d["metadata_obj"] = cached d["metadata_obj"] = cached
tasks = [] tasks = []