mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-26 08:56:47 +08:00
Fix: toc no chunk found issue. (#12197)
### What problem does this PR solve? #12170 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -619,6 +619,8 @@ class Dealer:
|
|||||||
chunks[id2idx[cid]]["similarity"] += sim
|
chunks[id2idx[cid]]["similarity"] += sim
|
||||||
continue
|
continue
|
||||||
chunk = self.dataStore.get(cid, idx_nms, kb_ids)
|
chunk = self.dataStore.get(cid, idx_nms, kb_ids)
|
||||||
|
if not chunk:
|
||||||
|
continue
|
||||||
d = {
|
d = {
|
||||||
"chunk_id": cid,
|
"chunk_id": cid,
|
||||||
"content_ltks": chunk["content_ltks"],
|
"content_ltks": chunk["content_ltks"],
|
||||||
|
|||||||
@ -827,6 +827,11 @@ async def relevant_chunks_with_toc(query: str, toc:list[dict], chat_mdl, topn: i
|
|||||||
META_DATA = load_prompt("meta_data")
|
META_DATA = load_prompt("meta_data")
|
||||||
async def gen_metadata(chat_mdl, schema:dict, content:str):
|
async def gen_metadata(chat_mdl, schema:dict, content:str):
|
||||||
template = PROMPT_JINJA_ENV.from_string(META_DATA)
|
template = PROMPT_JINJA_ENV.from_string(META_DATA)
|
||||||
|
for k, desc in schema.items():
|
||||||
|
if "enum" in desc and not desc.get("enum"):
|
||||||
|
del desc["enum"]
|
||||||
|
if desc.get("enum"):
|
||||||
|
desc["description"] += "\n** Extracted values must strictly match the given list specified by `enum`. **"
|
||||||
system_prompt = template.render(content=content, schema=schema)
|
system_prompt = template.render(content=content, schema=schema)
|
||||||
user_prompt = "Output: "
|
user_prompt = "Output: "
|
||||||
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
|
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
|
||||||
|
|||||||
@ -374,13 +374,13 @@ async def build_chunks(task, progress_callback):
|
|||||||
chat_mdl = LLMBundle(task["tenant_id"], LLMType.CHAT, llm_name=task["llm_id"], lang=task["language"])
|
chat_mdl = LLMBundle(task["tenant_id"], LLMType.CHAT, llm_name=task["llm_id"], lang=task["language"])
|
||||||
|
|
||||||
async def gen_metadata_task(chat_mdl, d):
|
async def gen_metadata_task(chat_mdl, d):
|
||||||
cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata", {})
|
cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata", task["parser_config"]["metadata"])
|
||||||
if not cached:
|
if not cached:
|
||||||
async with chat_limiter:
|
async with chat_limiter:
|
||||||
cached = await gen_metadata(chat_mdl,
|
cached = await gen_metadata(chat_mdl,
|
||||||
metadata_schema(task["parser_config"]["metadata"]),
|
metadata_schema(task["parser_config"]["metadata"]),
|
||||||
d["content_with_weight"])
|
d["content_with_weight"])
|
||||||
set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata", {})
|
set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata", task["parser_config"]["metadata"])
|
||||||
if cached:
|
if cached:
|
||||||
d["metadata_obj"] = cached
|
d["metadata_obj"] = cached
|
||||||
tasks = []
|
tasks = []
|
||||||
|
|||||||
Reference in New Issue
Block a user