Refine: image/table context. (#12336)

### What problem does this PR solve?

#12303

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-12-30 20:24:27 +08:00
committed by GitHub
parent 348265afc1
commit 52f91c2388
5 changed files with 116 additions and 11 deletions

View File

@ -332,6 +332,9 @@ async def build_chunks(task, progress_callback):
async def doc_keyword_extraction(chat_mdl, d, topn):
cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "keywords", {"topn": topn})
if not cached:
if has_canceled(task["id"]):
progress_callback(-1, msg="Task has been canceled.")
return
async with chat_limiter:
cached = await keyword_extraction(chat_mdl, d["content_with_weight"], topn)
set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "keywords", {"topn": topn})
@ -362,6 +365,9 @@ async def build_chunks(task, progress_callback):
async def doc_question_proposal(chat_mdl, d, topn):
cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "question", {"topn": topn})
if not cached:
if has_canceled(task["id"]):
progress_callback(-1, msg="Task has been canceled.")
return
async with chat_limiter:
cached = await question_proposal(chat_mdl, d["content_with_weight"], topn)
set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "question", {"topn": topn})
@ -392,6 +398,9 @@ async def build_chunks(task, progress_callback):
cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata",
task["parser_config"]["metadata"])
if not cached:
if has_canceled(task["id"]):
progress_callback(-1, msg="Task has been canceled.")
return
async with chat_limiter:
cached = await gen_metadata(chat_mdl,
metadata_schema(task["parser_config"]["metadata"]),
@ -457,6 +466,9 @@ async def build_chunks(task, progress_callback):
async def doc_content_tagging(chat_mdl, d, topn_tags):
cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], all_tags, {"topn": topn_tags})
if not cached:
if has_canceled(task["id"]):
progress_callback(-1, msg="Task has been canceled.")
return
picked_examples = random.choices(examples, k=2) if len(examples) > 2 else examples
if not picked_examples:
picked_examples.append({"content": "This is an example", TAG_FLD: {'example': 1}})