Feat: PDF vision figure parser supports reading context (#12416)

### What problem does this PR solve?

PDF vision figure parser supports reading context.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2026-01-05 09:55:43 +08:00
committed by GitHub
parent cc8a10376a
commit 4cd4526492
8 changed files with 263 additions and 41 deletions

View File

@ -158,6 +158,7 @@ KEYWORD_PROMPT_TEMPLATE = load_prompt("keyword_prompt")
QUESTION_PROMPT_TEMPLATE = load_prompt("question_prompt")
VISION_LLM_DESCRIBE_PROMPT = load_prompt("vision_llm_describe_prompt")
VISION_LLM_FIGURE_DESCRIBE_PROMPT = load_prompt("vision_llm_figure_describe_prompt")
VISION_LLM_FIGURE_DESCRIBE_PROMPT_WITH_CONTEXT = load_prompt("vision_llm_figure_describe_prompt_with_context")
STRUCTURED_OUTPUT_PROMPT = load_prompt("structured_output_prompt")
ANALYZE_TASK_SYSTEM = load_prompt("analyze_task_system")
@ -321,6 +322,11 @@ def vision_llm_figure_describe_prompt() -> str:
return template.render()
def vision_llm_figure_describe_prompt_with_context(context_above: str, context_below: str) -> str:
template = PROMPT_JINJA_ENV.from_string(VISION_LLM_FIGURE_DESCRIBE_PROMPT_WITH_CONTEXT)
return template.render(context_above=context_above, context_below=context_below)
def tool_schema(tools_description: list[dict], complete_task=False):
if not tools_description:
return ""