Feat: parsing hyperlinks in docx and pdf & Fix: default parser config of toc extraction (#10877)

### What problem does this PR solve? Feat: parsing hyperlinks in docx and pdf #10848 Fix: default parser config of toc extraction ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-30 23:26:36 +08:00 · 2025-11-03 09:34:12 +08:00
parent 360f5c1179
commit fa210e7c58
3 changed files with 181 additions and 11 deletions
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@ -366,7 +366,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
        page_size = doc["parser_config"].get("task_page_size") or 12
        if doc["parser_id"] == "paper":
            page_size = doc["parser_config"].get("task_page_size") or 22
-        if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC" or doc["parser_config"].get("toc", True):
+        if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC" or doc["parser_config"].get("toc_extraction", False):
            page_size = 10 ** 9
        page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
        for s, e in page_ranges: