mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: parsing hyperlinks in docx and pdf & Fix: default parser config of toc extraction (#10877)
### What problem does this PR solve? Feat: parsing hyperlinks in docx and pdf #10848 Fix: default parser config of toc extraction ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -366,7 +366,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
||||
page_size = doc["parser_config"].get("task_page_size") or 12
|
||||
if doc["parser_id"] == "paper":
|
||||
page_size = doc["parser_config"].get("task_page_size") or 22
|
||||
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC" or doc["parser_config"].get("toc", True):
|
||||
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC" or doc["parser_config"].get("toc_extraction", False):
|
||||
page_size = 10 ** 9
|
||||
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
|
||||
for s, e in page_ranges:
|
||||
|
||||
Reference in New Issue
Block a user