From 83c8af1b59350dd6b74cd86e559001bddae7c3c7 Mon Sep 17 00:00:00 2001 From: Can Wang Date: Wed, 2 Jul 2025 18:38:48 +0800 Subject: [PATCH] Fix: page_size can be None error (#8603) ### What problem does this PR solve? Issue #8602 `parser_config.task_page_size` can be defaults to `None` when dataset is created by API. This was not handled by the `task_executor.py` code thus `page_size` could sometimes be `None` which will cause issue in line 351. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/db/services/task_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/db/services/task_service.py b/api/db/services/task_service.py index 1fdfed350..401489a69 100644 --- a/api/db/services/task_service.py +++ b/api/db/services/task_service.py @@ -338,9 +338,9 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int): file_bin = STORAGE_IMPL.get(bucket, name) do_layout = doc["parser_config"].get("layout_recognize", "DeepDOC") pages = PdfParser.total_page_number(doc["name"], file_bin) - page_size = doc["parser_config"].get("task_page_size", 12) + page_size = doc["parser_config"].get("task_page_size") or 12 if doc["parser_id"] == "paper": - page_size = doc["parser_config"].get("task_page_size", 22) + page_size = doc["parser_config"].get("task_page_size") or 22 if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC": page_size = 10 ** 9 page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]