mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 12:32:30 +08:00
Fix: page_size can be None error (#8603)
### What problem does this PR solve? Issue #8602 `parser_config.task_page_size` can be defaults to `None` when dataset is created by API. This was not handled by the `task_executor.py` code thus `page_size` could sometimes be `None` which will cause issue in line 351. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -338,9 +338,9 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
||||
file_bin = STORAGE_IMPL.get(bucket, name)
|
||||
do_layout = doc["parser_config"].get("layout_recognize", "DeepDOC")
|
||||
pages = PdfParser.total_page_number(doc["name"], file_bin)
|
||||
page_size = doc["parser_config"].get("task_page_size", 12)
|
||||
page_size = doc["parser_config"].get("task_page_size") or 12
|
||||
if doc["parser_id"] == "paper":
|
||||
page_size = doc["parser_config"].get("task_page_size", 22)
|
||||
page_size = doc["parser_config"].get("task_page_size") or 22
|
||||
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC":
|
||||
page_size = 10 ** 9
|
||||
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
|
||||
|
||||
Reference in New Issue
Block a user