Fix: Increase default chunk_token_num from 128 to 512 in parser config (#8753)

### What problem does this PR solve? Updated the default `chunk_token_num` value in `api_utils.py` and `validation_utils.py` to 512 to accommodate larger text chunks. Adjusted corresponding test cases in HTTP and SDK API tests to reflect this change. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-01-29 22:56:36 +08:00 · 2025-07-10 09:34:03 +08:00
parent aae9fbb9de
commit f8524462b0
8 changed files with 16 additions and 16 deletions
--- a/api/utils/api_utils.py
+++ b/api/utils/api_utils.py
@ -348,7 +348,7 @@ def get_parser_config(chunk_method, parser_config):
    if not chunk_method:
        chunk_method = "naive"
    key_mapping = {
-        "naive": {"chunk_token_num": 128, "delimiter": r"\n", "html4excel": False, "layout_recognize": "DeepDOC", "raptor": {"use_raptor": False}},
+        "naive": {"chunk_token_num": 512, "delimiter": r"\n", "html4excel": False, "layout_recognize": "DeepDOC", "raptor": {"use_raptor": False}},
        "qa": {"raptor": {"use_raptor": False}},
        "tag": None,
        "resume": None,
--- a/api/utils/validation_utils.py
+++ b/api/utils/validation_utils.py
@ -363,7 +363,7 @@ class GraphragConfig(Base):
 class ParserConfig(Base):
    auto_keywords: int = Field(default=0, ge=0, le=32)
    auto_questions: int = Field(default=0, ge=0, le=10)
-    chunk_token_num: int = Field(default=128, ge=1, le=2048)
+    chunk_token_num: int = Field(default=512, ge=1, le=2048)
    delimiter: str = Field(default=r"\n", min_length=1)
    graphrag: GraphragConfig | None = None
    html4excel: bool = False