Fix: Increase default chunk_token_num from 128 to 512 in parser config (#8753)

### What problem does this PR solve?

Updated the default `chunk_token_num` value in `api_utils.py` and
`validation_utils.py` to 512 to accommodate larger text chunks. Adjusted
corresponding test cases in HTTP and SDK API tests to reflect this
change.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Liu An
2025-07-10 09:34:03 +08:00
committed by Zhichang Yu
parent aae9fbb9de
commit f8524462b0
8 changed files with 16 additions and 16 deletions

View File

@ -588,7 +588,7 @@ class TestDatasetCreate:
excepted_value = DataSet.ParserConfig(
client,
{
"chunk_token_num": 128,
"chunk_token_num": 512,
"delimiter": r"\n",
"html4excel": False,
"layout_recognize": "DeepDOC",
@ -605,7 +605,7 @@ class TestDatasetCreate:
excepted_value = DataSet.ParserConfig(
client,
{
"chunk_token_num": 128,
"chunk_token_num": 512,
"delimiter": r"\n",
"html4excel": False,
"layout_recognize": "DeepDOC",
@ -621,7 +621,7 @@ class TestDatasetCreate:
excepted_value = DataSet.ParserConfig(
client,
{
"chunk_token_num": 128,
"chunk_token_num": 512,
"delimiter": r"\n",
"html4excel": False,
"layout_recognize": "DeepDOC",

View File

@ -636,7 +636,7 @@ class TestDatasetUpdate:
expected_config = DataSet.ParserConfig(
client,
{
"chunk_token_num": 128,
"chunk_token_num": 512,
"delimiter": r"\n",
"html4excel": False,
"layout_recognize": "DeepDOC",
@ -655,7 +655,7 @@ class TestDatasetUpdate:
expected_config = DataSet.ParserConfig(
client,
{
"chunk_token_num": 128,
"chunk_token_num": 512,
"delimiter": r"\n",
"html4excel": False,
"layout_recognize": "DeepDOC",

View File

@ -207,7 +207,7 @@ class TestUpdateDocumentParserConfig:
(
"naive",
{
"chunk_token_num": 128,
"chunk_token_num": 512,
"layout_recognize": "DeepDOC",
"html4excel": False,
"delimiter": r"\n",
@ -401,7 +401,7 @@ class TestUpdateDocumentParserConfig:
expected_config = DataSet.ParserConfig(
client,
{
"chunk_token_num": 128,
"chunk_token_num": 512,
"delimiter": r"\n",
"html4excel": False,
"layout_recognize": "DeepDOC",