diff --git a/deepdoc/parser/html_parser.py b/deepdoc/parser/html_parser.py index 71bbb706a..44ff10389 100644 --- a/deepdoc/parser/html_parser.py +++ b/deepdoc/parser/html_parser.py @@ -37,7 +37,7 @@ TITLE_TAGS = {"h1": "#", "h2": "##", "h3": "###", "h4": "#####", "h5": "#####", class RAGFlowHtmlParser: - def __call__(self, fnm, binary=None, chunk_token_num=None): + def __call__(self, fnm, binary=None, chunk_token_num=512): if binary: encoding = find_codec(binary) txt = binary.decode(encoding, errors="ignore")