From c9ea22ef69467a84984773f2e1d348c4b872a282 Mon Sep 17 00:00:00 2001 From: buua436 <66937541+buua436@users.noreply.github.com> Date: Wed, 17 Sep 2025 09:36:31 +0800 Subject: [PATCH] Fix: set default chunk_token_num in html_parser (#10118) ### What problem does this PR solve? issue: [Bug]: Agent component (HTTP Request) "'>' not supported between instances of 'int' and 'NoneType'" [#10096](https://github.com/infiniflow/ragflow/issues/10096) Change: When the Invoke class instantiates HtmlParser without providing the chunk_token_num parameter, the value defaults to None, leading to a comparison error with block_token_count. This change sets the default chunk_token_num to 512 to prevent such errors. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: BadwomanCraZY <511528396@qq.com> --- deepdoc/parser/html_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepdoc/parser/html_parser.py b/deepdoc/parser/html_parser.py index 71bbb706a..44ff10389 100644 --- a/deepdoc/parser/html_parser.py +++ b/deepdoc/parser/html_parser.py @@ -37,7 +37,7 @@ TITLE_TAGS = {"h1": "#", "h2": "##", "h3": "###", "h4": "#####", "h5": "#####", class RAGFlowHtmlParser: - def __call__(self, fnm, binary=None, chunk_token_num=None): + def __call__(self, fnm, binary=None, chunk_token_num=512): if binary: encoding = find_codec(binary) txt = binary.decode(encoding, errors="ignore")