Fix: Refactor parser config handling and add GraphRAG defaults (#8778)

### What problem does this PR solve?

- Update `get_parser_config` to merge provided configs with defaults
- Add GraphRAG configuration defaults for all chunk methods
- Make raptor and graphrag fields non-nullable in ParserConfig schema
- Update related test cases to reflect config changes
- Ensure backward compatibility while adding new GraphRAG support
- #8396

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Liu An
2025-07-23 09:29:37 +08:00
committed by GitHub
parent c3b8d8b4ba
commit 0020c50000
8 changed files with 179 additions and 28 deletions

View File

@ -365,10 +365,10 @@ class ParserConfig(Base):
auto_questions: int = Field(default=0, ge=0, le=10)
chunk_token_num: int = Field(default=512, ge=1, le=2048)
delimiter: str = Field(default=r"\n", min_length=1)
graphrag: GraphragConfig | None = None
graphrag: GraphragConfig = Field(default_factory=lambda: GraphragConfig(use_graphrag=False))
html4excel: bool = False
layout_recognize: str = "DeepDOC"
raptor: RaptorConfig | None = None
raptor: RaptorConfig = Field(default_factory=lambda: RaptorConfig(use_raptor=False))
tag_kb_ids: list[str] = Field(default_factory=list)
topn_tags: int = Field(default=1, ge=1, le=10)
filename_embd_weight: float | None = Field(default=0.1, ge=0.0, le=1.0)