mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: Create dataset performance unmatched between HTTP api and web ui (#10960)
### What problem does this PR solve? Fix: Create dataset performance unmatched between HTTP api and web ui #10925 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -298,8 +298,34 @@ def get_parser_config(chunk_method, parser_config):
|
||||
|
||||
# Define default configurations for each chunking method
|
||||
key_mapping = {
|
||||
"naive": {"chunk_token_num": 512, "delimiter": r"\n", "html4excel": False, "layout_recognize": "DeepDOC",
|
||||
"raptor": {"use_raptor": False}, "graphrag": {"use_graphrag": False}},
|
||||
"naive": {
|
||||
"layout_recognize": "DeepDOC",
|
||||
"chunk_token_num": 512,
|
||||
"delimiter": "\n",
|
||||
"auto_keywords": 0,
|
||||
"auto_questions": 0,
|
||||
"html4excel": False,
|
||||
"topn_tags": 3,
|
||||
"raptor": {
|
||||
"use_raptor": True,
|
||||
"prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
|
||||
"max_token": 256,
|
||||
"threshold": 0.1,
|
||||
"max_cluster": 64,
|
||||
"random_seed": 0,
|
||||
},
|
||||
"graphrag": {
|
||||
"use_graphrag": True,
|
||||
"entity_types": [
|
||||
"organization",
|
||||
"person",
|
||||
"geo",
|
||||
"event",
|
||||
"category",
|
||||
],
|
||||
"method": "light",
|
||||
},
|
||||
},
|
||||
"qa": {"raptor": {"use_raptor": False}, "graphrag": {"use_graphrag": False}},
|
||||
"tag": None,
|
||||
"resume": None,
|
||||
|
||||
Reference in New Issue
Block a user