Change knowledge base to dataset (#11976)

### What problem does this PR solve?

As title

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-12-17 10:03:33 +08:00
committed by GitHub
parent 4d46726eb7
commit 30019dab9f
35 changed files with 92 additions and 92 deletions

View File

@ -283,7 +283,7 @@ if __name__ == '__main__':
print('*****************RAGFlow Benchmark*****************')
parser = argparse.ArgumentParser(usage="benchmark.py <max_docs> <kb_id> <dataset> <dataset_path> [<miracl_corpus_path>])", description='RAGFlow Benchmark')
parser.add_argument('max_docs', metavar='max_docs', type=int, help='max docs to evaluate')
parser.add_argument('kb_id', metavar='kb_id', help='knowledgebase id')
parser.add_argument('kb_id', metavar='kb_id', help='dataset id')
parser.add_argument('dataset', metavar='dataset', help='dataset name, shall be one of ms_marco_v1.1(https://huggingface.co/datasets/microsoft/ms_marco), trivia_qa(https://huggingface.co/datasets/mandarjoshi/trivia_qa>), miracl(https://huggingface.co/datasets/miracl/miracl')
parser.add_argument('dataset_path', metavar='dataset_path', help='dataset path')
parser.add_argument('miracl_corpus_path', metavar='miracl_corpus_path', nargs='?', default="", help='miracl corpus path. Only needed when dataset is miracl')

View File

@ -888,7 +888,7 @@ async def do_handle_task(task):
if task_type == "raptor":
ok, kb = KnowledgebaseService.get_by_id(task_dataset_id)
if not ok:
progress_callback(prog=-1.0, msg="Cannot found valid knowledgebase for RAPTOR task")
progress_callback(prog=-1.0, msg="Cannot found valid dataset for RAPTOR task")
return
kb_parser_config = kb.parser_config
@ -940,7 +940,7 @@ async def do_handle_task(task):
elif task_type == "graphrag":
ok, kb = KnowledgebaseService.get_by_id(task_dataset_id)
if not ok:
progress_callback(prog=-1.0, msg="Cannot found valid knowledgebase for GraphRAG task")
progress_callback(prog=-1.0, msg="Cannot found valid dataset for GraphRAG task")
return
kb_parser_config = kb.parser_config

View File

@ -521,7 +521,7 @@ class InfinityConnection(DocStoreConnection):
try:
table_instance = db_instance.get_table(table_name)
except Exception:
logger.warning(f"Table not found: {table_name}, this knowledge base isn't created in Infinity. Maybe it is created in other document engine.")
logger.warning(f"Table not found: {table_name}, this dataset isn't created in Infinity. Maybe it is created in other document engine.")
continue
kb_res, _ = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_df()
logger.debug(f"INFINITY get table: {str(table_list)}, result: {str(kb_res)}")