mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-03 11:05:30 +08:00
Change knowledge base to dataset (#11976)
### What problem does this PR solve? As title ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -283,7 +283,7 @@ if __name__ == '__main__':
|
||||
print('*****************RAGFlow Benchmark*****************')
|
||||
parser = argparse.ArgumentParser(usage="benchmark.py <max_docs> <kb_id> <dataset> <dataset_path> [<miracl_corpus_path>])", description='RAGFlow Benchmark')
|
||||
parser.add_argument('max_docs', metavar='max_docs', type=int, help='max docs to evaluate')
|
||||
parser.add_argument('kb_id', metavar='kb_id', help='knowledgebase id')
|
||||
parser.add_argument('kb_id', metavar='kb_id', help='dataset id')
|
||||
parser.add_argument('dataset', metavar='dataset', help='dataset name, shall be one of ms_marco_v1.1(https://huggingface.co/datasets/microsoft/ms_marco), trivia_qa(https://huggingface.co/datasets/mandarjoshi/trivia_qa>), miracl(https://huggingface.co/datasets/miracl/miracl')
|
||||
parser.add_argument('dataset_path', metavar='dataset_path', help='dataset path')
|
||||
parser.add_argument('miracl_corpus_path', metavar='miracl_corpus_path', nargs='?', default="", help='miracl corpus path. Only needed when dataset is miracl')
|
||||
|
||||
@ -888,7 +888,7 @@ async def do_handle_task(task):
|
||||
if task_type == "raptor":
|
||||
ok, kb = KnowledgebaseService.get_by_id(task_dataset_id)
|
||||
if not ok:
|
||||
progress_callback(prog=-1.0, msg="Cannot found valid knowledgebase for RAPTOR task")
|
||||
progress_callback(prog=-1.0, msg="Cannot found valid dataset for RAPTOR task")
|
||||
return
|
||||
|
||||
kb_parser_config = kb.parser_config
|
||||
@ -940,7 +940,7 @@ async def do_handle_task(task):
|
||||
elif task_type == "graphrag":
|
||||
ok, kb = KnowledgebaseService.get_by_id(task_dataset_id)
|
||||
if not ok:
|
||||
progress_callback(prog=-1.0, msg="Cannot found valid knowledgebase for GraphRAG task")
|
||||
progress_callback(prog=-1.0, msg="Cannot found valid dataset for GraphRAG task")
|
||||
return
|
||||
|
||||
kb_parser_config = kb.parser_config
|
||||
|
||||
@ -521,7 +521,7 @@ class InfinityConnection(DocStoreConnection):
|
||||
try:
|
||||
table_instance = db_instance.get_table(table_name)
|
||||
except Exception:
|
||||
logger.warning(f"Table not found: {table_name}, this knowledge base isn't created in Infinity. Maybe it is created in other document engine.")
|
||||
logger.warning(f"Table not found: {table_name}, this dataset isn't created in Infinity. Maybe it is created in other document engine.")
|
||||
continue
|
||||
kb_res, _ = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_df()
|
||||
logger.debug(f"INFINITY get table: {str(table_list)}, result: {str(kb_res)}")
|
||||
|
||||
Reference in New Issue
Block a user