mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-19 20:16:49 +08:00
Change knowledge base to dataset (#11976)
### What problem does this PR solve? As title ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -578,7 +578,7 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"form": {
|
"form": {
|
||||||
"text": "Searches for relevant database creation statements.\n\nIt should label with a knowledgebase to which the schema is dumped in. You could use \" General \" as parsing method, \" 2 \" as chunk size and \" ; \" as delimiter."
|
"text": "Searches for relevant database creation statements.\n\nIt should label with a dataset to which the schema is dumped in. You could use \" General \" as parsing method, \" 2 \" as chunk size and \" ; \" as delimiter."
|
||||||
},
|
},
|
||||||
"label": "Note",
|
"label": "Note",
|
||||||
"name": "Note Schema"
|
"name": "Note Schema"
|
||||||
|
|||||||
@ -65,7 +65,7 @@
|
|||||||
"component_name": "Agent",
|
"component_name": "Agent",
|
||||||
"params": {
|
"params": {
|
||||||
"llm_id": "deepseek-chat",
|
"llm_id": "deepseek-chat",
|
||||||
"sys_prompt": "You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\n Here is the knowledge base:\n {retrieval:0@formalized_content}\n The above is the knowledge base.",
|
"sys_prompt": "You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\n Here is the knowledge base:\n {retrieval:0@formalized_content}\n The above is the knowledge base.",
|
||||||
"temperature": 0.2
|
"temperature": 0.2
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@ -348,7 +348,7 @@ async def retrieval_test():
|
|||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
data=False, message='Only owner of knowledgebase authorized for this operation.',
|
data=False, message='Only owner of dataset authorized for this operation.',
|
||||||
code=RetCode.OPERATING_ERROR)
|
code=RetCode.OPERATING_ERROR)
|
||||||
|
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
|
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
|
||||||
|
|||||||
@ -65,7 +65,7 @@ async def set_dialog():
|
|||||||
|
|
||||||
if not is_create:
|
if not is_create:
|
||||||
if not req.get("kb_ids", []) and not prompt_config.get("tavily_api_key") and "{knowledge}" in prompt_config['system']:
|
if not req.get("kb_ids", []) and not prompt_config.get("tavily_api_key") and "{knowledge}" in prompt_config['system']:
|
||||||
return get_data_error_result(message="Please remove `{knowledge}` in system prompt since no knowledge base / Tavily used here.")
|
return get_data_error_result(message="Please remove `{knowledge}` in system prompt since no dataset / Tavily used here.")
|
||||||
|
|
||||||
for p in prompt_config["parameters"]:
|
for p in prompt_config["parameters"]:
|
||||||
if p["optional"]:
|
if p["optional"]:
|
||||||
|
|||||||
@ -70,7 +70,7 @@ async def upload():
|
|||||||
|
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
raise LookupError("Can't find this knowledgebase!")
|
raise LookupError("Can't find this dataset!")
|
||||||
if not check_kb_team_permission(kb, current_user.id):
|
if not check_kb_team_permission(kb, current_user.id):
|
||||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||||
|
|
||||||
@ -99,7 +99,7 @@ async def web_crawl():
|
|||||||
return get_json_result(data=False, message="The URL format is invalid", code=RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message="The URL format is invalid", code=RetCode.ARGUMENT_ERROR)
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
raise LookupError("Can't find this knowledgebase!")
|
raise LookupError("Can't find this dataset!")
|
||||||
if check_kb_team_permission(kb, current_user.id):
|
if check_kb_team_permission(kb, current_user.id):
|
||||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||||
|
|
||||||
@ -169,10 +169,10 @@ async def create():
|
|||||||
try:
|
try:
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(message="Can't find this knowledgebase!")
|
return get_data_error_result(message="Can't find this dataset!")
|
||||||
|
|
||||||
if DocumentService.query(name=req["name"], kb_id=kb_id):
|
if DocumentService.query(name=req["name"], kb_id=kb_id):
|
||||||
return get_data_error_result(message="Duplicated document name in the same knowledgebase.")
|
return get_data_error_result(message="Duplicated document name in the same dataset.")
|
||||||
|
|
||||||
kb_root_folder = FileService.get_kb_folder(kb.tenant_id)
|
kb_root_folder = FileService.get_kb_folder(kb.tenant_id)
|
||||||
if not kb_root_folder:
|
if not kb_root_folder:
|
||||||
@ -219,7 +219,7 @@ async def list_docs():
|
|||||||
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
return get_json_result(data=False, message="Only owner of dataset authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
||||||
keywords = request.args.get("keywords", "")
|
keywords = request.args.get("keywords", "")
|
||||||
|
|
||||||
page_number = int(request.args.get("page", 0))
|
page_number = int(request.args.get("page", 0))
|
||||||
@ -293,7 +293,7 @@ async def get_filter():
|
|||||||
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
return get_json_result(data=False, message="Only owner of dataset authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
||||||
|
|
||||||
keywords = req.get("keywords", "")
|
keywords = req.get("keywords", "")
|
||||||
|
|
||||||
@ -343,7 +343,7 @@ async def metadata_summary():
|
|||||||
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
return get_json_result(data=False, message="Only owner of dataset authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
summary = DocumentService.get_metadata_summary(kb_id)
|
summary = DocumentService.get_metadata_summary(kb_id)
|
||||||
@ -365,7 +365,7 @@ async def metadata_update():
|
|||||||
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
return get_json_result(data=False, message="Only owner of dataset authorized for this operation.", code=RetCode.OPERATING_ERROR)
|
||||||
|
|
||||||
selector = req.get("selector", {}) or {}
|
selector = req.get("selector", {}) or {}
|
||||||
updates = req.get("updates", []) or []
|
updates = req.get("updates", []) or []
|
||||||
@ -454,7 +454,7 @@ async def change_status():
|
|||||||
continue
|
continue
|
||||||
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
|
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
result[doc_id] = {"error": "Can't find this knowledgebase!"}
|
result[doc_id] = {"error": "Can't find this dataset!"}
|
||||||
continue
|
continue
|
||||||
if not DocumentService.update_by_id(doc_id, {"status": str(status)}):
|
if not DocumentService.update_by_id(doc_id, {"status": str(status)}):
|
||||||
result[doc_id] = {"error": "Database error (Document update)!"}
|
result[doc_id] = {"error": "Database error (Document update)!"}
|
||||||
@ -562,7 +562,7 @@ async def rename():
|
|||||||
|
|
||||||
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
||||||
if d.name == req["name"]:
|
if d.name == req["name"]:
|
||||||
return get_data_error_result(message="Duplicated document name in the same knowledgebase.")
|
return get_data_error_result(message="Duplicated document name in the same dataset.")
|
||||||
|
|
||||||
if not DocumentService.update_by_id(req["doc_id"], {"name": req["name"]}):
|
if not DocumentService.update_by_id(req["doc_id"], {"name": req["name"]}):
|
||||||
return get_data_error_result(message="Database error (Document rename)!")
|
return get_data_error_result(message="Database error (Document rename)!")
|
||||||
|
|||||||
@ -68,7 +68,7 @@ async def convert():
|
|||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(
|
return get_data_error_result(
|
||||||
message="Can't find this knowledgebase!")
|
message="Can't find this dataset!")
|
||||||
e, file = FileService.get_by_id(id)
|
e, file = FileService.get_by_id(id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(
|
return get_data_error_result(
|
||||||
|
|||||||
@ -93,19 +93,19 @@ async def update():
|
|||||||
if not KnowledgebaseService.query(
|
if not KnowledgebaseService.query(
|
||||||
created_by=current_user.id, id=req["kb_id"]):
|
created_by=current_user.id, id=req["kb_id"]):
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
data=False, message='Only owner of knowledgebase authorized for this operation.',
|
data=False, message='Only owner of dataset authorized for this operation.',
|
||||||
code=RetCode.OPERATING_ERROR)
|
code=RetCode.OPERATING_ERROR)
|
||||||
|
|
||||||
e, kb = KnowledgebaseService.get_by_id(req["kb_id"])
|
e, kb = KnowledgebaseService.get_by_id(req["kb_id"])
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(
|
return get_data_error_result(
|
||||||
message="Can't find this knowledgebase!")
|
message="Can't find this dataset!")
|
||||||
|
|
||||||
if req["name"].lower() != kb.name.lower() \
|
if req["name"].lower() != kb.name.lower() \
|
||||||
and len(
|
and len(
|
||||||
KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) >= 1:
|
KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) >= 1:
|
||||||
return get_data_error_result(
|
return get_data_error_result(
|
||||||
message="Duplicated knowledgebase name.")
|
message="Duplicated dataset name.")
|
||||||
|
|
||||||
del req["kb_id"]
|
del req["kb_id"]
|
||||||
connectors = []
|
connectors = []
|
||||||
@ -162,12 +162,12 @@ def detail():
|
|||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
data=False, message='Only owner of knowledgebase authorized for this operation.',
|
data=False, message='Only owner of dataset authorized for this operation.',
|
||||||
code=RetCode.OPERATING_ERROR)
|
code=RetCode.OPERATING_ERROR)
|
||||||
kb = KnowledgebaseService.get_detail(kb_id)
|
kb = KnowledgebaseService.get_detail(kb_id)
|
||||||
if not kb:
|
if not kb:
|
||||||
return get_data_error_result(
|
return get_data_error_result(
|
||||||
message="Can't find this knowledgebase!")
|
message="Can't find this dataset!")
|
||||||
kb["size"] = DocumentService.get_total_size_by_kb_id(kb_id=kb["id"],keywords="", run_status=[], types=[])
|
kb["size"] = DocumentService.get_total_size_by_kb_id(kb_id=kb["id"],keywords="", run_status=[], types=[])
|
||||||
kb["connectors"] = Connector2KbService.list_connectors(kb_id)
|
kb["connectors"] = Connector2KbService.list_connectors(kb_id)
|
||||||
|
|
||||||
@ -232,7 +232,7 @@ async def rm():
|
|||||||
created_by=current_user.id, id=req["kb_id"])
|
created_by=current_user.id, id=req["kb_id"])
|
||||||
if not kbs:
|
if not kbs:
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
data=False, message='Only owner of knowledgebase authorized for this operation.',
|
data=False, message='Only owner of dataset authorized for this operation.',
|
||||||
code=RetCode.OPERATING_ERROR)
|
code=RetCode.OPERATING_ERROR)
|
||||||
|
|
||||||
def _rm_sync():
|
def _rm_sync():
|
||||||
|
|||||||
@ -92,7 +92,7 @@ async def create(tenant_id):
|
|||||||
req["tenant_id"] = tenant_id
|
req["tenant_id"] = tenant_id
|
||||||
# prompt more parameter
|
# prompt more parameter
|
||||||
default_prompt = {
|
default_prompt = {
|
||||||
"system": """You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.
|
"system": """You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the dataset and answer in detail. When all dataset content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the dataset!" Answers need to consider chat history.
|
||||||
Here is the knowledge base:
|
Here is the knowledge base:
|
||||||
{knowledge}
|
{knowledge}
|
||||||
The above is the knowledge base.""",
|
The above is the knowledge base.""",
|
||||||
|
|||||||
@ -237,7 +237,7 @@ async def update_doc(tenant_id, dataset_id, document_id):
|
|||||||
return get_error_data_result(message="You don't own the dataset.")
|
return get_error_data_result(message="You don't own the dataset.")
|
||||||
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_error_data_result(message="Can't find this knowledgebase!")
|
return get_error_data_result(message="Can't find this dataset!")
|
||||||
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
||||||
if not doc:
|
if not doc:
|
||||||
return get_error_data_result(message="The dataset doesn't own the document.")
|
return get_error_data_result(message="The dataset doesn't own the document.")
|
||||||
|
|||||||
@ -744,7 +744,7 @@ async def convert(tenant_id):
|
|||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
message="Can't find this knowledgebase!", code=RetCode.NOT_FOUND)
|
message="Can't find this dataset!", code=RetCode.NOT_FOUND)
|
||||||
e, file = FileService.get_by_id(id)
|
e, file = FileService.get_by_id(id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
|
|||||||
@ -998,7 +998,7 @@ async def retrieval_test_embedded():
|
|||||||
tenant_ids.append(tenant.tenant_id)
|
tenant_ids.append(tenant.tenant_id)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.",
|
return get_json_result(data=False, message="Only owner of dataset authorized for this operation.",
|
||||||
code=RetCode.OPERATING_ERROR)
|
code=RetCode.OPERATING_ERROR)
|
||||||
|
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
|
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
|
||||||
|
|||||||
@ -153,7 +153,7 @@ def delete_user_data(user_id: str) -> dict:
|
|||||||
done_msg += "Start to delete owned tenant.\n"
|
done_msg += "Start to delete owned tenant.\n"
|
||||||
tenant_id = owned_tenant[0]["tenant_id"]
|
tenant_id = owned_tenant[0]["tenant_id"]
|
||||||
kb_ids = KnowledgebaseService.get_kb_ids(usr.id)
|
kb_ids = KnowledgebaseService.get_kb_ids(usr.id)
|
||||||
# step1.1 delete knowledgebase related file and info
|
# step1.1 delete dataset related file and info
|
||||||
if kb_ids:
|
if kb_ids:
|
||||||
# step1.1.1 delete files in storage, remove bucket
|
# step1.1.1 delete files in storage, remove bucket
|
||||||
for kb_id in kb_ids:
|
for kb_id in kb_ids:
|
||||||
@ -182,7 +182,7 @@ def delete_user_data(user_id: str) -> dict:
|
|||||||
search.index_name(tenant_id), kb_ids)
|
search.index_name(tenant_id), kb_ids)
|
||||||
done_msg += f"- Deleted {r} chunk records.\n"
|
done_msg += f"- Deleted {r} chunk records.\n"
|
||||||
kb_delete_res = KnowledgebaseService.delete_by_ids(kb_ids)
|
kb_delete_res = KnowledgebaseService.delete_by_ids(kb_ids)
|
||||||
done_msg += f"- Deleted {kb_delete_res} knowledgebase records.\n"
|
done_msg += f"- Deleted {kb_delete_res} dataset records.\n"
|
||||||
# step1.1.4 delete agents
|
# step1.1.4 delete agents
|
||||||
agent_delete_res = delete_user_agents(usr.id)
|
agent_delete_res = delete_user_agents(usr.id)
|
||||||
done_msg += f"- Deleted {agent_delete_res['agents_deleted_count']} agent, {agent_delete_res['version_deleted_count']} versions records.\n"
|
done_msg += f"- Deleted {agent_delete_res['agents_deleted_count']} agent, {agent_delete_res['version_deleted_count']} versions records.\n"
|
||||||
@ -258,7 +258,7 @@ def delete_user_data(user_id: str) -> dict:
|
|||||||
# step2.1.5 delete document record
|
# step2.1.5 delete document record
|
||||||
doc_delete_res = DocumentService.delete_by_ids([d['id'] for d in created_documents])
|
doc_delete_res = DocumentService.delete_by_ids([d['id'] for d in created_documents])
|
||||||
done_msg += f"- Deleted {doc_delete_res} documents.\n"
|
done_msg += f"- Deleted {doc_delete_res} documents.\n"
|
||||||
# step2.1.6 update knowledge base doc&chunk&token cnt
|
# step2.1.6 update dataset doc&chunk&token cnt
|
||||||
for kb_id, doc_num in kb_doc_info.items():
|
for kb_id, doc_num in kb_doc_info.items():
|
||||||
KnowledgebaseService.decrease_document_num_in_delete(kb_id, doc_num)
|
KnowledgebaseService.decrease_document_num_in_delete(kb_id, doc_num)
|
||||||
|
|
||||||
|
|||||||
@ -1082,12 +1082,12 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
|||||||
|
|
||||||
e, dia = DialogService.get_by_id(conv.dialog_id)
|
e, dia = DialogService.get_by_id(conv.dialog_id)
|
||||||
if not dia.kb_ids:
|
if not dia.kb_ids:
|
||||||
raise LookupError("No knowledge base associated with this conversation. "
|
raise LookupError("No dataset associated with this conversation. "
|
||||||
"Please add a knowledge base before uploading documents")
|
"Please add a dataset before uploading documents")
|
||||||
kb_id = dia.kb_ids[0]
|
kb_id = dia.kb_ids[0]
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
raise LookupError("Can't find this knowledgebase!")
|
raise LookupError("Can't find this dataset!")
|
||||||
|
|
||||||
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING, llm_name=kb.embd_id, lang=kb.language)
|
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING, llm_name=kb.embd_id, lang=kb.language)
|
||||||
|
|
||||||
|
|||||||
@ -94,11 +94,11 @@ class FileService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_kb_id_by_file_id(cls, file_id):
|
def get_kb_id_by_file_id(cls, file_id):
|
||||||
# Get knowledge base IDs associated with a file
|
# Get dataset IDs associated with a file
|
||||||
# Args:
|
# Args:
|
||||||
# file_id: File ID
|
# file_id: File ID
|
||||||
# Returns:
|
# Returns:
|
||||||
# List of dictionaries containing knowledge base IDs and names
|
# List of dictionaries containing dataset IDs and names
|
||||||
kbs = (
|
kbs = (
|
||||||
cls.model.select(*[Knowledgebase.id, Knowledgebase.name])
|
cls.model.select(*[Knowledgebase.id, Knowledgebase.name])
|
||||||
.join(File2Document, on=(File2Document.file_id == file_id))
|
.join(File2Document, on=(File2Document.file_id == file_id))
|
||||||
@ -247,7 +247,7 @@ class FileService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_kb_folder(cls, tenant_id):
|
def get_kb_folder(cls, tenant_id):
|
||||||
# Get knowledge base folder for tenant
|
# Get dataset folder for tenant
|
||||||
# Args:
|
# Args:
|
||||||
# tenant_id: Tenant ID
|
# tenant_id: Tenant ID
|
||||||
# Returns:
|
# Returns:
|
||||||
@ -263,7 +263,7 @@ class FileService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def new_a_file_from_kb(cls, tenant_id, name, parent_id, ty=FileType.FOLDER.value, size=0, location=""):
|
def new_a_file_from_kb(cls, tenant_id, name, parent_id, ty=FileType.FOLDER.value, size=0, location=""):
|
||||||
# Create a new file from knowledge base
|
# Create a new file from dataset
|
||||||
# Args:
|
# Args:
|
||||||
# tenant_id: Tenant ID
|
# tenant_id: Tenant ID
|
||||||
# name: File name
|
# name: File name
|
||||||
@ -292,7 +292,7 @@ class FileService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def init_knowledgebase_docs(cls, root_id, tenant_id):
|
def init_knowledgebase_docs(cls, root_id, tenant_id):
|
||||||
# Initialize knowledge base documents
|
# Initialize dataset documents
|
||||||
# Args:
|
# Args:
|
||||||
# root_id: Root folder ID
|
# root_id: Root folder ID
|
||||||
# tenant_id: Tenant ID
|
# tenant_id: Tenant ID
|
||||||
|
|||||||
@ -30,9 +30,9 @@ from api.utils.api_utils import get_parser_config, get_data_error_result
|
|||||||
|
|
||||||
|
|
||||||
class KnowledgebaseService(CommonService):
|
class KnowledgebaseService(CommonService):
|
||||||
"""Service class for managing knowledge base operations.
|
"""Service class for managing dataset operations.
|
||||||
|
|
||||||
This class extends CommonService to provide specialized functionality for knowledge base
|
This class extends CommonService to provide specialized functionality for dataset
|
||||||
management, including document parsing status tracking, access control, and configuration
|
management, including document parsing status tracking, access control, and configuration
|
||||||
management. It handles operations such as listing, creating, updating, and deleting
|
management. It handles operations such as listing, creating, updating, and deleting
|
||||||
knowledge bases, as well as managing their associated documents and permissions.
|
knowledge bases, as well as managing their associated documents and permissions.
|
||||||
@ -41,7 +41,7 @@ class KnowledgebaseService(CommonService):
|
|||||||
- Document parsing status verification
|
- Document parsing status verification
|
||||||
- Knowledge base access control
|
- Knowledge base access control
|
||||||
- Parser configuration management
|
- Parser configuration management
|
||||||
- Tenant-based knowledge base organization
|
- Tenant-based dataset organization
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
model: The Knowledgebase model class for database operations.
|
model: The Knowledgebase model class for database operations.
|
||||||
@ -51,18 +51,18 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def accessible4deletion(cls, kb_id, user_id):
|
def accessible4deletion(cls, kb_id, user_id):
|
||||||
"""Check if a knowledge base can be deleted by a specific user.
|
"""Check if a dataset can be deleted by a specific user.
|
||||||
|
|
||||||
This method verifies whether a user has permission to delete a knowledge base
|
This method verifies whether a user has permission to delete a dataset
|
||||||
by checking if they are the creator of that knowledge base.
|
by checking if they are the creator of that dataset.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
kb_id (str): The unique identifier of the knowledge base to check.
|
kb_id (str): The unique identifier of the dataset to check.
|
||||||
user_id (str): The unique identifier of the user attempting the deletion.
|
user_id (str): The unique identifier of the user attempting the deletion.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: True if the user has permission to delete the knowledge base,
|
bool: True if the user has permission to delete the dataset,
|
||||||
False if the user doesn't have permission or the knowledge base doesn't exist.
|
False if the user doesn't have permission or the dataset doesn't exist.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> KnowledgebaseService.accessible4deletion("kb123", "user456")
|
>>> KnowledgebaseService.accessible4deletion("kb123", "user456")
|
||||||
@ -71,10 +71,10 @@ class KnowledgebaseService(CommonService):
|
|||||||
Note:
|
Note:
|
||||||
- This method only checks creator permissions
|
- This method only checks creator permissions
|
||||||
- A return value of False can mean either:
|
- A return value of False can mean either:
|
||||||
1. The knowledge base doesn't exist
|
1. The dataset doesn't exist
|
||||||
2. The user is not the creator of the knowledge base
|
2. The user is not the creator of the dataset
|
||||||
"""
|
"""
|
||||||
# Check if a knowledge base can be deleted by a user
|
# Check if a dataset can be deleted by a user
|
||||||
docs = cls.model.select(
|
docs = cls.model.select(
|
||||||
cls.model.id).where(cls.model.id == kb_id, cls.model.created_by == user_id).paginate(0, 1)
|
cls.model.id).where(cls.model.id == kb_id, cls.model.created_by == user_id).paginate(0, 1)
|
||||||
docs = docs.dicts()
|
docs = docs.dicts()
|
||||||
@ -85,7 +85,7 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def is_parsed_done(cls, kb_id):
|
def is_parsed_done(cls, kb_id):
|
||||||
# Check if all documents in the knowledge base have completed parsing
|
# Check if all documents in the dataset have completed parsing
|
||||||
#
|
#
|
||||||
# Args:
|
# Args:
|
||||||
# kb_id: Knowledge base ID
|
# kb_id: Knowledge base ID
|
||||||
@ -96,13 +96,13 @@ class KnowledgebaseService(CommonService):
|
|||||||
from common.constants import TaskStatus
|
from common.constants import TaskStatus
|
||||||
from api.db.services.document_service import DocumentService
|
from api.db.services.document_service import DocumentService
|
||||||
|
|
||||||
# Get knowledge base information
|
# Get dataset information
|
||||||
kbs = cls.query(id=kb_id)
|
kbs = cls.query(id=kb_id)
|
||||||
if not kbs:
|
if not kbs:
|
||||||
return False, "Knowledge base not found"
|
return False, "Knowledge base not found"
|
||||||
kb = kbs[0]
|
kb = kbs[0]
|
||||||
|
|
||||||
# Get all documents in the knowledge base
|
# Get all documents in the dataset
|
||||||
docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "", [], [])
|
docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "", [], [])
|
||||||
|
|
||||||
# Check parsing status of each document
|
# Check parsing status of each document
|
||||||
@ -119,9 +119,9 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def list_documents_by_ids(cls, kb_ids):
|
def list_documents_by_ids(cls, kb_ids):
|
||||||
# Get document IDs associated with given knowledge base IDs
|
# Get document IDs associated with given dataset IDs
|
||||||
# Args:
|
# Args:
|
||||||
# kb_ids: List of knowledge base IDs
|
# kb_ids: List of dataset IDs
|
||||||
# Returns:
|
# Returns:
|
||||||
# List of document IDs
|
# List of document IDs
|
||||||
doc_ids = cls.model.select(Document.id.alias("document_id")).join(Document, on=(cls.model.id == Document.kb_id)).where(
|
doc_ids = cls.model.select(Document.id.alias("document_id")).join(Document, on=(cls.model.id == Document.kb_id)).where(
|
||||||
@ -235,11 +235,11 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_kb_ids(cls, tenant_id):
|
def get_kb_ids(cls, tenant_id):
|
||||||
# Get all knowledge base IDs for a tenant
|
# Get all dataset IDs for a tenant
|
||||||
# Args:
|
# Args:
|
||||||
# tenant_id: Tenant ID
|
# tenant_id: Tenant ID
|
||||||
# Returns:
|
# Returns:
|
||||||
# List of knowledge base IDs
|
# List of dataset IDs
|
||||||
fields = [
|
fields = [
|
||||||
cls.model.id,
|
cls.model.id,
|
||||||
]
|
]
|
||||||
@ -250,11 +250,11 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_detail(cls, kb_id):
|
def get_detail(cls, kb_id):
|
||||||
# Get detailed information about a knowledge base
|
# Get detailed information about a dataset
|
||||||
# Args:
|
# Args:
|
||||||
# kb_id: Knowledge base ID
|
# kb_id: Knowledge base ID
|
||||||
# Returns:
|
# Returns:
|
||||||
# Dictionary containing knowledge base details
|
# Dictionary containing dataset details
|
||||||
fields = [
|
fields = [
|
||||||
cls.model.id,
|
cls.model.id,
|
||||||
cls.model.embd_id,
|
cls.model.embd_id,
|
||||||
@ -294,13 +294,13 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def update_parser_config(cls, id, config):
|
def update_parser_config(cls, id, config):
|
||||||
# Update parser configuration for a knowledge base
|
# Update parser configuration for a dataset
|
||||||
# Args:
|
# Args:
|
||||||
# id: Knowledge base ID
|
# id: Knowledge base ID
|
||||||
# config: New parser configuration
|
# config: New parser configuration
|
||||||
e, m = cls.get_by_id(id)
|
e, m = cls.get_by_id(id)
|
||||||
if not e:
|
if not e:
|
||||||
raise LookupError(f"knowledgebase({id}) not found.")
|
raise LookupError(f"dataset({id}) not found.")
|
||||||
|
|
||||||
def dfs_update(old, new):
|
def dfs_update(old, new):
|
||||||
# Deep update of nested configuration
|
# Deep update of nested configuration
|
||||||
@ -325,7 +325,7 @@ class KnowledgebaseService(CommonService):
|
|||||||
def delete_field_map(cls, id):
|
def delete_field_map(cls, id):
|
||||||
e, m = cls.get_by_id(id)
|
e, m = cls.get_by_id(id)
|
||||||
if not e:
|
if not e:
|
||||||
raise LookupError(f"knowledgebase({id}) not found.")
|
raise LookupError(f"dataset({id}) not found.")
|
||||||
|
|
||||||
m.parser_config.pop("field_map", None)
|
m.parser_config.pop("field_map", None)
|
||||||
cls.update_by_id(id, {"parser_config": m.parser_config})
|
cls.update_by_id(id, {"parser_config": m.parser_config})
|
||||||
@ -335,7 +335,7 @@ class KnowledgebaseService(CommonService):
|
|||||||
def get_field_map(cls, ids):
|
def get_field_map(cls, ids):
|
||||||
# Get field mappings for knowledge bases
|
# Get field mappings for knowledge bases
|
||||||
# Args:
|
# Args:
|
||||||
# ids: List of knowledge base IDs
|
# ids: List of dataset IDs
|
||||||
# Returns:
|
# Returns:
|
||||||
# Dictionary of field mappings
|
# Dictionary of field mappings
|
||||||
conf = {}
|
conf = {}
|
||||||
@ -347,7 +347,7 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_by_name(cls, kb_name, tenant_id):
|
def get_by_name(cls, kb_name, tenant_id):
|
||||||
# Get knowledge base by name and tenant ID
|
# Get dataset by name and tenant ID
|
||||||
# Args:
|
# Args:
|
||||||
# kb_name: Knowledge base name
|
# kb_name: Knowledge base name
|
||||||
# tenant_id: Tenant ID
|
# tenant_id: Tenant ID
|
||||||
@ -365,9 +365,9 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_all_ids(cls):
|
def get_all_ids(cls):
|
||||||
# Get all knowledge base IDs
|
# Get all dataset IDs
|
||||||
# Returns:
|
# Returns:
|
||||||
# List of all knowledge base IDs
|
# List of all dataset IDs
|
||||||
return [m["id"] for m in cls.model.select(cls.model.id).dicts()]
|
return [m["id"] for m in cls.model.select(cls.model.id).dicts()]
|
||||||
|
|
||||||
|
|
||||||
@ -471,7 +471,7 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def accessible(cls, kb_id, user_id):
|
def accessible(cls, kb_id, user_id):
|
||||||
# Check if a knowledge base is accessible by a user
|
# Check if a dataset is accessible by a user
|
||||||
# Args:
|
# Args:
|
||||||
# kb_id: Knowledge base ID
|
# kb_id: Knowledge base ID
|
||||||
# user_id: User ID
|
# user_id: User ID
|
||||||
@ -488,12 +488,12 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_kb_by_id(cls, kb_id, user_id):
|
def get_kb_by_id(cls, kb_id, user_id):
|
||||||
# Get knowledge base by ID and user ID
|
# Get dataset by ID and user ID
|
||||||
# Args:
|
# Args:
|
||||||
# kb_id: Knowledge base ID
|
# kb_id: Knowledge base ID
|
||||||
# user_id: User ID
|
# user_id: User ID
|
||||||
# Returns:
|
# Returns:
|
||||||
# List containing knowledge base information
|
# List containing dataset information
|
||||||
kbs = cls.model.select().join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id)
|
kbs = cls.model.select().join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id)
|
||||||
).where(cls.model.id == kb_id, UserTenant.user_id == user_id).paginate(0, 1)
|
).where(cls.model.id == kb_id, UserTenant.user_id == user_id).paginate(0, 1)
|
||||||
kbs = kbs.dicts()
|
kbs = kbs.dicts()
|
||||||
@ -502,12 +502,12 @@ class KnowledgebaseService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_kb_by_name(cls, kb_name, user_id):
|
def get_kb_by_name(cls, kb_name, user_id):
|
||||||
# Get knowledge base by name and user ID
|
# Get dataset by name and user ID
|
||||||
# Args:
|
# Args:
|
||||||
# kb_name: Knowledge base name
|
# kb_name: Knowledge base name
|
||||||
# user_id: User ID
|
# user_id: User ID
|
||||||
# Returns:
|
# Returns:
|
||||||
# List containing knowledge base information
|
# List containing dataset information
|
||||||
kbs = cls.model.select().join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id)
|
kbs = cls.model.select().join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id)
|
||||||
).where(cls.model.name == kb_name, UserTenant.user_id == user_id).paginate(0, 1)
|
).where(cls.model.name == kb_name, UserTenant.user_id == user_id).paginate(0, 1)
|
||||||
kbs = kbs.dicts()
|
kbs = kbs.dicts()
|
||||||
|
|||||||
@ -121,7 +121,7 @@ class PipelineOperationLogService(CommonService):
|
|||||||
else:
|
else:
|
||||||
ok, kb_info = KnowledgebaseService.get_by_id(document.kb_id)
|
ok, kb_info = KnowledgebaseService.get_by_id(document.kb_id)
|
||||||
if not ok:
|
if not ok:
|
||||||
raise RuntimeError(f"Cannot find knowledge base {document.kb_id} for referred_document {referred_document_id}")
|
raise RuntimeError(f"Cannot find dataset {document.kb_id} for referred_document {referred_document_id}")
|
||||||
|
|
||||||
tenant_id = kb_info.tenant_id
|
tenant_id = kb_info.tenant_id
|
||||||
title = document.parser_id
|
title = document.parser_id
|
||||||
|
|||||||
@ -76,7 +76,7 @@ class TaskService(CommonService):
|
|||||||
"""Retrieve detailed task information by task ID.
|
"""Retrieve detailed task information by task ID.
|
||||||
|
|
||||||
This method fetches comprehensive task details including associated document,
|
This method fetches comprehensive task details including associated document,
|
||||||
knowledge base, and tenant information. It also handles task retry logic and
|
dataset, and tenant information. It also handles task retry logic and
|
||||||
progress updates.
|
progress updates.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|||||||
@ -51,9 +51,9 @@ Your answers should follow a professional and {style} style.
|
|||||||
|
|
||||||
...
|
...
|
||||||
|
|
||||||
Here is the dataset:
|
Here is the knowledge base:
|
||||||
{knowledge}
|
{knowledge}
|
||||||
The above is the dataset.
|
The above is the knowledge base.
|
||||||
```
|
```
|
||||||
|
|
||||||
:::tip NOTE
|
:::tip NOTE
|
||||||
|
|||||||
@ -5355,6 +5355,6 @@ or
|
|||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"code": 404,
|
"code": 404,
|
||||||
"message": "Can't find this knowledgebase!"
|
"message": "Can't find this dataset!"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|||||||
@ -283,7 +283,7 @@ if __name__ == '__main__':
|
|||||||
print('*****************RAGFlow Benchmark*****************')
|
print('*****************RAGFlow Benchmark*****************')
|
||||||
parser = argparse.ArgumentParser(usage="benchmark.py <max_docs> <kb_id> <dataset> <dataset_path> [<miracl_corpus_path>])", description='RAGFlow Benchmark')
|
parser = argparse.ArgumentParser(usage="benchmark.py <max_docs> <kb_id> <dataset> <dataset_path> [<miracl_corpus_path>])", description='RAGFlow Benchmark')
|
||||||
parser.add_argument('max_docs', metavar='max_docs', type=int, help='max docs to evaluate')
|
parser.add_argument('max_docs', metavar='max_docs', type=int, help='max docs to evaluate')
|
||||||
parser.add_argument('kb_id', metavar='kb_id', help='knowledgebase id')
|
parser.add_argument('kb_id', metavar='kb_id', help='dataset id')
|
||||||
parser.add_argument('dataset', metavar='dataset', help='dataset name, shall be one of ms_marco_v1.1(https://huggingface.co/datasets/microsoft/ms_marco), trivia_qa(https://huggingface.co/datasets/mandarjoshi/trivia_qa>), miracl(https://huggingface.co/datasets/miracl/miracl')
|
parser.add_argument('dataset', metavar='dataset', help='dataset name, shall be one of ms_marco_v1.1(https://huggingface.co/datasets/microsoft/ms_marco), trivia_qa(https://huggingface.co/datasets/mandarjoshi/trivia_qa>), miracl(https://huggingface.co/datasets/miracl/miracl')
|
||||||
parser.add_argument('dataset_path', metavar='dataset_path', help='dataset path')
|
parser.add_argument('dataset_path', metavar='dataset_path', help='dataset path')
|
||||||
parser.add_argument('miracl_corpus_path', metavar='miracl_corpus_path', nargs='?', default="", help='miracl corpus path. Only needed when dataset is miracl')
|
parser.add_argument('miracl_corpus_path', metavar='miracl_corpus_path', nargs='?', default="", help='miracl corpus path. Only needed when dataset is miracl')
|
||||||
|
|||||||
@ -888,7 +888,7 @@ async def do_handle_task(task):
|
|||||||
if task_type == "raptor":
|
if task_type == "raptor":
|
||||||
ok, kb = KnowledgebaseService.get_by_id(task_dataset_id)
|
ok, kb = KnowledgebaseService.get_by_id(task_dataset_id)
|
||||||
if not ok:
|
if not ok:
|
||||||
progress_callback(prog=-1.0, msg="Cannot found valid knowledgebase for RAPTOR task")
|
progress_callback(prog=-1.0, msg="Cannot found valid dataset for RAPTOR task")
|
||||||
return
|
return
|
||||||
|
|
||||||
kb_parser_config = kb.parser_config
|
kb_parser_config = kb.parser_config
|
||||||
@ -940,7 +940,7 @@ async def do_handle_task(task):
|
|||||||
elif task_type == "graphrag":
|
elif task_type == "graphrag":
|
||||||
ok, kb = KnowledgebaseService.get_by_id(task_dataset_id)
|
ok, kb = KnowledgebaseService.get_by_id(task_dataset_id)
|
||||||
if not ok:
|
if not ok:
|
||||||
progress_callback(prog=-1.0, msg="Cannot found valid knowledgebase for GraphRAG task")
|
progress_callback(prog=-1.0, msg="Cannot found valid dataset for GraphRAG task")
|
||||||
return
|
return
|
||||||
|
|
||||||
kb_parser_config = kb.parser_config
|
kb_parser_config = kb.parser_config
|
||||||
|
|||||||
@ -521,7 +521,7 @@ class InfinityConnection(DocStoreConnection):
|
|||||||
try:
|
try:
|
||||||
table_instance = db_instance.get_table(table_name)
|
table_instance = db_instance.get_table(table_name)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(f"Table not found: {table_name}, this knowledge base isn't created in Infinity. Maybe it is created in other document engine.")
|
logger.warning(f"Table not found: {table_name}, this dataset isn't created in Infinity. Maybe it is created in other document engine.")
|
||||||
continue
|
continue
|
||||||
kb_res, _ = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_df()
|
kb_res, _ = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_df()
|
||||||
logger.debug(f"INFINITY get table: {str(table_list)}, result: {str(kb_res)}")
|
logger.debug(f"INFINITY get table: {str(table_list)}, result: {str(kb_res)}")
|
||||||
|
|||||||
@ -225,7 +225,7 @@ class TestChatAssistantCreate:
|
|||||||
assert res["data"]["prompt"]["show_quote"] is True
|
assert res["data"]["prompt"]["show_quote"] is True
|
||||||
assert (
|
assert (
|
||||||
res["data"]["prompt"]["prompt"]
|
res["data"]["prompt"]["prompt"]
|
||||||
== 'You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
== 'You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the dataset and answer in detail. When all dataset content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the dataset!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
assert res["message"] == expected_message
|
assert res["message"] == expected_message
|
||||||
|
|||||||
@ -222,7 +222,7 @@ class TestChatAssistantUpdate:
|
|||||||
assert res["data"]["prompt"][0]["show_quote"] is True
|
assert res["data"]["prompt"][0]["show_quote"] is True
|
||||||
assert (
|
assert (
|
||||||
res["data"]["prompt"][0]["prompt"]
|
res["data"]["prompt"][0]["prompt"]
|
||||||
== 'You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
== 'You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the dataset and answer in detail. When all dataset content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the dataset!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
assert expected_message in res["message"]
|
assert expected_message in res["message"]
|
||||||
|
|||||||
@ -226,7 +226,7 @@ class TestChatAssistantCreate:
|
|||||||
assert res["data"]["prompt"]["show_quote"] is True
|
assert res["data"]["prompt"]["show_quote"] is True
|
||||||
assert (
|
assert (
|
||||||
res["data"]["prompt"]["prompt"]
|
res["data"]["prompt"]["prompt"]
|
||||||
== 'You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
== 'You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the dataset and answer in detail. When all dataset content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the dataset!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
assert res["message"] == expected_message
|
assert res["message"] == expected_message
|
||||||
|
|||||||
@ -223,7 +223,7 @@ class TestChatAssistantUpdate:
|
|||||||
assert res["data"]["prompt"][0]["show_quote"] is True
|
assert res["data"]["prompt"][0]["show_quote"] is True
|
||||||
assert (
|
assert (
|
||||||
res["data"]["prompt"][0]["prompt"]
|
res["data"]["prompt"][0]["prompt"]
|
||||||
== 'You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
== 'You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the dataset and answer in detail. When all dataset content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the dataset!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
assert expected_message in res["message"]
|
assert expected_message in res["message"]
|
||||||
|
|||||||
@ -211,7 +211,7 @@ class TestChatAssistantCreate:
|
|||||||
assert attrgetter("show_quote")(chat_assistant.prompt) is True
|
assert attrgetter("show_quote")(chat_assistant.prompt) is True
|
||||||
assert (
|
assert (
|
||||||
attrgetter("prompt")(chat_assistant.prompt)
|
attrgetter("prompt")(chat_assistant.prompt)
|
||||||
== 'You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
== 'You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the dataset and answer in detail. When all dataset content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the dataset!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -202,7 +202,7 @@ class TestChatAssistantUpdate:
|
|||||||
"empty_response": "Sorry! No relevant content was found in the knowledge base!",
|
"empty_response": "Sorry! No relevant content was found in the knowledge base!",
|
||||||
"opener": "Hi! I'm your assistant. What can I do for you?",
|
"opener": "Hi! I'm your assistant. What can I do for you?",
|
||||||
"show_quote": True,
|
"show_quote": True,
|
||||||
"prompt": 'You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.',
|
"prompt": 'You are an intelligent assistant. Please summarize the content of the dataset to answer the question. Please list the data in the dataset and answer in detail. When all dataset content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the dataset!" Answers need to consider chat history.\n Here is the knowledge base:\n {knowledge}\n The above is the knowledge base.',
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
assert str(updated_chat.prompt) == str(excepted_value), str(updated_chat)
|
assert str(updated_chat.prompt) == str(excepted_value), str(updated_chat)
|
||||||
|
|||||||
@ -21,7 +21,7 @@ from common import create_dialog, delete_dialog, get_dialog, update_dialog
|
|||||||
class TestDialogEdgeCases:
|
class TestDialogEdgeCases:
|
||||||
@pytest.mark.p2
|
@pytest.mark.p2
|
||||||
def test_create_dialog_with_tavily_api_key(self, WebApiAuth):
|
def test_create_dialog_with_tavily_api_key(self, WebApiAuth):
|
||||||
"""Test creating dialog with Tavily API key instead of knowledge base"""
|
"""Test creating dialog with Tavily API key instead of dataset"""
|
||||||
payload = {
|
payload = {
|
||||||
"name": "tavily_dialog",
|
"name": "tavily_dialog",
|
||||||
"prompt_config": {"system": "You are a helpful assistant. Use this knowledge: {knowledge}", "parameters": [{"key": "knowledge", "optional": True}], "tavily_api_key": "test_tavily_key"},
|
"prompt_config": {"system": "You are a helpful assistant. Use this knowledge: {knowledge}", "parameters": [{"key": "knowledge", "optional": True}], "tavily_api_key": "test_tavily_key"},
|
||||||
|
|||||||
@ -60,7 +60,7 @@ class TestDocumentCreate:
|
|||||||
def test_invalid_kb_id(self, WebApiAuth):
|
def test_invalid_kb_id(self, WebApiAuth):
|
||||||
res = create_document(WebApiAuth, {"name": "ragflow_test.txt", "kb_id": "invalid_kb_id"})
|
res = create_document(WebApiAuth, {"name": "ragflow_test.txt", "kb_id": "invalid_kb_id"})
|
||||||
assert res["code"] == 102, res
|
assert res["code"] == 102, res
|
||||||
assert res["message"] == "Can't find this knowledgebase!", res
|
assert res["message"] == "Can't find this dataset!", res
|
||||||
|
|
||||||
@pytest.mark.p3
|
@pytest.mark.p3
|
||||||
def test_filename_special_characters(self, WebApiAuth, add_dataset_func):
|
def test_filename_special_characters(self, WebApiAuth, add_dataset_func):
|
||||||
|
|||||||
@ -51,7 +51,7 @@ class TestDocumentsList:
|
|||||||
"kb_id, expected_code, expected_message",
|
"kb_id, expected_code, expected_message",
|
||||||
[
|
[
|
||||||
("", 101, 'Lack of "KB ID"'),
|
("", 101, 'Lack of "KB ID"'),
|
||||||
("invalid_dataset_id", 103, "Only owner of knowledgebase authorized for this operation."),
|
("invalid_dataset_id", 103, "Only owner of dataset authorized for this operation."),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_invalid_dataset_id(self, WebApiAuth, kb_id, expected_code, expected_message):
|
def test_invalid_dataset_id(self, WebApiAuth, kb_id, expected_code, expected_message):
|
||||||
|
|||||||
@ -136,7 +136,7 @@ class TestDocumentsUpload:
|
|||||||
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
fp = create_txt_file(tmp_path / "ragflow_test.txt")
|
||||||
res = upload_documents(WebApiAuth, {"kb_id": "invalid_kb_id"}, [fp])
|
res = upload_documents(WebApiAuth, {"kb_id": "invalid_kb_id"}, [fp])
|
||||||
assert res["code"] == 100, res
|
assert res["code"] == 100, res
|
||||||
assert res["message"] == """LookupError("Can't find this knowledgebase!")""", res
|
assert res["message"] == """LookupError("Can't find this dataset!")""", res
|
||||||
|
|
||||||
@pytest.mark.p2
|
@pytest.mark.p2
|
||||||
def test_duplicate_files(self, WebApiAuth, add_dataset_func, tmp_path):
|
def test_duplicate_files(self, WebApiAuth, add_dataset_func, tmp_path):
|
||||||
|
|||||||
@ -50,4 +50,4 @@ class TestDatasetsDetail:
|
|||||||
payload = {"kb_id": "d94a8dc02c9711f0930f7fbc369eab6d"}
|
payload = {"kb_id": "d94a8dc02c9711f0930f7fbc369eab6d"}
|
||||||
res = detail_kb(WebApiAuth, payload)
|
res = detail_kb(WebApiAuth, payload)
|
||||||
assert res["code"] == 103, res
|
assert res["code"] == 103, res
|
||||||
assert "Only owner of knowledgebase authorized for this operation." in res["message"], res
|
assert "Only owner of dataset authorized for this operation." in res["message"], res
|
||||||
|
|||||||
@ -111,7 +111,7 @@ class TestDatasetUpdate:
|
|||||||
payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
|
payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
|
||||||
res = update_kb(WebApiAuth, payload)
|
res = update_kb(WebApiAuth, payload)
|
||||||
assert res["code"] == 102, res
|
assert res["code"] == 102, res
|
||||||
assert res["message"] == "Duplicated knowledgebase name.", res
|
assert res["message"] == "Duplicated dataset name.", res
|
||||||
|
|
||||||
@pytest.mark.p3
|
@pytest.mark.p3
|
||||||
def test_name_case_insensitive(self, WebApiAuth, add_datasets_func):
|
def test_name_case_insensitive(self, WebApiAuth, add_datasets_func):
|
||||||
@ -120,7 +120,7 @@ class TestDatasetUpdate:
|
|||||||
payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
|
payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
|
||||||
res = update_kb(WebApiAuth, payload)
|
res = update_kb(WebApiAuth, payload)
|
||||||
assert res["code"] == 102, res
|
assert res["code"] == 102, res
|
||||||
assert res["message"] == "Duplicated knowledgebase name.", res
|
assert res["message"] == "Duplicated dataset name.", res
|
||||||
|
|
||||||
@pytest.mark.p2
|
@pytest.mark.p2
|
||||||
def test_avatar(self, WebApiAuth, add_dataset_func, tmp_path):
|
def test_avatar(self, WebApiAuth, add_dataset_func, tmp_path):
|
||||||
|
|||||||
@ -939,7 +939,7 @@ Example: Virtual Hosted Style`,
|
|||||||
'The default VLM for each newly created knowledge base. It describes a picture or video. If you cannot find a model from the dropdown, check https://ragflow.io/docs/dev/supported_models to see if your model provider supports this model.',
|
'The default VLM for each newly created knowledge base. It describes a picture or video. If you cannot find a model from the dropdown, check https://ragflow.io/docs/dev/supported_models to see if your model provider supports this model.',
|
||||||
sequence2txtModel: 'ASR',
|
sequence2txtModel: 'ASR',
|
||||||
sequence2txtModelTip:
|
sequence2txtModelTip:
|
||||||
'The default ASR model for each newly created knowledgebase. Use this model to translate voices to corresponding text.',
|
'The default ASR model for each newly created dataset. Use this model to translate voices to corresponding text.',
|
||||||
rerankModel: 'Rerank',
|
rerankModel: 'Rerank',
|
||||||
rerankModelTip: `The default rerank model for reranking chunks. If you cannot find a model from the dropdown, check https://ragflow.io/docs/dev/supported_models to see if your model provider supports this model.`,
|
rerankModelTip: `The default rerank model for reranking chunks. If you cannot find a model from the dropdown, check https://ragflow.io/docs/dev/supported_models to see if your model provider supports this model.`,
|
||||||
ttsModel: 'TTS',
|
ttsModel: 'TTS',
|
||||||
|
|||||||
Reference in New Issue
Block a user