Refa: HTTP API list datasets / test cases / docs (#7720)

### What problem does this PR solve?

This PR introduces Pydantic-based validation for the list datasets HTTP
API, improving code clarity and robustness. Key changes include:

Pydantic Validation
Error Handling
Test Updates
Documentation Updates

### Type of change

- [x] Documentation Update
- [x] Refactoring
This commit is contained in:
liu an
2025-05-20 09:58:26 +08:00
committed by GitHub
parent 6ed81d6774
commit fed1221302
7 changed files with 683 additions and 412 deletions

View File

@ -32,12 +32,22 @@ from api.utils.api_utils import (
deep_merge,
get_error_argument_result,
get_error_data_result,
get_error_operating_result,
get_error_permission_result,
get_parser_config,
get_result,
remap_dictionary_keys,
token_required,
verify_embedding_availability,
)
from api.utils.validation_utils import CreateDatasetReq, DeleteDatasetReq, UpdateDatasetReq, validate_and_parse_json_request
from api.utils.validation_utils import (
CreateDatasetReq,
DeleteDatasetReq,
ListDatasetReq,
UpdateDatasetReq,
validate_and_parse_json_request,
validate_and_parse_request_args,
)
@manager.route("/datasets", methods=["POST"]) # noqa: F821
@ -113,7 +123,7 @@ def create(tenant_id):
try:
if KnowledgebaseService.get_or_none(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
return get_error_data_result(message=f"Dataset name '{req['name']}' already exists")
return get_error_operating_result(message=f"Dataset name '{req['name']}' already exists")
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
@ -126,7 +136,7 @@ def create(tenant_id):
try:
ok, t = TenantService.get_by_id(tenant_id)
if not ok:
return get_error_data_result(message="Tenant not found")
return get_error_permission_result(message="Tenant not found")
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
@ -153,16 +163,7 @@ def create(tenant_id):
logging.exception(e)
return get_error_data_result(message="Database operation failed")
response_data = {}
key_mapping = {
"chunk_num": "chunk_count",
"doc_num": "document_count",
"parser_id": "chunk_method",
"embd_id": "embedding_model",
}
for key, value in k.to_dict().items():
new_key = key_mapping.get(key, key)
response_data[new_key] = value
response_data = remap_dictionary_keys(k.to_dict())
return get_result(data=response_data)
@ -232,7 +233,7 @@ def delete(tenant_id):
logging.exception(e)
return get_error_data_result(message="Database operation failed")
if len(error_kb_ids) > 0:
return get_error_data_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
return get_error_permission_result(message=f"""User '{tenant_id}' lacks permission for datasets: '{", ".join(error_kb_ids)}'""")
errors = []
success_count = 0
@ -347,7 +348,7 @@ def update(tenant_id, dataset_id):
try:
kb = KnowledgebaseService.get_or_none(id=dataset_id, tenant_id=tenant_id)
if kb is None:
return get_error_data_result(message=f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'")
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{dataset_id}'")
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
@ -418,7 +419,7 @@ def list_datasets(tenant_id):
name: page_size
type: integer
required: false
default: 1024
default: 30
description: Number of items per page.
- in: query
name: orderby
@ -445,47 +446,46 @@ def list_datasets(tenant_id):
items:
type: object
"""
id = request.args.get("id")
name = request.args.get("name")
if id:
kbs = KnowledgebaseService.get_kb_by_id(id, tenant_id)
args, err = validate_and_parse_request_args(request, ListDatasetReq)
if err is not None:
return get_error_argument_result(err)
kb_id = request.args.get("id")
name = args.get("name")
if kb_id:
try:
kbs = KnowledgebaseService.get_kb_by_id(kb_id, tenant_id)
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
if not kbs:
return get_error_data_result(f"You don't own the dataset {id}")
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{kb_id}'")
if name:
kbs = KnowledgebaseService.get_kb_by_name(name, tenant_id)
try:
kbs = KnowledgebaseService.get_kb_by_name(name, tenant_id)
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
if not kbs:
return get_error_data_result(f"You don't own the dataset {name}")
page_number = int(request.args.get("page", 1))
items_per_page = int(request.args.get("page_size", 30))
orderby = request.args.get("orderby", "create_time")
if request.args.get("desc", "false").lower() not in ["true", "false"]:
return get_error_data_result("desc should be true or false")
if request.args.get("desc", "true").lower() == "false":
desc = False
else:
desc = True
tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
kbs = KnowledgebaseService.get_list(
[m["tenant_id"] for m in tenants],
tenant_id,
page_number,
items_per_page,
orderby,
desc,
id,
name,
)
renamed_list = []
return get_error_permission_result(message=f"User '{tenant_id}' lacks permission for dataset '{name}'")
try:
tenants = TenantService.get_joined_tenants_by_user_id(tenant_id)
kbs = KnowledgebaseService.get_list(
[m["tenant_id"] for m in tenants],
tenant_id,
args["page"],
args["page_size"],
args["orderby"],
args["desc"],
kb_id,
name,
)
except OperationalError as e:
logging.exception(e)
return get_error_data_result(message="Database operation failed")
response_data_list = []
for kb in kbs:
key_mapping = {
"chunk_num": "chunk_count",
"doc_num": "document_count",
"parser_id": "chunk_method",
"embd_id": "embedding_model",
}
renamed_data = {}
for key, value in kb.items():
new_key = key_mapping.get(key, key)
renamed_data[new_key] = value
renamed_list.append(renamed_data)
return get_result(data=renamed_list)
response_data_list.append(remap_dictionary_keys(kb))
return get_result(data=response_data_list)