From 64af09ce7b67bcb82b7af0bd61d978ad3284d604 Mon Sep 17 00:00:00 2001
From: Liu An <asiro@qq.com>
Date: Fri, 13 Jun 2025 16:39:10 +0800
Subject: [PATCH] Test: Add web API test suite for knowledge base operations
 (#8254)

### What problem does this PR solve?

- Implement RAGFlowWebApiAuth class for web API authentication
- Add comprehensive test cases for KB CRUD operations
- Set up common fixtures and utilities in conftest.py
- Add helper functions in common.py for web API requests

The changes establish a complete testing framework for knowledge base
management via web API endpoints.

### Type of change

- [x] Add test case
---
 test/libs/auth.py                             |   9 +
 .../test_delete_datasets.py                   |   8 +-
 test/testcases/test_sdk_api/conftest.py       |   2 +-
 test/testcases/test_web_api/common.py         |  93 +++++
 test/testcases/test_web_api/conftest.py       | 100 +++++
 .../test_web_api/test_kb_app/conftest.py      |  38 ++
 .../test_kb_app/test_create_kb.py             | 109 +++++
 .../test_kb_app/test_detail_kb.py             |  53 +++
 .../test_web_api/test_kb_app/test_list_kbs.py | 184 +++++++++
 .../test_web_api/test_kb_app/test_rm_kb.py    |  61 +++
 .../test_kb_app/test_update_kb.py             | 378 ++++++++++++++++++
 11 files changed, 1030 insertions(+), 5 deletions(-)
 create mode 100644 test/testcases/test_web_api/common.py
 create mode 100644 test/testcases/test_web_api/conftest.py
 create mode 100644 test/testcases/test_web_api/test_kb_app/conftest.py
 create mode 100644 test/testcases/test_web_api/test_kb_app/test_create_kb.py
 create mode 100644 test/testcases/test_web_api/test_kb_app/test_detail_kb.py
 create mode 100644 test/testcases/test_web_api/test_kb_app/test_list_kbs.py
 create mode 100644 test/testcases/test_web_api/test_kb_app/test_rm_kb.py
 create mode 100644 test/testcases/test_web_api/test_kb_app/test_update_kb.py

diff --git a/test/libs/auth.py b/test/libs/auth.py
index 1504d7774..cdc31c94b 100644
--- a/test/libs/auth.py
+++ b/test/libs/auth.py
@@ -23,3 +23,12 @@ class RAGFlowHttpApiAuth(AuthBase):
     def __call__(self, r):
         r.headers["Authorization"] = f"Bearer {self._token}"
         return r
+
+
+class RAGFlowWebApiAuth(AuthBase):
+    def __init__(self, token):
+        self._token = token
+
+    def __call__(self, r):
+        r.headers["Authorization"] = self._token
+        return r
diff --git a/test/testcases/test_http_api/test_dataset_mangement/test_delete_datasets.py b/test/testcases/test_http_api/test_dataset_mangement/test_delete_datasets.py
index 0165633bc..1bba3fac9 100644
--- a/test/testcases/test_http_api/test_dataset_mangement/test_delete_datasets.py
+++ b/test/testcases/test_http_api/test_dataset_mangement/test_delete_datasets.py
@@ -99,14 +99,14 @@ class TestCapability:
 class TestDatasetsDelete:
     @pytest.mark.p1
     @pytest.mark.parametrize(
-        "func, expected_code, expected_message, remaining",
+        "func, expected_code, remaining",
         [
-            (lambda r: {"ids": r[:1]}, 0, "", 2),
-            (lambda r: {"ids": r}, 0, "", 0),
+            (lambda r: {"ids": r[:1]}, 0, 2),
+            (lambda r: {"ids": r}, 0, 0),
         ],
         ids=["single_dataset", "multiple_datasets"],
     )
-    def test_ids(self, HttpApiAuth, add_datasets_func, func, expected_code, expected_message, remaining):
+    def test_ids(self, HttpApiAuth, add_datasets_func, func, expected_code, remaining):
         dataset_ids = add_datasets_func
         if callable(func):
             payload = func(dataset_ids)
diff --git a/test/testcases/test_sdk_api/conftest.py b/test/testcases/test_sdk_api/conftest.py
index 5c74e3a27..11a258a5a 100644
--- a/test/testcases/test_sdk_api/conftest.py
+++ b/test/testcases/test_sdk_api/conftest.py
@@ -116,7 +116,7 @@ def clear_session_with_chat_assistants(request, add_chat_assistants):
 
 
 @pytest.fixture(scope="class")
-def add_dataset(request: FixtureRequest, client: RAGFlow):
+def add_dataset(request: FixtureRequest, client: RAGFlow) -> DataSet:
     def cleanup():
         client.delete_datasets(ids=None)
 
diff --git a/test/testcases/test_web_api/common.py b/test/testcases/test_web_api/common.py
new file mode 100644
index 000000000..69eba070d
--- /dev/null
+++ b/test/testcases/test_web_api/common.py
@@ -0,0 +1,93 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import requests
+from configs import HOST_ADDRESS
+
+HEADERS = {"Content-Type": "application/json"}
+
+KB_APP_URL = "/v1/kb"
+# FILE_API_URL = "/api/v1/datasets/{dataset_id}/documents"
+# FILE_CHUNK_API_URL = "/api/v1/datasets/{dataset_id}/chunks"
+# CHUNK_API_URL = "/api/v1/datasets/{dataset_id}/documents/{document_id}/chunks"
+# CHAT_ASSISTANT_API_URL = "/api/v1/chats"
+# SESSION_WITH_CHAT_ASSISTANT_API_URL = "/api/v1/chats/{chat_id}/sessions"
+# SESSION_WITH_AGENT_API_URL = "/api/v1/agents/{agent_id}/sessions"
+
+
+# DATASET MANAGEMENT
+def create_kb(auth, payload=None, *, headers=HEADERS, data=None):
+    res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/create", headers=headers, auth=auth, json=payload, data=data)
+    return res.json()
+
+
+def list_kbs(auth, params=None, payload=None, *, headers=HEADERS, data=None):
+    if payload is None:
+        payload = {}
+    res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/list", headers=headers, auth=auth, params=params, json=payload, data=data)
+    return res.json()
+
+
+def update_kb(auth, payload=None, *, headers=HEADERS, data=None):
+    res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/update", headers=headers, auth=auth, json=payload, data=data)
+    return res.json()
+
+
+def rm_kb(auth, payload=None, *, headers=HEADERS, data=None):
+    res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/rm", headers=headers, auth=auth, json=payload, data=data)
+    return res.json()
+
+
+def detail_kb(auth, params=None, *, headers=HEADERS):
+    res = requests.get(url=f"{HOST_ADDRESS}{KB_APP_URL}/detail", headers=headers, auth=auth, params=params)
+    return res.json()
+
+
+def list_tags_from_kbs(auth, params=None, *, headers=HEADERS):
+    res = requests.get(url=f"{HOST_ADDRESS}{KB_APP_URL}/tags", headers=headers, auth=auth, params=params)
+    return res.json()
+
+
+def list_tags(auth, dataset_id, params=None, *, headers=HEADERS):
+    res = requests.get(url=f"{HOST_ADDRESS}{KB_APP_URL}/{dataset_id}/tags", headers=headers, auth=auth, params=params)
+    return res.json()
+
+
+def rm_tags(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
+    res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/{dataset_id}/rm_tags", headers=headers, auth=auth, json=payload, data=data)
+    return res.json()
+
+
+def rename_tags(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
+    res = requests.post(url=f"{HOST_ADDRESS}{KB_APP_URL}/{dataset_id}/rename_tags", headers=headers, auth=auth, json=payload, data=data)
+    return res.json()
+
+
+def knowledge_graph(auth, dataset_id, params=None, *, headers=HEADERS):
+    res = requests.get(url=f"{HOST_ADDRESS}{KB_APP_URL}/{dataset_id}/knowledge_graph", headers=headers, auth=auth, params=params)
+    return res.json()
+
+
+def delete_knowledge_graph(auth, dataset_id, payload=None, *, headers=HEADERS, data=None):
+    res = requests.delete(url=f"{HOST_ADDRESS}{KB_APP_URL}/{dataset_id}/delete_knowledge_graph", headers=headers, auth=auth, json=payload, data=data)
+    return res.json()
+
+
+def batch_create_datasets(auth, num):
+    ids = []
+    for i in range(num):
+        res = create_kb(auth, {"name": f"kb_{i}"})
+        ids.append(res["data"]["kb_id"])
+    return ids
diff --git a/test/testcases/test_web_api/conftest.py b/test/testcases/test_web_api/conftest.py
new file mode 100644
index 000000000..44c80d9af
--- /dev/null
+++ b/test/testcases/test_web_api/conftest.py
@@ -0,0 +1,100 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import pytest
+from common import (
+    batch_create_datasets,
+)
+from configs import HOST_ADDRESS, VERSION
+from libs.auth import RAGFlowWebApiAuth
+from pytest import FixtureRequest
+from ragflow_sdk import RAGFlow
+from utils.file_utils import (
+    create_docx_file,
+    create_eml_file,
+    create_excel_file,
+    create_html_file,
+    create_image_file,
+    create_json_file,
+    create_md_file,
+    create_pdf_file,
+    create_ppt_file,
+    create_txt_file,
+)
+
+
+@pytest.fixture
+def generate_test_files(request: FixtureRequest, tmp_path):
+    file_creators = {
+        "docx": (tmp_path / "ragflow_test.docx", create_docx_file),
+        "excel": (tmp_path / "ragflow_test.xlsx", create_excel_file),
+        "ppt": (tmp_path / "ragflow_test.pptx", create_ppt_file),
+        "image": (tmp_path / "ragflow_test.png", create_image_file),
+        "pdf": (tmp_path / "ragflow_test.pdf", create_pdf_file),
+        "txt": (tmp_path / "ragflow_test.txt", create_txt_file),
+        "md": (tmp_path / "ragflow_test.md", create_md_file),
+        "json": (tmp_path / "ragflow_test.json", create_json_file),
+        "eml": (tmp_path / "ragflow_test.eml", create_eml_file),
+        "html": (tmp_path / "ragflow_test.html", create_html_file),
+    }
+
+    files = {}
+    for file_type, (file_path, creator_func) in file_creators.items():
+        if request.param in ["", file_type]:
+            creator_func(file_path)
+            files[file_type] = file_path
+    return files
+
+
+@pytest.fixture(scope="class")
+def ragflow_tmp_dir(request, tmp_path_factory):
+    class_name = request.cls.__name__
+    return tmp_path_factory.mktemp(class_name)
+
+
+@pytest.fixture(scope="session")
+def WebApiAuth(auth):
+    return RAGFlowWebApiAuth(auth)
+
+
+@pytest.fixture(scope="session")
+def client(token: str) -> RAGFlow:
+    return RAGFlow(api_key=token, base_url=HOST_ADDRESS, version=VERSION)
+
+
+@pytest.fixture(scope="function")
+def clear_datasets(request: FixtureRequest, client: RAGFlow):
+    def cleanup():
+        client.delete_datasets(ids=None)
+
+    request.addfinalizer(cleanup)
+
+
+@pytest.fixture(scope="class")
+def add_dataset(request: FixtureRequest, client: RAGFlow, WebApiAuth: RAGFlowWebApiAuth) -> str:
+    def cleanup():
+        client.delete_datasets(ids=None)
+
+    request.addfinalizer(cleanup)
+    return batch_create_datasets(WebApiAuth, 1)[0]
+
+
+@pytest.fixture(scope="function")
+def add_dataset_func(request: FixtureRequest, client: RAGFlow, WebApiAuth: RAGFlowWebApiAuth) -> str:
+    def cleanup():
+        client.delete_datasets(ids=None)
+
+    request.addfinalizer(cleanup)
+    return batch_create_datasets(WebApiAuth, 1)[0]
diff --git a/test/testcases/test_web_api/test_kb_app/conftest.py b/test/testcases/test_web_api/test_kb_app/conftest.py
new file mode 100644
index 000000000..0a435483c
--- /dev/null
+++ b/test/testcases/test_web_api/test_kb_app/conftest.py
@@ -0,0 +1,38 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import pytest
+from common import batch_create_datasets
+from libs.auth import RAGFlowWebApiAuth
+from pytest import FixtureRequest
+from ragflow_sdk import RAGFlow
+
+
+@pytest.fixture(scope="class")
+def add_datasets(request: FixtureRequest, client: RAGFlow, WebApiAuth: RAGFlowWebApiAuth) -> list[str]:
+    def cleanup():
+        client.delete_datasets(ids=None)
+
+    request.addfinalizer(cleanup)
+    return batch_create_datasets(WebApiAuth, 5)
+
+
+@pytest.fixture(scope="function")
+def add_datasets_func(request: FixtureRequest, client: RAGFlow, WebApiAuth: RAGFlowWebApiAuth) -> list[str]:
+    def cleanup():
+        client.delete_datasets(ids=None)
+
+    request.addfinalizer(cleanup)
+    return batch_create_datasets(WebApiAuth, 3)
diff --git a/test/testcases/test_web_api/test_kb_app/test_create_kb.py b/test/testcases/test_web_api/test_kb_app/test_create_kb.py
new file mode 100644
index 000000000..82f596491
--- /dev/null
+++ b/test/testcases/test_web_api/test_kb_app/test_create_kb.py
@@ -0,0 +1,109 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from common import create_kb
+from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
+from hypothesis import example, given, settings
+from libs.auth import RAGFlowWebApiAuth
+from utils.hypothesis_utils import valid_names
+
+
+@pytest.mark.usefixtures("clear_datasets")
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_code, expected_message",
+        [
+            (None, 401, "<Unauthorized '401: Unauthorized'>"),
+            (RAGFlowWebApiAuth(INVALID_API_TOKEN), 401, "<Unauthorized '401: Unauthorized'>"),
+        ],
+        ids=["empty_auth", "invalid_api_token"],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
+        res = create_kb(invalid_auth, {"name": "auth_test"})
+        assert res["code"] == expected_code, res
+        assert res["message"] == expected_message, res
+
+
+@pytest.mark.usefixtures("clear_datasets")
+class TestCapability:
+    @pytest.mark.p3
+    def test_create_kb_1k(self, WebApiAuth):
+        for i in range(1_000):
+            payload = {"name": f"dataset_{i}"}
+            res = create_kb(WebApiAuth, payload)
+            assert res["code"] == 0, f"Failed to create dataset {i}"
+
+    @pytest.mark.p3
+    def test_create_kb_concurrent(self, WebApiAuth):
+        count = 100
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(create_kb, WebApiAuth, {"name": f"dataset_{i}"}) for i in range(count)]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+        assert all(future.result()["code"] == 0 for future in futures)
+
+
+@pytest.mark.usefixtures("clear_datasets")
+class TestDatasetCreate:
+    @pytest.mark.p1
+    @given(name=valid_names())
+    @example("a" * 128)
+    @settings(max_examples=20)
+    def test_name(self, WebApiAuth, name):
+        res = create_kb(WebApiAuth, {"name": name})
+        assert res["code"] == 0, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, expected_message",
+        [
+            ("", "Dataset name can't be empty."),
+            (" ", "Dataset name can't be empty."),
+            ("a" * (DATASET_NAME_LIMIT + 1), "Dataset name length is 129 which is large than 128"),
+            (0, "Dataset name must be string."),
+            (None, "Dataset name must be string."),
+        ],
+        ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
+    )
+    def test_name_invalid(self, WebApiAuth, name, expected_message):
+        payload = {"name": name}
+        res = create_kb(WebApiAuth, payload)
+        assert res["code"] == 102, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p3
+    def test_name_duplicated(self, WebApiAuth):
+        name = "duplicated_name"
+        payload = {"name": name}
+        res = create_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+
+        res = create_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+
+    @pytest.mark.p3
+    def test_name_case_insensitive(self, WebApiAuth):
+        name = "CaseInsensitive"
+        payload = {"name": name.upper()}
+        res = create_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+
+        payload = {"name": name.lower()}
+        res = create_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
diff --git a/test/testcases/test_web_api/test_kb_app/test_detail_kb.py b/test/testcases/test_web_api/test_kb_app/test_detail_kb.py
new file mode 100644
index 000000000..a3c0f82b1
--- /dev/null
+++ b/test/testcases/test_web_api/test_kb_app/test_detail_kb.py
@@ -0,0 +1,53 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import pytest
+from common import (
+    detail_kb,
+)
+from configs import INVALID_API_TOKEN
+from libs.auth import RAGFlowWebApiAuth
+
+
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_code, expected_message",
+        [
+            (None, 401, "<Unauthorized '401: Unauthorized'>"),
+            (RAGFlowWebApiAuth(INVALID_API_TOKEN), 401, "<Unauthorized '401: Unauthorized'>"),
+        ],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
+        res = detail_kb(invalid_auth)
+        assert res["code"] == expected_code, res
+        assert res["message"] == expected_message, res
+
+
+class TestDatasetsDetail:
+    @pytest.mark.p1
+    def test_kb_id(self, WebApiAuth, add_dataset):
+        kb_id = add_dataset
+        payload = {"kb_id": kb_id}
+        res = detail_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["name"] == "kb_0"
+
+    @pytest.mark.p2
+    def test_id_wrong_uuid(self, WebApiAuth):
+        payload = {"kb_id": "d94a8dc02c9711f0930f7fbc369eab6d"}
+        res = detail_kb(WebApiAuth, payload)
+        assert res["code"] == 103, res
+        assert "Only owner of knowledgebase authorized for this operation." in res["message"], res
diff --git a/test/testcases/test_web_api/test_kb_app/test_list_kbs.py b/test/testcases/test_web_api/test_kb_app/test_list_kbs.py
new file mode 100644
index 000000000..5d29968d9
--- /dev/null
+++ b/test/testcases/test_web_api/test_kb_app/test_list_kbs.py
@@ -0,0 +1,184 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from common import list_kbs
+from configs import INVALID_API_TOKEN
+from libs.auth import RAGFlowWebApiAuth
+from utils import is_sorted
+
+
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_code, expected_message",
+        [
+            (None, 401, "<Unauthorized '401: Unauthorized'>"),
+            (RAGFlowWebApiAuth(INVALID_API_TOKEN), 401, "<Unauthorized '401: Unauthorized'>"),
+        ],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
+        res = list_kbs(invalid_auth)
+        assert res["code"] == expected_code, res
+        assert res["message"] == expected_message, res
+
+
+class TestCapability:
+    @pytest.mark.p3
+    def test_concurrent_list(self, WebApiAuth):
+        count = 100
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [executor.submit(list_kbs, WebApiAuth) for i in range(count)]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+        assert all(future.result()["code"] == 0 for future in futures)
+
+
+@pytest.mark.usefixtures("add_datasets")
+class TestDatasetsList:
+    @pytest.mark.p1
+    def test_params_unset(self, WebApiAuth):
+        res = list_kbs(WebApiAuth, None)
+        assert res["code"] == 0, res
+        assert len(res["data"]["kbs"]) == 5, res
+
+    @pytest.mark.p2
+    def test_params_empty(self, WebApiAuth):
+        res = list_kbs(WebApiAuth, {})
+        assert res["code"] == 0, res
+        assert len(res["data"]["kbs"]) == 5, res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "params, expected_page_size",
+        [
+            ({"page": 2, "page_size": 2}, 2),
+            ({"page": 3, "page_size": 2}, 1),
+            ({"page": 4, "page_size": 2}, 0),
+            ({"page": "2", "page_size": 2}, 2),
+            ({"page": 1, "page_size": 10}, 5),
+        ],
+        ids=["normal_middle_page", "normal_last_partial_page", "beyond_max_page", "string_page_number", "full_data_single_page"],
+    )
+    def test_page(self, WebApiAuth, params, expected_page_size):
+        res = list_kbs(WebApiAuth, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]["kbs"]) == expected_page_size, res
+
+    @pytest.mark.skip
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, expected_code, expected_message",
+        [
+            ({"page": 0}, 101, "Input should be greater than or equal to 1"),
+            ({"page": "a"}, 101, "Input should be a valid integer, unable to parse string as an integer"),
+        ],
+        ids=["page_0", "page_a"],
+    )
+    def test_page_invalid(self, WebApiAuth, params, expected_code, expected_message):
+        res = list_kbs(WebApiAuth, params=params)
+        assert res["code"] == expected_code, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p2
+    def test_page_none(self, WebApiAuth):
+        params = {"page": None}
+        res = list_kbs(WebApiAuth, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]["kbs"]) == 5, res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "params, expected_page_size",
+        [
+            ({"page": 1, "page_size": 1}, 1),
+            ({"page": 1, "page_size": 3}, 3),
+            ({"page": 1, "page_size": 5}, 5),
+            ({"page": 1, "page_size": 6}, 5),
+            ({"page": 1, "page_size": "1"}, 1),
+        ],
+        ids=["min_valid_page_size", "medium_page_size", "page_size_equals_total", "page_size_exceeds_total", "string_type_page_size"],
+    )
+    def test_page_size(self, WebApiAuth, params, expected_page_size):
+        res = list_kbs(WebApiAuth, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]["kbs"]) == expected_page_size, res
+
+    @pytest.mark.skip
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, expected_code, expected_message",
+        [
+            ({"page_size": 0}, 101, "Input should be greater than or equal to 1"),
+            ({"page_size": "a"}, 101, "Input should be a valid integer, unable to parse string as an integer"),
+        ],
+    )
+    def test_page_size_invalid(self, WebApiAuth, params, expected_code, expected_message):
+        res = list_kbs(WebApiAuth, params)
+        assert res["code"] == expected_code, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p2
+    def test_page_size_none(self, WebApiAuth):
+        params = {"page_size": None}
+        res = list_kbs(WebApiAuth, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]["kbs"]) == 5, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, assertions",
+        [
+            ({"orderby": "update_time"}, lambda r: (is_sorted(r["data"]["kbs"], "update_time", True))),
+        ],
+        ids=["orderby_update_time"],
+    )
+    def test_orderby(self, WebApiAuth, params, assertions):
+        res = list_kbs(WebApiAuth, params)
+        assert res["code"] == 0, res
+        if callable(assertions):
+            assert assertions(res), res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, assertions",
+        [
+            ({"desc": "True"}, lambda r: (is_sorted(r["data"]["kbs"], "update_time", True))),
+            ({"desc": "False"}, lambda r: (is_sorted(r["data"]["kbs"], "update_time", False))),
+        ],
+        ids=["desc=True", "desc=False"],
+    )
+    def test_desc(self, WebApiAuth, params, assertions):
+        res = list_kbs(WebApiAuth, params)
+
+        assert res["code"] == 0, res
+        if callable(assertions):
+            assert assertions(res), res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "params, expected_page_size",
+        [
+            ({"parser_id": "naive"}, 5),
+            ({"parser_id": "qa"}, 0),
+        ],
+        ids=["naive", "dqa"],
+    )
+    def test_parser_id(self, WebApiAuth, params, expected_page_size):
+        res = list_kbs(WebApiAuth, params)
+        assert res["code"] == 0, res
+        assert len(res["data"]["kbs"]) == expected_page_size, res
diff --git a/test/testcases/test_web_api/test_kb_app/test_rm_kb.py b/test/testcases/test_web_api/test_kb_app/test_rm_kb.py
new file mode 100644
index 000000000..ff20ea8c3
--- /dev/null
+++ b/test/testcases/test_web_api/test_kb_app/test_rm_kb.py
@@ -0,0 +1,61 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import pytest
+from common import (
+    list_kbs,
+    rm_kb,
+)
+from configs import INVALID_API_TOKEN
+from libs.auth import RAGFlowWebApiAuth
+
+
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_code, expected_message",
+        [
+            (None, 401, "<Unauthorized '401: Unauthorized'>"),
+            (RAGFlowWebApiAuth(INVALID_API_TOKEN), 401, "<Unauthorized '401: Unauthorized'>"),
+        ],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
+        res = rm_kb(invalid_auth)
+        assert res["code"] == expected_code, res
+        assert res["message"] == expected_message, res
+
+
+class TestDatasetsDelete:
+    @pytest.mark.p1
+    def test_kb_id(self, WebApiAuth, add_datasets_func):
+        kb_ids = add_datasets_func
+        payload = {"kb_id": kb_ids[0]}
+        res = rm_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+
+        res = list_kbs(WebApiAuth)
+        assert len(res["data"]["kbs"]) == 2, res
+
+    @pytest.mark.p2
+    @pytest.mark.usefixtures("add_dataset_func")
+    def test_id_wrong_uuid(self, WebApiAuth):
+        payload = {"kb_id": "d94a8dc02c9711f0930f7fbc369eab6d"}
+        res = rm_kb(WebApiAuth, payload)
+        assert res["code"] == 109, res
+        assert "No authorization." in res["message"], res
+
+        res = list_kbs(WebApiAuth)
+        assert len(res["data"]["kbs"]) == 1, res
diff --git a/test/testcases/test_web_api/test_kb_app/test_update_kb.py b/test/testcases/test_web_api/test_kb_app/test_update_kb.py
new file mode 100644
index 000000000..6505dd1b9
--- /dev/null
+++ b/test/testcases/test_web_api/test_kb_app/test_update_kb.py
@@ -0,0 +1,378 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import pytest
+from common import update_kb
+from configs import DATASET_NAME_LIMIT, INVALID_API_TOKEN
+from hypothesis import HealthCheck, example, given, settings
+from libs.auth import RAGFlowWebApiAuth
+from utils import encode_avatar
+from utils.file_utils import create_image_file
+from utils.hypothesis_utils import valid_names
+
+
+class TestAuthorization:
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "invalid_auth, expected_code, expected_message",
+        [
+            (None, 401, "<Unauthorized '401: Unauthorized'>"),
+            (RAGFlowWebApiAuth(INVALID_API_TOKEN), 401, "<Unauthorized '401: Unauthorized'>"),
+        ],
+        ids=["empty_auth", "invalid_api_token"],
+    )
+    def test_auth_invalid(self, invalid_auth, expected_code, expected_message):
+        res = update_kb(invalid_auth, "dataset_id")
+        assert res["code"] == expected_code, res
+        assert res["message"] == expected_message, res
+
+
+class TestCapability:
+    @pytest.mark.p3
+    def test_update_dateset_concurrent(self, WebApiAuth, add_dataset_func):
+        dataset_id = add_dataset_func
+        count = 100
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [
+                executor.submit(
+                    update_kb,
+                    WebApiAuth,
+                    {
+                        "kb_id": dataset_id,
+                        "name": f"dataset_{i}",
+                        "description": "",
+                        "parser_id": "naive",
+                    },
+                )
+                for i in range(count)
+            ]
+        responses = list(as_completed(futures))
+        assert len(responses) == count, responses
+        assert all(future.result()["code"] == 0 for future in futures)
+
+
+class TestDatasetUpdate:
+    @pytest.mark.p3
+    def test_dataset_id_not_uuid(self, WebApiAuth):
+        payload = {"name": "not uuid", "description": "", "parser_id": "naive", "kb_id": "not_uuid"}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 109, res
+        assert "No authorization." in res["message"], res
+
+    @pytest.mark.p1
+    @given(name=valid_names())
+    @example("a" * 128)
+    @settings(max_examples=20, suppress_health_check=[HealthCheck.function_scoped_fixture])
+    def test_name(self, WebApiAuth, add_dataset_func, name):
+        dataset_id = add_dataset_func
+        payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": dataset_id}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["name"] == name, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "name, expected_message",
+        [
+            ("", "Dataset name can't be empty."),
+            (" ", "Dataset name can't be empty."),
+            ("a" * (DATASET_NAME_LIMIT + 1), "Dataset name length is 129 which is large than 128"),
+            (0, "Dataset name must be string."),
+            (None, "Dataset name must be string."),
+        ],
+        ids=["empty_name", "space_name", "too_long_name", "invalid_name", "None_name"],
+    )
+    def test_name_invalid(self, WebApiAuth, add_dataset_func, name, expected_message):
+        kb_id = add_dataset_func
+        payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 102, res
+        assert expected_message in res["message"], res
+
+    @pytest.mark.p3
+    def test_name_duplicated(self, WebApiAuth, add_datasets_func):
+        kb_id = add_datasets_func[0]
+        name = "kb_1"
+        payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 102, res
+        assert res["message"] == "Duplicated knowledgebase name.", res
+
+    @pytest.mark.p3
+    def test_name_case_insensitive(self, WebApiAuth, add_datasets_func):
+        kb_id = add_datasets_func[0]
+        name = "KB_1"
+        payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": kb_id}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 102, res
+        assert res["message"] == "Duplicated knowledgebase name.", res
+
+    @pytest.mark.p2
+    def test_avatar(self, WebApiAuth, add_dataset_func, tmp_path):
+        kb_id = add_dataset_func
+        fn = create_image_file(tmp_path / "ragflow_test.png")
+        payload = {
+            "name": "avatar",
+            "description": "",
+            "parser_id": "naive",
+            "kb_id": kb_id,
+            "avatar": f"data:image/png;base64,{encode_avatar(fn)}",
+        }
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["avatar"] == f"data:image/png;base64,{encode_avatar(fn)}", res
+
+    @pytest.mark.p2
+    def test_description(self, WebApiAuth, add_dataset_func):
+        kb_id = add_dataset_func
+        payload = {"name": "description", "description": "description", "parser_id": "naive", "kb_id": kb_id}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["description"] == "description", res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "embedding_model",
+        [
+            "BAAI/bge-large-zh-v1.5@BAAI",
+            "maidalun1020/bce-embedding-base_v1@Youdao",
+            "embedding-3@ZHIPU-AI",
+        ],
+        ids=["builtin_baai", "builtin_youdao", "tenant_zhipu"],
+    )
+    def test_embedding_model(self, WebApiAuth, add_dataset_func, embedding_model):
+        kb_id = add_dataset_func
+        payload = {"name": "embedding_model", "description": "", "parser_id": "naive", "kb_id": kb_id, "embd_id": embedding_model}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["embd_id"] == embedding_model, res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "permission",
+        [
+            "me",
+            "team",
+        ],
+        ids=["me", "team"],
+    )
+    def test_permission(self, WebApiAuth, add_dataset_func, permission):
+        kb_id = add_dataset_func
+        payload = {"name": "permission", "description": "", "parser_id": "naive", "kb_id": kb_id, "permission": permission}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["permission"] == permission.lower().strip(), res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "chunk_method",
+        [
+            "naive",
+            "book",
+            "email",
+            "laws",
+            "manual",
+            "one",
+            "paper",
+            "picture",
+            "presentation",
+            "qa",
+            "table",
+            pytest.param("tag", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="Infinity does not support parser_id=tag")),
+        ],
+        ids=["naive", "book", "email", "laws", "manual", "one", "paper", "picture", "presentation", "qa", "table", "tag"],
+    )
+    def test_chunk_method(self, WebApiAuth, add_dataset_func, chunk_method):
+        kb_id = add_dataset_func
+        payload = {"name": "chunk_method", "description": "", "parser_id": chunk_method, "kb_id": kb_id}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["parser_id"] == chunk_method, res
+
+    @pytest.mark.p1
+    @pytest.mark.skipif(os.getenv("DOC_ENGINE") != "infinity", reason="Infinity does not support parser_id=tag")
+    def test_chunk_method_tag_with_infinity(self, WebApiAuth, add_dataset_func):
+        kb_id = add_dataset_func
+        payload = {"name": "chunk_method", "description": "", "parser_id": "tag", "kb_id": kb_id}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 103, res
+        assert res["message"] == "The chunking method Tag has not been supported by Infinity yet.", res
+
+    @pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="#8208")
+    @pytest.mark.p2
+    @pytest.mark.parametrize("pagerank", [0, 50, 100], ids=["min", "mid", "max"])
+    def test_pagerank(self, WebApiAuth, add_dataset_func, pagerank):
+        kb_id = add_dataset_func
+        payload = {"name": "pagerank", "description": "", "parser_id": "naive", "kb_id": kb_id, "pagerank": pagerank}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["pagerank"] == pagerank, res
+
+    @pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="#8208")
+    @pytest.mark.p2
+    def test_pagerank_set_to_0(self, WebApiAuth, add_dataset_func):
+        kb_id = add_dataset_func
+        payload = {"name": "pagerank", "description": "", "parser_id": "naive", "kb_id": kb_id, "pagerank": 50}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["pagerank"] == 50, res
+
+        payload = {"name": "pagerank", "description": "", "parser_id": "naive", "kb_id": kb_id, "pagerank": 0}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["pagerank"] == 0, res
+
+    @pytest.mark.skipif(os.getenv("DOC_ENGINE") != "infinity", reason="#8208")
+    @pytest.mark.p2
+    def test_pagerank_infinity(self, WebApiAuth, add_dataset_func):
+        kb_id = add_dataset_func
+        payload = {"name": "pagerank", "description": "", "parser_id": "naive", "kb_id": kb_id, "pagerank": 50}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 102, res
+        assert res["message"] == "'pagerank' can only be set when doc_engine is elasticsearch", res
+
+    @pytest.mark.p1
+    @pytest.mark.parametrize(
+        "parser_config",
+        [
+            {"auto_keywords": 0},
+            {"auto_keywords": 16},
+            {"auto_keywords": 32},
+            {"auto_questions": 0},
+            {"auto_questions": 5},
+            {"auto_questions": 10},
+            {"chunk_token_num": 1},
+            {"chunk_token_num": 1024},
+            {"chunk_token_num": 2048},
+            {"delimiter": "\n"},
+            {"delimiter": " "},
+            {"html4excel": True},
+            {"html4excel": False},
+            {"layout_recognize": "DeepDOC"},
+            {"layout_recognize": "Plain Text"},
+            {"tag_kb_ids": ["1", "2"]},
+            {"topn_tags": 1},
+            {"topn_tags": 5},
+            {"topn_tags": 10},
+            {"filename_embd_weight": 0.1},
+            {"filename_embd_weight": 0.5},
+            {"filename_embd_weight": 1.0},
+            {"task_page_size": 1},
+            {"task_page_size": None},
+            {"pages": [[1, 100]]},
+            {"pages": None},
+            {"graphrag": {"use_graphrag": True}},
+            {"graphrag": {"use_graphrag": False}},
+            {"graphrag": {"entity_types": ["age", "sex", "height", "weight"]}},
+            {"graphrag": {"method": "general"}},
+            {"graphrag": {"method": "light"}},
+            {"graphrag": {"community": True}},
+            {"graphrag": {"community": False}},
+            {"graphrag": {"resolution": True}},
+            {"graphrag": {"resolution": False}},
+            {"raptor": {"use_raptor": True}},
+            {"raptor": {"use_raptor": False}},
+            {"raptor": {"prompt": "Who are you?"}},
+            {"raptor": {"max_token": 1}},
+            {"raptor": {"max_token": 1024}},
+            {"raptor": {"max_token": 2048}},
+            {"raptor": {"threshold": 0.0}},
+            {"raptor": {"threshold": 0.5}},
+            {"raptor": {"threshold": 1.0}},
+            {"raptor": {"max_cluster": 1}},
+            {"raptor": {"max_cluster": 512}},
+            {"raptor": {"max_cluster": 1024}},
+            {"raptor": {"random_seed": 0}},
+        ],
+        ids=[
+            "auto_keywords_min",
+            "auto_keywords_mid",
+            "auto_keywords_max",
+            "auto_questions_min",
+            "auto_questions_mid",
+            "auto_questions_max",
+            "chunk_token_num_min",
+            "chunk_token_num_mid",
+            "chunk_token_num_max",
+            "delimiter",
+            "delimiter_space",
+            "html4excel_true",
+            "html4excel_false",
+            "layout_recognize_DeepDOC",
+            "layout_recognize_navie",
+            "tag_kb_ids",
+            "topn_tags_min",
+            "topn_tags_mid",
+            "topn_tags_max",
+            "filename_embd_weight_min",
+            "filename_embd_weight_mid",
+            "filename_embd_weight_max",
+            "task_page_size_min",
+            "task_page_size_None",
+            "pages",
+            "pages_none",
+            "graphrag_true",
+            "graphrag_false",
+            "graphrag_entity_types",
+            "graphrag_method_general",
+            "graphrag_method_light",
+            "graphrag_community_true",
+            "graphrag_community_false",
+            "graphrag_resolution_true",
+            "graphrag_resolution_false",
+            "raptor_true",
+            "raptor_false",
+            "raptor_prompt",
+            "raptor_max_token_min",
+            "raptor_max_token_mid",
+            "raptor_max_token_max",
+            "raptor_threshold_min",
+            "raptor_threshold_mid",
+            "raptor_threshold_max",
+            "raptor_max_cluster_min",
+            "raptor_max_cluster_mid",
+            "raptor_max_cluster_max",
+            "raptor_random_seed_min",
+        ],
+    )
+    def test_parser_config(self, WebApiAuth, add_dataset_func, parser_config):
+        kb_id = add_dataset_func
+        payload = {"name": "parser_config", "description": "", "parser_id": "naive", "kb_id": kb_id, "parser_config": parser_config}
+        res = update_kb(WebApiAuth, payload)
+        assert res["code"] == 0, res
+        assert res["data"]["parser_config"] == parser_config, res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize(
+        "payload",
+        [
+            {"id": "id"},
+            {"tenant_id": "e57c1966f99211efb41e9e45646e0111"},
+            {"created_by": "created_by"},
+            {"create_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"create_time": 1741671443322},
+            {"update_date": "Tue, 11 Mar 2025 13:37:23 GMT"},
+            {"update_time": 1741671443339},
+        ],
+    )
+    def test_field_unsupported(self, WebApiAuth, add_dataset_func, payload):
+        kb_id = add_dataset_func
+        full_payload = {"name": "field_unsupported", "description": "", "parser_id": "naive", "kb_id": kb_id, **payload}
+        res = update_kb(WebApiAuth, full_payload)
+        assert res["code"] == 101, res
+        assert "isn't allowed" in res["message"], res