Feat: update and add new tests for web api apps (#12714)

### What problem does this PR solve? This PR adds missing web API tests (system, search, KB, LLM, plugin, connector). It also addresses a contract mismatch that was causing test failures: metadata updates did not persist new keys (update‑only behavior). ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [x] Other (please describe): Test coverage expansion and test helper instrumentation
2026-01-30 15:16:45 +08:00 · 2026-01-20 19:12:15 +08:00
parent aee9860970
commit 960ecd3158
14 changed files with 1623 additions and 11 deletions
--- a/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py
+++ b/test/testcases/test_web_api/test_kb_app/test_kb_pipeline_tasks.py
@ -0,0 +1,208 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import pytest
+from common import (
+    kb_delete_pipeline_logs,
+    kb_list_pipeline_dataset_logs,
+    kb_list_pipeline_logs,
+    kb_pipeline_log_detail,
+    kb_run_graphrag,
+    kb_run_mindmap,
+    kb_run_raptor,
+    kb_trace_graphrag,
+    kb_trace_mindmap,
+    kb_trace_raptor,
+    list_documents,
+    parse_documents,
+)
+from utils import wait_for
+
+TASK_STATUS_DONE = "3"
+
+def _find_task(data, task_id):
+    if isinstance(data, dict):
+        if data.get("id") == task_id:
+            return data
+        tasks = data.get("tasks")
+        if isinstance(tasks, list):
+            for item in tasks:
+                if isinstance(item, dict) and item.get("id") == task_id:
+                    return item
+    elif isinstance(data, list):
+        for item in data:
+            if isinstance(item, dict) and item.get("id") == task_id:
+                return item
+    return None
+
+
+def _assert_progress_in_scale(progress, payload):
+    assert isinstance(progress, (int, float)), payload
+    if progress < 0:
+        assert False, f"Negative progress is not expected: {payload}"
+    scale = 100 if progress > 1 else 1
+    # Infer scale from observed payload (0..1 or 0..100).
+    assert 0 <= progress <= scale, payload
+    return scale
+
+
+def _wait_for_task(trace_func, auth, kb_id, task_id, timeout=60):
+    @wait_for(timeout, 1, "Pipeline task trace timeout")
+    def _condition():
+        res = trace_func(auth, {"kb_id": kb_id})
+        if res["code"] != 0:
+            return False
+        return _find_task(res["data"], task_id) is not None
+
+    _condition()
+
+
+def _wait_for_docs_parsed(auth, kb_id, timeout=60):
+    @wait_for(timeout, 2, "Document parsing timeout")
+    def _condition():
+        res = list_documents(auth, {"kb_id": kb_id})
+        if res["code"] != 0:
+            return False
+        for doc in res["data"]["docs"]:
+            progress = doc.get("progress", 0)
+            _assert_progress_in_scale(progress, doc)
+            scale = 100 if progress > 1 else 1
+            if doc.get("run") != TASK_STATUS_DONE or progress < scale:
+                return False
+        return True
+
+    _condition()
+
+
+def _wait_for_pipeline_logs(auth, kb_id, timeout=30):
+    @wait_for(timeout, 1, "Pipeline log timeout")
+    def _condition():
+        res = kb_list_pipeline_logs(auth, params={"kb_id": kb_id}, payload={})
+        if res["code"] != 0:
+            return False
+        return bool(res["data"]["logs"])
+
+    _condition()
+
+
+class TestKbPipelineTasks:
+    @pytest.mark.p3
+    def test_graphrag_run_and_trace(self, WebApiAuth, add_chunks):
+        kb_id, _, _ = add_chunks
+        run_res = kb_run_graphrag(WebApiAuth, {"kb_id": kb_id})
+        assert run_res["code"] == 0, run_res
+        task_id = run_res["data"]["graphrag_task_id"]
+        assert task_id, run_res
+
+        _wait_for_task(kb_trace_graphrag, WebApiAuth, kb_id, task_id)
+        trace_res = kb_trace_graphrag(WebApiAuth, {"kb_id": kb_id})
+        assert trace_res["code"] == 0, trace_res
+        task = _find_task(trace_res["data"], task_id)
+        assert task, trace_res
+        assert task["id"] == task_id, trace_res
+        progress = task.get("progress")
+        _assert_progress_in_scale(progress, task)
+
+    @pytest.mark.p3
+    def test_raptor_run_and_trace(self, WebApiAuth, add_chunks):
+        kb_id, _, _ = add_chunks
+        run_res = kb_run_raptor(WebApiAuth, {"kb_id": kb_id})
+        assert run_res["code"] == 0, run_res
+        task_id = run_res["data"]["raptor_task_id"]
+        assert task_id, run_res
+
+        _wait_for_task(kb_trace_raptor, WebApiAuth, kb_id, task_id)
+        trace_res = kb_trace_raptor(WebApiAuth, {"kb_id": kb_id})
+        assert trace_res["code"] == 0, trace_res
+        task = _find_task(trace_res["data"], task_id)
+        assert task, trace_res
+        assert task["id"] == task_id, trace_res
+        progress = task.get("progress")
+        _assert_progress_in_scale(progress, task)
+
+    @pytest.mark.p3
+    def test_mindmap_run_and_trace(self, WebApiAuth, add_chunks):
+        kb_id, _, _ = add_chunks
+        run_res = kb_run_mindmap(WebApiAuth, {"kb_id": kb_id})
+        assert run_res["code"] == 0, run_res
+        task_id = run_res["data"]["mindmap_task_id"]
+        assert task_id, run_res
+
+        _wait_for_task(kb_trace_mindmap, WebApiAuth, kb_id, task_id)
+        trace_res = kb_trace_mindmap(WebApiAuth, {"kb_id": kb_id})
+        assert trace_res["code"] == 0, trace_res
+        task = _find_task(trace_res["data"], task_id)
+        assert task, trace_res
+        assert task["id"] == task_id, trace_res
+        progress = task.get("progress")
+        _assert_progress_in_scale(progress, task)
+
+
+class TestKbPipelineLogs:
+    @pytest.mark.p3
+    def test_pipeline_log_lifecycle(self, WebApiAuth, add_document):
+        kb_id, document_id = add_document
+        parse_documents(WebApiAuth, {"doc_ids": [document_id], "run": "1"})
+        _wait_for_docs_parsed(WebApiAuth, kb_id)
+        _wait_for_pipeline_logs(WebApiAuth, kb_id)
+
+        list_res = kb_list_pipeline_logs(WebApiAuth, params={"kb_id": kb_id}, payload={})
+        assert list_res["code"] == 0, list_res
+        assert "total" in list_res["data"], list_res
+        assert isinstance(list_res["data"]["logs"], list), list_res
+        assert list_res["data"]["logs"], list_res
+
+        log_id = list_res["data"]["logs"][0]["id"]
+        detail_res = kb_pipeline_log_detail(WebApiAuth, {"log_id": log_id})
+        assert detail_res["code"] == 0, detail_res
+        detail = detail_res["data"]
+        assert detail["id"] == log_id, detail_res
+        assert detail["kb_id"] == kb_id, detail_res
+        for key in ["document_id", "task_type", "operation_status", "progress"]:
+            assert key in detail, detail_res
+
+        delete_res = kb_delete_pipeline_logs(WebApiAuth, params={"kb_id": kb_id}, payload={"log_ids": [log_id]})
+        assert delete_res["code"] == 0, delete_res
+        assert delete_res["data"] is True, delete_res
+
+        @wait_for(30, 1, "Pipeline log delete timeout")
+        def _condition():
+            res = kb_list_pipeline_logs(WebApiAuth, params={"kb_id": kb_id}, payload={})
+            if res["code"] != 0:
+                return False
+            return all(log.get("id") != log_id for log in res["data"]["logs"])
+
+        _condition()
+
+    @pytest.mark.p3
+    def test_list_pipeline_dataset_logs(self, WebApiAuth, add_document):
+        kb_id, _ = add_document
+        res = kb_list_pipeline_dataset_logs(WebApiAuth, params={"kb_id": kb_id}, payload={})
+        assert res["code"] == 0, res
+        assert "total" in res["data"], res
+        assert isinstance(res["data"]["logs"], list), res
+
+    @pytest.mark.p3
+    def test_pipeline_log_detail_missing_id(self, WebApiAuth):
+        res = kb_pipeline_log_detail(WebApiAuth, {})
+        assert res["code"] == 101, res
+        assert "Pipeline log ID" in res["message"], res
+
+    @pytest.mark.p3
+    def test_delete_pipeline_logs_empty(self, WebApiAuth, add_document):
+        kb_id, _ = add_document
+        res = kb_delete_pipeline_logs(WebApiAuth, params={"kb_id": kb_id}, payload={"log_ids": []})
+        assert res["code"] == 0, res
+        assert res["data"] is True, res
--- a/test/testcases/test_web_api/test_kb_app/test_kb_tags_meta.py
+++ b/test/testcases/test_web_api/test_kb_app/test_kb_tags_meta.py
@ -0,0 +1,251 @@
+#
+#  Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import uuid
+
+import pytest
+from common import (
+    kb_basic_info,
+    kb_get_meta,
+    kb_update_metadata_setting,
+    list_tags,
+    list_tags_from_kbs,
+    rename_tags,
+    rm_tags,
+    update_chunk,
+)
+from configs import INVALID_API_TOKEN
+from libs.auth import RAGFlowWebApiAuth
+from utils import wait_for
+
+INVALID_AUTH_CASES = [
+    (None, 401, "Unauthorized"),
+    (RAGFlowWebApiAuth(INVALID_API_TOKEN), 401, "Unauthorized"),
+]
+
+TAG_SEED_TIMEOUT = 20
+
+
+def _wait_for_tag(auth, kb_id, tag, timeout=TAG_SEED_TIMEOUT):
+    @wait_for(timeout, 1, "Tag seed timeout")
+    def _condition():
+        res = list_tags(auth, kb_id)
+        if res["code"] != 0:
+            return False
+        return tag in res["data"]
+
+    try:
+        _condition()
+    except AssertionError:
+        return False
+    return True
+
+
+def _seed_tag(auth, kb_id, document_id, chunk_id):
+    # KB tags are derived from chunk tag_kwd, not document metadata.
+    tag = f"tag_{uuid.uuid4().hex[:8]}"
+    res = update_chunk(
+        auth,
+        {
+            "doc_id": document_id,
+            "chunk_id": chunk_id,
+            "content_with_weight": f"tag seed {tag}",
+            "tag_kwd": [tag],
+        },
+    )
+    assert res["code"] == 0, res
+    if not _wait_for_tag(auth, kb_id, tag):
+        return None
+    return tag
+
+
+class TestAuthorization:
+    @pytest.mark.p2
+    @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
+    def test_list_tags_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
+        res = list_tags(invalid_auth, "kb_id")
+        assert res["code"] == expected_code, res
+        assert expected_fragment in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
+    def test_list_tags_from_kbs_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
+        res = list_tags_from_kbs(invalid_auth, {"kb_ids": "kb_id"})
+        assert res["code"] == expected_code, res
+        assert expected_fragment in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
+    def test_rm_tags_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
+        res = rm_tags(invalid_auth, "kb_id", {"tags": ["tag"]})
+        assert res["code"] == expected_code, res
+        assert expected_fragment in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
+    def test_rename_tag_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
+        res = rename_tags(invalid_auth, "kb_id", {"from_tag": "old", "to_tag": "new"})
+        assert res["code"] == expected_code, res
+        assert expected_fragment in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
+    def test_get_meta_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
+        res = kb_get_meta(invalid_auth, {"kb_ids": "kb_id"})
+        assert res["code"] == expected_code, res
+        assert expected_fragment in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
+    def test_basic_info_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
+        res = kb_basic_info(invalid_auth, {"kb_id": "kb_id"})
+        assert res["code"] == expected_code, res
+        assert expected_fragment in res["message"], res
+
+    @pytest.mark.p2
+    @pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
+    def test_update_metadata_setting_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
+        res = kb_update_metadata_setting(invalid_auth, {"kb_id": "kb_id", "metadata": {}})
+        assert res["code"] == expected_code, res
+        assert expected_fragment in res["message"], res
+
+
+class TestKbTagsMeta:
+    @pytest.mark.p2
+    def test_list_tags(self, WebApiAuth, add_dataset):
+        kb_id = add_dataset
+        res = list_tags(WebApiAuth, kb_id)
+        assert res["code"] == 0, res
+        assert isinstance(res["data"], list), res
+
+    @pytest.mark.p2
+    def test_list_tags_from_kbs(self, WebApiAuth, add_dataset):
+        kb_id = add_dataset
+        res = list_tags_from_kbs(WebApiAuth, {"kb_ids": kb_id})
+        assert res["code"] == 0, res
+        assert isinstance(res["data"], list), res
+
+    @pytest.mark.p3
+    def test_rm_tags(self, WebApiAuth, add_chunks):
+        kb_id, document_id, chunk_ids = add_chunks
+        tag_to_remove = _seed_tag(WebApiAuth, kb_id, document_id, chunk_ids[0])
+        if not tag_to_remove:
+            # Tag aggregation is index-backed; skip if it never surfaces.
+            pytest.skip("Seeded tag did not appear in list_tags.")
+
+        res = rm_tags(WebApiAuth, kb_id, {"tags": [tag_to_remove]})
+        assert res["code"] == 0, res
+        assert res["data"] is True, res
+
+        @wait_for(TAG_SEED_TIMEOUT, 1, "Tag removal timeout")
+        def _condition():
+            after_res = list_tags(WebApiAuth, kb_id)
+            if after_res["code"] != 0:
+                return False
+            return tag_to_remove not in after_res["data"]
+
+        _condition()
+
+    @pytest.mark.p3
+    def test_rename_tag(self, WebApiAuth, add_chunks):
+        kb_id, document_id, chunk_ids = add_chunks
+        from_tag = _seed_tag(WebApiAuth, kb_id, document_id, chunk_ids[0])
+        if not from_tag:
+            # Tag aggregation is index-backed; skip if it never surfaces.
+            pytest.skip("Seeded tag did not appear in list_tags.")
+
+        to_tag = f"{from_tag}_renamed"
+        res = rename_tags(WebApiAuth, kb_id, {"from_tag": from_tag, "to_tag": to_tag})
+        assert res["code"] == 0, res
+        assert res["data"] is True, res
+
+        @wait_for(TAG_SEED_TIMEOUT, 1, "Tag rename timeout")
+        def _condition():
+            after_res = list_tags(WebApiAuth, kb_id)
+            if after_res["code"] != 0:
+                return False
+            tags = after_res["data"]
+            return to_tag in tags and from_tag not in tags
+
+        _condition()
+
+    @pytest.mark.p2
+    def test_get_meta(self, WebApiAuth, add_dataset):
+        kb_id = add_dataset
+        res = kb_get_meta(WebApiAuth, {"kb_ids": kb_id})
+        assert res["code"] == 0, res
+        assert isinstance(res["data"], dict), res
+
+    @pytest.mark.p2
+    def test_basic_info(self, WebApiAuth, add_dataset):
+        kb_id = add_dataset
+        res = kb_basic_info(WebApiAuth, {"kb_id": kb_id})
+        assert res["code"] == 0, res
+        for key in ["processing", "finished", "failed", "cancelled", "downloaded"]:
+            assert key in res["data"], res
+
+    @pytest.mark.p2
+    def test_update_metadata_setting(self, WebApiAuth, add_dataset):
+        kb_id = add_dataset
+        metadata = {"source": "test"}
+        res = kb_update_metadata_setting(WebApiAuth, {"kb_id": kb_id, "metadata": metadata, "enable_metadata": True})
+        assert res["code"] == 0, res
+        assert res["data"]["id"] == kb_id, res
+        assert res["data"]["parser_config"]["metadata"] == metadata, res
+
+
+class TestKbTagsMetaNegative:
+    @pytest.mark.p3
+    def test_list_tags_invalid_kb(self, WebApiAuth):
+        res = list_tags(WebApiAuth, "invalid_kb_id")
+        assert res["code"] == 109, res
+        assert "No authorization" in res["message"], res
+
+    @pytest.mark.p3
+    def test_list_tags_from_kbs_invalid_kb(self, WebApiAuth):
+        res = list_tags_from_kbs(WebApiAuth, {"kb_ids": "invalid_kb_id"})
+        assert res["code"] == 109, res
+        assert "No authorization" in res["message"], res
+
+    @pytest.mark.p3
+    def test_rm_tags_invalid_kb(self, WebApiAuth):
+        res = rm_tags(WebApiAuth, "invalid_kb_id", {"tags": ["tag"]})
+        assert res["code"] == 109, res
+        assert "No authorization" in res["message"], res
+
+    @pytest.mark.p3
+    def test_rename_tag_invalid_kb(self, WebApiAuth):
+        res = rename_tags(WebApiAuth, "invalid_kb_id", {"from_tag": "old", "to_tag": "new"})
+        assert res["code"] == 109, res
+        assert "No authorization" in res["message"], res
+
+    @pytest.mark.p3
+    def test_get_meta_invalid_kb(self, WebApiAuth):
+        res = kb_get_meta(WebApiAuth, {"kb_ids": "invalid_kb_id"})
+        assert res["code"] == 109, res
+        assert "No authorization" in res["message"], res
+
+    @pytest.mark.p3
+    def test_basic_info_invalid_kb(self, WebApiAuth):
+        res = kb_basic_info(WebApiAuth, {"kb_id": "invalid_kb_id"})
+        assert res["code"] == 109, res
+        assert "No authorization" in res["message"], res
+
+    @pytest.mark.p3
+    def test_update_metadata_setting_missing_metadata(self, WebApiAuth, add_dataset):
+        res = kb_update_metadata_setting(WebApiAuth, {"kb_id": add_dataset})
+        assert res["code"] == 101, res
+        assert "required argument are missing" in res["message"], res
+        assert "metadata" in res["message"], res
--- a/test/testcases/test_web_api/test_kb_app/test_update_kb.py
+++ b/test/testcases/test_web_api/test_kb_app/test_update_kb.py
@ -77,7 +77,8 @@ class TestDatasetUpdate:
    @pytest.mark.p1
    @given(name=valid_names())
    @example("a" * 128)
-    @settings(max_examples=20, suppress_health_check=[HealthCheck.function_scoped_fixture])
+    # Network-bound API call; disable Hypothesis deadline to avoid flaky timeouts.
+    @settings(max_examples=20, suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None)
    def test_name(self, WebApiAuth, add_dataset_func, name):
        dataset_id = add_dataset_func
        payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": dataset_id}