Feat: update and add new tests for web api apps (#12714)

### What problem does this PR solve?

This PR adds missing web API tests (system, search, KB, LLM, plugin,
connector). It also addresses a contract mismatch that was causing test
failures: metadata updates did not persist new keys (update‑only
behavior).

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [x] Other (please describe): Test coverage expansion and test helper
instrumentation
This commit is contained in:
6ba3i
2026-01-20 19:12:15 +08:00
committed by GitHub
parent aee9860970
commit 960ecd3158
14 changed files with 1623 additions and 11 deletions

View File

@ -0,0 +1,208 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
from common import (
kb_delete_pipeline_logs,
kb_list_pipeline_dataset_logs,
kb_list_pipeline_logs,
kb_pipeline_log_detail,
kb_run_graphrag,
kb_run_mindmap,
kb_run_raptor,
kb_trace_graphrag,
kb_trace_mindmap,
kb_trace_raptor,
list_documents,
parse_documents,
)
from utils import wait_for
TASK_STATUS_DONE = "3"
def _find_task(data, task_id):
if isinstance(data, dict):
if data.get("id") == task_id:
return data
tasks = data.get("tasks")
if isinstance(tasks, list):
for item in tasks:
if isinstance(item, dict) and item.get("id") == task_id:
return item
elif isinstance(data, list):
for item in data:
if isinstance(item, dict) and item.get("id") == task_id:
return item
return None
def _assert_progress_in_scale(progress, payload):
assert isinstance(progress, (int, float)), payload
if progress < 0:
assert False, f"Negative progress is not expected: {payload}"
scale = 100 if progress > 1 else 1
# Infer scale from observed payload (0..1 or 0..100).
assert 0 <= progress <= scale, payload
return scale
def _wait_for_task(trace_func, auth, kb_id, task_id, timeout=60):
@wait_for(timeout, 1, "Pipeline task trace timeout")
def _condition():
res = trace_func(auth, {"kb_id": kb_id})
if res["code"] != 0:
return False
return _find_task(res["data"], task_id) is not None
_condition()
def _wait_for_docs_parsed(auth, kb_id, timeout=60):
@wait_for(timeout, 2, "Document parsing timeout")
def _condition():
res = list_documents(auth, {"kb_id": kb_id})
if res["code"] != 0:
return False
for doc in res["data"]["docs"]:
progress = doc.get("progress", 0)
_assert_progress_in_scale(progress, doc)
scale = 100 if progress > 1 else 1
if doc.get("run") != TASK_STATUS_DONE or progress < scale:
return False
return True
_condition()
def _wait_for_pipeline_logs(auth, kb_id, timeout=30):
@wait_for(timeout, 1, "Pipeline log timeout")
def _condition():
res = kb_list_pipeline_logs(auth, params={"kb_id": kb_id}, payload={})
if res["code"] != 0:
return False
return bool(res["data"]["logs"])
_condition()
class TestKbPipelineTasks:
@pytest.mark.p3
def test_graphrag_run_and_trace(self, WebApiAuth, add_chunks):
kb_id, _, _ = add_chunks
run_res = kb_run_graphrag(WebApiAuth, {"kb_id": kb_id})
assert run_res["code"] == 0, run_res
task_id = run_res["data"]["graphrag_task_id"]
assert task_id, run_res
_wait_for_task(kb_trace_graphrag, WebApiAuth, kb_id, task_id)
trace_res = kb_trace_graphrag(WebApiAuth, {"kb_id": kb_id})
assert trace_res["code"] == 0, trace_res
task = _find_task(trace_res["data"], task_id)
assert task, trace_res
assert task["id"] == task_id, trace_res
progress = task.get("progress")
_assert_progress_in_scale(progress, task)
@pytest.mark.p3
def test_raptor_run_and_trace(self, WebApiAuth, add_chunks):
kb_id, _, _ = add_chunks
run_res = kb_run_raptor(WebApiAuth, {"kb_id": kb_id})
assert run_res["code"] == 0, run_res
task_id = run_res["data"]["raptor_task_id"]
assert task_id, run_res
_wait_for_task(kb_trace_raptor, WebApiAuth, kb_id, task_id)
trace_res = kb_trace_raptor(WebApiAuth, {"kb_id": kb_id})
assert trace_res["code"] == 0, trace_res
task = _find_task(trace_res["data"], task_id)
assert task, trace_res
assert task["id"] == task_id, trace_res
progress = task.get("progress")
_assert_progress_in_scale(progress, task)
@pytest.mark.p3
def test_mindmap_run_and_trace(self, WebApiAuth, add_chunks):
kb_id, _, _ = add_chunks
run_res = kb_run_mindmap(WebApiAuth, {"kb_id": kb_id})
assert run_res["code"] == 0, run_res
task_id = run_res["data"]["mindmap_task_id"]
assert task_id, run_res
_wait_for_task(kb_trace_mindmap, WebApiAuth, kb_id, task_id)
trace_res = kb_trace_mindmap(WebApiAuth, {"kb_id": kb_id})
assert trace_res["code"] == 0, trace_res
task = _find_task(trace_res["data"], task_id)
assert task, trace_res
assert task["id"] == task_id, trace_res
progress = task.get("progress")
_assert_progress_in_scale(progress, task)
class TestKbPipelineLogs:
@pytest.mark.p3
def test_pipeline_log_lifecycle(self, WebApiAuth, add_document):
kb_id, document_id = add_document
parse_documents(WebApiAuth, {"doc_ids": [document_id], "run": "1"})
_wait_for_docs_parsed(WebApiAuth, kb_id)
_wait_for_pipeline_logs(WebApiAuth, kb_id)
list_res = kb_list_pipeline_logs(WebApiAuth, params={"kb_id": kb_id}, payload={})
assert list_res["code"] == 0, list_res
assert "total" in list_res["data"], list_res
assert isinstance(list_res["data"]["logs"], list), list_res
assert list_res["data"]["logs"], list_res
log_id = list_res["data"]["logs"][0]["id"]
detail_res = kb_pipeline_log_detail(WebApiAuth, {"log_id": log_id})
assert detail_res["code"] == 0, detail_res
detail = detail_res["data"]
assert detail["id"] == log_id, detail_res
assert detail["kb_id"] == kb_id, detail_res
for key in ["document_id", "task_type", "operation_status", "progress"]:
assert key in detail, detail_res
delete_res = kb_delete_pipeline_logs(WebApiAuth, params={"kb_id": kb_id}, payload={"log_ids": [log_id]})
assert delete_res["code"] == 0, delete_res
assert delete_res["data"] is True, delete_res
@wait_for(30, 1, "Pipeline log delete timeout")
def _condition():
res = kb_list_pipeline_logs(WebApiAuth, params={"kb_id": kb_id}, payload={})
if res["code"] != 0:
return False
return all(log.get("id") != log_id for log in res["data"]["logs"])
_condition()
@pytest.mark.p3
def test_list_pipeline_dataset_logs(self, WebApiAuth, add_document):
kb_id, _ = add_document
res = kb_list_pipeline_dataset_logs(WebApiAuth, params={"kb_id": kb_id}, payload={})
assert res["code"] == 0, res
assert "total" in res["data"], res
assert isinstance(res["data"]["logs"], list), res
@pytest.mark.p3
def test_pipeline_log_detail_missing_id(self, WebApiAuth):
res = kb_pipeline_log_detail(WebApiAuth, {})
assert res["code"] == 101, res
assert "Pipeline log ID" in res["message"], res
@pytest.mark.p3
def test_delete_pipeline_logs_empty(self, WebApiAuth, add_document):
kb_id, _ = add_document
res = kb_delete_pipeline_logs(WebApiAuth, params={"kb_id": kb_id}, payload={"log_ids": []})
assert res["code"] == 0, res
assert res["data"] is True, res

View File

@ -0,0 +1,251 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import uuid
import pytest
from common import (
kb_basic_info,
kb_get_meta,
kb_update_metadata_setting,
list_tags,
list_tags_from_kbs,
rename_tags,
rm_tags,
update_chunk,
)
from configs import INVALID_API_TOKEN
from libs.auth import RAGFlowWebApiAuth
from utils import wait_for
INVALID_AUTH_CASES = [
(None, 401, "Unauthorized"),
(RAGFlowWebApiAuth(INVALID_API_TOKEN), 401, "Unauthorized"),
]
TAG_SEED_TIMEOUT = 20
def _wait_for_tag(auth, kb_id, tag, timeout=TAG_SEED_TIMEOUT):
@wait_for(timeout, 1, "Tag seed timeout")
def _condition():
res = list_tags(auth, kb_id)
if res["code"] != 0:
return False
return tag in res["data"]
try:
_condition()
except AssertionError:
return False
return True
def _seed_tag(auth, kb_id, document_id, chunk_id):
# KB tags are derived from chunk tag_kwd, not document metadata.
tag = f"tag_{uuid.uuid4().hex[:8]}"
res = update_chunk(
auth,
{
"doc_id": document_id,
"chunk_id": chunk_id,
"content_with_weight": f"tag seed {tag}",
"tag_kwd": [tag],
},
)
assert res["code"] == 0, res
if not _wait_for_tag(auth, kb_id, tag):
return None
return tag
class TestAuthorization:
@pytest.mark.p2
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
def test_list_tags_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
res = list_tags(invalid_auth, "kb_id")
assert res["code"] == expected_code, res
assert expected_fragment in res["message"], res
@pytest.mark.p2
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
def test_list_tags_from_kbs_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
res = list_tags_from_kbs(invalid_auth, {"kb_ids": "kb_id"})
assert res["code"] == expected_code, res
assert expected_fragment in res["message"], res
@pytest.mark.p2
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
def test_rm_tags_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
res = rm_tags(invalid_auth, "kb_id", {"tags": ["tag"]})
assert res["code"] == expected_code, res
assert expected_fragment in res["message"], res
@pytest.mark.p2
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
def test_rename_tag_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
res = rename_tags(invalid_auth, "kb_id", {"from_tag": "old", "to_tag": "new"})
assert res["code"] == expected_code, res
assert expected_fragment in res["message"], res
@pytest.mark.p2
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
def test_get_meta_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
res = kb_get_meta(invalid_auth, {"kb_ids": "kb_id"})
assert res["code"] == expected_code, res
assert expected_fragment in res["message"], res
@pytest.mark.p2
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
def test_basic_info_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
res = kb_basic_info(invalid_auth, {"kb_id": "kb_id"})
assert res["code"] == expected_code, res
assert expected_fragment in res["message"], res
@pytest.mark.p2
@pytest.mark.parametrize("invalid_auth, expected_code, expected_fragment", INVALID_AUTH_CASES)
def test_update_metadata_setting_auth_invalid(self, invalid_auth, expected_code, expected_fragment):
res = kb_update_metadata_setting(invalid_auth, {"kb_id": "kb_id", "metadata": {}})
assert res["code"] == expected_code, res
assert expected_fragment in res["message"], res
class TestKbTagsMeta:
@pytest.mark.p2
def test_list_tags(self, WebApiAuth, add_dataset):
kb_id = add_dataset
res = list_tags(WebApiAuth, kb_id)
assert res["code"] == 0, res
assert isinstance(res["data"], list), res
@pytest.mark.p2
def test_list_tags_from_kbs(self, WebApiAuth, add_dataset):
kb_id = add_dataset
res = list_tags_from_kbs(WebApiAuth, {"kb_ids": kb_id})
assert res["code"] == 0, res
assert isinstance(res["data"], list), res
@pytest.mark.p3
def test_rm_tags(self, WebApiAuth, add_chunks):
kb_id, document_id, chunk_ids = add_chunks
tag_to_remove = _seed_tag(WebApiAuth, kb_id, document_id, chunk_ids[0])
if not tag_to_remove:
# Tag aggregation is index-backed; skip if it never surfaces.
pytest.skip("Seeded tag did not appear in list_tags.")
res = rm_tags(WebApiAuth, kb_id, {"tags": [tag_to_remove]})
assert res["code"] == 0, res
assert res["data"] is True, res
@wait_for(TAG_SEED_TIMEOUT, 1, "Tag removal timeout")
def _condition():
after_res = list_tags(WebApiAuth, kb_id)
if after_res["code"] != 0:
return False
return tag_to_remove not in after_res["data"]
_condition()
@pytest.mark.p3
def test_rename_tag(self, WebApiAuth, add_chunks):
kb_id, document_id, chunk_ids = add_chunks
from_tag = _seed_tag(WebApiAuth, kb_id, document_id, chunk_ids[0])
if not from_tag:
# Tag aggregation is index-backed; skip if it never surfaces.
pytest.skip("Seeded tag did not appear in list_tags.")
to_tag = f"{from_tag}_renamed"
res = rename_tags(WebApiAuth, kb_id, {"from_tag": from_tag, "to_tag": to_tag})
assert res["code"] == 0, res
assert res["data"] is True, res
@wait_for(TAG_SEED_TIMEOUT, 1, "Tag rename timeout")
def _condition():
after_res = list_tags(WebApiAuth, kb_id)
if after_res["code"] != 0:
return False
tags = after_res["data"]
return to_tag in tags and from_tag not in tags
_condition()
@pytest.mark.p2
def test_get_meta(self, WebApiAuth, add_dataset):
kb_id = add_dataset
res = kb_get_meta(WebApiAuth, {"kb_ids": kb_id})
assert res["code"] == 0, res
assert isinstance(res["data"], dict), res
@pytest.mark.p2
def test_basic_info(self, WebApiAuth, add_dataset):
kb_id = add_dataset
res = kb_basic_info(WebApiAuth, {"kb_id": kb_id})
assert res["code"] == 0, res
for key in ["processing", "finished", "failed", "cancelled", "downloaded"]:
assert key in res["data"], res
@pytest.mark.p2
def test_update_metadata_setting(self, WebApiAuth, add_dataset):
kb_id = add_dataset
metadata = {"source": "test"}
res = kb_update_metadata_setting(WebApiAuth, {"kb_id": kb_id, "metadata": metadata, "enable_metadata": True})
assert res["code"] == 0, res
assert res["data"]["id"] == kb_id, res
assert res["data"]["parser_config"]["metadata"] == metadata, res
class TestKbTagsMetaNegative:
@pytest.mark.p3
def test_list_tags_invalid_kb(self, WebApiAuth):
res = list_tags(WebApiAuth, "invalid_kb_id")
assert res["code"] == 109, res
assert "No authorization" in res["message"], res
@pytest.mark.p3
def test_list_tags_from_kbs_invalid_kb(self, WebApiAuth):
res = list_tags_from_kbs(WebApiAuth, {"kb_ids": "invalid_kb_id"})
assert res["code"] == 109, res
assert "No authorization" in res["message"], res
@pytest.mark.p3
def test_rm_tags_invalid_kb(self, WebApiAuth):
res = rm_tags(WebApiAuth, "invalid_kb_id", {"tags": ["tag"]})
assert res["code"] == 109, res
assert "No authorization" in res["message"], res
@pytest.mark.p3
def test_rename_tag_invalid_kb(self, WebApiAuth):
res = rename_tags(WebApiAuth, "invalid_kb_id", {"from_tag": "old", "to_tag": "new"})
assert res["code"] == 109, res
assert "No authorization" in res["message"], res
@pytest.mark.p3
def test_get_meta_invalid_kb(self, WebApiAuth):
res = kb_get_meta(WebApiAuth, {"kb_ids": "invalid_kb_id"})
assert res["code"] == 109, res
assert "No authorization" in res["message"], res
@pytest.mark.p3
def test_basic_info_invalid_kb(self, WebApiAuth):
res = kb_basic_info(WebApiAuth, {"kb_id": "invalid_kb_id"})
assert res["code"] == 109, res
assert "No authorization" in res["message"], res
@pytest.mark.p3
def test_update_metadata_setting_missing_metadata(self, WebApiAuth, add_dataset):
res = kb_update_metadata_setting(WebApiAuth, {"kb_id": add_dataset})
assert res["code"] == 101, res
assert "required argument are missing" in res["message"], res
assert "metadata" in res["message"], res

View File

@ -77,7 +77,8 @@ class TestDatasetUpdate:
@pytest.mark.p1
@given(name=valid_names())
@example("a" * 128)
@settings(max_examples=20, suppress_health_check=[HealthCheck.function_scoped_fixture])
# Network-bound API call; disable Hypothesis deadline to avoid flaky timeouts.
@settings(max_examples=20, suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None)
def test_name(self, WebApiAuth, add_dataset_func, name):
dataset_id = add_dataset_func
payload = {"name": name, "description": "", "parser_id": "naive", "kb_id": dataset_id}