From af1344033d2c50c9fe67b9433c67c76719e3d1b7 Mon Sep 17 00:00:00 2001 From: buua436 <66937541+buua436@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:49:32 +0800 Subject: [PATCH] Delete:remove unused tests (#11749) ### What problem does this PR solve? change: remove unused tests ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- .../test_evaluation_framework_demo.py | 323 ---------- .../services/test_evaluation_service.py | 557 ------------------ 2 files changed, 880 deletions(-) delete mode 100644 test/unit_test/services/test_evaluation_framework_demo.py delete mode 100644 test/unit_test/services/test_evaluation_service.py diff --git a/test/unit_test/services/test_evaluation_framework_demo.py b/test/unit_test/services/test_evaluation_framework_demo.py deleted file mode 100644 index 56a5c8781..000000000 --- a/test/unit_test/services/test_evaluation_framework_demo.py +++ /dev/null @@ -1,323 +0,0 @@ -# -# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -Standalone test to demonstrate the RAG evaluation test framework works. -This test doesn't require RAGFlow dependencies. -""" - -import pytest -from unittest.mock import Mock - - -class TestEvaluationFrameworkDemo: - """Demo tests to verify the evaluation test framework is working""" - - def test_basic_assertion(self): - """Test basic assertion works""" - assert 1 + 1 == 2 - - def test_mock_evaluation_service(self): - """Test mocking evaluation service""" - mock_service = Mock() - mock_service.create_dataset.return_value = (True, "dataset_123") - - success, dataset_id = mock_service.create_dataset( - name="Test Dataset", - kb_ids=["kb_1"] - ) - - assert success is True - assert dataset_id == "dataset_123" - mock_service.create_dataset.assert_called_once() - - def test_mock_test_case_addition(self): - """Test mocking test case addition""" - mock_service = Mock() - mock_service.add_test_case.return_value = (True, "case_123") - - success, case_id = mock_service.add_test_case( - dataset_id="dataset_123", - question="Test question?", - reference_answer="Test answer" - ) - - assert success is True - assert case_id == "case_123" - - def test_mock_evaluation_run(self): - """Test mocking evaluation run""" - mock_service = Mock() - mock_service.start_evaluation.return_value = (True, "run_123") - - success, run_id = mock_service.start_evaluation( - dataset_id="dataset_123", - dialog_id="dialog_456", - user_id="user_1" - ) - - assert success is True - assert run_id == "run_123" - - def test_mock_metrics_computation(self): - """Test mocking metrics computation""" - mock_service = Mock() - - # Mock retrieval metrics - metrics = { - "precision": 0.85, - "recall": 0.78, - "f1_score": 0.81, - "hit_rate": 1.0, - "mrr": 0.9 - } - mock_service._compute_retrieval_metrics.return_value = metrics - - result = mock_service._compute_retrieval_metrics( - retrieved_ids=["chunk_1", "chunk_2", "chunk_3"], - relevant_ids=["chunk_1", "chunk_2", "chunk_4"] - ) - - assert result["precision"] == 0.85 - assert result["recall"] == 0.78 - assert result["f1_score"] == 0.81 - - def test_mock_recommendations(self): - """Test mocking recommendations""" - mock_service = Mock() - - recommendations = [ - { - "issue": "Low Precision", - "severity": "high", - "suggestions": [ - "Increase similarity_threshold", - "Enable reranking" - ] - } - ] - mock_service.get_recommendations.return_value = recommendations - - recs = mock_service.get_recommendations("run_123") - - assert len(recs) == 1 - assert recs[0]["issue"] == "Low Precision" - assert len(recs[0]["suggestions"]) == 2 - - @pytest.mark.parametrize("precision,recall,expected_f1", [ - (1.0, 1.0, 1.0), - (0.8, 0.6, 0.69), - (0.5, 0.5, 0.5), - (0.0, 0.0, 0.0), - ]) - def test_f1_score_calculation(self, precision, recall, expected_f1): - """Test F1 score calculation with different inputs""" - if precision + recall > 0: - f1 = 2 * (precision * recall) / (precision + recall) - else: - f1 = 0.0 - - assert abs(f1 - expected_f1) < 0.01 - - def test_dataset_list_structure(self): - """Test dataset list structure""" - mock_service = Mock() - - expected_result = { - "total": 3, - "datasets": [ - {"id": "dataset_1", "name": "Dataset 1"}, - {"id": "dataset_2", "name": "Dataset 2"}, - {"id": "dataset_3", "name": "Dataset 3"} - ] - } - mock_service.list_datasets.return_value = expected_result - - result = mock_service.list_datasets( - tenant_id="tenant_1", - user_id="user_1", - page=1, - page_size=10 - ) - - assert result["total"] == 3 - assert len(result["datasets"]) == 3 - assert result["datasets"][0]["id"] == "dataset_1" - - def test_evaluation_run_status_flow(self): - """Test evaluation run status transitions""" - mock_service = Mock() - - # Simulate status progression - statuses = ["PENDING", "RUNNING", "COMPLETED"] - - for status in statuses: - mock_run = {"id": "run_123", "status": status} - mock_service.get_run_results.return_value = {"run": mock_run} - - result = mock_service.get_run_results("run_123") - assert result["run"]["status"] == status - - def test_bulk_import_success_count(self): - """Test bulk import success/failure counting""" - mock_service = Mock() - - # Simulate 8 successes, 2 failures - mock_service.import_test_cases.return_value = (8, 2) - - success_count, failure_count = mock_service.import_test_cases( - dataset_id="dataset_123", - cases=[{"question": f"Q{i}"} for i in range(10)] - ) - - assert success_count == 8 - assert failure_count == 2 - assert success_count + failure_count == 10 - - def test_metrics_summary_aggregation(self): - """Test metrics summary aggregation""" - results = [ - {"metrics": {"precision": 0.9, "recall": 0.8}, "execution_time": 1.2}, - {"metrics": {"precision": 0.8, "recall": 0.7}, "execution_time": 1.5}, - {"metrics": {"precision": 0.85, "recall": 0.75}, "execution_time": 1.3} - ] - - # Calculate averages - avg_precision = sum(r["metrics"]["precision"] for r in results) / len(results) - avg_recall = sum(r["metrics"]["recall"] for r in results) / len(results) - avg_time = sum(r["execution_time"] for r in results) / len(results) - - assert abs(avg_precision - 0.85) < 0.01 - assert abs(avg_recall - 0.75) < 0.01 - assert abs(avg_time - 1.33) < 0.01 - - def test_recommendation_severity_levels(self): - """Test recommendation severity levels""" - severities = ["low", "medium", "high", "critical"] - - for severity in severities: - rec = { - "issue": "Test Issue", - "severity": severity, - "suggestions": ["Fix it"] - } - assert rec["severity"] in severities - - def test_empty_dataset_handling(self): - """Test handling of empty datasets""" - mock_service = Mock() - mock_service.get_test_cases.return_value = [] - - cases = mock_service.get_test_cases("empty_dataset") - - assert len(cases) == 0 - assert isinstance(cases, list) - - def test_error_handling(self): - """Test error handling in service""" - mock_service = Mock() - mock_service.create_dataset.return_value = (False, "Dataset name cannot be empty") - - success, error = mock_service.create_dataset(name="", kb_ids=[]) - - assert success is False - assert "empty" in error.lower() - - def test_pagination_logic(self): - """Test pagination logic""" - total_items = 50 - page_size = 10 - page = 2 - - # Calculate expected items for page 2 - start = (page - 1) * page_size - end = min(start + page_size, total_items) - expected_count = end - start - - assert expected_count == 10 - assert start == 10 - assert end == 20 - - -class TestMetricsCalculations: - """Test metric calculation logic""" - - def test_precision_calculation(self): - """Test precision calculation""" - retrieved = {"chunk_1", "chunk_2", "chunk_3", "chunk_4"} - relevant = {"chunk_1", "chunk_2", "chunk_5"} - - precision = len(retrieved & relevant) / len(retrieved) - - assert precision == 0.5 # 2 out of 4 - - def test_recall_calculation(self): - """Test recall calculation""" - retrieved = {"chunk_1", "chunk_2", "chunk_3", "chunk_4"} - relevant = {"chunk_1", "chunk_2", "chunk_5"} - - recall = len(retrieved & relevant) / len(relevant) - - assert abs(recall - 0.67) < 0.01 # 2 out of 3 - - def test_hit_rate_positive(self): - """Test hit rate when relevant chunk is found""" - retrieved = {"chunk_1", "chunk_2", "chunk_3"} - relevant = {"chunk_2", "chunk_4"} - - hit_rate = 1.0 if (retrieved & relevant) else 0.0 - - assert hit_rate == 1.0 - - def test_hit_rate_negative(self): - """Test hit rate when no relevant chunk is found""" - retrieved = {"chunk_1", "chunk_2", "chunk_3"} - relevant = {"chunk_4", "chunk_5"} - - hit_rate = 1.0 if (retrieved & relevant) else 0.0 - - assert hit_rate == 0.0 - - def test_mrr_calculation(self): - """Test MRR calculation""" - retrieved_ids = ["chunk_1", "chunk_2", "chunk_3", "chunk_4"] - relevant_ids = {"chunk_3", "chunk_5"} - - mrr = 0.0 - for i, chunk_id in enumerate(retrieved_ids, 1): - if chunk_id in relevant_ids: - mrr = 1.0 / i - break - - assert abs(mrr - 0.33) < 0.01 # First relevant at position 3 - - -# Summary test -def test_evaluation_framework_summary(): - """ - Summary test to confirm all evaluation framework features work. - This test verifies that: - - Basic assertions work - - Mocking works for all service methods - - Metrics calculations are correct - - Error handling works - - Pagination logic works - """ - assert True, "Evaluation test framework is working correctly!" - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/test/unit_test/services/test_evaluation_service.py b/test/unit_test/services/test_evaluation_service.py deleted file mode 100644 index 76cf97a19..000000000 --- a/test/unit_test/services/test_evaluation_service.py +++ /dev/null @@ -1,557 +0,0 @@ -# -# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -Unit tests for RAG Evaluation Service - -Tests cover: -- Dataset management (CRUD operations) -- Test case management -- Evaluation execution -- Metrics computation -- Recommendations generation -""" - -import pytest -from unittest.mock import patch - - -class TestEvaluationDatasetManagement: - """Tests for evaluation dataset management""" - - @pytest.fixture - def mock_evaluation_service(self): - """Create a mock EvaluationService""" - with patch('api.db.services.evaluation_service.EvaluationService') as mock: - yield mock - - @pytest.fixture - def sample_dataset_data(self): - """Sample dataset data for testing""" - return { - "name": "Customer Support QA", - "description": "Test cases for customer support", - "kb_ids": ["kb_123", "kb_456"], - "tenant_id": "tenant_1", - "user_id": "user_1" - } - - def test_create_dataset_success(self, mock_evaluation_service, sample_dataset_data): - """Test successful dataset creation""" - mock_evaluation_service.create_dataset.return_value = (True, "dataset_123") - - success, dataset_id = mock_evaluation_service.create_dataset(**sample_dataset_data) - - assert success is True - assert dataset_id == "dataset_123" - mock_evaluation_service.create_dataset.assert_called_once() - - def test_create_dataset_with_empty_name(self, mock_evaluation_service): - """Test dataset creation with empty name""" - data = { - "name": "", - "description": "Test", - "kb_ids": ["kb_123"], - "tenant_id": "tenant_1", - "user_id": "user_1" - } - - mock_evaluation_service.create_dataset.return_value = (False, "Dataset name cannot be empty") - success, error = mock_evaluation_service.create_dataset(**data) - - assert success is False - assert "name" in error.lower() or "empty" in error.lower() - - def test_create_dataset_with_empty_kb_ids(self, mock_evaluation_service): - """Test dataset creation with empty kb_ids""" - data = { - "name": "Test Dataset", - "description": "Test", - "kb_ids": [], - "tenant_id": "tenant_1", - "user_id": "user_1" - } - - mock_evaluation_service.create_dataset.return_value = (False, "kb_ids cannot be empty") - success, error = mock_evaluation_service.create_dataset(**data) - - assert success is False - - def test_get_dataset_success(self, mock_evaluation_service): - """Test successful dataset retrieval""" - expected_dataset = { - "id": "dataset_123", - "name": "Test Dataset", - "kb_ids": ["kb_123"] - } - mock_evaluation_service.get_dataset.return_value = expected_dataset - - dataset = mock_evaluation_service.get_dataset("dataset_123") - - assert dataset is not None - assert dataset["id"] == "dataset_123" - - def test_get_dataset_not_found(self, mock_evaluation_service): - """Test getting non-existent dataset""" - mock_evaluation_service.get_dataset.return_value = None - - dataset = mock_evaluation_service.get_dataset("nonexistent") - - assert dataset is None - - def test_list_datasets(self, mock_evaluation_service): - """Test listing datasets""" - expected_result = { - "total": 2, - "datasets": [ - {"id": "dataset_1", "name": "Dataset 1"}, - {"id": "dataset_2", "name": "Dataset 2"} - ] - } - mock_evaluation_service.list_datasets.return_value = expected_result - - result = mock_evaluation_service.list_datasets( - tenant_id="tenant_1", - user_id="user_1", - page=1, - page_size=20 - ) - - assert result["total"] == 2 - assert len(result["datasets"]) == 2 - - def test_list_datasets_with_pagination(self, mock_evaluation_service): - """Test listing datasets with pagination""" - mock_evaluation_service.list_datasets.return_value = { - "total": 50, - "datasets": [{"id": f"dataset_{i}"} for i in range(10)] - } - - result = mock_evaluation_service.list_datasets( - tenant_id="tenant_1", - user_id="user_1", - page=2, - page_size=10 - ) - - assert result["total"] == 50 - assert len(result["datasets"]) == 10 - - def test_update_dataset_success(self, mock_evaluation_service): - """Test successful dataset update""" - mock_evaluation_service.update_dataset.return_value = True - - success = mock_evaluation_service.update_dataset( - "dataset_123", - name="Updated Name", - description="Updated Description" - ) - - assert success is True - - def test_update_dataset_not_found(self, mock_evaluation_service): - """Test updating non-existent dataset""" - mock_evaluation_service.update_dataset.return_value = False - - success = mock_evaluation_service.update_dataset( - "nonexistent", - name="Updated Name" - ) - - assert success is False - - def test_delete_dataset_success(self, mock_evaluation_service): - """Test successful dataset deletion""" - mock_evaluation_service.delete_dataset.return_value = True - - success = mock_evaluation_service.delete_dataset("dataset_123") - - assert success is True - - def test_delete_dataset_not_found(self, mock_evaluation_service): - """Test deleting non-existent dataset""" - mock_evaluation_service.delete_dataset.return_value = False - - success = mock_evaluation_service.delete_dataset("nonexistent") - - assert success is False - - -class TestEvaluationTestCaseManagement: - """Tests for test case management""" - - @pytest.fixture - def mock_evaluation_service(self): - """Create a mock EvaluationService""" - with patch('api.db.services.evaluation_service.EvaluationService') as mock: - yield mock - - @pytest.fixture - def sample_test_case(self): - """Sample test case data""" - return { - "dataset_id": "dataset_123", - "question": "How do I reset my password?", - "reference_answer": "Click on 'Forgot Password' and follow the email instructions.", - "relevant_doc_ids": ["doc_789"], - "relevant_chunk_ids": ["chunk_101", "chunk_102"] - } - - def test_add_test_case_success(self, mock_evaluation_service, sample_test_case): - """Test successful test case addition""" - mock_evaluation_service.add_test_case.return_value = (True, "case_123") - - success, case_id = mock_evaluation_service.add_test_case(**sample_test_case) - - assert success is True - assert case_id == "case_123" - - def test_add_test_case_with_empty_question(self, mock_evaluation_service): - """Test adding test case with empty question""" - mock_evaluation_service.add_test_case.return_value = (False, "Question cannot be empty") - - success, error = mock_evaluation_service.add_test_case( - dataset_id="dataset_123", - question="" - ) - - assert success is False - assert "question" in error.lower() or "empty" in error.lower() - - def test_add_test_case_without_reference_answer(self, mock_evaluation_service): - """Test adding test case without reference answer (optional)""" - mock_evaluation_service.add_test_case.return_value = (True, "case_123") - - success, case_id = mock_evaluation_service.add_test_case( - dataset_id="dataset_123", - question="Test question", - reference_answer=None - ) - - assert success is True - - def test_get_test_cases(self, mock_evaluation_service): - """Test getting all test cases for a dataset""" - expected_cases = [ - {"id": "case_1", "question": "Question 1"}, - {"id": "case_2", "question": "Question 2"} - ] - mock_evaluation_service.get_test_cases.return_value = expected_cases - - cases = mock_evaluation_service.get_test_cases("dataset_123") - - assert len(cases) == 2 - assert cases[0]["id"] == "case_1" - - def test_get_test_cases_empty_dataset(self, mock_evaluation_service): - """Test getting test cases from empty dataset""" - mock_evaluation_service.get_test_cases.return_value = [] - - cases = mock_evaluation_service.get_test_cases("dataset_123") - - assert len(cases) == 0 - - def test_delete_test_case_success(self, mock_evaluation_service): - """Test successful test case deletion""" - mock_evaluation_service.delete_test_case.return_value = True - - success = mock_evaluation_service.delete_test_case("case_123") - - assert success is True - - def test_import_test_cases_success(self, mock_evaluation_service): - """Test bulk import of test cases""" - cases = [ - {"question": "Question 1", "reference_answer": "Answer 1"}, - {"question": "Question 2", "reference_answer": "Answer 2"}, - {"question": "Question 3", "reference_answer": "Answer 3"} - ] - mock_evaluation_service.import_test_cases.return_value = (3, 0) - - success_count, failure_count = mock_evaluation_service.import_test_cases( - "dataset_123", - cases - ) - - assert success_count == 3 - assert failure_count == 0 - - def test_import_test_cases_with_failures(self, mock_evaluation_service): - """Test bulk import with some failures""" - cases = [ - {"question": "Question 1"}, - {"question": ""}, # Invalid - {"question": "Question 3"} - ] - mock_evaluation_service.import_test_cases.return_value = (2, 1) - - success_count, failure_count = mock_evaluation_service.import_test_cases( - "dataset_123", - cases - ) - - assert success_count == 2 - assert failure_count == 1 - - -class TestEvaluationExecution: - """Tests for evaluation execution""" - - @pytest.fixture - def mock_evaluation_service(self): - """Create a mock EvaluationService""" - with patch('api.db.services.evaluation_service.EvaluationService') as mock: - yield mock - - def test_start_evaluation_success(self, mock_evaluation_service): - """Test successful evaluation start""" - mock_evaluation_service.start_evaluation.return_value = (True, "run_123") - - success, run_id = mock_evaluation_service.start_evaluation( - dataset_id="dataset_123", - dialog_id="dialog_456", - user_id="user_1" - ) - - assert success is True - assert run_id == "run_123" - - def test_start_evaluation_with_invalid_dialog(self, mock_evaluation_service): - """Test starting evaluation with invalid dialog""" - mock_evaluation_service.start_evaluation.return_value = (False, "Dialog not found") - - success, error = mock_evaluation_service.start_evaluation( - dataset_id="dataset_123", - dialog_id="nonexistent", - user_id="user_1" - ) - - assert success is False - assert "dialog" in error.lower() - - def test_start_evaluation_with_custom_name(self, mock_evaluation_service): - """Test starting evaluation with custom name""" - mock_evaluation_service.start_evaluation.return_value = (True, "run_123") - - success, run_id = mock_evaluation_service.start_evaluation( - dataset_id="dataset_123", - dialog_id="dialog_456", - user_id="user_1", - name="My Custom Evaluation" - ) - - assert success is True - - def test_get_run_results(self, mock_evaluation_service): - """Test getting evaluation run results""" - expected_results = { - "run": { - "id": "run_123", - "status": "COMPLETED", - "metrics_summary": { - "avg_precision": 0.85, - "avg_recall": 0.78 - } - }, - "results": [ - {"case_id": "case_1", "metrics": {"precision": 0.9}}, - {"case_id": "case_2", "metrics": {"precision": 0.8}} - ] - } - mock_evaluation_service.get_run_results.return_value = expected_results - - results = mock_evaluation_service.get_run_results("run_123") - - assert results["run"]["id"] == "run_123" - assert len(results["results"]) == 2 - - def test_get_run_results_not_found(self, mock_evaluation_service): - """Test getting results for non-existent run""" - mock_evaluation_service.get_run_results.return_value = {} - - results = mock_evaluation_service.get_run_results("nonexistent") - - assert results == {} - - -class TestEvaluationMetrics: - """Tests for metrics computation""" - - @pytest.fixture - def mock_evaluation_service(self): - """Create a mock EvaluationService""" - with patch('api.db.services.evaluation_service.EvaluationService') as mock: - yield mock - - def test_compute_retrieval_metrics_perfect_match(self, mock_evaluation_service): - """Test retrieval metrics with perfect match""" - retrieved_ids = ["chunk_1", "chunk_2", "chunk_3"] - relevant_ids = ["chunk_1", "chunk_2", "chunk_3"] - - expected_metrics = { - "precision": 1.0, - "recall": 1.0, - "f1_score": 1.0, - "hit_rate": 1.0, - "mrr": 1.0 - } - mock_evaluation_service._compute_retrieval_metrics.return_value = expected_metrics - - metrics = mock_evaluation_service._compute_retrieval_metrics(retrieved_ids, relevant_ids) - - assert metrics["precision"] == 1.0 - assert metrics["recall"] == 1.0 - assert metrics["f1_score"] == 1.0 - - def test_compute_retrieval_metrics_partial_match(self, mock_evaluation_service): - """Test retrieval metrics with partial match""" - retrieved_ids = ["chunk_1", "chunk_2", "chunk_4", "chunk_5"] - relevant_ids = ["chunk_1", "chunk_2", "chunk_3"] - - expected_metrics = { - "precision": 0.5, # 2 out of 4 retrieved are relevant - "recall": 0.67, # 2 out of 3 relevant were retrieved - "f1_score": 0.57, - "hit_rate": 1.0, # At least one relevant was retrieved - "mrr": 1.0 # First retrieved is relevant - } - mock_evaluation_service._compute_retrieval_metrics.return_value = expected_metrics - - metrics = mock_evaluation_service._compute_retrieval_metrics(retrieved_ids, relevant_ids) - - assert metrics["precision"] < 1.0 - assert metrics["recall"] < 1.0 - assert metrics["hit_rate"] == 1.0 - - def test_compute_retrieval_metrics_no_match(self, mock_evaluation_service): - """Test retrieval metrics with no match""" - retrieved_ids = ["chunk_4", "chunk_5", "chunk_6"] - relevant_ids = ["chunk_1", "chunk_2", "chunk_3"] - - expected_metrics = { - "precision": 0.0, - "recall": 0.0, - "f1_score": 0.0, - "hit_rate": 0.0, - "mrr": 0.0 - } - mock_evaluation_service._compute_retrieval_metrics.return_value = expected_metrics - - metrics = mock_evaluation_service._compute_retrieval_metrics(retrieved_ids, relevant_ids) - - assert metrics["precision"] == 0.0 - assert metrics["recall"] == 0.0 - assert metrics["hit_rate"] == 0.0 - - def test_compute_summary_metrics(self, mock_evaluation_service): - """Test summary metrics computation""" - results = [ - {"metrics": {"precision": 0.9, "recall": 0.8}, "execution_time": 1.2}, - {"metrics": {"precision": 0.8, "recall": 0.7}, "execution_time": 1.5}, - {"metrics": {"precision": 0.85, "recall": 0.75}, "execution_time": 1.3} - ] - - expected_summary = { - "total_cases": 3, - "avg_execution_time": 1.33, - "avg_precision": 0.85, - "avg_recall": 0.75 - } - mock_evaluation_service._compute_summary_metrics.return_value = expected_summary - - summary = mock_evaluation_service._compute_summary_metrics(results) - - assert summary["total_cases"] == 3 - assert summary["avg_precision"] > 0.8 - - -class TestEvaluationRecommendations: - """Tests for configuration recommendations""" - - @pytest.fixture - def mock_evaluation_service(self): - """Create a mock EvaluationService""" - with patch('api.db.services.evaluation_service.EvaluationService') as mock: - yield mock - - def test_get_recommendations_low_precision(self, mock_evaluation_service): - """Test recommendations for low precision""" - recommendations = [ - { - "issue": "Low Precision", - "severity": "high", - "suggestions": [ - "Increase similarity_threshold", - "Enable reranking" - ] - } - ] - mock_evaluation_service.get_recommendations.return_value = recommendations - - recs = mock_evaluation_service.get_recommendations("run_123") - - assert len(recs) > 0 - assert any("precision" in r["issue"].lower() for r in recs) - - def test_get_recommendations_low_recall(self, mock_evaluation_service): - """Test recommendations for low recall""" - recommendations = [ - { - "issue": "Low Recall", - "severity": "high", - "suggestions": [ - "Increase top_k", - "Lower similarity_threshold" - ] - } - ] - mock_evaluation_service.get_recommendations.return_value = recommendations - - recs = mock_evaluation_service.get_recommendations("run_123") - - assert len(recs) > 0 - assert any("recall" in r["issue"].lower() for r in recs) - - def test_get_recommendations_slow_response(self, mock_evaluation_service): - """Test recommendations for slow response time""" - recommendations = [ - { - "issue": "Slow Response Time", - "severity": "medium", - "suggestions": [ - "Reduce top_k", - "Optimize embedding model" - ] - } - ] - mock_evaluation_service.get_recommendations.return_value = recommendations - - recs = mock_evaluation_service.get_recommendations("run_123") - - assert len(recs) > 0 - assert any("response" in r["issue"].lower() or "slow" in r["issue"].lower() for r in recs) - - def test_get_recommendations_no_issues(self, mock_evaluation_service): - """Test recommendations when metrics are good""" - mock_evaluation_service.get_recommendations.return_value = [] - - recs = mock_evaluation_service.get_recommendations("run_123") - - assert len(recs) == 0 - - -if __name__ == "__main__": - pytest.main([__file__, "-v"])