From 90b726c988706704a4d54742127c2737cddec54f Mon Sep 17 00:00:00 2001 From: Clint-chan <62379027+Clint-chan@users.noreply.github.com> Date: Thu, 5 Feb 2026 13:52:51 +0800 Subject: [PATCH] fix: support date comparison operators (>=, <=, >, <) in metadata filtering (#12982) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description This PR fixes the issue where date metadata conditions with comparison operators (`>=`, `<=`, `>`, `<`) did not work correctly in the `/api/v1/retrieval` endpoint. ## Problem When using metadata conditions like: ```json { "metadata_condition": { "conditions": [ { "name": "date", "comparison_operator": ">=", "value": "2027-01-13" } ] } } The filtering did not work as expected because: 1. Operators >= and <= were not mapped to internal symbols ≥ and ≤ 2. Date strings like "2027-01-13" failed to parse with ast.literal_eval() 3. Non-standard date formats were incorrectly compared as strings Solution Changes in common/metadata_utils.py: 1. Added operator mapping in convert_conditions(): - >= → ≥ - <= → ≤ - != → ≠ 2. Implemented strict date format detection in meta_filter(): - Only processes dates in YYYY-MM-DD format (10 characters, properly formatted) - When query value is a date, only matches data in the same standard format - Non-standard formats (e.g., "2026年1月13日", "2026-1-22") are skipped 3. Maintained backward compatibility: - Numeric comparisons still work - String comparisons still work - Only affects date-formatted queries Testing All test cases pass (8/8): - ✅ Date >= comparison - ✅ Date > comparison - ✅ Date < comparison - ✅ Date <= comparison - ✅ Date = comparison - ✅ Date range queries - ✅ Non-date string comparison (backward compatibility) - ✅ Numeric comparison (backward compatibility) Example Usage { "dataset_ids": ["xxx"], "question": "test", "metadata_condition": { "conditions": [ { "name": "date", "comparison_operator": ">=", "value": "2027-01-13" } ] } } Notes - Only supports standard YYYY-MM-DD format - Non-standard date formats in data are treated as data quality issues and will not match - Users should ensure their date metadata is in the correct format --------- Co-authored-by: Clint-chan --- common/metadata_utils.py | 69 +++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 12 deletions(-) diff --git a/common/metadata_utils.py b/common/metadata_utils.py index aab00df8a..8403dd9e0 100644 --- a/common/metadata_utils.py +++ b/common/metadata_utils.py @@ -24,7 +24,10 @@ def convert_conditions(metadata_condition): metadata_condition = {} op_mapping = { "is": "=", - "not is": "≠" + "not is": "≠", + ">=": "≥", + "<=": "≤", + "!=": "≠" } return [ { @@ -44,17 +47,59 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"): for input, docids in v2docs.items(): if operator in ["=", "≠", ">", "<", "≥", "≤"]: - try: - if isinstance(input, list): - input = input[0] - input = ast.literal_eval(input) - value = ast.literal_eval(value) - except Exception: - pass - if isinstance(input, str): - input = input.lower() - if isinstance(value, str): - value = value.lower() + # Check if input is in YYYY-MM-DD date format + input_str = str(input).strip() + value_str = str(value).strip() + + # Strict date format detection: YYYY-MM-DD (must be 10 chars with correct format) + is_input_date = ( + len(input_str) == 10 and + input_str[4] == '-' and + input_str[7] == '-' and + input_str[:4].isdigit() and + input_str[5:7].isdigit() and + input_str[8:10].isdigit() + ) + + is_value_date = ( + len(value_str) == 10 and + value_str[4] == '-' and + value_str[7] == '-' and + value_str[:4].isdigit() and + value_str[5:7].isdigit() and + value_str[8:10].isdigit() + ) + + if is_value_date: + # Query value is in date format + if is_input_date: + # Data is also in date format: perform date comparison + input = input_str + value = value_str + else: + # Data is not in date format: skip this record (no match) + continue + else: + # Query value is not in date format: use original logic + try: + if isinstance(input, list): + input = input[0] + input = ast.literal_eval(input) + value = ast.literal_eval(value) + except Exception: + pass + + # Convert strings to lowercase + if isinstance(input, str): + input = input.lower() + if isinstance(value, str): + value = value.lower() + else: + # Non-comparison operators: maintain original logic + if isinstance(input, str): + input = input.lower() + if isinstance(value, str): + value = value.lower() matched = False try: