Feat: document list and filter supports metadata filtering (#12053)

### What problem does this PR solve?

Document list and filter supports metadata filtering.

**OR within the same field, AND across different fields**

Example 1 (multi-field AND):

```markdown
Doc1 metadata: { "a": "b", "as": ["a", "b", "c"] }
Doc2 metadata: { "a": "x", "as": ["d"] }

Query:

metadata = {
  "a": ["b"],
  "as": ["d"]
}

Result:

Doc1 matches a=b but not as=d → excluded
Doc2 matches as=d but not a=b → excluded

Final result: empty
```

Example 2 (same field OR):

```markdown
Doc1 metadata: { "as": ["a", "b", "c"] }
Doc2 metadata: { "as": ["d"] }

Query:

metadata = {
  "as": ["a", "d"]
}
Result:

Doc1 matches as=a → included
Doc2 matches as=d → included

Final result: Doc1 + Doc2
```

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-12-22 09:35:11 +08:00
committed by GitHub
parent 55c0468ac9
commit 3ee47e4af7
2 changed files with 71 additions and 4 deletions

View File

@ -180,6 +180,16 @@ class DocumentService(CommonService):
"1": 2,
"2": 2
}
"metadata": {
"key1": {
"key1_value1": 1,
"key1_value2": 2,
},
"key2": {
"key2_value1": 2,
"key2_value2": 1,
},
}
}, total
where "1" => RUNNING, "2" => CANCEL
"""
@ -200,19 +210,40 @@ class DocumentService(CommonService):
if suffix:
query = query.where(cls.model.suffix.in_(suffix))
rows = query.select(cls.model.run, cls.model.suffix)
rows = query.select(cls.model.run, cls.model.suffix, cls.model.meta_fields)
total = rows.count()
suffix_counter = {}
run_status_counter = {}
metadata_counter = {}
for row in rows:
suffix_counter[row.suffix] = suffix_counter.get(row.suffix, 0) + 1
run_status_counter[str(row.run)] = run_status_counter.get(str(row.run), 0) + 1
meta_fields = row.meta_fields or {}
if isinstance(meta_fields, str):
try:
meta_fields = json.loads(meta_fields)
except Exception:
meta_fields = {}
if not isinstance(meta_fields, dict):
continue
for key, value in meta_fields.items():
values = value if isinstance(value, list) else [value]
for vv in values:
if vv is None:
continue
if isinstance(vv, str) and not vv.strip():
continue
sv = str(vv)
if key not in metadata_counter:
metadata_counter[key] = {}
metadata_counter[key][sv] = metadata_counter[key].get(sv, 0) + 1
return {
"suffix": suffix_counter,
"run_status": run_status_counter
"run_status": run_status_counter,
"metadata": metadata_counter,
}, total
@classmethod