Feat: list documents supports range filtering (#9214)

### What problem does this PR solve?

list_document supports range filtering.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-08-04 16:35:35 +08:00
committed by GitHub
parent 448bdda73d
commit 60d652d2e1
5 changed files with 81 additions and 7 deletions

View File

@ -206,6 +206,8 @@ def list_docs():
desc = False
else:
desc = True
create_time_from = int(request.args.get("create_time_from", 0))
create_time_to = int(request.args.get("create_time_to", 0))
req = request.get_json()
@ -226,6 +228,14 @@ def list_docs():
try:
docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix)
if create_time_from or create_time_to:
filtered_docs = []
for doc in docs:
doc_create_time = doc.get("create_time", 0)
if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to):
filtered_docs.append(doc)
docs = filtered_docs
for doc_item in docs:
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
doc_item["thumbnail"] = f"/v1/document/image/{kb_id}-{doc_item['thumbnail']}"

View File

@ -38,7 +38,7 @@ from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_
from rag.app.qa import beAdoc, rmPrefix
from rag.app.tag import label_question
from rag.nlp import rag_tokenizer, search
from rag.prompts import keyword_extraction, cross_languages
from rag.prompts import cross_languages, keyword_extraction
from rag.utils import rmSpace
from rag.utils.storage_factory import STORAGE_IMPL
@ -456,6 +456,18 @@ def list_docs(dataset_id, tenant_id):
required: false
default: true
description: Order in descending.
- in: query
name: create_time_from
type: integer
required: false
default: 0
description: Unix timestamp for filtering documents created after this time. 0 means no filter.
- in: query
name: create_time_to
type: integer
required: false
default: 0
description: Unix timestamp for filtering documents created before this time. 0 means no filter.
- in: header
name: Authorization
type: string
@ -517,6 +529,17 @@ def list_docs(dataset_id, tenant_id):
desc = True
docs, tol = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, id, name)
create_time_from = int(request.args.get("create_time_from", 0))
create_time_to = int(request.args.get("create_time_to", 0))
if create_time_from or create_time_to:
filtered_docs = []
for doc in docs:
doc_create_time = doc.get("create_time", 0)
if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to):
filtered_docs.append(doc)
docs = filtered_docs
# rename key's name
renamed_doc_list = []
key_mapping = {