Feat: add advanced document filter (#8723)

### What problem does this PR solve?

Add advanced document filter

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-07-09 09:33:11 +08:00
committed by GitHub
parent f7af0fc71e
commit c1f6e6f00e
6 changed files with 109 additions and 3 deletions

View File

@ -45,6 +45,7 @@ from rag.utils.storage_factory import STORAGE_IMPL
from api.db.services.canvas_service import UserCanvasService
from agent.canvas import Canvas
from functools import partial
from pathlib import Path
@manager.route('/new_token', methods=['POST']) # noqa: F821
@ -439,7 +440,8 @@ def upload():
"name": filename,
"location": location,
"size": len(blob),
"thumbnail": thumbnail(filename, blob)
"thumbnail": thumbnail(filename, blob),
"suffix": Path(filename).suffix.lstrip("."),
}
form_data = request.form

View File

@ -17,6 +17,7 @@ import json
import os.path
import pathlib
import re
from pathlib import Path
import flask
from flask import request
@ -125,6 +126,7 @@ def web_crawl():
"location": location,
"size": len(blob),
"thumbnail": thumbnail(filename, blob),
"suffix": Path(filename).suffix.lstrip("."),
}
if doc["type"] == FileType.VISUAL:
doc["parser_id"] = ParserType.PICTURE.value
@ -173,6 +175,7 @@ def create():
"created_by": current_user.id,
"type": FileType.VIRTUAL,
"name": req["name"],
"suffix": Path(req["name"]).suffix.lstrip("."),
"location": "",
"size": 0,
}
@ -218,8 +221,10 @@ def list_docs():
if invalid_types:
return get_data_error_result(message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}")
suffix = req.get("suffix", [])
try:
docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types)
docs, tol = DocumentService.get_by_kb_id(kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types, suffix)
for doc_item in docs:
if doc_item["thumbnail"] and not doc_item["thumbnail"].startswith(IMG_BASE64_PREFIX):
@ -230,6 +235,45 @@ def list_docs():
return server_error_response(e)
@manager.route("/filter", methods=["POST"]) # noqa: F821
@login_required
def get_filter():
req = request.get_json()
kb_id = req.get("kb_id")
if not kb_id:
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
tenants = UserTenantService.query(user_id=current_user.id)
for tenant in tenants:
if KnowledgebaseService.query(tenant_id=tenant.tenant_id, id=kb_id):
break
else:
return get_json_result(data=False, message="Only owner of knowledgebase authorized for this operation.", code=settings.RetCode.OPERATING_ERROR)
keywords = req.get("keywords", "")
suffix = req.get("suffix", [])
run_status = req.get("run_status", [])
if run_status:
invalid_status = {s for s in run_status if s not in VALID_TASK_STATUS}
if invalid_status:
return get_data_error_result(message=f"Invalid filter run status conditions: {', '.join(invalid_status)}")
types = req.get("types", [])
if types:
invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
if invalid_types:
return get_data_error_result(message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}")
try:
filter, total = DocumentService.get_filter_by_kb_id(kb_id, keywords, run_status, types, suffix)
return get_json_result(data={"total": total, "filter": filter})
except Exception as e:
return server_error_response(e)
@manager.route("/infos", methods=["POST"]) # noqa: F821
@login_required
def docinfos():

View File

@ -14,6 +14,8 @@
# limitations under the License
#
from pathlib import Path
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
@ -82,6 +84,7 @@ def convert():
"created_by": current_user.id,
"type": file.type,
"name": file.name,
"suffix": Path(file.name).suffix.lstrip("."),
"location": file.location,
"size": file.size
})