Feat: add advanced document filter (#8723)

### What problem does this PR solve?

Add advanced document filter

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-07-09 09:33:11 +08:00
committed by GitHub
parent f7af0fc71e
commit c1f6e6f00e
6 changed files with 109 additions and 3 deletions

View File

@ -634,6 +634,7 @@ class Document(DataBaseModel):
process_begin_at = DateTimeField(null=True, index=True)
process_duration = FloatField(default=0)
meta_fields = JSONField(null=True, default={})
suffix = CharField(max_length=32, null=False, help_text="The real file extension suffix", index=True)
run = CharField(max_length=1, null=True, help_text="start to run processing or cancel.(1: run it; 2: cancel)", default="0", index=True)
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
@ -960,3 +961,7 @@ def migrate_db():
migrate(migrator.rename_column("document", "process_duation", "process_duration"))
except Exception:
pass
try:
migrate(migrator.add_column("document", "suffix", CharField(max_length=32, null=False, default="", help_text="The real file extension suffix", index=True)))
except Exception:
pass

View File

@ -72,7 +72,7 @@ class DocumentService(CommonService):
@classmethod
@DB.connection_context()
def get_by_kb_id(cls, kb_id, page_number, items_per_page,
orderby, desc, keywords, run_status, types):
orderby, desc, keywords, run_status, types, suffix):
if keywords:
docs = cls.model.select().where(
(cls.model.kb_id == kb_id),
@ -85,6 +85,8 @@ class DocumentService(CommonService):
docs = docs.where(cls.model.run.in_(run_status))
if types:
docs = docs.where(cls.model.type.in_(types))
if suffix:
docs = docs.where(cls.model.suffix.in_(suffix))
count = docs.count()
if desc:
@ -98,6 +100,54 @@ class DocumentService(CommonService):
return list(docs.dicts()), count
@classmethod
@DB.connection_context()
def get_filter_by_kb_id(cls, kb_id, keywords, run_status, types, suffix):
"""
returns:
{
"suffix": {
"ppt": 1,
"doxc": 2
},
"run_status": {
"1": 2,
"2": 2
}
}, total
where "1" => RUNNING, "2" => CANCEL
"""
if keywords:
query = cls.model.select().where(
(cls.model.kb_id == kb_id),
(fn.LOWER(cls.model.name).contains(keywords.lower()))
)
else:
query = cls.model.select().where(cls.model.kb_id == kb_id)
if run_status:
query = query.where(cls.model.run.in_(run_status))
if types:
query = query.where(cls.model.type.in_(types))
if suffix:
query = query.where(cls.model.suffix.in_(suffix))
rows = query.select(cls.model.run, cls.model.suffix)
total = rows.count()
suffix_counter = {}
run_status_counter = {}
for row in rows:
suffix_counter[row.suffix] = suffix_counter.get(row.suffix, 0) + 1
run_status_counter[str(row.run)] = run_status_counter.get(str(row.run), 0) + 1
return {
"suffix": suffix_counter,
"run_status": run_status_counter
}, total
@classmethod
@DB.connection_context()
def count_by_kb_id(cls, kb_id, keywords, run_status, types):

View File

@ -17,6 +17,7 @@ import logging
import os
import re
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from flask_login import current_user
from peewee import fn
@ -446,6 +447,7 @@ class FileService(CommonService):
"created_by": user_id,
"type": filetype,
"name": filename,
"suffix": Path(filename).suffix.lstrip("."),
"location": location,
"size": len(blob),
"thumbnail": thumbnail_location,