Feat: add advanced document filter (#8723)

### What problem does this PR solve? Add advanced document filter ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-30 15:16:45 +08:00 · 2025-07-09 09:33:11 +08:00
parent f7af0fc71e
commit c1f6e6f00e
6 changed files with 109 additions and 3 deletions
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
@ -634,6 +634,7 @@ class Document(DataBaseModel):
    process_begin_at = DateTimeField(null=True, index=True)
    process_duration = FloatField(default=0)
    meta_fields = JSONField(null=True, default={})
+    suffix = CharField(max_length=32, null=False, help_text="The real file extension suffix", index=True)

    run = CharField(max_length=1, null=True, help_text="start to run processing or cancel.(1: run it; 2: cancel)", default="0", index=True)
    status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
@ -960,3 +961,7 @@ def migrate_db():
        migrate(migrator.rename_column("document", "process_duation", "process_duration"))
    except Exception:
        pass
+    try:
+        migrate(migrator.add_column("document", "suffix", CharField(max_length=32, null=False, default="", help_text="The real file extension suffix", index=True)))
+    except Exception:
+        pass
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -72,7 +72,7 @@ class DocumentService(CommonService):
    @classmethod
    @DB.connection_context()
    def get_by_kb_id(cls, kb_id, page_number, items_per_page,
-                     orderby, desc, keywords, run_status, types):
+                     orderby, desc, keywords, run_status, types, suffix):
        if keywords:
            docs = cls.model.select().where(
                (cls.model.kb_id == kb_id),
@ -85,6 +85,8 @@ class DocumentService(CommonService):
            docs = docs.where(cls.model.run.in_(run_status))
        if types:
            docs = docs.where(cls.model.type.in_(types))
+        if suffix:
+            docs = docs.where(cls.model.suffix.in_(suffix))

        count = docs.count()
        if desc:
@ -98,6 +100,54 @@ class DocumentService(CommonService):

        return list(docs.dicts()), count

+    @classmethod
+    @DB.connection_context()
+    def get_filter_by_kb_id(cls, kb_id, keywords, run_status, types, suffix):
+        """
+        returns:
+        {
+            "suffix": {
+                "ppt": 1,
+                "doxc": 2
+            },
+            "run_status": {
+             "1": 2,
+             "2": 2
+            }
+        }, total
+        where "1" => RUNNING, "2" => CANCEL
+        """
+        if keywords:
+            query = cls.model.select().where(
+                (cls.model.kb_id == kb_id),
+                (fn.LOWER(cls.model.name).contains(keywords.lower()))
+            )
+        else:
+            query  = cls.model.select().where(cls.model.kb_id == kb_id)
+
+
+        if run_status:
+            query = query.where(cls.model.run.in_(run_status))
+        if types:
+            query = query.where(cls.model.type.in_(types))
+        if suffix:
+            query = query.where(cls.model.suffix.in_(suffix))
+
+        rows = query.select(cls.model.run, cls.model.suffix)
+        total = rows.count()
+
+        suffix_counter = {}
+        run_status_counter = {}
+
+        for row in rows:
+            suffix_counter[row.suffix] = suffix_counter.get(row.suffix, 0) + 1
+            run_status_counter[str(row.run)] = run_status_counter.get(str(row.run), 0) + 1
+
+        return {
+            "suffix": suffix_counter,
+            "run_status": run_status_counter
+        }, total
+
    @classmethod
    @DB.connection_context()
    def count_by_kb_id(cls, kb_id, keywords, run_status, types):
--- a/api/db/services/file_service.py
+++ b/api/db/services/file_service.py
@ -17,6 +17,7 @@ import logging
 import os
 import re
 from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path

 from flask_login import current_user
 from peewee import fn
@ -446,6 +447,7 @@ class FileService(CommonService):
                    "created_by": user_id,
                    "type": filetype,
                    "name": filename,
+                    "suffix": Path(filename).suffix.lstrip("."),
                    "location": location,
                    "size": len(blob),
                    "thumbnail": thumbnail_location,