From c35384024452127c6cc7aa99a8da1948835c19a8 Mon Sep 17 00:00:00 2001
From: Yongteng Lei <yongtengrey@outlook.com>
Date: Thu, 18 Sep 2025 09:52:33 +0800
Subject: [PATCH] Feat: add support for KB document basic info (#10134)

### What problem does this PR solve?

Add support for KB document basic info

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 api/apps/kb_app.py                  | 16 +++++++++
 api/db/services/document_service.py | 50 ++++++++++++++++++++++++++++-
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py
index 2e86a31bd..33008c946 100644
--- a/api/apps/kb_app.py
+++ b/api/apps/kb_app.py
@@ -379,3 +379,19 @@ def get_meta():
                 code=settings.RetCode.AUTHENTICATION_ERROR
             )
     return get_json_result(data=DocumentService.get_meta_by_kbs(kb_ids))
+
+
+@manager.route("/basic_info", methods=["GET"])  # noqa: F821
+@login_required
+def get_basic_info():
+    kb_id = request.args.get("kb_id", "")
+    if not KnowledgebaseService.accessible(kb_id, current_user.id):
+        return get_json_result(
+            data=False,
+            message='No authorization.',
+            code=settings.RetCode.AUTHENTICATION_ERROR
+        )
+
+    basic_info = DocumentService.knowledgebase_basic_info(kb_id)
+
+    return get_json_result(data=basic_info)
diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index 3ebee2b7a..23eef474f 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -24,7 +24,7 @@ from io import BytesIO
 
 import trio
 import xxhash
-from peewee import fn
+from peewee import fn, Case
 
 from api import settings
 from api.constants import IMG_BASE64_PREFIX, FILE_NAME_LEN_LIMIT
@@ -674,6 +674,53 @@ class DocumentService(CommonService):
         return False
 
 
+    @classmethod
+    @DB.connection_context()
+    def knowledgebase_basic_info(cls, kb_id: str) -> dict[str, int]:
+        # cancelled: run == "2" but progress can vary
+        cancelled = (
+            cls.model.select(fn.COUNT(1))
+            .where((cls.model.kb_id == kb_id) & (cls.model.run == TaskStatus.CANCEL))
+            .scalar()
+        )
+
+        row = (
+            cls.model.select(
+                # finished: progress == 1
+                fn.COALESCE(fn.SUM(Case(None, [(cls.model.progress == 1, 1)], 0)), 0).alias("finished"),
+
+                # failed: progress == -1
+                fn.COALESCE(fn.SUM(Case(None, [(cls.model.progress == -1, 1)], 0)), 0).alias("failed"),
+
+                # processing: 0 <= progress < 1
+                fn.COALESCE(
+                    fn.SUM(
+                        Case(
+                            None,
+                            [
+                                (((cls.model.progress == 0) | ((cls.model.progress > 0) & (cls.model.progress < 1))), 1),
+                            ],
+                            0,
+                        )
+                    ),
+                    0,
+                ).alias("processing"),
+            )
+            .where(
+                (cls.model.kb_id == kb_id)
+                & ((cls.model.run.is_null(True)) | (cls.model.run != TaskStatus.CANCEL))
+            )
+            .dicts()
+            .get()
+        )
+
+        return {
+            "processing": int(row["processing"]),
+            "finished": int(row["finished"]),
+            "failed": int(row["failed"]),
+            "cancelled": int(cancelled),
+        }
+
 def queue_raptor_o_graphrag_tasks(doc, ty, priority):
     chunking_config = DocumentService.get_chunking_config(doc["id"])
     hasher = xxhash.xxh64()
@@ -849,3 +896,4 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
             doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0)
 
     return [d["id"] for d, _ in files]
+