Fix: debug pipeline... (#10311)

### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-02-07 02:55:08 +08:00 · 2025-09-26 19:11:30 +08:00
parent 771a38434f
commit 76b1ee2a00
18 changed files with 116 additions and 474 deletions
--- a/api/db/services/knowledgebase_service.py
+++ b/api/db/services/knowledgebase_service.py
@ -15,10 +15,10 @@
 #
 from datetime import datetime

-from peewee import fn
+from peewee import fn, JOIN

 from api.db import StatusEnum, TenantPermission
-from api.db.db_models import DB, Document, Knowledgebase, Tenant, User, UserTenant
+from api.db.db_models import DB, Document, Knowledgebase, Tenant, User, UserTenant, UserCanvas
 from api.db.services.common_service import CommonService
 from api.utils import current_timestamp, datetime_format

@ -226,13 +226,17 @@ class KnowledgebaseService(CommonService):
            cls.model.chunk_num,
            cls.model.parser_id,
            cls.model.pipeline_id,
+            UserCanvas.title,
+            UserCanvas.avatar.alias("pipeline_avatar"),
            cls.model.parser_config,
            cls.model.pagerank,
            cls.model.create_time,
            cls.model.update_time
            ]
-        kbs = cls.model.select(*fields).join(Tenant, on=(
-            (Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
+        kbs = cls.model.select(*fields)\
+                .join(Tenant, on=((Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value)))\
+                .join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
+            .where(
            (cls.model.id == kb_id),
            (cls.model.status == StatusEnum.VALID.value)
        )
--- a/api/db/services/pipeline_operation_log_service.py
+++ b/api/db/services/pipeline_operation_log_service.py
@ -83,10 +83,7 @@ class PipelineOperationLogService(CommonService):

    @classmethod
    @DB.connection_context()
-    def create(cls, document_id, pipeline_id, task_type, fake_document_ids=[]):
-        from rag.flow.pipeline import Pipeline
-
-        dsl = ""
+    def create(cls, document_id, pipeline_id, task_type, fake_document_ids=[], dsl:str="{}"):
        referred_document_id = document_id

        if referred_document_id == GRAPH_RAPTOR_FAKE_DOC_ID and fake_document_ids:
@ -108,13 +105,9 @@ class PipelineOperationLogService(CommonService):
            ok, user_pipeline = UserCanvasService.get_by_id(pipeline_id)
            if not ok:
                raise RuntimeError(f"Pipeline {pipeline_id} not found")
-
-            pipeline = Pipeline(dsl=json.dumps(user_pipeline.dsl), tenant_id=user_pipeline.user_id, doc_id=referred_document_id, task_id="", flow_id=pipeline_id)
-
            tenant_id = user_pipeline.user_id
            title = user_pipeline.title
            avatar = user_pipeline.avatar
-            dsl = json.loads(str(pipeline))
        else:
            ok, kb_info = KnowledgebaseService.get_by_id(document.kb_id)
            if not ok:
@ -143,7 +136,7 @@ class PipelineOperationLogService(CommonService):
            progress_msg=document.progress_msg,
            process_begin_at=document.process_begin_at,
            process_duration=document.process_duration,
-            dsl=dsl,
+            dsl=json.loads(dsl),
            task_type=task_type,
            operation_status=operation_status,
            avatar=avatar,
@ -162,7 +155,7 @@ class PipelineOperationLogService(CommonService):

    @classmethod
    @DB.connection_context()
-    def get_file_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, keywords, operation_status, types, suffix):
+    def get_file_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, keywords, operation_status, types, suffix, create_date_from=None, create_date_to=None):
        fields = cls.get_file_logs_fields()
        if keywords:
            logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (fn.LOWER(cls.model.document_name).contains(keywords.lower())))
@ -177,6 +170,10 @@ class PipelineOperationLogService(CommonService):
            logs = logs.where(cls.model.document_type.in_(types))
        if suffix:
            logs = logs.where(cls.model.document_suffix.in_(suffix))
+        if create_date_from:
+            logs = logs.where(cls.model.create_date >= create_date_from)
+        if create_date_to:
+            logs = logs.where(cls.model.create_date <= create_date_to)

        count = logs.count()
        if desc:
@ -205,12 +202,16 @@ class PipelineOperationLogService(CommonService):
    
    @classmethod
    @DB.connection_context()
-    def get_dataset_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, operation_status):
+    def get_dataset_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, operation_status, create_date_from=None, create_date_to=None):
        fields = cls.get_dataset_logs_fields()
        logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (cls.model.document_id == GRAPH_RAPTOR_FAKE_DOC_ID))

        if operation_status:
            logs = logs.where(cls.model.operation_status.in_(operation_status))
+        if create_date_from:
+            logs = logs.where(cls.model.create_date >= create_date_from)
+        if create_date_to:
+            logs = logs.where(cls.model.create_date <= create_date_to)

        count = logs.count()
        if desc:
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@ -488,8 +488,9 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
        task_type="dataflow" if not rerun else "dataflow_rerun",
        priority=priority,
    )
-
-    TaskService.model.delete().where(TaskService.model.id == task["id"]).execute()
+    if doc_id not in [CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID]:
+        TaskService.model.delete().where(TaskService.model.doc_id == doc_id).execute()
+        DocumentService.begin2parse(doc_id)
    bulk_insert_into_db(model=Task, data_source=[task], replace_on_conflict=True)

    task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)