Fix: debug pipeline... (#10311)

### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-09-26 19:11:30 +08:00
committed by GitHub
parent 771a38434f
commit 76b1ee2a00
18 changed files with 116 additions and 474 deletions

View File

@ -15,10 +15,10 @@
#
from datetime import datetime
from peewee import fn
from peewee import fn, JOIN
from api.db import StatusEnum, TenantPermission
from api.db.db_models import DB, Document, Knowledgebase, Tenant, User, UserTenant
from api.db.db_models import DB, Document, Knowledgebase, Tenant, User, UserTenant, UserCanvas
from api.db.services.common_service import CommonService
from api.utils import current_timestamp, datetime_format
@ -226,13 +226,17 @@ class KnowledgebaseService(CommonService):
cls.model.chunk_num,
cls.model.parser_id,
cls.model.pipeline_id,
UserCanvas.title,
UserCanvas.avatar.alias("pipeline_avatar"),
cls.model.parser_config,
cls.model.pagerank,
cls.model.create_time,
cls.model.update_time
]
kbs = cls.model.select(*fields).join(Tenant, on=(
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
kbs = cls.model.select(*fields)\
.join(Tenant, on=((Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value)))\
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
.where(
(cls.model.id == kb_id),
(cls.model.status == StatusEnum.VALID.value)
)

View File

@ -83,10 +83,7 @@ class PipelineOperationLogService(CommonService):
@classmethod
@DB.connection_context()
def create(cls, document_id, pipeline_id, task_type, fake_document_ids=[]):
from rag.flow.pipeline import Pipeline
dsl = ""
def create(cls, document_id, pipeline_id, task_type, fake_document_ids=[], dsl:str="{}"):
referred_document_id = document_id
if referred_document_id == GRAPH_RAPTOR_FAKE_DOC_ID and fake_document_ids:
@ -108,13 +105,9 @@ class PipelineOperationLogService(CommonService):
ok, user_pipeline = UserCanvasService.get_by_id(pipeline_id)
if not ok:
raise RuntimeError(f"Pipeline {pipeline_id} not found")
pipeline = Pipeline(dsl=json.dumps(user_pipeline.dsl), tenant_id=user_pipeline.user_id, doc_id=referred_document_id, task_id="", flow_id=pipeline_id)
tenant_id = user_pipeline.user_id
title = user_pipeline.title
avatar = user_pipeline.avatar
dsl = json.loads(str(pipeline))
else:
ok, kb_info = KnowledgebaseService.get_by_id(document.kb_id)
if not ok:
@ -143,7 +136,7 @@ class PipelineOperationLogService(CommonService):
progress_msg=document.progress_msg,
process_begin_at=document.process_begin_at,
process_duration=document.process_duration,
dsl=dsl,
dsl=json.loads(dsl),
task_type=task_type,
operation_status=operation_status,
avatar=avatar,
@ -162,7 +155,7 @@ class PipelineOperationLogService(CommonService):
@classmethod
@DB.connection_context()
def get_file_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, keywords, operation_status, types, suffix):
def get_file_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, keywords, operation_status, types, suffix, create_date_from=None, create_date_to=None):
fields = cls.get_file_logs_fields()
if keywords:
logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (fn.LOWER(cls.model.document_name).contains(keywords.lower())))
@ -177,6 +170,10 @@ class PipelineOperationLogService(CommonService):
logs = logs.where(cls.model.document_type.in_(types))
if suffix:
logs = logs.where(cls.model.document_suffix.in_(suffix))
if create_date_from:
logs = logs.where(cls.model.create_date >= create_date_from)
if create_date_to:
logs = logs.where(cls.model.create_date <= create_date_to)
count = logs.count()
if desc:
@ -205,12 +202,16 @@ class PipelineOperationLogService(CommonService):
@classmethod
@DB.connection_context()
def get_dataset_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, operation_status):
def get_dataset_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, operation_status, create_date_from=None, create_date_to=None):
fields = cls.get_dataset_logs_fields()
logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (cls.model.document_id == GRAPH_RAPTOR_FAKE_DOC_ID))
if operation_status:
logs = logs.where(cls.model.operation_status.in_(operation_status))
if create_date_from:
logs = logs.where(cls.model.create_date >= create_date_from)
if create_date_to:
logs = logs.where(cls.model.create_date <= create_date_to)
count = logs.count()
if desc:

View File

@ -488,8 +488,9 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
task_type="dataflow" if not rerun else "dataflow_rerun",
priority=priority,
)
TaskService.model.delete().where(TaskService.model.id == task["id"]).execute()
if doc_id not in [CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID]:
TaskService.model.delete().where(TaskService.model.doc_id == doc_id).execute()
DocumentService.begin2parse(doc_id)
bulk_insert_into_db(model=Task, data_source=[task], replace_on_conflict=True)
task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)