mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-21 21:36:42 +08:00
Fix: debug pipeline... (#10311)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -15,10 +15,10 @@
|
||||
#
|
||||
from datetime import datetime
|
||||
|
||||
from peewee import fn
|
||||
from peewee import fn, JOIN
|
||||
|
||||
from api.db import StatusEnum, TenantPermission
|
||||
from api.db.db_models import DB, Document, Knowledgebase, Tenant, User, UserTenant
|
||||
from api.db.db_models import DB, Document, Knowledgebase, Tenant, User, UserTenant, UserCanvas
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.utils import current_timestamp, datetime_format
|
||||
|
||||
@ -226,13 +226,17 @@ class KnowledgebaseService(CommonService):
|
||||
cls.model.chunk_num,
|
||||
cls.model.parser_id,
|
||||
cls.model.pipeline_id,
|
||||
UserCanvas.title,
|
||||
UserCanvas.avatar.alias("pipeline_avatar"),
|
||||
cls.model.parser_config,
|
||||
cls.model.pagerank,
|
||||
cls.model.create_time,
|
||||
cls.model.update_time
|
||||
]
|
||||
kbs = cls.model.select(*fields).join(Tenant, on=(
|
||||
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
|
||||
kbs = cls.model.select(*fields)\
|
||||
.join(Tenant, on=((Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value)))\
|
||||
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
|
||||
.where(
|
||||
(cls.model.id == kb_id),
|
||||
(cls.model.status == StatusEnum.VALID.value)
|
||||
)
|
||||
|
||||
@ -83,10 +83,7 @@ class PipelineOperationLogService(CommonService):
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def create(cls, document_id, pipeline_id, task_type, fake_document_ids=[]):
|
||||
from rag.flow.pipeline import Pipeline
|
||||
|
||||
dsl = ""
|
||||
def create(cls, document_id, pipeline_id, task_type, fake_document_ids=[], dsl:str="{}"):
|
||||
referred_document_id = document_id
|
||||
|
||||
if referred_document_id == GRAPH_RAPTOR_FAKE_DOC_ID and fake_document_ids:
|
||||
@ -108,13 +105,9 @@ class PipelineOperationLogService(CommonService):
|
||||
ok, user_pipeline = UserCanvasService.get_by_id(pipeline_id)
|
||||
if not ok:
|
||||
raise RuntimeError(f"Pipeline {pipeline_id} not found")
|
||||
|
||||
pipeline = Pipeline(dsl=json.dumps(user_pipeline.dsl), tenant_id=user_pipeline.user_id, doc_id=referred_document_id, task_id="", flow_id=pipeline_id)
|
||||
|
||||
tenant_id = user_pipeline.user_id
|
||||
title = user_pipeline.title
|
||||
avatar = user_pipeline.avatar
|
||||
dsl = json.loads(str(pipeline))
|
||||
else:
|
||||
ok, kb_info = KnowledgebaseService.get_by_id(document.kb_id)
|
||||
if not ok:
|
||||
@ -143,7 +136,7 @@ class PipelineOperationLogService(CommonService):
|
||||
progress_msg=document.progress_msg,
|
||||
process_begin_at=document.process_begin_at,
|
||||
process_duration=document.process_duration,
|
||||
dsl=dsl,
|
||||
dsl=json.loads(dsl),
|
||||
task_type=task_type,
|
||||
operation_status=operation_status,
|
||||
avatar=avatar,
|
||||
@ -162,7 +155,7 @@ class PipelineOperationLogService(CommonService):
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_file_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, keywords, operation_status, types, suffix):
|
||||
def get_file_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, keywords, operation_status, types, suffix, create_date_from=None, create_date_to=None):
|
||||
fields = cls.get_file_logs_fields()
|
||||
if keywords:
|
||||
logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (fn.LOWER(cls.model.document_name).contains(keywords.lower())))
|
||||
@ -177,6 +170,10 @@ class PipelineOperationLogService(CommonService):
|
||||
logs = logs.where(cls.model.document_type.in_(types))
|
||||
if suffix:
|
||||
logs = logs.where(cls.model.document_suffix.in_(suffix))
|
||||
if create_date_from:
|
||||
logs = logs.where(cls.model.create_date >= create_date_from)
|
||||
if create_date_to:
|
||||
logs = logs.where(cls.model.create_date <= create_date_to)
|
||||
|
||||
count = logs.count()
|
||||
if desc:
|
||||
@ -205,12 +202,16 @@ class PipelineOperationLogService(CommonService):
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_dataset_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, operation_status):
|
||||
def get_dataset_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, operation_status, create_date_from=None, create_date_to=None):
|
||||
fields = cls.get_dataset_logs_fields()
|
||||
logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (cls.model.document_id == GRAPH_RAPTOR_FAKE_DOC_ID))
|
||||
|
||||
if operation_status:
|
||||
logs = logs.where(cls.model.operation_status.in_(operation_status))
|
||||
if create_date_from:
|
||||
logs = logs.where(cls.model.create_date >= create_date_from)
|
||||
if create_date_to:
|
||||
logs = logs.where(cls.model.create_date <= create_date_to)
|
||||
|
||||
count = logs.count()
|
||||
if desc:
|
||||
|
||||
@ -488,8 +488,9 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
|
||||
task_type="dataflow" if not rerun else "dataflow_rerun",
|
||||
priority=priority,
|
||||
)
|
||||
|
||||
TaskService.model.delete().where(TaskService.model.id == task["id"]).execute()
|
||||
if doc_id not in [CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID]:
|
||||
TaskService.model.delete().where(TaskService.model.doc_id == doc_id).execute()
|
||||
DocumentService.begin2parse(doc_id)
|
||||
bulk_insert_into_db(model=Task, data_source=[task], replace_on_conflict=True)
|
||||
|
||||
task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)
|
||||
|
||||
Reference in New Issue
Block a user