Fix: debug hierachical merging... (#10337)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-09-29 09:29:33 +08:00
committed by GitHub
parent 664bc0b961
commit 71f69cdb75
13 changed files with 113 additions and 46 deletions

View File

@ -18,7 +18,7 @@ import logging
import time
from uuid import uuid4
from agent.canvas import Canvas
from api.db import CanvasCategory, TenantPermission
from api.db import CanvasCategory
from api.db.db_models import DB, CanvasTemplate, User, UserCanvas, API4Conversation
from api.db.services.api_service import API4ConversationService
from api.db.services.common_service import CommonService
@ -104,6 +104,7 @@ class UserCanvasService(CommonService):
cls.model.dsl,
cls.model.description,
cls.model.permission,
cls.model.user_id.alias("tenant_id"),
User.nickname,
User.avatar.alias('tenant_avatar'),
cls.model.update_time,
@ -111,16 +112,15 @@ class UserCanvasService(CommonService):
]
if keywords:
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
TenantPermission.TEAM.value)) | (
cls.model.user_id == user_id)),
(fn.LOWER(cls.model.title).contains(keywords.lower()))
cls.model.user_id.in_(joined_tenant_ids),
fn.LOWER(cls.model.title).contains(keywords.lower())
#(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id)),
#(fn.LOWER(cls.model.title).contains(keywords.lower()))
)
else:
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
TenantPermission.TEAM.value)) | (
cls.model.user_id == user_id))
cls.model.user_id.in_(joined_tenant_ids)
#(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id))
)
if canvas_category:
agents = agents.where(cls.model.canvas_category == canvas_category)
@ -128,8 +128,10 @@ class UserCanvasService(CommonService):
agents = agents.order_by(cls.model.getter_by(orderby).desc())
else:
agents = agents.order_by(cls.model.getter_by(orderby).asc())
count = agents.count()
agents = agents.paginate(page_number, items_per_page)
if page_number and items_per_page:
agents = agents.paginate(page_number, items_per_page)
return list(agents.dicts()), count
@classmethod

View File

@ -29,7 +29,8 @@ from peewee import fn, Case, JOIN
from api import settings
from api.constants import IMG_BASE64_PREFIX, FILE_NAME_LEN_LIMIT
from api.db import FileType, LLMType, ParserType, StatusEnum, TaskStatus, UserTenantRole, CanvasCategory
from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant, File2Document, File, UserCanvas
from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant, File2Document, File, UserCanvas, \
User
from api.db.db_utils import bulk_insert_into_db
from api.db.services.common_service import CommonService
from api.db.services.knowledgebase_service import KnowledgebaseService
@ -121,19 +122,21 @@ class DocumentService(CommonService):
orderby, desc, keywords, run_status, types, suffix):
fields = cls.get_cls_model_fields()
if keywords:
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name")])\
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name"), User.nickname])\
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
.join(File, on=(File.id == File2Document.file_id))\
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
.join(User, on=(cls.model.created_by == User.id), join_type=JOIN.LEFT_OUTER)\
.where(
(cls.model.kb_id == kb_id),
(fn.LOWER(cls.model.name).contains(keywords.lower()))
)
else:
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name")])\
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name"), User.nickname])\
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
.join(File, on=(File.id == File2Document.file_id))\
.join(User, on=(cls.model.created_by == User.id), join_type=JOIN.LEFT_OUTER)\
.where(cls.model.kb_id == kb_id)
if run_status:

View File

@ -123,7 +123,7 @@ class PipelineOperationLogService(CommonService):
raise RuntimeError(f"Cannot find knowledge base {document.kb_id} for referred_document {referred_document_id}")
tenant_id = kb_info.tenant_id
title = document.name
title = document.parser_id
avatar = document.thumbnail
if task_type not in VALID_PIPELINE_TASK_TYPES:
@ -228,14 +228,12 @@ class PipelineOperationLogService(CommonService):
@classmethod
@DB.connection_context()
def get_documents_info(cls, id):
fields = [Document.id, Document.name, Document.progress]
fields = [Document.id, Document.name, Document.progress, Document.kb_id]
return (
cls.model.select(*fields)
.join(Document, on=(cls.model.document_id == Document.id))
.where(
cls.model.id == id,
Document.progress > 0,
Document.progress < 1,
cls.model.id == id
)
.dicts()
)

View File

@ -358,7 +358,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
page_size = doc["parser_config"].get("task_page_size") or 12
if doc["parser_id"] == "paper":
page_size = doc["parser_config"].get("task_page_size") or 22
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC":
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC" or doc["parser_config"].get("toc", True):
page_size = 10 ** 9
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
for s, e in page_ranges:
@ -505,7 +505,6 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)
task["tenant_id"] = tenant_id
task["task_type"] = "dataflow"
task["dataflow_id"] = flow_id
task["file"] = file