Feat: add data source to pipleline logs . (#11075)

### What problem does this PR solve?

#10953

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-11-07 11:43:59 +08:00
committed by GitHub
parent 5629fbd2ca
commit 34283d4db4
7 changed files with 45 additions and 45 deletions

View File

@ -54,7 +54,6 @@ class ConnectorService(CommonService):
SyncLogsService.update_by_id(task["id"], task)
ConnectorService.update_by_id(connector_id, {"status": status})
@classmethod
def list(cls, tenant_id):
fields = [
@ -67,6 +66,15 @@ class ConnectorService(CommonService):
cls.model.tenant_id == tenant_id
).dicts())
@classmethod
def rebuild(cls, kb_id:str, connector_id: str, tenant_id:str):
e, conn = cls.get_by_id(connector_id)
if not e:
return
SyncLogsService.filter_delete([SyncLogs.connector_id==connector_id, SyncLogs.kb_id==kb_id])
docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}")
return FileService.delete_docs([d.id for d in docs], tenant_id)
class SyncLogsService(CommonService):
model = SyncLogs
@ -91,6 +99,7 @@ class SyncLogsService(CommonService):
Connector.timeout_secs,
Knowledgebase.name.alias("kb_name"),
Knowledgebase.avatar.alias("kb_avatar"),
Connector2Kb.auto_parse,
cls.model.from_beginning.alias("reindex"),
cls.model.status
]
@ -179,7 +188,7 @@ class SyncLogsService(CommonService):
.where(cls.model.id == id).execute()
@classmethod
def duplicate_and_parse(cls, kb, docs, tenant_id, src):
def duplicate_and_parse(cls, kb, docs, tenant_id, src, auto_parse=True):
if not docs:
return None
@ -191,14 +200,17 @@ class SyncLogsService(CommonService):
return self.blob
errs = []
files = [FileObj(filename=d["semantic_identifier"]+f".{d['extension']}", blob=d["blob"]) for d in docs]
files = [FileObj(filename=d["semantic_identifier"]+(f"{d['extension']}" if d["semantic_identifier"][::-1].find(d['extension'][::-1])<0 else ""), blob=d["blob"]) for d in docs]
doc_ids = []
err, doc_blob_pairs = FileService.upload_document(kb, files, tenant_id, src)
errs.extend(err)
kb_table_num_map = {}
for doc, _ in doc_blob_pairs:
DocumentService.run(tenant_id, doc, kb_table_num_map)
doc_ids.append(doc["id"])
if not auto_parse or auto_parse == "0":
continue
DocumentService.run(tenant_id, doc, kb_table_num_map)
return errs, doc_ids
@ -213,33 +225,6 @@ class SyncLogsService(CommonService):
class Connector2KbService(CommonService):
model = Connector2Kb
@classmethod
def link_kb(cls, conn_id:str, kb_ids: list[str], tenant_id:str):
arr = cls.query(connector_id=conn_id)
old_kb_ids = [a.kb_id for a in arr]
for kb_id in kb_ids:
if kb_id in old_kb_ids:
continue
cls.save(**{
"id": get_uuid(),
"connector_id": conn_id,
"kb_id": kb_id
})
SyncLogsService.schedule(conn_id, kb_id, reindex=True)
errs = []
e, conn = ConnectorService.get_by_id(conn_id)
for kb_id in old_kb_ids:
if kb_id in kb_ids:
continue
cls.filter_delete([cls.model.kb_id==kb_id, cls.model.connector_id==conn_id])
SyncLogsService.filter_update([SyncLogs.connector_id==conn_id, SyncLogs.kb_id==kb_id, SyncLogs.status==TaskStatus.SCHEDULE], {"status": TaskStatus.CANCEL})
docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}")
err = FileService.delete_docs([d.id for d in docs], tenant_id)
if err:
errs.append(err)
return "\n".join(errs)
@classmethod
def link_connectors(cls, kb_id:str, connector_ids: list[str], tenant_id:str):
arr = cls.query(kb_id=kb_id)
@ -260,11 +245,15 @@ class Connector2KbService(CommonService):
continue
cls.filter_delete([cls.model.kb_id==kb_id, cls.model.connector_id==conn_id])
e, conn = ConnectorService.get_by_id(conn_id)
SyncLogsService.filter_update([SyncLogs.connector_id==conn_id, SyncLogs.kb_id==kb_id, SyncLogs.status==TaskStatus.SCHEDULE], {"status": TaskStatus.CANCEL})
docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}")
err = FileService.delete_docs([d.id for d in docs], tenant_id)
if err:
errs.append(err)
if not e:
continue
#SyncLogsService.filter_delete([SyncLogs.connector_id==conn_id, SyncLogs.kb_id==kb_id])
# Do not delete docs while unlinking.
SyncLogsService.filter_update([SyncLogs.connector_id==conn_id, SyncLogs.kb_id==kb_id, SyncLogs.status.in_([TaskStatus.SCHEDULE, TaskStatus.RUNNING])], {"status": TaskStatus.CANCEL})
#docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}")
#err = FileService.delete_docs([d.id for d in docs], tenant_id)
#if err:
# errs.append(err)
return "\n".join(errs)
@classmethod
@ -282,3 +271,5 @@ class Connector2KbService(CommonService):
).dicts()
)