Feat: debug sync data. (#11073)

### What problem does this PR solve?

#10953 

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-11-06 16:48:04 +08:00
committed by GitHub
parent e18c408759
commit 3bd1fefe1f
6 changed files with 96 additions and 29 deletions

View File

@ -73,7 +73,8 @@ def get_connector(connector_id):
@login_required
def list_logs(connector_id):
req = request.args.to_dict(flat=True)
return get_json_result(data=SyncLogsService.list_sync_tasks(connector_id, int(req.get("page", 1)), int(req.get("page_size", 15))))
arr, total = SyncLogsService.list_sync_tasks(connector_id, int(req.get("page", 1)), int(req.get("page_size", 15)))
return get_json_result(data={"total": total, "logs": arr})
@manager.route("/<connector_id>/resume", methods=["PUT"]) # noqa: F821

View File

@ -122,10 +122,9 @@ def update():
if not e:
return get_data_error_result(
message="Database error (Knowledgebase rename)!")
if connectors:
errors = Connector2KbService.link_connectors(kb.id, [conn["id"] for conn in connectors], current_user.id)
if errors:
logging.error("Link KB errors: ", errors)
errors = Connector2KbService.link_connectors(kb.id, [conn["id"] for conn in connectors], current_user.id)
if errors:
logging.error("Link KB errors: ", errors)
kb = kb.to_dict()
kb.update(req)

View File

@ -15,6 +15,7 @@
#
import logging
from datetime import datetime
from typing import Tuple, List
from anthropic import BaseModel
from peewee import SQL, fn
@ -71,7 +72,7 @@ class SyncLogsService(CommonService):
model = SyncLogs
@classmethod
def list_sync_tasks(cls, connector_id=None, page_number=None, items_per_page=15):
def list_sync_tasks(cls, connector_id=None, page_number=None, items_per_page=15) -> Tuple[List[dict], int]:
fields = [
cls.model.id,
cls.model.connector_id,
@ -113,10 +114,11 @@ class SyncLogsService(CommonService):
)
query = query.distinct().order_by(cls.model.update_time.desc())
totbal = query.count()
if page_number:
query = query.paginate(page_number, items_per_page)
return list(query.dicts())
return list(query.dicts()), totbal
@classmethod
def start(cls, id, connector_id):
@ -130,6 +132,14 @@ class SyncLogsService(CommonService):
@classmethod
def schedule(cls, connector_id, kb_id, poll_range_start=None, reindex=False, total_docs_indexed=0):
try:
if cls.model.select().where(cls.model.kb_id == kb_id, cls.model.connector_id == connector_id).count() > 100:
rm_ids = [m.id for m in cls.model.select(cls.model.id).where(cls.model.kb_id == kb_id, cls.model.connector_id == connector_id).order_by(cls.model.update_time.asc()).limit(70)]
deleted = cls.model.delete().where(cls.model.id.in_(rm_ids)).execute()
logging.info(f"[SyncLogService] Cleaned {deleted} old logs.")
except Exception as e:
logging.exception(e)
try:
e = cls.query(kb_id=kb_id, connector_id=connector_id, status=TaskStatus.SCHEDULE)
if e:
@ -185,11 +195,10 @@ class SyncLogsService(CommonService):
doc_ids = []
err, doc_blob_pairs = FileService.upload_document(kb, files, tenant_id, src)
errs.extend(err)
if not err:
kb_table_num_map = {}
for doc, _ in doc_blob_pairs:
DocumentService.run(tenant_id, doc, kb_table_num_map)
doc_ids.append(doc["id"])
kb_table_num_map = {}
for doc, _ in doc_blob_pairs:
DocumentService.run(tenant_id, doc, kb_table_num_map)
doc_ids.append(doc["id"])
return errs, doc_ids

View File

@ -623,7 +623,8 @@ class DocumentService(CommonService):
cls.update_by_id(
docid, {"progress": random.random() * 1 / 100.,
"progress_msg": "Task is queued...",
"process_begin_at": get_format_time()
"process_begin_at": get_format_time(),
"run": TaskStatus.RUNNING.value
})
@classmethod