Feat: Hash doc id to avoid duplicate name. (#12573)

### What problem does this PR solve?

Feat: Hash doc id to avoid duplicate name. 

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Magicbook1108
2026-01-15 14:02:15 +08:00
committed by GitHub
parent 9a10558f80
commit b40a7b2e7d
4 changed files with 18 additions and 4 deletions

View File

@ -29,7 +29,6 @@ from common.misc_utils import get_uuid
from common.constants import TaskStatus
from common.time_utils import current_timestamp, timestamp_to_date
class ConnectorService(CommonService):
model = Connector
@ -202,6 +201,7 @@ class SyncLogsService(CommonService):
return None
class FileObj(BaseModel):
id: str
filename: str
blob: bytes
@ -209,7 +209,7 @@ class SyncLogsService(CommonService):
return self.blob
errs = []
files = [FileObj(filename=d["semantic_identifier"]+(f"{d['extension']}" if d["semantic_identifier"][::-1].find(d['extension'][::-1])<0 else ""), blob=d["blob"]) for d in docs]
files = [FileObj(id=d["id"], filename=d["semantic_identifier"]+(f"{d['extension']}" if d["semantic_identifier"][::-1].find(d['extension'][::-1])<0 else ""), blob=d["blob"]) for d in docs]
doc_ids = []
err, doc_blob_pairs = FileService.upload_document(kb, files, tenant_id, src)
errs.extend(err)

View File

@ -439,6 +439,15 @@ class FileService(CommonService):
err, files = [], []
for file in file_objs:
doc_id = file.id if hasattr(file, "id") else get_uuid()
e, doc = DocumentService.get_by_id(doc_id)
if e:
blob = file.read()
settings.STORAGE_IMPL.put(kb.id, doc.location, blob, kb.tenant_id)
doc.size = len(blob)
doc = doc.to_dict()
DocumentService.update_by_id(doc["id"], doc)
continue
try:
DocumentService.check_doc_health(kb.tenant_id, file.filename)
filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id)
@ -455,7 +464,6 @@ class FileService(CommonService):
blob = read_potential_broken_pdf(blob)
settings.STORAGE_IMPL.put(kb.id, location, blob)
doc_id = get_uuid()
img = thumbnail_img(filename, blob)
thumbnail_location = ""