Fix: parent-child chunking method (#11810)

### What problem does this PR solve?

change:
parent-child chunking method

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
buua436
2025-12-09 09:34:01 +08:00
committed by GitHub
parent 5c9672a265
commit dd046be976
3 changed files with 7 additions and 4 deletions

View File

@ -727,17 +727,17 @@ async def insert_es(task_id, task_tenant_id, task_dataset_id, chunks, progress_c
if not mom:
continue
id = xxhash.xxh64(mom.encode("utf-8")).hexdigest()
ck["mom_id"] = id
if id in mother_ids:
continue
mother_ids.add(id)
ck["mom_id"] = id
mom_ck = copy.deepcopy(ck)
mom_ck["id"] = id
mom_ck["content_with_weight"] = mom
mom_ck["available_int"] = 0
flds = list(mom_ck.keys())
for fld in flds:
if fld not in ["id", "content_with_weight", "doc_id", "kb_id", "available_int", "position_int"]:
if fld not in ["id", "content_with_weight", "doc_id", "docnm_kwd", "kb_id", "available_int", "position_int"]:
del mom_ck[fld]
mothers.append(mom_ck)