Fix: Normalize embedding model ID comparison across datasets (#5169)

Modify embedding model ID comparison to remove vendor suffixes, ensuring consistent model identification when working with multiple knowledge bases. This change affects dialog creation, chat operations, and document retrieval test functions. ### What problem does this PR solve? resolve this bug: https://github.com/infiniflow/ragflow/issues/5166 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: wenju.li <wenju.li@deepctr.cn>
2026-01-30 23:26:36 +08:00 · 2025-02-20 12:40:59 +08:00
parent ed943b1b5b
commit f298e55ded
3 changed files with 8 additions and 4 deletions
--- a/api/apps/sdk/chat.py
+++ b/api/apps/sdk/chat.py
@ -41,7 +41,8 @@ def create(tenant_id):
        if kb.chunk_num == 0:
            return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
    kbs = KnowledgebaseService.get_by_ids(ids)
-    embd_count = list(set([kb.embd_id for kb in kbs]))
+    embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs]  # remove vendor suffix for comparison
+    embd_count = list(set(embd_ids))
    if len(embd_count) != 1:
        return get_result(message='Datasets use different embedding models."',
                          code=settings.RetCode.AUTHENTICATION_ERROR)
@ -176,7 +177,8 @@ def update(tenant_id, chat_id):
                if kb.chunk_num == 0:
                    return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
            kbs = KnowledgebaseService.get_by_ids(ids)
-            embd_count = list(set([kb.embd_id for kb in kbs]))
+            embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs]  # remove vendor suffix for comparison
+            embd_count = list(set(embd_ids))
            if len(embd_count) != 1:
                return get_result(
                    message='Datasets use different embedding models."',
--- a/api/apps/sdk/doc.py
+++ b/api/apps/sdk/doc.py
@ -1305,7 +1305,7 @@ def retrieval_test(tenant_id):
        if not KnowledgebaseService.accessible(kb_id=id, user_id=tenant_id):
            return get_error_data_result(f"You don't own the dataset {id}.")
    kbs = KnowledgebaseService.get_by_ids(kb_ids)
-    embd_nms = list(set([kb.embd_id for kb in kbs]))
+    embd_nms = list(set([TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs]))  # remove vendor suffix for comparison
    if len(embd_nms) != 1:
        return get_result(
            message='Datasets use different embedding models."',