Feat: add or logic operations for meta data filters. (#11404)

### What problem does this PR solve?

#11376 #11387

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2025-11-20 14:31:12 +08:00
committed by GitHub
parent d2b1da0e26
commit 06cef71ba6
11 changed files with 129 additions and 48 deletions

View File

@ -177,7 +177,7 @@ class UserCanvasService(CommonService):
return True
def completion(tenant_id, agent_id, session_id=None, **kwargs):
async def completion(tenant_id, agent_id, session_id=None, **kwargs):
query = kwargs.get("query", "") or kwargs.get("question", "")
files = kwargs.get("files", [])
inputs = kwargs.get("inputs", {})
@ -219,7 +219,7 @@ def completion(tenant_id, agent_id, session_id=None, **kwargs):
"id": message_id
})
txt = ""
for ans in canvas.run(query=query, files=files, user_id=user_id, inputs=inputs):
async for ans in canvas.run(query=query, files=files, user_id=user_id, inputs=inputs):
ans["session_id"] = session_id
if ans["event"] == "message":
txt += ans["data"]["content"]
@ -237,7 +237,7 @@ def completion(tenant_id, agent_id, session_id=None, **kwargs):
API4ConversationService.append_message(conv["id"], conv)
def completion_openai(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):
async def completion_openai(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):
tiktoken_encoder = tiktoken.get_encoding("cl100k_base")
prompt_tokens = len(tiktoken_encoder.encode(str(question)))
user_id = kwargs.get("user_id", "")
@ -245,7 +245,7 @@ def completion_openai(tenant_id, agent_id, question, session_id=None, stream=Tru
if stream:
completion_tokens = 0
try:
for ans in completion(
async for ans in completion(
tenant_id=tenant_id,
agent_id=agent_id,
session_id=session_id,
@ -304,7 +304,7 @@ def completion_openai(tenant_id, agent_id, question, session_id=None, stream=Tru
try:
all_content = ""
reference = {}
for ans in completion(
async for ans in completion(
tenant_id=tenant_id,
agent_id=agent_id,
session_id=session_id,

View File

@ -287,7 +287,7 @@ def convert_conditions(metadata_condition):
]
def meta_filter(metas: dict, filters: list[dict]):
def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
doc_ids = set([])
def filter_out(v2docs, operator, value):
@ -331,7 +331,10 @@ def meta_filter(metas: dict, filters: list[dict]):
if not doc_ids:
doc_ids = set(ids)
else:
doc_ids = doc_ids & set(ids)
if logic == "and":
doc_ids = doc_ids & set(ids)
else:
doc_ids = doc_ids | set(ids)
if not doc_ids:
return []
return list(doc_ids)
@ -407,12 +410,12 @@ def chat(dialog, messages, stream=True, **kwargs):
if dialog.meta_data_filter:
metas = DocumentService.get_meta_by_kbs(dialog.kb_ids)
if dialog.meta_data_filter.get("method") == "auto":
filters = gen_meta_filter(chat_mdl, metas, questions[-1])
attachments.extend(meta_filter(metas, filters))
filters: dict = gen_meta_filter(chat_mdl, metas, questions[-1])
attachments.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not attachments:
attachments = None
elif dialog.meta_data_filter.get("method") == "manual":
attachments.extend(meta_filter(metas, dialog.meta_data_filter["manual"]))
attachments.extend(meta_filter(metas, dialog.meta_data_filter["manual"], dialog.meta_data_filter.get("logic", "and")))
if not attachments:
attachments = None
@ -778,12 +781,12 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}):
if meta_data_filter:
metas = DocumentService.get_meta_by_kbs(kb_ids)
if meta_data_filter.get("method") == "auto":
filters = gen_meta_filter(chat_mdl, metas, question)
doc_ids.extend(meta_filter(metas, filters))
filters: dict = gen_meta_filter(chat_mdl, metas, question)
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif meta_data_filter.get("method") == "manual":
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"]))
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if not doc_ids:
doc_ids = None
@ -853,12 +856,12 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
if meta_data_filter:
metas = DocumentService.get_meta_by_kbs(kb_ids)
if meta_data_filter.get("method") == "auto":
filters = gen_meta_filter(chat_mdl, metas, question)
doc_ids.extend(meta_filter(metas, filters))
filters: dict = gen_meta_filter(chat_mdl, metas, question)
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
if not doc_ids:
doc_ids = None
elif meta_data_filter.get("method") == "manual":
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"]))
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
if not doc_ids:
doc_ids = None