diff --git a/agent/component/llm.py b/agent/component/llm.py index 39e043aeb..e9d877068 100644 --- a/agent/component/llm.py +++ b/agent/component/llm.py @@ -56,7 +56,6 @@ class LLMParam(ComponentParamBase): self.check_nonnegative_number(int(self.max_tokens), "[Agent] Max tokens") self.check_decimal_float(float(self.top_p), "[Agent] Top P") self.check_empty(self.llm_id, "[Agent] LLM") - self.check_empty(self.sys_prompt, "[Agent] System prompt") self.check_empty(self.prompts, "[Agent] User prompt") def gen_conf(self): diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 7b8d222d4..2f9e143f9 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -696,10 +696,12 @@ class DocumentService(CommonService): for k,v in r.meta_fields.items(): if k not in meta: meta[k] = {} - v = str(v) - if v not in meta[k]: - meta[k][v] = [] - meta[k][v].append(doc_id) + if not isinstance(v, list): + v = [v] + for vv in v: + if vv not in meta[k]: + meta[k][vv] = [] + meta[k][vv].append(doc_id) return meta @classmethod diff --git a/common/metadata_utils.py b/common/metadata_utils.py index ca6d36598..4d4cceb4a 100644 --- a/common/metadata_utils.py +++ b/common/metadata_utils.py @@ -44,21 +44,27 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"): def filter_out(v2docs, operator, value): ids = [] for input, docids in v2docs.items(): + if operator in ["=", "≠", ">", "<", "≥", "≤"]: try: + if isinstance(input, list): + input = input[0] input = float(input) value = float(value) except Exception: - input = str(input) - value = str(value) + pass + if isinstance(input, str): + input = input.lower() + if isinstance(value, str): + value = value.lower() for conds in [ - (operator == "contains", str(value).lower() in str(input).lower()), - (operator == "not contains", str(value).lower() not in str(input).lower()), - (operator == "in", str(input).lower() in str(value).lower()), - (operator == "not in", str(input).lower() not in str(value).lower()), - (operator == "start with", str(input).lower().startswith(str(value).lower())), - (operator == "end with", str(input).lower().endswith(str(value).lower())), + (operator == "contains", input in value if not isinstance(input, list) else all([i in value for i in input])), + (operator == "not contains", input not in value if not isinstance(input, list) else all([i not in value for i in input])), + (operator == "in", input in value if not isinstance(input, list) else all([i in value for i in input])), + (operator == "not in", input not in value if not isinstance(input, list) else all([i not in value for i in input])), + (operator == "start with", str(input).lower().startswith(str(value).lower()) if not isinstance(input, list) else "".join([str(i).lower() for i in input]).startswith(str(value).lower())), + (operator == "end with", str(input).lower().endswith(str(value).lower()) if not isinstance(input, list) else "".join([str(i).lower() for i in input]).endswith(str(value).lower())), (operator == "empty", not input), (operator == "not empty", input), (operator == "=", input == value), diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index 1619eadbe..f5a15360f 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -348,7 +348,8 @@ def tokenize_table(tbls, doc, eng, batch_size=10): d["doc_type_kwd"] = "table" if img: d["image"] = img - d["doc_type_kwd"] = "image" + if d["content_with_weight"].find("") < 0: + d["doc_type_kwd"] = "image" if poss: add_positions(d, poss) res.append(d) @@ -361,7 +362,8 @@ def tokenize_table(tbls, doc, eng, batch_size=10): d["doc_type_kwd"] = "table" if img: d["image"] = img - d["doc_type_kwd"] = "image" + if d["content_with_weight"].find("") < 0: + d["doc_type_kwd"] = "image" add_positions(d, poss) res.append(d) return res diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 84fdf968f..b17a56511 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -395,8 +395,8 @@ async def build_chunks(task, progress_callback): await asyncio.gather(*tasks, return_exceptions=True) raise metadata = {} - for ck in cks: - metadata = update_metadata_to(metadata, ck["metadata_obj"]) + for doc in docs: + metadata = update_metadata_to(metadata, doc["metadata_obj"]) del ck["metadata_obj"] if metadata: e, doc = DocumentService.get_by_id(task["doc_id"])