mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-25 08:06:48 +08:00
Fix: table tag on chunks. (#12126)
### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -56,7 +56,6 @@ class LLMParam(ComponentParamBase):
|
||||
self.check_nonnegative_number(int(self.max_tokens), "[Agent] Max tokens")
|
||||
self.check_decimal_float(float(self.top_p), "[Agent] Top P")
|
||||
self.check_empty(self.llm_id, "[Agent] LLM")
|
||||
self.check_empty(self.sys_prompt, "[Agent] System prompt")
|
||||
self.check_empty(self.prompts, "[Agent] User prompt")
|
||||
|
||||
def gen_conf(self):
|
||||
|
||||
@ -696,10 +696,12 @@ class DocumentService(CommonService):
|
||||
for k,v in r.meta_fields.items():
|
||||
if k not in meta:
|
||||
meta[k] = {}
|
||||
v = str(v)
|
||||
if v not in meta[k]:
|
||||
meta[k][v] = []
|
||||
meta[k][v].append(doc_id)
|
||||
if not isinstance(v, list):
|
||||
v = [v]
|
||||
for vv in v:
|
||||
if vv not in meta[k]:
|
||||
meta[k][vv] = []
|
||||
meta[k][vv].append(doc_id)
|
||||
return meta
|
||||
|
||||
@classmethod
|
||||
|
||||
@ -44,21 +44,27 @@ def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
|
||||
def filter_out(v2docs, operator, value):
|
||||
ids = []
|
||||
for input, docids in v2docs.items():
|
||||
|
||||
if operator in ["=", "≠", ">", "<", "≥", "≤"]:
|
||||
try:
|
||||
if isinstance(input, list):
|
||||
input = input[0]
|
||||
input = float(input)
|
||||
value = float(value)
|
||||
except Exception:
|
||||
input = str(input)
|
||||
value = str(value)
|
||||
pass
|
||||
if isinstance(input, str):
|
||||
input = input.lower()
|
||||
if isinstance(value, str):
|
||||
value = value.lower()
|
||||
|
||||
for conds in [
|
||||
(operator == "contains", str(value).lower() in str(input).lower()),
|
||||
(operator == "not contains", str(value).lower() not in str(input).lower()),
|
||||
(operator == "in", str(input).lower() in str(value).lower()),
|
||||
(operator == "not in", str(input).lower() not in str(value).lower()),
|
||||
(operator == "start with", str(input).lower().startswith(str(value).lower())),
|
||||
(operator == "end with", str(input).lower().endswith(str(value).lower())),
|
||||
(operator == "contains", input in value if not isinstance(input, list) else all([i in value for i in input])),
|
||||
(operator == "not contains", input not in value if not isinstance(input, list) else all([i not in value for i in input])),
|
||||
(operator == "in", input in value if not isinstance(input, list) else all([i in value for i in input])),
|
||||
(operator == "not in", input not in value if not isinstance(input, list) else all([i not in value for i in input])),
|
||||
(operator == "start with", str(input).lower().startswith(str(value).lower()) if not isinstance(input, list) else "".join([str(i).lower() for i in input]).startswith(str(value).lower())),
|
||||
(operator == "end with", str(input).lower().endswith(str(value).lower()) if not isinstance(input, list) else "".join([str(i).lower() for i in input]).endswith(str(value).lower())),
|
||||
(operator == "empty", not input),
|
||||
(operator == "not empty", input),
|
||||
(operator == "=", input == value),
|
||||
|
||||
@ -348,7 +348,8 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
|
||||
d["doc_type_kwd"] = "table"
|
||||
if img:
|
||||
d["image"] = img
|
||||
d["doc_type_kwd"] = "image"
|
||||
if d["content_with_weight"].find("<tr>") < 0:
|
||||
d["doc_type_kwd"] = "image"
|
||||
if poss:
|
||||
add_positions(d, poss)
|
||||
res.append(d)
|
||||
@ -361,7 +362,8 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
|
||||
d["doc_type_kwd"] = "table"
|
||||
if img:
|
||||
d["image"] = img
|
||||
d["doc_type_kwd"] = "image"
|
||||
if d["content_with_weight"].find("<tr>") < 0:
|
||||
d["doc_type_kwd"] = "image"
|
||||
add_positions(d, poss)
|
||||
res.append(d)
|
||||
return res
|
||||
|
||||
@ -395,8 +395,8 @@ async def build_chunks(task, progress_callback):
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
raise
|
||||
metadata = {}
|
||||
for ck in cks:
|
||||
metadata = update_metadata_to(metadata, ck["metadata_obj"])
|
||||
for doc in docs:
|
||||
metadata = update_metadata_to(metadata, doc["metadata_obj"])
|
||||
del ck["metadata_obj"]
|
||||
if metadata:
|
||||
e, doc = DocumentService.get_by_id(task["doc_id"])
|
||||
|
||||
Reference in New Issue
Block a user