mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-02 18:45:29 +08:00
Fix memory issue on Infinity 0.6.15 (#12258)
### What problem does this PR solve? 1. Remove unused columns 2. Check the empty database 3. Switch on the order by expression ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -168,7 +168,7 @@ class MessageService:
|
|||||||
order_by = OrderByExpr()
|
order_by = OrderByExpr()
|
||||||
order_by.desc("valid_at")
|
order_by.desc("valid_at")
|
||||||
|
|
||||||
res = settings.msgStoreConn.search(
|
res, count = settings.msgStoreConn.search(
|
||||||
select_fields=["memory_id", "content", "content_embed"],
|
select_fields=["memory_id", "content", "content_embed"],
|
||||||
highlight_fields=[],
|
highlight_fields=[],
|
||||||
condition={},
|
condition={},
|
||||||
@ -177,8 +177,10 @@ class MessageService:
|
|||||||
offset=0, limit=2048*len(memory_ids),
|
offset=0, limit=2048*len(memory_ids),
|
||||||
index_names=index_names, memory_ids=memory_ids, agg_fields=[], hide_forgotten=False
|
index_names=index_names, memory_ids=memory_ids, agg_fields=[], hide_forgotten=False
|
||||||
)
|
)
|
||||||
if not res:
|
|
||||||
|
if count == 0:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
docs = settings.msgStoreConn.get_fields(res, ["memory_id", "content", "content_embed"])
|
docs = settings.msgStoreConn.get_fields(res, ["memory_id", "content", "content_embed"])
|
||||||
size_dict = {}
|
size_dict = {}
|
||||||
for doc in docs.values():
|
for doc in docs.values():
|
||||||
|
|||||||
@ -228,14 +228,14 @@ class ESConnection(ESConnectionBase):
|
|||||||
if str(res.get("timed_out", "")).lower() == "true":
|
if str(res.get("timed_out", "")).lower() == "true":
|
||||||
raise Exception("Es Timeout.")
|
raise Exception("Es Timeout.")
|
||||||
self.logger.debug(f"ESConnection.search {str(index_names)} res: " + str(res))
|
self.logger.debug(f"ESConnection.search {str(index_names)} res: " + str(res))
|
||||||
return res
|
return res, self.get_total(res)
|
||||||
except ConnectionTimeout:
|
except ConnectionTimeout:
|
||||||
self.logger.exception("ES request timeout")
|
self.logger.exception("ES request timeout")
|
||||||
self._connect()
|
self._connect()
|
||||||
continue
|
continue
|
||||||
except NotFoundError as e:
|
except NotFoundError as e:
|
||||||
self.logger.debug(f"ESConnection.search {str(index_names)} query: " + str(q) + str(e))
|
self.logger.debug(f"ESConnection.search {str(index_names)} query: " + str(q) + str(e))
|
||||||
return None
|
return None, 0
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.exception(f"ESConnection.search {str(index_names)} query: " + str(q) + str(e))
|
self.logger.exception(f"ESConnection.search {str(index_names)} query: " + str(q) + str(e))
|
||||||
raise e
|
raise e
|
||||||
|
|||||||
@ -22,7 +22,6 @@ from infinity.errors import ErrorCode
|
|||||||
|
|
||||||
from common.decorator import singleton
|
from common.decorator import singleton
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from common.constants import PAGERANK_FLD, TAG_FLD
|
|
||||||
from common.doc_store.doc_store_base import MatchExpr, MatchTextExpr, MatchDenseExpr, FusionExpr, OrderByExpr
|
from common.doc_store.doc_store_base import MatchExpr, MatchTextExpr, MatchDenseExpr, FusionExpr, OrderByExpr
|
||||||
from common.doc_store.infinity_conn_base import InfinityConnectionBase
|
from common.doc_store.infinity_conn_base import InfinityConnectionBase
|
||||||
from common.time_utils import date_string_to_timestamp
|
from common.time_utils import date_string_to_timestamp
|
||||||
@ -150,8 +149,6 @@ class InfinityConnection(InfinityConnectionBase):
|
|||||||
if match_expressions:
|
if match_expressions:
|
||||||
if score_func not in output:
|
if score_func not in output:
|
||||||
output.append(score_func)
|
output.append(score_func)
|
||||||
if PAGERANK_FLD not in output:
|
|
||||||
output.append(PAGERANK_FLD)
|
|
||||||
output = [f for f in output if f != "_score"]
|
output = [f for f in output if f != "_score"]
|
||||||
if limit <= 0:
|
if limit <= 0:
|
||||||
# ElasticSearch default limit is 10000
|
# ElasticSearch default limit is 10000
|
||||||
@ -192,17 +189,6 @@ class InfinityConnection(InfinityConnectionBase):
|
|||||||
str_minimum_should_match = str(int(minimum_should_match * 100)) + "%"
|
str_minimum_should_match = str(int(minimum_should_match * 100)) + "%"
|
||||||
matchExpr.extra_options["minimum_should_match"] = str_minimum_should_match
|
matchExpr.extra_options["minimum_should_match"] = str_minimum_should_match
|
||||||
|
|
||||||
# Add rank_feature support
|
|
||||||
if rank_feature and "rank_features" not in matchExpr.extra_options:
|
|
||||||
# Convert rank_feature dict to Infinity's rank_features string format
|
|
||||||
# Format: "field^feature_name^weight,field^feature_name^weight"
|
|
||||||
rank_features_list = []
|
|
||||||
for feature_name, weight in rank_feature.items():
|
|
||||||
# Use TAG_FLD as the field containing rank features
|
|
||||||
rank_features_list.append(f"{TAG_FLD}^{feature_name}^{weight}")
|
|
||||||
if rank_features_list:
|
|
||||||
matchExpr.extra_options["rank_features"] = ",".join(rank_features_list)
|
|
||||||
|
|
||||||
for k, v in matchExpr.extra_options.items():
|
for k, v in matchExpr.extra_options.items():
|
||||||
if not isinstance(v, str):
|
if not isinstance(v, str):
|
||||||
matchExpr.extra_options[k] = str(v)
|
matchExpr.extra_options[k] = str(v)
|
||||||
@ -225,14 +211,13 @@ class InfinityConnection(InfinityConnectionBase):
|
|||||||
self.logger.debug(f"INFINITY search FusionExpr: {json.dumps(matchExpr.__dict__)}")
|
self.logger.debug(f"INFINITY search FusionExpr: {json.dumps(matchExpr.__dict__)}")
|
||||||
|
|
||||||
order_by_expr_list = list()
|
order_by_expr_list = list()
|
||||||
# todo use order_by after infinity fixed bug
|
if order_by.fields:
|
||||||
# if order_by.fields:
|
for order_field in order_by.fields:
|
||||||
# for order_field in order_by.fields:
|
order_field_name = self.convert_condition_and_order_field(order_field[0])
|
||||||
# order_field_name = self.convert_condition_and_order_field(order_field[0])
|
if order_field[1] == 0:
|
||||||
# if order_field[1] == 0:
|
order_by_expr_list.append((order_field_name, SortType.Asc))
|
||||||
# order_by_expr_list.append((order_field_name, SortType.Asc))
|
else:
|
||||||
# else:
|
order_by_expr_list.append((order_field_name, SortType.Desc))
|
||||||
# order_by_expr_list.append((order_field_name, SortType.Desc))
|
|
||||||
|
|
||||||
total_hits_count = 0
|
total_hits_count = 0
|
||||||
# Scatter search tables and gather the results
|
# Scatter search tables and gather the results
|
||||||
@ -284,7 +269,7 @@ class InfinityConnection(InfinityConnectionBase):
|
|||||||
self.connPool.release_conn(inf_conn)
|
self.connPool.release_conn(inf_conn)
|
||||||
res = self.concat_dataframes(df_list, output)
|
res = self.concat_dataframes(df_list, output)
|
||||||
if match_expressions:
|
if match_expressions:
|
||||||
res["_score"] = res[score_column] + res[PAGERANK_FLD]
|
res["_score"] = res[score_column]
|
||||||
res = res.sort_values(by="_score", ascending=False).reset_index(drop=True)
|
res = res.sort_values(by="_score", ascending=False).reset_index(drop=True)
|
||||||
res = res.head(limit)
|
res = res.head(limit)
|
||||||
self.logger.debug(f"INFINITY search final result: {str(res)}")
|
self.logger.debug(f"INFINITY search final result: {str(res)}")
|
||||||
|
|||||||
Reference in New Issue
Block a user