mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Added doc for switching elasticsearch to infinity (#3370)
### What problem does this PR solve? Added doc for switching elasticsearch to infinity ### Type of change - [x] New Feature (non-breaking change which adds functionality) - [x] Documentation Update
This commit is contained in:
@ -4,7 +4,6 @@ import time
|
||||
import os
|
||||
from typing import List, Dict
|
||||
|
||||
import elasticsearch
|
||||
import copy
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch_dsl import UpdateByQuery, Q, Search, Index
|
||||
@ -17,14 +16,13 @@ import polars as pl
|
||||
from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, FusionExpr
|
||||
from rag.nlp import is_english, rag_tokenizer
|
||||
|
||||
logger.info("Elasticsearch sdk version: "+str(elasticsearch.__version__))
|
||||
|
||||
|
||||
@singleton
|
||||
class ESConnection(DocStoreConnection):
|
||||
def __init__(self):
|
||||
self.info = {}
|
||||
for _ in range(10):
|
||||
logger.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.")
|
||||
for _ in range(24):
|
||||
try:
|
||||
self.es = Elasticsearch(
|
||||
settings.ES["hosts"].split(","),
|
||||
@ -34,21 +32,27 @@ class ESConnection(DocStoreConnection):
|
||||
)
|
||||
if self.es:
|
||||
self.info = self.es.info()
|
||||
logger.info("Connect to es.")
|
||||
break
|
||||
except Exception:
|
||||
logger.exception("Fail to connect to es")
|
||||
time.sleep(1)
|
||||
except Exception as e:
|
||||
logger.warn(f"{str(e)}. Waiting Elasticsearch {settings.ES['hosts']} to be healthy.")
|
||||
time.sleep(5)
|
||||
if not self.es.ping():
|
||||
raise Exception("Can't connect to ES cluster")
|
||||
v = self.info.get("version", {"number": "5.6"})
|
||||
msg = f"Elasticsearch {settings.ES['hosts']} didn't become healthy in 120s."
|
||||
logger.error(msg)
|
||||
raise Exception(msg)
|
||||
v = self.info.get("version", {"number": "8.11.3"})
|
||||
v = v["number"].split(".")[0]
|
||||
if int(v) < 8:
|
||||
raise Exception(f"ES version must be greater than or equal to 8, current version: {v}")
|
||||
msg = f"Elasticsearch version must be greater than or equal to 8, current version: {v}"
|
||||
logger.error(msg)
|
||||
raise Exception(msg)
|
||||
fp_mapping = os.path.join(get_project_base_directory(), "conf", "mapping.json")
|
||||
if not os.path.exists(fp_mapping):
|
||||
raise Exception(f"Mapping file not found at {fp_mapping}")
|
||||
msg = f"Elasticsearch mapping file not found at {fp_mapping}"
|
||||
logger.error(msg)
|
||||
raise Exception(msg)
|
||||
self.mapping = json.load(open(fp_mapping, "r"))
|
||||
logger.info(f"Elasticsearch {settings.ES['hosts']} is healthy.")
|
||||
|
||||
"""
|
||||
Database operations
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
from typing import List, Dict
|
||||
import infinity
|
||||
from infinity.common import ConflictType, InfinityException
|
||||
from infinity.index import IndexInfo, IndexType
|
||||
from infinity.connection_pool import ConnectionPool
|
||||
from rag import settings
|
||||
from api.utils.log_utils import logger
|
||||
from rag import settings
|
||||
from rag.utils import singleton
|
||||
import polars as pl
|
||||
from polars.series.series import Series
|
||||
@ -54,8 +55,24 @@ class InfinityConnection(DocStoreConnection):
|
||||
if ":" in infinity_uri:
|
||||
host, port = infinity_uri.split(":")
|
||||
infinity_uri = infinity.common.NetworkAddress(host, int(port))
|
||||
self.connPool = ConnectionPool(infinity_uri)
|
||||
logger.info(f"Connected to infinity {infinity_uri}.")
|
||||
self.connPool = None
|
||||
logger.info(f"Use Infinity {infinity_uri} as the doc engine.")
|
||||
for _ in range(24):
|
||||
try:
|
||||
connPool = ConnectionPool(infinity_uri)
|
||||
inf_conn = connPool.get_conn()
|
||||
_ = inf_conn.show_current_node()
|
||||
connPool.release_conn(inf_conn)
|
||||
self.connPool = connPool
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warn(f"{str(e)}. Waiting Infinity {infinity_uri} to be healthy.")
|
||||
time.sleep(5)
|
||||
if self.connPool is None:
|
||||
msg = f"Infinity {infinity_uri} didn't become healthy in 120s."
|
||||
logger.error(msg)
|
||||
raise Exception(msg)
|
||||
logger.info(f"Infinity {infinity_uri} is healthy.")
|
||||
|
||||
"""
|
||||
Database operations
|
||||
@ -151,8 +168,8 @@ class InfinityConnection(DocStoreConnection):
|
||||
_ = db_instance.get_table(table_name)
|
||||
self.connPool.release_conn(inf_conn)
|
||||
return True
|
||||
except Exception:
|
||||
logger.exception("INFINITY indexExist")
|
||||
except Exception as e:
|
||||
logger.warn(f"INFINITY indexExist {str(e)}")
|
||||
return False
|
||||
|
||||
"""
|
||||
@ -199,7 +216,7 @@ class InfinityConnection(DocStoreConnection):
|
||||
)
|
||||
if len(filter_cond) != 0:
|
||||
filter_fulltext = f"({filter_cond}) AND {filter_fulltext}"
|
||||
# doc_store_logger.info(f"filter_fulltext: {filter_fulltext}")
|
||||
# logger.info(f"filter_fulltext: {filter_fulltext}")
|
||||
minimum_should_match = "0%"
|
||||
if "minimum_should_match" in matchExpr.extra_options:
|
||||
minimum_should_match = (
|
||||
@ -312,7 +329,7 @@ class InfinityConnection(DocStoreConnection):
|
||||
for k, v in d.items():
|
||||
if k.endswith("_kwd") and isinstance(v, list):
|
||||
d[k] = " ".join(v)
|
||||
ids = [f"{d['id']}" for d in documents]
|
||||
ids = ["'{}'".format(d["id"]) for d in documents]
|
||||
str_ids = ", ".join(ids)
|
||||
str_filter = f"id IN ({str_ids})"
|
||||
table_instance.delete(str_filter)
|
||||
@ -321,7 +338,7 @@ class InfinityConnection(DocStoreConnection):
|
||||
# logger.info(f"InfinityConnection.insert {json.dumps(documents)}")
|
||||
table_instance.insert(documents)
|
||||
self.connPool.release_conn(inf_conn)
|
||||
doc_store_logger.info(f"inserted into {table_name} {str_ids}.")
|
||||
logger.info(f"inserted into {table_name} {str_ids}.")
|
||||
return []
|
||||
|
||||
def update(
|
||||
|
||||
Reference in New Issue
Block a user