diff --git a/api/db/services/user_service.py b/api/db/services/user_service.py index 50b46ce63..66e334efd 100644 --- a/api/db/services/user_service.py +++ b/api/db/services/user_service.py @@ -27,7 +27,7 @@ from api.db.services.common_service import CommonService from common.misc_utils import get_uuid from common.time_utils import current_timestamp, datetime_format from common.constants import StatusEnum -from rag.settings import MINIO +from common import globals class UserService(CommonService): @@ -221,7 +221,7 @@ class TenantService(CommonService): @DB.connection_context() def user_gateway(cls, tenant_id): hash_obj = hashlib.sha256(tenant_id.encode("utf-8")) - return int(hash_obj.hexdigest(), 16)%len(MINIO) + return int(hash_obj.hexdigest(), 16)%len(globals.MINIO) class UserTenantService(CommonService): diff --git a/api/utils/health_utils.py b/api/utils/health_utils.py index f86a0d728..93c9bd7cc 100644 --- a/api/utils/health_utils.py +++ b/api/utils/health_utils.py @@ -19,7 +19,6 @@ from timeit import default_timer as timer from api import settings from api.db.db_models import DB -from rag import settings as rag_settings from rag.utils.redis_conn import REDIS_CONN from rag.utils.storage_factory import STORAGE_IMPL from rag.utils.es_conn import ESConnection @@ -121,7 +120,7 @@ def get_mysql_status(): def check_minio_alive(): start_time = timer() try: - response = requests.get(f'http://{rag_settings.MINIO["host"]}/minio/health/live') + response = requests.get(f'http://{globals.MINIO["host"]}/minio/health/live') if response.status_code == 200: return {"status": "alive", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."} else: diff --git a/common/globals.py b/common/globals.py index 3f2859c74..1a7fbb139 100644 --- a/common/globals.py +++ b/common/globals.py @@ -13,13 +13,52 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os +from common.config_utils import get_base_config, decrypt_database_config EMBEDDING_MDL = "" EMBEDDING_CFG = "" -DOC_ENGINE = None +DOC_ENGINE = os.getenv('DOC_ENGINE', 'elasticsearch') docStoreConn = None -retriever = None \ No newline at end of file +retriever = None + +# move from rag.settings +ES = {} +INFINITY = {} +AZURE = {} +S3 = {} +MINIO = {} +OSS = {} +OS = {} +REDIS = {} + +STORAGE_IMPL_TYPE = os.getenv('STORAGE_IMPL', 'MINIO') + +# Initialize the selected configuration data based on environment variables to solve the problem of initialization errors due to lack of configuration +if DOC_ENGINE == 'elasticsearch': + ES = get_base_config("es", {}) +elif DOC_ENGINE == 'opensearch': + OS = get_base_config("os", {}) +elif DOC_ENGINE == 'infinity': + INFINITY = get_base_config("infinity", {"uri": "infinity:23817"}) + +if STORAGE_IMPL_TYPE in ['AZURE_SPN', 'AZURE_SAS']: + AZURE = get_base_config("azure", {}) +elif STORAGE_IMPL_TYPE == 'AWS_S3': + S3 = get_base_config("s3", {}) +elif STORAGE_IMPL_TYPE == 'MINIO': + MINIO = decrypt_database_config(name="minio") +elif STORAGE_IMPL_TYPE == 'OSS': + OSS = get_base_config("oss", {}) + +try: + REDIS = decrypt_database_config(name="redis") +except Exception: + try: + REDIS = get_base_config("redis", {}) + except Exception: + REDIS = {} \ No newline at end of file diff --git a/rag/settings.py b/rag/settings.py index dbfc36880..78079ff1f 100644 --- a/rag/settings.py +++ b/rag/settings.py @@ -15,50 +15,12 @@ # import os import logging -from common.config_utils import get_base_config, decrypt_database_config from common.file_utils import get_project_base_directory from common.misc_utils import pip_install_torch -from common import globals # Server RAG_CONF_PATH = os.path.join(get_project_base_directory(), "conf") -# Get storage type and document engine from system environment variables -STORAGE_IMPL_TYPE = os.getenv('STORAGE_IMPL', 'MINIO') -globals.DOC_ENGINE = os.getenv('DOC_ENGINE', 'elasticsearch') - -ES = {} -INFINITY = {} -AZURE = {} -S3 = {} -MINIO = {} -OSS = {} -OS = {} - -# Initialize the selected configuration data based on environment variables to solve the problem of initialization errors due to lack of configuration -if globals.DOC_ENGINE == 'elasticsearch': - ES = get_base_config("es", {}) -elif globals.DOC_ENGINE == 'opensearch': - OS = get_base_config("os", {}) -elif globals.DOC_ENGINE == 'infinity': - INFINITY = get_base_config("infinity", {"uri": "infinity:23817"}) - -if STORAGE_IMPL_TYPE in ['AZURE_SPN', 'AZURE_SAS']: - AZURE = get_base_config("azure", {}) -elif STORAGE_IMPL_TYPE == 'AWS_S3': - S3 = get_base_config("s3", {}) -elif STORAGE_IMPL_TYPE == 'MINIO': - MINIO = decrypt_database_config(name="minio") -elif STORAGE_IMPL_TYPE == 'OSS': - OSS = get_base_config("oss", {}) - -try: - REDIS = decrypt_database_config(name="redis") -except Exception: - try: - REDIS = get_base_config("redis", {}) - except Exception: - REDIS = {} DOC_MAXIMUM_SIZE = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024)) DOC_BULK_SIZE = int(os.environ.get("DOC_BULK_SIZE", 4)) EMBEDDING_BATCH_SIZE = int(os.environ.get("EMBEDDING_BATCH_SIZE", 16)) diff --git a/rag/utils/azure_sas_conn.py b/rag/utils/azure_sas_conn.py index 771d5afdf..3211bb3ed 100644 --- a/rag/utils/azure_sas_conn.py +++ b/rag/utils/azure_sas_conn.py @@ -18,17 +18,17 @@ import logging import os import time from io import BytesIO -from rag import settings from common.decorator import singleton from azure.storage.blob import ContainerClient +from common import globals @singleton class RAGFlowAzureSasBlob: def __init__(self): self.conn = None - self.container_url = os.getenv('CONTAINER_URL', settings.AZURE["container_url"]) - self.sas_token = os.getenv('SAS_TOKEN', settings.AZURE["sas_token"]) + self.container_url = os.getenv('CONTAINER_URL', globals.AZURE["container_url"]) + self.sas_token = os.getenv('SAS_TOKEN', globals.AZURE["sas_token"]) self.__open__() def __open__(self): diff --git a/rag/utils/azure_spn_conn.py b/rag/utils/azure_spn_conn.py index e5a194aa8..547974d7d 100644 --- a/rag/utils/azure_spn_conn.py +++ b/rag/utils/azure_spn_conn.py @@ -17,21 +17,21 @@ import logging import os import time -from rag import settings from common.decorator import singleton from azure.identity import ClientSecretCredential, AzureAuthorityHosts from azure.storage.filedatalake import FileSystemClient +from common import globals @singleton class RAGFlowAzureSpnBlob: def __init__(self): self.conn = None - self.account_url = os.getenv('ACCOUNT_URL', settings.AZURE["account_url"]) - self.client_id = os.getenv('CLIENT_ID', settings.AZURE["client_id"]) - self.secret = os.getenv('SECRET', settings.AZURE["secret"]) - self.tenant_id = os.getenv('TENANT_ID', settings.AZURE["tenant_id"]) - self.container_name = os.getenv('CONTAINER_NAME', settings.AZURE["container_name"]) + self.account_url = os.getenv('ACCOUNT_URL', globals.AZURE["account_url"]) + self.client_id = os.getenv('CLIENT_ID', globals.AZURE["client_id"]) + self.secret = os.getenv('SECRET', globals.AZURE["secret"]) + self.tenant_id = os.getenv('TENANT_ID', globals.AZURE["tenant_id"]) + self.container_name = os.getenv('CONTAINER_NAME', globals.AZURE["container_name"]) self.__open__() def __open__(self): diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py index ec117f4e4..e8a95a4c4 100644 --- a/rag/utils/es_conn.py +++ b/rag/utils/es_conn.py @@ -24,7 +24,6 @@ import copy from elasticsearch import Elasticsearch, NotFoundError from elasticsearch_dsl import UpdateByQuery, Q, Search, Index from elastic_transport import ConnectionTimeout -from rag import settings from rag.settings import TAG_FLD, PAGERANK_FLD from common.decorator import singleton from common.file_utils import get_project_base_directory @@ -33,6 +32,7 @@ from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, FusionExpr from rag.nlp import is_english, rag_tokenizer from common.float_utils import get_float +from common import globals ATTEMPT_TIME = 2 @@ -43,17 +43,17 @@ logger = logging.getLogger('ragflow.es_conn') class ESConnection(DocStoreConnection): def __init__(self): self.info = {} - logger.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.") + logger.info(f"Use Elasticsearch {globals.ES['hosts']} as the doc engine.") for _ in range(ATTEMPT_TIME): try: if self._connect(): break except Exception as e: - logger.warning(f"{str(e)}. Waiting Elasticsearch {settings.ES['hosts']} to be healthy.") + logger.warning(f"{str(e)}. Waiting Elasticsearch {globals.ES['hosts']} to be healthy.") time.sleep(5) if not self.es.ping(): - msg = f"Elasticsearch {settings.ES['hosts']} is unhealthy in 120s." + msg = f"Elasticsearch {globals.ES['hosts']} is unhealthy in 120s." logger.error(msg) raise Exception(msg) v = self.info.get("version", {"number": "8.11.3"}) @@ -68,14 +68,14 @@ class ESConnection(DocStoreConnection): logger.error(msg) raise Exception(msg) self.mapping = json.load(open(fp_mapping, "r")) - logger.info(f"Elasticsearch {settings.ES['hosts']} is healthy.") + logger.info(f"Elasticsearch {globals.ES['hosts']} is healthy.") def _connect(self): self.es = Elasticsearch( - settings.ES["hosts"].split(","), - basic_auth=(settings.ES["username"], settings.ES[ - "password"]) if "username" in settings.ES and "password" in settings.ES else None, - verify_certs= settings.ES.get("verify_certs", False), + globals.ES["hosts"].split(","), + basic_auth=(globals.ES["username"], globals.ES[ + "password"]) if "username" in globals.ES and "password" in globals.ES else None, + verify_certs= globals.ES.get("verify_certs", False), timeout=600 ) if self.es: self.info = self.es.info() diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py index 9a92c8e86..10767377a 100644 --- a/rag/utils/infinity_conn.py +++ b/rag/utils/infinity_conn.py @@ -25,11 +25,11 @@ from infinity.common import ConflictType, InfinityException, SortType from infinity.index import IndexInfo, IndexType from infinity.connection_pool import ConnectionPool from infinity.errors import ErrorCode -from rag import settings from rag.settings import PAGERANK_FLD, TAG_FLD from common.decorator import singleton import pandas as pd from common.file_utils import get_project_base_directory +from common import globals from rag.nlp import is_english from rag.utils.doc_store_conn import ( @@ -130,8 +130,8 @@ def concat_dataframes(df_list: list[pd.DataFrame], selectFields: list[str]) -> p @singleton class InfinityConnection(DocStoreConnection): def __init__(self): - self.dbName = settings.INFINITY.get("db_name", "default_db") - infinity_uri = settings.INFINITY["uri"] + self.dbName = globals.INFINITY.get("db_name", "default_db") + infinity_uri = globals.INFINITY["uri"] if ":" in infinity_uri: host, port = infinity_uri.split(":") infinity_uri = infinity.common.NetworkAddress(host, int(port)) diff --git a/rag/utils/minio_conn.py b/rag/utils/minio_conn.py index 1777de0e5..1106817f3 100644 --- a/rag/utils/minio_conn.py +++ b/rag/utils/minio_conn.py @@ -20,8 +20,8 @@ from minio import Minio from minio.commonconfig import CopySource from minio.error import S3Error from io import BytesIO -from rag import settings from common.decorator import singleton +from common import globals @singleton @@ -38,14 +38,14 @@ class RAGFlowMinio: pass try: - self.conn = Minio(settings.MINIO["host"], - access_key=settings.MINIO["user"], - secret_key=settings.MINIO["password"], + self.conn = Minio(globals.MINIO["host"], + access_key=globals.MINIO["user"], + secret_key=globals.MINIO["password"], secure=False ) except Exception: logging.exception( - "Fail to connect %s " % settings.MINIO["host"]) + "Fail to connect %s " % globals.MINIO["host"]) def __close__(self): del self.conn diff --git a/rag/utils/opensearch_conn.py b/rag/utils/opensearch_conn.py index 5c51be52f..387798f97 100644 --- a/rag/utils/opensearch_conn.py +++ b/rag/utils/opensearch_conn.py @@ -24,13 +24,13 @@ import copy from opensearchpy import OpenSearch, NotFoundError from opensearchpy import UpdateByQuery, Q, Search, Index from opensearchpy import ConnectionTimeout -from rag import settings from rag.settings import TAG_FLD, PAGERANK_FLD from common.decorator import singleton from common.file_utils import get_project_base_directory from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, \ FusionExpr from rag.nlp import is_english, rag_tokenizer +from common import globals ATTEMPT_TIME = 2 @@ -41,13 +41,13 @@ logger = logging.getLogger('ragflow.opensearch_conn') class OSConnection(DocStoreConnection): def __init__(self): self.info = {} - logger.info(f"Use OpenSearch {settings.OS['hosts']} as the doc engine.") + logger.info(f"Use OpenSearch {globals.OS['hosts']} as the doc engine.") for _ in range(ATTEMPT_TIME): try: self.os = OpenSearch( - settings.OS["hosts"].split(","), - http_auth=(settings.OS["username"], settings.OS[ - "password"]) if "username" in settings.OS and "password" in settings.OS else None, + globals.OS["hosts"].split(","), + http_auth=(globals.OS["username"], globals.OS[ + "password"]) if "username" in globals.OS and "password" in globals.OS else None, verify_certs=False, timeout=600 ) @@ -55,10 +55,10 @@ class OSConnection(DocStoreConnection): self.info = self.os.info() break except Exception as e: - logger.warning(f"{str(e)}. Waiting OpenSearch {settings.OS['hosts']} to be healthy.") + logger.warning(f"{str(e)}. Waiting OpenSearch {globals.OS['hosts']} to be healthy.") time.sleep(5) if not self.os.ping(): - msg = f"OpenSearch {settings.OS['hosts']} is unhealthy in 120s." + msg = f"OpenSearch {globals.OS['hosts']} is unhealthy in 120s." logger.error(msg) raise Exception(msg) v = self.info.get("version", {"number": "2.18.0"}) @@ -73,7 +73,7 @@ class OSConnection(DocStoreConnection): logger.error(msg) raise Exception(msg) self.mapping = json.load(open(fp_mapping, "r")) - logger.info(f"OpenSearch {settings.OS['hosts']} is healthy.") + logger.info(f"OpenSearch {globals.OS['hosts']} is healthy.") """ Database operations diff --git a/rag/utils/oss_conn.py b/rag/utils/oss_conn.py index a34bd2323..775b62884 100644 --- a/rag/utils/oss_conn.py +++ b/rag/utils/oss_conn.py @@ -20,14 +20,14 @@ from botocore.config import Config import time from io import BytesIO from common.decorator import singleton -from rag import settings +from common import globals @singleton class RAGFlowOSS: def __init__(self): self.conn = None - self.oss_config = settings.OSS + self.oss_config = globals.OSS self.access_key = self.oss_config.get('access_key', None) self.secret_key = self.oss_config.get('secret_key', None) self.endpoint_url = self.oss_config.get('endpoint_url', None) diff --git a/rag/utils/redis_conn.py b/rag/utils/redis_conn.py index 2b295eacf..eda04ec21 100644 --- a/rag/utils/redis_conn.py +++ b/rag/utils/redis_conn.py @@ -19,8 +19,8 @@ import json import uuid import valkey as redis -from rag import settings from common.decorator import singleton +from common import globals from valkey.lock import Lock import trio @@ -61,7 +61,7 @@ class RedisDB: def __init__(self): self.REDIS = None - self.config = settings.REDIS + self.config = globals.REDIS self.__open__() def register_scripts(self) -> None: diff --git a/rag/utils/s3_conn.py b/rag/utils/s3_conn.py index 190f18b51..f4fbb7faf 100644 --- a/rag/utils/s3_conn.py +++ b/rag/utils/s3_conn.py @@ -21,13 +21,14 @@ from botocore.config import Config import time from io import BytesIO from common.decorator import singleton -from rag import settings +from common import globals + @singleton class RAGFlowS3: def __init__(self): self.conn = None - self.s3_config = settings.S3 + self.s3_config = globals.S3 self.access_key = self.s3_config.get('access_key', None) self.secret_key = self.s3_config.get('secret_key', None) self.session_token = self.s3_config.get('session_token', None)