Format file format from Windows/dos to Unix (#1949)

### What problem does this PR solve? Related source file is in Windows/DOS format, they are format to Unix format. ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-01-30 15:16:45 +08:00 · 2024-08-15 09:17:36 +08:00
parent 1328d715db
commit 6b3a40be5c
108 changed files with 36399 additions and 36399 deletions
--- a/api/apps/init.py
+++ b/api/apps/init.py
@ -1,125 +1,125 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import logging
-import os
-import sys
-from importlib.util import module_from_spec, spec_from_file_location
-from pathlib import Path
-from flask import Blueprint, Flask
-from werkzeug.wrappers.request import Request
-from flask_cors import CORS
-
-from api.db import StatusEnum
-from api.db.db_models import close_connection
-from api.db.services import UserService
-from api.utils import CustomJSONEncoder, commands
-
-from flask_session import Session
-from flask_login import LoginManager
-from api.settings import SECRET_KEY, stat_logger
-from api.settings import API_VERSION, access_logger
-from api.utils.api_utils import server_error_response
-from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
-
-__all__ = ['app']
-
-
-logger = logging.getLogger('flask.app')
-for h in access_logger.handlers:
-    logger.addHandler(h)
-
-Request.json = property(lambda self: self.get_json(force=True, silent=True))
-
-app = Flask(__name__)
-CORS(app, supports_credentials=True,max_age=2592000)
-app.url_map.strict_slashes = False
-app.json_encoder = CustomJSONEncoder
-app.errorhandler(Exception)(server_error_response)
-
-
-## convince for dev and debug
-#app.config["LOGIN_DISABLED"] = True
-app.config["SESSION_PERMANENT"] = False
-app.config["SESSION_TYPE"] = "filesystem"
-app.config['MAX_CONTENT_LENGTH'] = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024))
-
-Session(app)
-login_manager = LoginManager()
-login_manager.init_app(app)
-
-commands.register_commands(app)
-
-
-def search_pages_path(pages_dir):
-    app_path_list = [path for path in pages_dir.glob('*_app.py') if not path.name.startswith('.')]
-    api_path_list = [path for path in pages_dir.glob('*_api.py') if not path.name.startswith('.')]
-    app_path_list.extend(api_path_list)
-    return app_path_list
-
-
-def register_page(page_path):
-    path = f'{page_path}'
-
-    page_name = page_path.stem.rstrip('_api') if "_api" in path else page_path.stem.rstrip('_app')
-    module_name = '.'.join(page_path.parts[page_path.parts.index('api'):-1] + (page_name,))
-
-    spec = spec_from_file_location(module_name, page_path)
-    page = module_from_spec(spec)
-    page.app = app
-    page.manager = Blueprint(page_name, module_name)
-    sys.modules[module_name] = page
-    spec.loader.exec_module(page)
-    page_name = getattr(page, 'page_name', page_name)
-    url_prefix = f'/api/{API_VERSION}/{page_name}' if "_api" in path else f'/{API_VERSION}/{page_name}'
-
-    app.register_blueprint(page.manager, url_prefix=url_prefix)
-    return url_prefix
-
-
-pages_dir = [
-    Path(__file__).parent,
-    Path(__file__).parent.parent / 'api' / 'apps', # FIXME: ragflow/api/api/apps, can be remove?
-]
-
-client_urls_prefix = [
-    register_page(path)
-    for dir in pages_dir
-    for path in search_pages_path(dir)
-]
-
-
-@login_manager.request_loader
-def load_user(web_request):
-    jwt = Serializer(secret_key=SECRET_KEY)
-    authorization = web_request.headers.get("Authorization")
-    if authorization:
-        try:
-            access_token = str(jwt.loads(authorization))
-            user = UserService.query(access_token=access_token, status=StatusEnum.VALID.value)
-            if user:
-                return user[0]
-            else:
-                return None
-        except Exception as e:
-            stat_logger.exception(e)
-            return None
-    else:
-        return None
-
-
-@app.teardown_request
-def _db_close(exc):
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import logging
+import os
+import sys
+from importlib.util import module_from_spec, spec_from_file_location
+from pathlib import Path
+from flask import Blueprint, Flask
+from werkzeug.wrappers.request import Request
+from flask_cors import CORS
+
+from api.db import StatusEnum
+from api.db.db_models import close_connection
+from api.db.services import UserService
+from api.utils import CustomJSONEncoder, commands
+
+from flask_session import Session
+from flask_login import LoginManager
+from api.settings import SECRET_KEY, stat_logger
+from api.settings import API_VERSION, access_logger
+from api.utils.api_utils import server_error_response
+from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
+
+__all__ = ['app']
+
+
+logger = logging.getLogger('flask.app')
+for h in access_logger.handlers:
+    logger.addHandler(h)
+
+Request.json = property(lambda self: self.get_json(force=True, silent=True))
+
+app = Flask(__name__)
+CORS(app, supports_credentials=True,max_age=2592000)
+app.url_map.strict_slashes = False
+app.json_encoder = CustomJSONEncoder
+app.errorhandler(Exception)(server_error_response)
+
+
+## convince for dev and debug
+#app.config["LOGIN_DISABLED"] = True
+app.config["SESSION_PERMANENT"] = False
+app.config["SESSION_TYPE"] = "filesystem"
+app.config['MAX_CONTENT_LENGTH'] = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024))
+
+Session(app)
+login_manager = LoginManager()
+login_manager.init_app(app)
+
+commands.register_commands(app)
+
+
+def search_pages_path(pages_dir):
+    app_path_list = [path for path in pages_dir.glob('*_app.py') if not path.name.startswith('.')]
+    api_path_list = [path for path in pages_dir.glob('*_api.py') if not path.name.startswith('.')]
+    app_path_list.extend(api_path_list)
+    return app_path_list
+
+
+def register_page(page_path):
+    path = f'{page_path}'
+
+    page_name = page_path.stem.rstrip('_api') if "_api" in path else page_path.stem.rstrip('_app')
+    module_name = '.'.join(page_path.parts[page_path.parts.index('api'):-1] + (page_name,))
+
+    spec = spec_from_file_location(module_name, page_path)
+    page = module_from_spec(spec)
+    page.app = app
+    page.manager = Blueprint(page_name, module_name)
+    sys.modules[module_name] = page
+    spec.loader.exec_module(page)
+    page_name = getattr(page, 'page_name', page_name)
+    url_prefix = f'/api/{API_VERSION}/{page_name}' if "_api" in path else f'/{API_VERSION}/{page_name}'
+
+    app.register_blueprint(page.manager, url_prefix=url_prefix)
+    return url_prefix
+
+
+pages_dir = [
+    Path(__file__).parent,
+    Path(__file__).parent.parent / 'api' / 'apps', # FIXME: ragflow/api/api/apps, can be remove?
+]
+
+client_urls_prefix = [
+    register_page(path)
+    for dir in pages_dir
+    for path in search_pages_path(dir)
+]
+
+
+@login_manager.request_loader
+def load_user(web_request):
+    jwt = Serializer(secret_key=SECRET_KEY)
+    authorization = web_request.headers.get("Authorization")
+    if authorization:
+        try:
+            access_token = str(jwt.loads(authorization))
+            user = UserService.query(access_token=access_token, status=StatusEnum.VALID.value)
+            if user:
+                return user[0]
+            else:
+                return None
+        except Exception as e:
+            stat_logger.exception(e)
+            return None
+    else:
+        return None
+
+
+@app.teardown_request
+def _db_close(exc):
    close_connection()
--- a/api/apps/api_app.py
+++ b/api/apps/api_app.py
--- a/api/apps/chunk_app.py
+++ b/api/apps/chunk_app.py
@ -1,318 +1,318 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import datetime
-import json
-import traceback
-
-from flask import request
-from flask_login import login_required, current_user
-from elasticsearch_dsl import Q
-
-from rag.app.qa import rmPrefix, beAdoc
-from rag.nlp import search, rag_tokenizer, keyword_extraction
-from rag.utils.es_conn import ELASTICSEARCH
-from rag.utils import rmSpace
-from api.db import LLMType, ParserType
-from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.db.services.llm_service import TenantLLMService
-from api.db.services.user_service import UserTenantService
-from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
-from api.db.services.document_service import DocumentService
-from api.settings import RetCode, retrievaler, kg_retrievaler
-from api.utils.api_utils import get_json_result
-import hashlib
-import re
-
-
-@manager.route('/list', methods=['POST'])
-@login_required
-@validate_request("doc_id")
-def list_chunk():
-    req = request.json
-    doc_id = req["doc_id"]
-    page = int(req.get("page", 1))
-    size = int(req.get("size", 30))
-    question = req.get("keywords", "")
-    try:
-        tenant_id = DocumentService.get_tenant_id(req["doc_id"])
-        if not tenant_id:
-            return get_data_error_result(retmsg="Tenant not found!")
-        e, doc = DocumentService.get_by_id(doc_id)
-        if not e:
-            return get_data_error_result(retmsg="Document not found!")
-        query = {
-            "doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
-        }
-        if "available_int" in req:
-            query["available_int"] = int(req["available_int"])
-        sres = retrievaler.search(query, search.index_name(tenant_id))
-        res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
-        for id in sres.ids:
-            d = {
-                "chunk_id": id,
-                "content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[
-                    id].get(
-                    "content_with_weight", ""),
-                "doc_id": sres.field[id]["doc_id"],
-                "docnm_kwd": sres.field[id]["docnm_kwd"],
-                "important_kwd": sres.field[id].get("important_kwd", []),
-                "img_id": sres.field[id].get("img_id", ""),
-                "available_int": sres.field[id].get("available_int", 1),
-                "positions": sres.field[id].get("position_int", "").split("\t")
-            }
-            if len(d["positions"]) % 5 == 0:
-                poss = []
-                for i in range(0, len(d["positions"]), 5):
-                    poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
-                                 float(d["positions"][i + 3]), float(d["positions"][i + 4])])
-                d["positions"] = poss
-            res["chunks"].append(d)
-        return get_json_result(data=res)
-    except Exception as e:
-        if str(e).find("not_found") > 0:
-            return get_json_result(data=False, retmsg=f'No chunk found!',
-                                   retcode=RetCode.DATA_ERROR)
-        return server_error_response(e)
-
-
-@manager.route('/get', methods=['GET'])
-@login_required
-def get():
-    chunk_id = request.args["chunk_id"]
-    try:
-        tenants = UserTenantService.query(user_id=current_user.id)
-        if not tenants:
-            return get_data_error_result(retmsg="Tenant not found!")
-        res = ELASTICSEARCH.get(
-            chunk_id, search.index_name(
-                tenants[0].tenant_id))
-        if not res.get("found"):
-            return server_error_response("Chunk not found")
-        id = res["_id"]
-        res = res["_source"]
-        res["chunk_id"] = id
-        k = []
-        for n in res.keys():
-            if re.search(r"(_vec$|_sm_|_tks|_ltks)", n):
-                k.append(n)
-        for n in k:
-            del res[n]
-
-        return get_json_result(data=res)
-    except Exception as e:
-        if str(e).find("NotFoundError") >= 0:
-            return get_json_result(data=False, retmsg=f'Chunk not found!',
-                                   retcode=RetCode.DATA_ERROR)
-        return server_error_response(e)
-
-
-@manager.route('/set', methods=['POST'])
-@login_required
-@validate_request("doc_id", "chunk_id", "content_with_weight",
-                  "important_kwd")
-def set():
-    req = request.json
-    d = {
-        "id": req["chunk_id"],
-        "content_with_weight": req["content_with_weight"]}
-    d["content_ltks"] = rag_tokenizer.tokenize(req["content_with_weight"])
-    d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
-    d["important_kwd"] = req["important_kwd"]
-    d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_kwd"]))
-    if "available_int" in req:
-        d["available_int"] = req["available_int"]
-
-    try:
-        tenant_id = DocumentService.get_tenant_id(req["doc_id"])
-        if not tenant_id:
-            return get_data_error_result(retmsg="Tenant not found!")
-
-        embd_id = DocumentService.get_embd_id(req["doc_id"])
-        embd_mdl = TenantLLMService.model_instance(
-            tenant_id, LLMType.EMBEDDING.value, embd_id)
-
-        e, doc = DocumentService.get_by_id(req["doc_id"])
-        if not e:
-            return get_data_error_result(retmsg="Document not found!")
-
-        if doc.parser_id == ParserType.QA:
-            arr = [
-                t for t in re.split(
-                    r"[\n\t]",
-                    req["content_with_weight"]) if len(t) > 1]
-            if len(arr) != 2:
-                return get_data_error_result(
-                    retmsg="Q&A must be separated by TAB/ENTER key.")
-            q, a = rmPrefix(arr[0]), rmPrefix(arr[1])
-            d = beAdoc(d, arr[0], arr[1], not any(
-                [rag_tokenizer.is_chinese(t) for t in q + a]))
-
-        v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
-        v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
-        d["q_%d_vec" % len(v)] = v.tolist()
-        ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
-        return get_json_result(data=True)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/switch', methods=['POST'])
-@login_required
-@validate_request("chunk_ids", "available_int", "doc_id")
-def switch():
-    req = request.json
-    try:
-        tenant_id = DocumentService.get_tenant_id(req["doc_id"])
-        if not tenant_id:
-            return get_data_error_result(retmsg="Tenant not found!")
-        if not ELASTICSEARCH.upsert([{"id": i, "available_int": int(req["available_int"])} for i in req["chunk_ids"]],
-                                    search.index_name(tenant_id)):
-            return get_data_error_result(retmsg="Index updating failure")
-        return get_json_result(data=True)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/rm', methods=['POST'])
-@login_required
-@validate_request("chunk_ids", "doc_id")
-def rm():
-    req = request.json
-    try:
-        if not ELASTICSEARCH.deleteByQuery(
-                Q("ids", values=req["chunk_ids"]), search.index_name(current_user.id)):
-            return get_data_error_result(retmsg="Index updating failure")
-        e, doc = DocumentService.get_by_id(req["doc_id"])
-        if not e:
-            return get_data_error_result(retmsg="Document not found!")
-        deleted_chunk_ids = req["chunk_ids"]
-        chunk_number = len(deleted_chunk_ids)
-        DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
-        return get_json_result(data=True)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/create', methods=['POST'])
-@login_required
-@validate_request("doc_id", "content_with_weight")
-def create():
-    req = request.json
-    md5 = hashlib.md5()
-    md5.update((req["content_with_weight"] + req["doc_id"]).encode("utf-8"))
-    chunck_id = md5.hexdigest()
-    d = {"id": chunck_id, "content_ltks": rag_tokenizer.tokenize(req["content_with_weight"]),
-         "content_with_weight": req["content_with_weight"]}
-    d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
-    d["important_kwd"] = req.get("important_kwd", [])
-    d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", [])))
-    d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
-    d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
-
-    try:
-        e, doc = DocumentService.get_by_id(req["doc_id"])
-        if not e:
-            return get_data_error_result(retmsg="Document not found!")
-        d["kb_id"] = [doc.kb_id]
-        d["docnm_kwd"] = doc.name
-        d["doc_id"] = doc.id
-
-        tenant_id = DocumentService.get_tenant_id(req["doc_id"])
-        if not tenant_id:
-            return get_data_error_result(retmsg="Tenant not found!")
-
-        embd_id = DocumentService.get_embd_id(req["doc_id"])
-        embd_mdl = TenantLLMService.model_instance(
-            tenant_id, LLMType.EMBEDDING.value, embd_id)
-
-        v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
-        v = 0.1 * v[0] + 0.9 * v[1]
-        d["q_%d_vec" % len(v)] = v.tolist()
-        ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
-
-        DocumentService.increment_chunk_num(
-            doc.id, doc.kb_id, c, 1, 0)
-        return get_json_result(data={"chunk_id": chunck_id})
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/retrieval_test', methods=['POST'])
-@login_required
-@validate_request("kb_id", "question")
-def retrieval_test():
-    req = request.json
-    page = int(req.get("page", 1))
-    size = int(req.get("size", 30))
-    question = req["question"]
-    kb_id = req["kb_id"]
-    doc_ids = req.get("doc_ids", [])
-    similarity_threshold = float(req.get("similarity_threshold", 0.2))
-    vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
-    top = int(req.get("top_k", 1024))
-    try:
-        e, kb = KnowledgebaseService.get_by_id(kb_id)
-        if not e:
-            return get_data_error_result(retmsg="Knowledgebase not found!")
-
-        embd_mdl = TenantLLMService.model_instance(
-            kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
-
-        rerank_mdl = None
-        if req.get("rerank_id"):
-            rerank_mdl = TenantLLMService.model_instance(
-                kb.tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"])
-
-        if req.get("keyword", False):
-            chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
-            question += keyword_extraction(chat_mdl, question)
-
-        retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
-        ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, [kb_id], page, size,
-                               similarity_threshold, vector_similarity_weight, top,
-                               doc_ids, rerank_mdl=rerank_mdl)
-        for c in ranks["chunks"]:
-            if "vector" in c:
-                del c["vector"]
-
-        return get_json_result(data=ranks)
-    except Exception as e:
-        if str(e).find("not_found") > 0:
-            return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!',
-                                   retcode=RetCode.DATA_ERROR)
-        return server_error_response(e)
-
-
-@manager.route('/knowledge_graph', methods=['GET'])
-@login_required
-def knowledge_graph():
-    doc_id = request.args["doc_id"]
-    req = {
-        "doc_ids":[doc_id],
-        "knowledge_graph_kwd": ["graph", "mind_map"]
-    }
-    tenant_id = DocumentService.get_tenant_id(doc_id)
-    sres = retrievaler.search(req, search.index_name(tenant_id))
-    obj = {"graph": {}, "mind_map": {}}
-    for id in sres.ids[:2]:
-        ty = sres.field[id]["knowledge_graph_kwd"]
-        try:
-            obj[ty] = json.loads(sres.field[id]["content_with_weight"])
-        except Exception as e:
-            print(traceback.format_exc(), flush=True)
-
-    return get_json_result(data=obj)
-
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import datetime
+import json
+import traceback
+
+from flask import request
+from flask_login import login_required, current_user
+from elasticsearch_dsl import Q
+
+from rag.app.qa import rmPrefix, beAdoc
+from rag.nlp import search, rag_tokenizer, keyword_extraction
+from rag.utils.es_conn import ELASTICSEARCH
+from rag.utils import rmSpace
+from api.db import LLMType, ParserType
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db.services.llm_service import TenantLLMService
+from api.db.services.user_service import UserTenantService
+from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
+from api.db.services.document_service import DocumentService
+from api.settings import RetCode, retrievaler, kg_retrievaler
+from api.utils.api_utils import get_json_result
+import hashlib
+import re
+
+
+@manager.route('/list', methods=['POST'])
+@login_required
+@validate_request("doc_id")
+def list_chunk():
+    req = request.json
+    doc_id = req["doc_id"]
+    page = int(req.get("page", 1))
+    size = int(req.get("size", 30))
+    question = req.get("keywords", "")
+    try:
+        tenant_id = DocumentService.get_tenant_id(req["doc_id"])
+        if not tenant_id:
+            return get_data_error_result(retmsg="Tenant not found!")
+        e, doc = DocumentService.get_by_id(doc_id)
+        if not e:
+            return get_data_error_result(retmsg="Document not found!")
+        query = {
+            "doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
+        }
+        if "available_int" in req:
+            query["available_int"] = int(req["available_int"])
+        sres = retrievaler.search(query, search.index_name(tenant_id))
+        res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
+        for id in sres.ids:
+            d = {
+                "chunk_id": id,
+                "content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[
+                    id].get(
+                    "content_with_weight", ""),
+                "doc_id": sres.field[id]["doc_id"],
+                "docnm_kwd": sres.field[id]["docnm_kwd"],
+                "important_kwd": sres.field[id].get("important_kwd", []),
+                "img_id": sres.field[id].get("img_id", ""),
+                "available_int": sres.field[id].get("available_int", 1),
+                "positions": sres.field[id].get("position_int", "").split("\t")
+            }
+            if len(d["positions"]) % 5 == 0:
+                poss = []
+                for i in range(0, len(d["positions"]), 5):
+                    poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
+                                 float(d["positions"][i + 3]), float(d["positions"][i + 4])])
+                d["positions"] = poss
+            res["chunks"].append(d)
+        return get_json_result(data=res)
+    except Exception as e:
+        if str(e).find("not_found") > 0:
+            return get_json_result(data=False, retmsg=f'No chunk found!',
+                                   retcode=RetCode.DATA_ERROR)
+        return server_error_response(e)
+
+
+@manager.route('/get', methods=['GET'])
+@login_required
+def get():
+    chunk_id = request.args["chunk_id"]
+    try:
+        tenants = UserTenantService.query(user_id=current_user.id)
+        if not tenants:
+            return get_data_error_result(retmsg="Tenant not found!")
+        res = ELASTICSEARCH.get(
+            chunk_id, search.index_name(
+                tenants[0].tenant_id))
+        if not res.get("found"):
+            return server_error_response("Chunk not found")
+        id = res["_id"]
+        res = res["_source"]
+        res["chunk_id"] = id
+        k = []
+        for n in res.keys():
+            if re.search(r"(_vec$|_sm_|_tks|_ltks)", n):
+                k.append(n)
+        for n in k:
+            del res[n]
+
+        return get_json_result(data=res)
+    except Exception as e:
+        if str(e).find("NotFoundError") >= 0:
+            return get_json_result(data=False, retmsg=f'Chunk not found!',
+                                   retcode=RetCode.DATA_ERROR)
+        return server_error_response(e)
+
+
+@manager.route('/set', methods=['POST'])
+@login_required
+@validate_request("doc_id", "chunk_id", "content_with_weight",
+                  "important_kwd")
+def set():
+    req = request.json
+    d = {
+        "id": req["chunk_id"],
+        "content_with_weight": req["content_with_weight"]}
+    d["content_ltks"] = rag_tokenizer.tokenize(req["content_with_weight"])
+    d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
+    d["important_kwd"] = req["important_kwd"]
+    d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_kwd"]))
+    if "available_int" in req:
+        d["available_int"] = req["available_int"]
+
+    try:
+        tenant_id = DocumentService.get_tenant_id(req["doc_id"])
+        if not tenant_id:
+            return get_data_error_result(retmsg="Tenant not found!")
+
+        embd_id = DocumentService.get_embd_id(req["doc_id"])
+        embd_mdl = TenantLLMService.model_instance(
+            tenant_id, LLMType.EMBEDDING.value, embd_id)
+
+        e, doc = DocumentService.get_by_id(req["doc_id"])
+        if not e:
+            return get_data_error_result(retmsg="Document not found!")
+
+        if doc.parser_id == ParserType.QA:
+            arr = [
+                t for t in re.split(
+                    r"[\n\t]",
+                    req["content_with_weight"]) if len(t) > 1]
+            if len(arr) != 2:
+                return get_data_error_result(
+                    retmsg="Q&A must be separated by TAB/ENTER key.")
+            q, a = rmPrefix(arr[0]), rmPrefix(arr[1])
+            d = beAdoc(d, arr[0], arr[1], not any(
+                [rag_tokenizer.is_chinese(t) for t in q + a]))
+
+        v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
+        v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
+        d["q_%d_vec" % len(v)] = v.tolist()
+        ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
+        return get_json_result(data=True)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/switch', methods=['POST'])
+@login_required
+@validate_request("chunk_ids", "available_int", "doc_id")
+def switch():
+    req = request.json
+    try:
+        tenant_id = DocumentService.get_tenant_id(req["doc_id"])
+        if not tenant_id:
+            return get_data_error_result(retmsg="Tenant not found!")
+        if not ELASTICSEARCH.upsert([{"id": i, "available_int": int(req["available_int"])} for i in req["chunk_ids"]],
+                                    search.index_name(tenant_id)):
+            return get_data_error_result(retmsg="Index updating failure")
+        return get_json_result(data=True)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/rm', methods=['POST'])
+@login_required
+@validate_request("chunk_ids", "doc_id")
+def rm():
+    req = request.json
+    try:
+        if not ELASTICSEARCH.deleteByQuery(
+                Q("ids", values=req["chunk_ids"]), search.index_name(current_user.id)):
+            return get_data_error_result(retmsg="Index updating failure")
+        e, doc = DocumentService.get_by_id(req["doc_id"])
+        if not e:
+            return get_data_error_result(retmsg="Document not found!")
+        deleted_chunk_ids = req["chunk_ids"]
+        chunk_number = len(deleted_chunk_ids)
+        DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
+        return get_json_result(data=True)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/create', methods=['POST'])
+@login_required
+@validate_request("doc_id", "content_with_weight")
+def create():
+    req = request.json
+    md5 = hashlib.md5()
+    md5.update((req["content_with_weight"] + req["doc_id"]).encode("utf-8"))
+    chunck_id = md5.hexdigest()
+    d = {"id": chunck_id, "content_ltks": rag_tokenizer.tokenize(req["content_with_weight"]),
+         "content_with_weight": req["content_with_weight"]}
+    d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
+    d["important_kwd"] = req.get("important_kwd", [])
+    d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", [])))
+    d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
+    d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
+
+    try:
+        e, doc = DocumentService.get_by_id(req["doc_id"])
+        if not e:
+            return get_data_error_result(retmsg="Document not found!")
+        d["kb_id"] = [doc.kb_id]
+        d["docnm_kwd"] = doc.name
+        d["doc_id"] = doc.id
+
+        tenant_id = DocumentService.get_tenant_id(req["doc_id"])
+        if not tenant_id:
+            return get_data_error_result(retmsg="Tenant not found!")
+
+        embd_id = DocumentService.get_embd_id(req["doc_id"])
+        embd_mdl = TenantLLMService.model_instance(
+            tenant_id, LLMType.EMBEDDING.value, embd_id)
+
+        v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
+        v = 0.1 * v[0] + 0.9 * v[1]
+        d["q_%d_vec" % len(v)] = v.tolist()
+        ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
+
+        DocumentService.increment_chunk_num(
+            doc.id, doc.kb_id, c, 1, 0)
+        return get_json_result(data={"chunk_id": chunck_id})
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/retrieval_test', methods=['POST'])
+@login_required
+@validate_request("kb_id", "question")
+def retrieval_test():
+    req = request.json
+    page = int(req.get("page", 1))
+    size = int(req.get("size", 30))
+    question = req["question"]
+    kb_id = req["kb_id"]
+    doc_ids = req.get("doc_ids", [])
+    similarity_threshold = float(req.get("similarity_threshold", 0.2))
+    vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
+    top = int(req.get("top_k", 1024))
+    try:
+        e, kb = KnowledgebaseService.get_by_id(kb_id)
+        if not e:
+            return get_data_error_result(retmsg="Knowledgebase not found!")
+
+        embd_mdl = TenantLLMService.model_instance(
+            kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
+
+        rerank_mdl = None
+        if req.get("rerank_id"):
+            rerank_mdl = TenantLLMService.model_instance(
+                kb.tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"])
+
+        if req.get("keyword", False):
+            chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
+            question += keyword_extraction(chat_mdl, question)
+
+        retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
+        ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, [kb_id], page, size,
+                               similarity_threshold, vector_similarity_weight, top,
+                               doc_ids, rerank_mdl=rerank_mdl)
+        for c in ranks["chunks"]:
+            if "vector" in c:
+                del c["vector"]
+
+        return get_json_result(data=ranks)
+    except Exception as e:
+        if str(e).find("not_found") > 0:
+            return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!',
+                                   retcode=RetCode.DATA_ERROR)
+        return server_error_response(e)
+
+
+@manager.route('/knowledge_graph', methods=['GET'])
+@login_required
+def knowledge_graph():
+    doc_id = request.args["doc_id"]
+    req = {
+        "doc_ids":[doc_id],
+        "knowledge_graph_kwd": ["graph", "mind_map"]
+    }
+    tenant_id = DocumentService.get_tenant_id(doc_id)
+    sres = retrievaler.search(req, search.index_name(tenant_id))
+    obj = {"graph": {}, "mind_map": {}}
+    for id in sres.ids[:2]:
+        ty = sres.field[id]["knowledge_graph_kwd"]
+        try:
+            obj[ty] = json.loads(sres.field[id]["content_with_weight"])
+        except Exception as e:
+            print(traceback.format_exc(), flush=True)
+
+    return get_json_result(data=obj)
+
--- a/api/apps/conversation_app.py
+++ b/api/apps/conversation_app.py
@ -1,177 +1,177 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-from copy import deepcopy
-from flask import request, Response
-from flask_login import login_required
-from api.db.services.dialog_service import DialogService, ConversationService, chat
-from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
-from api.utils import get_uuid
-from api.utils.api_utils import get_json_result
-import json
-
-
-@manager.route('/set', methods=['POST'])
-@login_required
-def set_conversation():
-    req = request.json
-    conv_id = req.get("conversation_id")
-    if conv_id:
-        del req["conversation_id"]
-        try:
-            if not ConversationService.update_by_id(conv_id, req):
-                return get_data_error_result(retmsg="Conversation not found!")
-            e, conv = ConversationService.get_by_id(conv_id)
-            if not e:
-                return get_data_error_result(
-                    retmsg="Fail to update a conversation!")
-            conv = conv.to_dict()
-            return get_json_result(data=conv)
-        except Exception as e:
-            return server_error_response(e)
-
-    try:
-        e, dia = DialogService.get_by_id(req["dialog_id"])
-        if not e:
-            return get_data_error_result(retmsg="Dialog not found")
-        conv = {
-            "id": get_uuid(),
-            "dialog_id": req["dialog_id"],
-            "name": req.get("name", "New conversation"),
-            "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}]
-        }
-        ConversationService.save(**conv)
-        e, conv = ConversationService.get_by_id(conv["id"])
-        if not e:
-            return get_data_error_result(retmsg="Fail to new a conversation!")
-        conv = conv.to_dict()
-        return get_json_result(data=conv)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/get', methods=['GET'])
-@login_required
-def get():
-    conv_id = request.args["conversation_id"]
-    try:
-        e, conv = ConversationService.get_by_id(conv_id)
-        if not e:
-            return get_data_error_result(retmsg="Conversation not found!")
-        conv = conv.to_dict()
-        return get_json_result(data=conv)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/rm', methods=['POST'])
-@login_required
-def rm():
-    conv_ids = request.json["conversation_ids"]
-    try:
-        for cid in conv_ids:
-            ConversationService.delete_by_id(cid)
-        return get_json_result(data=True)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/list', methods=['GET'])
-@login_required
-def list_convsersation():
-    dialog_id = request.args["dialog_id"]
-    try:
-        convs = ConversationService.query(
-            dialog_id=dialog_id,
-            order_by=ConversationService.model.create_time,
-            reverse=True)
-        convs = [d.to_dict() for d in convs]
-        return get_json_result(data=convs)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/completion', methods=['POST'])
-@login_required
-#@validate_request("conversation_id", "messages")
-def completion():
-    req = request.json
-    #req = {"conversation_id": "9aaaca4c11d311efa461fa163e197198", "messages": [
-    #    {"role": "user", "content": "上海有吗？"}
-    #]}
-    msg = []
-    for m in req["messages"]:
-        if m["role"] == "system":
-            continue
-        if m["role"] == "assistant" and not msg:
-            continue
-        msg.append({"role": m["role"], "content": m["content"]})
-        if "doc_ids" in m:
-            msg[-1]["doc_ids"] = m["doc_ids"]
-    try:
-        e, conv = ConversationService.get_by_id(req["conversation_id"])
-        if not e:
-            return get_data_error_result(retmsg="Conversation not found!")
-        conv.message.append(deepcopy(msg[-1]))
-        e, dia = DialogService.get_by_id(conv.dialog_id)
-        if not e:
-            return get_data_error_result(retmsg="Dialog not found!")
-        del req["conversation_id"]
-        del req["messages"]
-
-        if not conv.reference:
-            conv.reference = []
-        conv.message.append({"role": "assistant", "content": ""})
-        conv.reference.append({"chunks": [], "doc_aggs": []})
-
-        def fillin_conv(ans):
-            nonlocal conv
-            if not conv.reference:
-                conv.reference.append(ans["reference"])
-            else: conv.reference[-1] = ans["reference"]
-            conv.message[-1] = {"role": "assistant", "content": ans["answer"]}
-
-        def stream():
-            nonlocal dia, msg, req, conv
-            try:
-                for ans in chat(dia, msg, True, **req):
-                    fillin_conv(ans)
-                    yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": ans}, ensure_ascii=False) + "\n\n"
-                ConversationService.update_by_id(conv.id, conv.to_dict())
-            except Exception as e:
-                yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e),
-                                            "data": {"answer": "**ERROR**: "+str(e), "reference": []}},
-                                           ensure_ascii=False) + "\n\n"
-            yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n"
-
-        if req.get("stream", True):
-            resp = Response(stream(), mimetype="text/event-stream")
-            resp.headers.add_header("Cache-control", "no-cache")
-            resp.headers.add_header("Connection", "keep-alive")
-            resp.headers.add_header("X-Accel-Buffering", "no")
-            resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
-            return resp
-
-        else:
-            answer = None
-            for ans in chat(dia, msg, **req):
-                answer = ans
-                fillin_conv(ans)
-                ConversationService.update_by_id(conv.id, conv.to_dict())
-                break
-            return get_json_result(data=answer)
-    except Exception as e:
-        return server_error_response(e)
-
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from copy import deepcopy
+from flask import request, Response
+from flask_login import login_required
+from api.db.services.dialog_service import DialogService, ConversationService, chat
+from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
+from api.utils import get_uuid
+from api.utils.api_utils import get_json_result
+import json
+
+
+@manager.route('/set', methods=['POST'])
+@login_required
+def set_conversation():
+    req = request.json
+    conv_id = req.get("conversation_id")
+    if conv_id:
+        del req["conversation_id"]
+        try:
+            if not ConversationService.update_by_id(conv_id, req):
+                return get_data_error_result(retmsg="Conversation not found!")
+            e, conv = ConversationService.get_by_id(conv_id)
+            if not e:
+                return get_data_error_result(
+                    retmsg="Fail to update a conversation!")
+            conv = conv.to_dict()
+            return get_json_result(data=conv)
+        except Exception as e:
+            return server_error_response(e)
+
+    try:
+        e, dia = DialogService.get_by_id(req["dialog_id"])
+        if not e:
+            return get_data_error_result(retmsg="Dialog not found")
+        conv = {
+            "id": get_uuid(),
+            "dialog_id": req["dialog_id"],
+            "name": req.get("name", "New conversation"),
+            "message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}]
+        }
+        ConversationService.save(**conv)
+        e, conv = ConversationService.get_by_id(conv["id"])
+        if not e:
+            return get_data_error_result(retmsg="Fail to new a conversation!")
+        conv = conv.to_dict()
+        return get_json_result(data=conv)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/get', methods=['GET'])
+@login_required
+def get():
+    conv_id = request.args["conversation_id"]
+    try:
+        e, conv = ConversationService.get_by_id(conv_id)
+        if not e:
+            return get_data_error_result(retmsg="Conversation not found!")
+        conv = conv.to_dict()
+        return get_json_result(data=conv)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/rm', methods=['POST'])
+@login_required
+def rm():
+    conv_ids = request.json["conversation_ids"]
+    try:
+        for cid in conv_ids:
+            ConversationService.delete_by_id(cid)
+        return get_json_result(data=True)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/list', methods=['GET'])
+@login_required
+def list_convsersation():
+    dialog_id = request.args["dialog_id"]
+    try:
+        convs = ConversationService.query(
+            dialog_id=dialog_id,
+            order_by=ConversationService.model.create_time,
+            reverse=True)
+        convs = [d.to_dict() for d in convs]
+        return get_json_result(data=convs)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/completion', methods=['POST'])
+@login_required
+#@validate_request("conversation_id", "messages")
+def completion():
+    req = request.json
+    #req = {"conversation_id": "9aaaca4c11d311efa461fa163e197198", "messages": [
+    #    {"role": "user", "content": "上海有吗？"}
+    #]}
+    msg = []
+    for m in req["messages"]:
+        if m["role"] == "system":
+            continue
+        if m["role"] == "assistant" and not msg:
+            continue
+        msg.append({"role": m["role"], "content": m["content"]})
+        if "doc_ids" in m:
+            msg[-1]["doc_ids"] = m["doc_ids"]
+    try:
+        e, conv = ConversationService.get_by_id(req["conversation_id"])
+        if not e:
+            return get_data_error_result(retmsg="Conversation not found!")
+        conv.message.append(deepcopy(msg[-1]))
+        e, dia = DialogService.get_by_id(conv.dialog_id)
+        if not e:
+            return get_data_error_result(retmsg="Dialog not found!")
+        del req["conversation_id"]
+        del req["messages"]
+
+        if not conv.reference:
+            conv.reference = []
+        conv.message.append({"role": "assistant", "content": ""})
+        conv.reference.append({"chunks": [], "doc_aggs": []})
+
+        def fillin_conv(ans):
+            nonlocal conv
+            if not conv.reference:
+                conv.reference.append(ans["reference"])
+            else: conv.reference[-1] = ans["reference"]
+            conv.message[-1] = {"role": "assistant", "content": ans["answer"]}
+
+        def stream():
+            nonlocal dia, msg, req, conv
+            try:
+                for ans in chat(dia, msg, True, **req):
+                    fillin_conv(ans)
+                    yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": ans}, ensure_ascii=False) + "\n\n"
+                ConversationService.update_by_id(conv.id, conv.to_dict())
+            except Exception as e:
+                yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e),
+                                            "data": {"answer": "**ERROR**: "+str(e), "reference": []}},
+                                           ensure_ascii=False) + "\n\n"
+            yield "data:"+json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n"
+
+        if req.get("stream", True):
+            resp = Response(stream(), mimetype="text/event-stream")
+            resp.headers.add_header("Cache-control", "no-cache")
+            resp.headers.add_header("Connection", "keep-alive")
+            resp.headers.add_header("X-Accel-Buffering", "no")
+            resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
+            return resp
+
+        else:
+            answer = None
+            for ans in chat(dia, msg, **req):
+                answer = ans
+                fillin_conv(ans)
+                ConversationService.update_by_id(conv.id, conv.to_dict())
+                break
+            return get_json_result(data=answer)
+    except Exception as e:
+        return server_error_response(e)
+
--- a/api/apps/dialog_app.py
+++ b/api/apps/dialog_app.py
@ -1,172 +1,172 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-from flask import request
-from flask_login import login_required, current_user
-from api.db.services.dialog_service import DialogService
-from api.db import StatusEnum
-from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.db.services.user_service import TenantService
-from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
-from api.utils import get_uuid
-from api.utils.api_utils import get_json_result
-
-
-@manager.route('/set', methods=['POST'])
-@login_required
-def set_dialog():
-    req = request.json
-    dialog_id = req.get("dialog_id")
-    name = req.get("name", "New Dialog")
-    description = req.get("description", "A helpful Dialog")
-    icon = req.get("icon", "")
-    top_n = req.get("top_n", 6)
-    top_k = req.get("top_k", 1024)
-    rerank_id = req.get("rerank_id", "")
-    if not rerank_id: req["rerank_id"] = ""
-    similarity_threshold = req.get("similarity_threshold", 0.1)
-    vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
-    if vector_similarity_weight is None: vector_similarity_weight = 0.3
-    llm_setting = req.get("llm_setting", {})
-    default_prompt = {
-        "system": """你是一个智能助手，请总结知识库的内容来回答问题，请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时，你的回答必须包括“知识库中未找到您要的答案！”这句话。回答需要考虑聊天历史。
-以下是知识库：
-{knowledge}
-以上是知识库。""",
-        "prologue": "您好，我是您的助手小樱，长得可爱又善良，can I help you?",
-        "parameters": [
-            {"key": "knowledge", "optional": False}
-        ],
-        "empty_response": "Sorry! 知识库中未找到相关内容！"
-    }
-    prompt_config = req.get("prompt_config", default_prompt)
-
-    if not prompt_config["system"]:
-        prompt_config["system"] = default_prompt["system"]
-    # if len(prompt_config["parameters"]) < 1:
-    #     prompt_config["parameters"] = default_prompt["parameters"]
-    # for p in prompt_config["parameters"]:
-    #     if p["key"] == "knowledge":break
-    # else: prompt_config["parameters"].append(default_prompt["parameters"][0])
-
-    for p in prompt_config["parameters"]:
-        if p["optional"]:
-            continue
-        if prompt_config["system"].find("{%s}" % p["key"]) < 0:
-            return get_data_error_result(
-                retmsg="Parameter '{}' is not used".format(p["key"]))
-
-    try:
-        e, tenant = TenantService.get_by_id(current_user.id)
-        if not e:
-            return get_data_error_result(retmsg="Tenant not found!")
-        llm_id = req.get("llm_id", tenant.llm_id)
-        if not dialog_id:
-            if not req.get("kb_ids"):
-                return get_data_error_result(
-                    retmsg="Fail! Please select knowledgebase!")
-            dia = {
-                "id": get_uuid(),
-                "tenant_id": current_user.id,
-                "name": name,
-                "kb_ids": req["kb_ids"],
-                "description": description,
-                "llm_id": llm_id,
-                "llm_setting": llm_setting,
-                "prompt_config": prompt_config,
-                "top_n": top_n,
-                "top_k": top_k,
-                "rerank_id": rerank_id,
-                "similarity_threshold": similarity_threshold,
-                "vector_similarity_weight": vector_similarity_weight,
-                "icon": icon
-            }
-            if not DialogService.save(**dia):
-                return get_data_error_result(retmsg="Fail to new a dialog!")
-            e, dia = DialogService.get_by_id(dia["id"])
-            if not e:
-                return get_data_error_result(retmsg="Fail to new a dialog!")
-            return get_json_result(data=dia.to_json())
-        else:
-            del req["dialog_id"]
-            if "kb_names" in req:
-                del req["kb_names"]
-            if not DialogService.update_by_id(dialog_id, req):
-                return get_data_error_result(retmsg="Dialog not found!")
-            e, dia = DialogService.get_by_id(dialog_id)
-            if not e:
-                return get_data_error_result(retmsg="Fail to update a dialog!")
-            dia = dia.to_dict()
-            dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
-            return get_json_result(data=dia)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/get', methods=['GET'])
-@login_required
-def get():
-    dialog_id = request.args["dialog_id"]
-    try:
-        e, dia = DialogService.get_by_id(dialog_id)
-        if not e:
-            return get_data_error_result(retmsg="Dialog not found!")
-        dia = dia.to_dict()
-        dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
-        return get_json_result(data=dia)
-    except Exception as e:
-        return server_error_response(e)
-
-
-def get_kb_names(kb_ids):
-    ids, nms = [], []
-    for kid in kb_ids:
-        e, kb = KnowledgebaseService.get_by_id(kid)
-        if not e or kb.status != StatusEnum.VALID.value:
-            continue
-        ids.append(kid)
-        nms.append(kb.name)
-    return ids, nms
-
-
-@manager.route('/list', methods=['GET'])
-@login_required
-def list_dialogs():
-    try:
-        diags = DialogService.query(
-            tenant_id=current_user.id,
-            status=StatusEnum.VALID.value,
-            reverse=True,
-            order_by=DialogService.model.create_time)
-        diags = [d.to_dict() for d in diags]
-        for d in diags:
-            d["kb_ids"], d["kb_names"] = get_kb_names(d["kb_ids"])
-        return get_json_result(data=diags)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/rm', methods=['POST'])
-@login_required
-@validate_request("dialog_ids")
-def rm():
-    req = request.json
-    try:
-        DialogService.update_many_by_id(
-            [{"id": id, "status": StatusEnum.INVALID.value} for id in req["dialog_ids"]])
-        return get_json_result(data=True)
-    except Exception as e:
-        return server_error_response(e)
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+from flask import request
+from flask_login import login_required, current_user
+from api.db.services.dialog_service import DialogService
+from api.db import StatusEnum
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db.services.user_service import TenantService
+from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
+from api.utils import get_uuid
+from api.utils.api_utils import get_json_result
+
+
+@manager.route('/set', methods=['POST'])
+@login_required
+def set_dialog():
+    req = request.json
+    dialog_id = req.get("dialog_id")
+    name = req.get("name", "New Dialog")
+    description = req.get("description", "A helpful Dialog")
+    icon = req.get("icon", "")
+    top_n = req.get("top_n", 6)
+    top_k = req.get("top_k", 1024)
+    rerank_id = req.get("rerank_id", "")
+    if not rerank_id: req["rerank_id"] = ""
+    similarity_threshold = req.get("similarity_threshold", 0.1)
+    vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
+    if vector_similarity_weight is None: vector_similarity_weight = 0.3
+    llm_setting = req.get("llm_setting", {})
+    default_prompt = {
+        "system": """你是一个智能助手，请总结知识库的内容来回答问题，请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时，你的回答必须包括“知识库中未找到您要的答案！”这句话。回答需要考虑聊天历史。
+以下是知识库：
+{knowledge}
+以上是知识库。""",
+        "prologue": "您好，我是您的助手小樱，长得可爱又善良，can I help you?",
+        "parameters": [
+            {"key": "knowledge", "optional": False}
+        ],
+        "empty_response": "Sorry! 知识库中未找到相关内容！"
+    }
+    prompt_config = req.get("prompt_config", default_prompt)
+
+    if not prompt_config["system"]:
+        prompt_config["system"] = default_prompt["system"]
+    # if len(prompt_config["parameters"]) < 1:
+    #     prompt_config["parameters"] = default_prompt["parameters"]
+    # for p in prompt_config["parameters"]:
+    #     if p["key"] == "knowledge":break
+    # else: prompt_config["parameters"].append(default_prompt["parameters"][0])
+
+    for p in prompt_config["parameters"]:
+        if p["optional"]:
+            continue
+        if prompt_config["system"].find("{%s}" % p["key"]) < 0:
+            return get_data_error_result(
+                retmsg="Parameter '{}' is not used".format(p["key"]))
+
+    try:
+        e, tenant = TenantService.get_by_id(current_user.id)
+        if not e:
+            return get_data_error_result(retmsg="Tenant not found!")
+        llm_id = req.get("llm_id", tenant.llm_id)
+        if not dialog_id:
+            if not req.get("kb_ids"):
+                return get_data_error_result(
+                    retmsg="Fail! Please select knowledgebase!")
+            dia = {
+                "id": get_uuid(),
+                "tenant_id": current_user.id,
+                "name": name,
+                "kb_ids": req["kb_ids"],
+                "description": description,
+                "llm_id": llm_id,
+                "llm_setting": llm_setting,
+                "prompt_config": prompt_config,
+                "top_n": top_n,
+                "top_k": top_k,
+                "rerank_id": rerank_id,
+                "similarity_threshold": similarity_threshold,
+                "vector_similarity_weight": vector_similarity_weight,
+                "icon": icon
+            }
+            if not DialogService.save(**dia):
+                return get_data_error_result(retmsg="Fail to new a dialog!")
+            e, dia = DialogService.get_by_id(dia["id"])
+            if not e:
+                return get_data_error_result(retmsg="Fail to new a dialog!")
+            return get_json_result(data=dia.to_json())
+        else:
+            del req["dialog_id"]
+            if "kb_names" in req:
+                del req["kb_names"]
+            if not DialogService.update_by_id(dialog_id, req):
+                return get_data_error_result(retmsg="Dialog not found!")
+            e, dia = DialogService.get_by_id(dialog_id)
+            if not e:
+                return get_data_error_result(retmsg="Fail to update a dialog!")
+            dia = dia.to_dict()
+            dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
+            return get_json_result(data=dia)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/get', methods=['GET'])
+@login_required
+def get():
+    dialog_id = request.args["dialog_id"]
+    try:
+        e, dia = DialogService.get_by_id(dialog_id)
+        if not e:
+            return get_data_error_result(retmsg="Dialog not found!")
+        dia = dia.to_dict()
+        dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
+        return get_json_result(data=dia)
+    except Exception as e:
+        return server_error_response(e)
+
+
+def get_kb_names(kb_ids):
+    ids, nms = [], []
+    for kid in kb_ids:
+        e, kb = KnowledgebaseService.get_by_id(kid)
+        if not e or kb.status != StatusEnum.VALID.value:
+            continue
+        ids.append(kid)
+        nms.append(kb.name)
+    return ids, nms
+
+
+@manager.route('/list', methods=['GET'])
+@login_required
+def list_dialogs():
+    try:
+        diags = DialogService.query(
+            tenant_id=current_user.id,
+            status=StatusEnum.VALID.value,
+            reverse=True,
+            order_by=DialogService.model.create_time)
+        diags = [d.to_dict() for d in diags]
+        for d in diags:
+            d["kb_ids"], d["kb_names"] = get_kb_names(d["kb_ids"])
+        return get_json_result(data=diags)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/rm', methods=['POST'])
+@login_required
+@validate_request("dialog_ids")
+def rm():
+    req = request.json
+    try:
+        DialogService.update_many_by_id(
+            [{"id": id, "status": StatusEnum.INVALID.value} for id in req["dialog_ids"]])
+        return get_json_result(data=True)
+    except Exception as e:
+        return server_error_response(e)
--- a/api/apps/document_app.py
+++ b/api/apps/document_app.py
--- a/api/apps/kb_app.py
+++ b/api/apps/kb_app.py
@ -1,153 +1,153 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-from elasticsearch_dsl import Q
-from flask import request
-from flask_login import login_required, current_user
-
-from api.db.services import duplicate_name
-from api.db.services.document_service import DocumentService
-from api.db.services.file2document_service import File2DocumentService
-from api.db.services.file_service import FileService
-from api.db.services.user_service import TenantService, UserTenantService
-from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
-from api.utils import get_uuid, get_format_time
-from api.db import StatusEnum, UserTenantRole, FileSource
-from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.db.db_models import Knowledgebase, File
-from api.settings import stat_logger, RetCode
-from api.utils.api_utils import get_json_result
-from rag.nlp import search
-from rag.utils.es_conn import ELASTICSEARCH
-
-
-@manager.route('/create', methods=['post'])
-@login_required
-@validate_request("name")
-def create():
-    req = request.json
-    req["name"] = req["name"].strip()
-    req["name"] = duplicate_name(
-        KnowledgebaseService.query,
-        name=req["name"],
-        tenant_id=current_user.id,
-        status=StatusEnum.VALID.value)
-    try:
-        req["id"] = get_uuid()
-        req["tenant_id"] = current_user.id
-        req["created_by"] = current_user.id
-        e, t = TenantService.get_by_id(current_user.id)
-        if not e:
-            return get_data_error_result(retmsg="Tenant not found.")
-        req["embd_id"] = t.embd_id
-        if not KnowledgebaseService.save(**req):
-            return get_data_error_result()
-        return get_json_result(data={"kb_id": req["id"]})
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/update', methods=['post'])
-@login_required
-@validate_request("kb_id", "name", "description", "permission", "parser_id")
-def update():
-    req = request.json
-    req["name"] = req["name"].strip()
-    try:
-        if not KnowledgebaseService.query(
-                created_by=current_user.id, id=req["kb_id"]):
-            return get_json_result(
-                data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR)
-
-        e, kb = KnowledgebaseService.get_by_id(req["kb_id"])
-        if not e:
-            return get_data_error_result(
-                retmsg="Can't find this knowledgebase!")
-
-        if req["name"].lower() != kb.name.lower() \
-                and len(KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1:
-            return get_data_error_result(
-                retmsg="Duplicated knowledgebase name.")
-
-        del req["kb_id"]
-        if not KnowledgebaseService.update_by_id(kb.id, req):
-            return get_data_error_result()
-
-        e, kb = KnowledgebaseService.get_by_id(kb.id)
-        if not e:
-            return get_data_error_result(
-                retmsg="Database error (Knowledgebase rename)!")
-
-        return get_json_result(data=kb.to_json())
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/detail', methods=['GET'])
-@login_required
-def detail():
-    kb_id = request.args["kb_id"]
-    try:
-        kb = KnowledgebaseService.get_detail(kb_id)
-        if not kb:
-            return get_data_error_result(
-                retmsg="Can't find this knowledgebase!")
-        return get_json_result(data=kb)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/list', methods=['GET'])
-@login_required
-def list_kbs():
-    page_number = request.args.get("page", 1)
-    items_per_page = request.args.get("page_size", 150)
-    orderby = request.args.get("orderby", "create_time")
-    desc = request.args.get("desc", True)
-    try:
-        tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
-        kbs = KnowledgebaseService.get_by_tenant_ids(
-            [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc)
-        return get_json_result(data=kbs)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/rm', methods=['post'])
-@login_required
-@validate_request("kb_id")
-def rm():
-    req = request.json
-    try:
-        kbs = KnowledgebaseService.query(
-                created_by=current_user.id, id=req["kb_id"])
-        if not kbs:
-            return get_json_result(
-                data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR)
-
-        for doc in DocumentService.query(kb_id=req["kb_id"]):
-            if not DocumentService.remove_document(doc, kbs[0].tenant_id):
-                return get_data_error_result(
-                    retmsg="Database error (Document removal)!")
-            f2d = File2DocumentService.get_by_document_id(doc.id)
-            FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
-            File2DocumentService.delete_by_document_id(doc.id)
-
-        if not KnowledgebaseService.delete_by_id(req["kb_id"]):
-            return get_data_error_result(
-                retmsg="Database error (Knowledgebase removal)!")
-        return get_json_result(data=True)
-    except Exception as e:
-        return server_error_response(e)
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from elasticsearch_dsl import Q
+from flask import request
+from flask_login import login_required, current_user
+
+from api.db.services import duplicate_name
+from api.db.services.document_service import DocumentService
+from api.db.services.file2document_service import File2DocumentService
+from api.db.services.file_service import FileService
+from api.db.services.user_service import TenantService, UserTenantService
+from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
+from api.utils import get_uuid, get_format_time
+from api.db import StatusEnum, UserTenantRole, FileSource
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db.db_models import Knowledgebase, File
+from api.settings import stat_logger, RetCode
+from api.utils.api_utils import get_json_result
+from rag.nlp import search
+from rag.utils.es_conn import ELASTICSEARCH
+
+
+@manager.route('/create', methods=['post'])
+@login_required
+@validate_request("name")
+def create():
+    req = request.json
+    req["name"] = req["name"].strip()
+    req["name"] = duplicate_name(
+        KnowledgebaseService.query,
+        name=req["name"],
+        tenant_id=current_user.id,
+        status=StatusEnum.VALID.value)
+    try:
+        req["id"] = get_uuid()
+        req["tenant_id"] = current_user.id
+        req["created_by"] = current_user.id
+        e, t = TenantService.get_by_id(current_user.id)
+        if not e:
+            return get_data_error_result(retmsg="Tenant not found.")
+        req["embd_id"] = t.embd_id
+        if not KnowledgebaseService.save(**req):
+            return get_data_error_result()
+        return get_json_result(data={"kb_id": req["id"]})
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/update', methods=['post'])
+@login_required
+@validate_request("kb_id", "name", "description", "permission", "parser_id")
+def update():
+    req = request.json
+    req["name"] = req["name"].strip()
+    try:
+        if not KnowledgebaseService.query(
+                created_by=current_user.id, id=req["kb_id"]):
+            return get_json_result(
+                data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR)
+
+        e, kb = KnowledgebaseService.get_by_id(req["kb_id"])
+        if not e:
+            return get_data_error_result(
+                retmsg="Can't find this knowledgebase!")
+
+        if req["name"].lower() != kb.name.lower() \
+                and len(KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1:
+            return get_data_error_result(
+                retmsg="Duplicated knowledgebase name.")
+
+        del req["kb_id"]
+        if not KnowledgebaseService.update_by_id(kb.id, req):
+            return get_data_error_result()
+
+        e, kb = KnowledgebaseService.get_by_id(kb.id)
+        if not e:
+            return get_data_error_result(
+                retmsg="Database error (Knowledgebase rename)!")
+
+        return get_json_result(data=kb.to_json())
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/detail', methods=['GET'])
+@login_required
+def detail():
+    kb_id = request.args["kb_id"]
+    try:
+        kb = KnowledgebaseService.get_detail(kb_id)
+        if not kb:
+            return get_data_error_result(
+                retmsg="Can't find this knowledgebase!")
+        return get_json_result(data=kb)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/list', methods=['GET'])
+@login_required
+def list_kbs():
+    page_number = request.args.get("page", 1)
+    items_per_page = request.args.get("page_size", 150)
+    orderby = request.args.get("orderby", "create_time")
+    desc = request.args.get("desc", True)
+    try:
+        tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
+        kbs = KnowledgebaseService.get_by_tenant_ids(
+            [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc)
+        return get_json_result(data=kbs)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/rm', methods=['post'])
+@login_required
+@validate_request("kb_id")
+def rm():
+    req = request.json
+    try:
+        kbs = KnowledgebaseService.query(
+                created_by=current_user.id, id=req["kb_id"])
+        if not kbs:
+            return get_json_result(
+                data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR)
+
+        for doc in DocumentService.query(kb_id=req["kb_id"]):
+            if not DocumentService.remove_document(doc, kbs[0].tenant_id):
+                return get_data_error_result(
+                    retmsg="Database error (Document removal)!")
+            f2d = File2DocumentService.get_by_document_id(doc.id)
+            FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
+            File2DocumentService.delete_by_document_id(doc.id)
+
+        if not KnowledgebaseService.delete_by_id(req["kb_id"]):
+            return get_data_error_result(
+                retmsg="Database error (Knowledgebase removal)!")
+        return get_json_result(data=True)
+    except Exception as e:
+        return server_error_response(e)
--- a/api/apps/llm_app.py
+++ b/api/apps/llm_app.py
@ -1,279 +1,279 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-from flask import request
-from flask_login import login_required, current_user
-from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
-from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
-from api.db import StatusEnum, LLMType
-from api.db.db_models import TenantLLM
-from api.utils.api_utils import get_json_result
-from rag.llm import EmbeddingModel, ChatModel, RerankModel,CvModel
-import requests
-import ast
-
-@manager.route('/factories', methods=['GET'])
-@login_required
-def factories():
-    try:
-        fac = LLMFactoriesService.get_all()
-        return get_json_result(data=[f.to_dict() for f in fac if f.name not in ["Youdao", "FastEmbed", "BAAI"]])
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/set_api_key', methods=['POST'])
-@login_required
-@validate_request("llm_factory", "api_key")
-def set_api_key():
-    req = request.json
-    # test if api key works
-    chat_passed, embd_passed, rerank_passed = False, False, False
-    factory = req["llm_factory"]
-    msg = ""
-    for llm in LLMService.query(fid=factory):
-        if not embd_passed and llm.model_type == LLMType.EMBEDDING.value:
-            mdl = EmbeddingModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
-            try:
-                arr, tc = mdl.encode(["Test if the api key is available"])
-                if len(arr[0]) == 0:
-                    raise Exception("Fail")
-                embd_passed = True
-            except Exception as e:
-                msg += f"\nFail to access embedding model({llm.llm_name}) using this api key." + str(e)
-        elif not chat_passed and llm.model_type == LLMType.CHAT.value:
-            mdl = ChatModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
-            try:
-                m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], 
-                                 {"temperature": 0.9,'max_tokens':50})
-                if m.find("**ERROR**") >=0:
-                    raise Exception(m)
-            except Exception as e:
-                msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
-                    e)
-            chat_passed = True
-        elif not rerank_passed and llm.model_type == LLMType.RERANK:
-            mdl = RerankModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
-            try:
-                arr, tc = mdl.similarity("What's the weather?", ["Is it sunny today?"])
-                if len(arr) == 0 or tc == 0:
-                    raise Exception("Fail")
-            except Exception as e:
-                msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
-                    e)
-            rerank_passed = True
-
-    if msg:
-        return get_data_error_result(retmsg=msg)
-
-    llm = {
-        "api_key": req["api_key"],
-        "api_base": req.get("base_url", "")
-    }
-    for n in ["model_type", "llm_name"]:
-        if n in req:
-            llm[n] = req[n]
-
-    if not TenantLLMService.filter_update(
-            [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == factory], llm):
-        for llm in LLMService.query(fid=factory):
-            TenantLLMService.save(
-                tenant_id=current_user.id,
-                llm_factory=factory,
-                llm_name=llm.llm_name,
-                model_type=llm.model_type,
-                api_key=req["api_key"],
-                api_base=req.get("base_url", "")
-            )
-
-    return get_json_result(data=True)
-
-
-@manager.route('/add_llm', methods=['POST'])
-@login_required
-@validate_request("llm_factory", "llm_name", "model_type")
-def add_llm():
-    req = request.json
-    factory = req["llm_factory"]
-
-    if factory == "VolcEngine":
-        # For VolcEngine, due to its special authentication method
-        # Assemble volc_ak, volc_sk, endpoint_id into api_key
-        temp = list(ast.literal_eval(req["llm_name"]).items())[0]
-        llm_name = temp[0]
-        endpoint_id = temp[1]
-        api_key = '{' + f'"volc_ak": "{req.get("volc_ak", "")}", ' \
-                        f'"volc_sk": "{req.get("volc_sk", "")}", ' \
-                        f'"ep_id": "{endpoint_id}", ' + '}'
-    elif factory == "Bedrock":
-        # For Bedrock, due to its special authentication method
-        # Assemble bedrock_ak, bedrock_sk, bedrock_region
-        llm_name = req["llm_name"]
-        api_key = '{' + f'"bedrock_ak": "{req.get("bedrock_ak", "")}", ' \
-                        f'"bedrock_sk": "{req.get("bedrock_sk", "")}", ' \
-                        f'"bedrock_region": "{req.get("bedrock_region", "")}", ' + '}'
-    elif factory == "LocalAI":
-        llm_name = req["llm_name"]+"___LocalAI"
-        api_key = "xxxxxxxxxxxxxxx"
-    elif factory == "OpenAI-API-Compatible":
-        llm_name = req["llm_name"]+"___OpenAI-API"
-        api_key = req.get("api_key","xxxxxxxxxxxxxxx") 
-    else:
-        llm_name = req["llm_name"]
-        api_key = req.get("api_key","xxxxxxxxxxxxxxx") 
-
-    llm = {
-        "tenant_id": current_user.id,
-        "llm_factory": factory,
-        "model_type": req["model_type"],
-        "llm_name": llm_name,
-        "api_base": req.get("api_base", ""),
-        "api_key": api_key
-    }
-
-    msg = ""
-    if llm["model_type"] == LLMType.EMBEDDING.value:
-        mdl = EmbeddingModel[factory](
-            key=llm['api_key'] if factory in ["VolcEngine", "Bedrock","OpenAI-API-Compatible"] else None,
-            model_name=llm["llm_name"], 
-            base_url=llm["api_base"])
-        try:
-            arr, tc = mdl.encode(["Test if the api key is available"])
-            if len(arr[0]) == 0 or tc == 0:
-                raise Exception("Fail")
-        except Exception as e:
-            msg += f"\nFail to access embedding model({llm['llm_name']})." + str(e)
-    elif llm["model_type"] == LLMType.CHAT.value:
-        mdl = ChatModel[factory](
-            key=llm['api_key'] if factory in ["VolcEngine", "Bedrock","OpenAI-API-Compatible"] else None,
-            model_name=llm["llm_name"],
-            base_url=llm["api_base"]
-        )
-        try:
-            m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {
-                             "temperature": 0.9})
-            if not tc:
-                raise Exception(m)
-        except Exception as e:
-            msg += f"\nFail to access model({llm['llm_name']})." + str(
-                e)
-    elif llm["model_type"] == LLMType.RERANK:
-        mdl = RerankModel[factory](
-            key=None, model_name=llm["llm_name"], base_url=llm["api_base"]
-        )
-        try:
-            arr, tc = mdl.similarity("Hello~ Ragflower!", ["Hi, there!"])
-            if len(arr) == 0 or tc == 0:
-                raise Exception("Not known.")
-        except Exception as e:
-            msg += f"\nFail to access model({llm['llm_name']})." + str(
-                e)
-    elif llm["model_type"] == LLMType.IMAGE2TEXT.value:
-        mdl = CvModel[factory](
-            key=llm["api_key"] if factory in ["OpenAI-API-Compatible"] else None, model_name=llm["llm_name"], base_url=llm["api_base"]
-        )
-        try:
-            img_url = (
-                "https://upload.wikimedia.org/wikipedia/comm"
-                "ons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/256"
-                "0px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
-            )
-            res = requests.get(img_url)
-            if res.status_code == 200:
-                m, tc = mdl.describe(res.content)
-                if not tc:
-                    raise Exception(m)
-            else:
-                pass
-        except Exception as e:
-            msg += f"\nFail to access model({llm['llm_name']})." + str(e)
-    else:
-        # TODO: check other type of models
-        pass
-
-    if msg:
-        return get_data_error_result(retmsg=msg)
-
-    if not TenantLLMService.filter_update(
-            [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == factory, TenantLLM.llm_name == llm["llm_name"]], llm):
-        TenantLLMService.save(**llm)
-
-    return get_json_result(data=True)
-
-
-@manager.route('/delete_llm', methods=['POST'])
-@login_required
-@validate_request("llm_factory", "llm_name")
-def delete_llm():
-    req = request.json
-    TenantLLMService.filter_delete(
-            [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == req["llm_factory"], TenantLLM.llm_name == req["llm_name"]])
-    return get_json_result(data=True)
-
-
-@manager.route('/my_llms', methods=['GET'])
-@login_required
-def my_llms():
-    try:
-        res = {}
-        for o in TenantLLMService.get_my_llms(current_user.id):
-            if o["llm_factory"] not in res:
-                res[o["llm_factory"]] = {
-                    "tags": o["tags"],
-                    "llm": []
-                }
-            res[o["llm_factory"]]["llm"].append({
-                "type": o["model_type"],
-                "name": o["llm_name"],
-                "used_token": o["used_tokens"]
-            })
-        return get_json_result(data=res)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route('/list', methods=['GET'])
-@login_required
-def list_app():
-    model_type = request.args.get("model_type")
-    try:
-        objs = TenantLLMService.query(tenant_id=current_user.id)
-        facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
-        llms = LLMService.get_all()
-        llms = [m.to_dict()
-                for m in llms if m.status == StatusEnum.VALID.value]
-        for m in llms:
-            m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in ["Youdao","FastEmbed", "BAAI"]
-
-        llm_set = set([m["llm_name"] for m in llms])
-        for o in objs:
-            if not o.api_key:continue
-            if o.llm_name in llm_set:continue
-            llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True})
-
-        res = {}
-        for m in llms:
-            if model_type and m["model_type"].find(model_type)<0:
-                continue
-            if m["fid"] not in res:
-                res[m["fid"]] = []
-            res[m["fid"]].append(m)
-
-        return get_json_result(data=res)
-    except Exception as e:
-        return server_error_response(e)
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from flask import request
+from flask_login import login_required, current_user
+from api.db.services.llm_service import LLMFactoriesService, TenantLLMService, LLMService
+from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
+from api.db import StatusEnum, LLMType
+from api.db.db_models import TenantLLM
+from api.utils.api_utils import get_json_result
+from rag.llm import EmbeddingModel, ChatModel, RerankModel,CvModel
+import requests
+import ast
+
+@manager.route('/factories', methods=['GET'])
+@login_required
+def factories():
+    try:
+        fac = LLMFactoriesService.get_all()
+        return get_json_result(data=[f.to_dict() for f in fac if f.name not in ["Youdao", "FastEmbed", "BAAI"]])
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/set_api_key', methods=['POST'])
+@login_required
+@validate_request("llm_factory", "api_key")
+def set_api_key():
+    req = request.json
+    # test if api key works
+    chat_passed, embd_passed, rerank_passed = False, False, False
+    factory = req["llm_factory"]
+    msg = ""
+    for llm in LLMService.query(fid=factory):
+        if not embd_passed and llm.model_type == LLMType.EMBEDDING.value:
+            mdl = EmbeddingModel[factory](
+                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+            try:
+                arr, tc = mdl.encode(["Test if the api key is available"])
+                if len(arr[0]) == 0:
+                    raise Exception("Fail")
+                embd_passed = True
+            except Exception as e:
+                msg += f"\nFail to access embedding model({llm.llm_name}) using this api key." + str(e)
+        elif not chat_passed and llm.model_type == LLMType.CHAT.value:
+            mdl = ChatModel[factory](
+                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+            try:
+                m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], 
+                                 {"temperature": 0.9,'max_tokens':50})
+                if m.find("**ERROR**") >=0:
+                    raise Exception(m)
+            except Exception as e:
+                msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
+                    e)
+            chat_passed = True
+        elif not rerank_passed and llm.model_type == LLMType.RERANK:
+            mdl = RerankModel[factory](
+                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+            try:
+                arr, tc = mdl.similarity("What's the weather?", ["Is it sunny today?"])
+                if len(arr) == 0 or tc == 0:
+                    raise Exception("Fail")
+            except Exception as e:
+                msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
+                    e)
+            rerank_passed = True
+
+    if msg:
+        return get_data_error_result(retmsg=msg)
+
+    llm = {
+        "api_key": req["api_key"],
+        "api_base": req.get("base_url", "")
+    }
+    for n in ["model_type", "llm_name"]:
+        if n in req:
+            llm[n] = req[n]
+
+    if not TenantLLMService.filter_update(
+            [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == factory], llm):
+        for llm in LLMService.query(fid=factory):
+            TenantLLMService.save(
+                tenant_id=current_user.id,
+                llm_factory=factory,
+                llm_name=llm.llm_name,
+                model_type=llm.model_type,
+                api_key=req["api_key"],
+                api_base=req.get("base_url", "")
+            )
+
+    return get_json_result(data=True)
+
+
+@manager.route('/add_llm', methods=['POST'])
+@login_required
+@validate_request("llm_factory", "llm_name", "model_type")
+def add_llm():
+    req = request.json
+    factory = req["llm_factory"]
+
+    if factory == "VolcEngine":
+        # For VolcEngine, due to its special authentication method
+        # Assemble volc_ak, volc_sk, endpoint_id into api_key
+        temp = list(ast.literal_eval(req["llm_name"]).items())[0]
+        llm_name = temp[0]
+        endpoint_id = temp[1]
+        api_key = '{' + f'"volc_ak": "{req.get("volc_ak", "")}", ' \
+                        f'"volc_sk": "{req.get("volc_sk", "")}", ' \
+                        f'"ep_id": "{endpoint_id}", ' + '}'
+    elif factory == "Bedrock":
+        # For Bedrock, due to its special authentication method
+        # Assemble bedrock_ak, bedrock_sk, bedrock_region
+        llm_name = req["llm_name"]
+        api_key = '{' + f'"bedrock_ak": "{req.get("bedrock_ak", "")}", ' \
+                        f'"bedrock_sk": "{req.get("bedrock_sk", "")}", ' \
+                        f'"bedrock_region": "{req.get("bedrock_region", "")}", ' + '}'
+    elif factory == "LocalAI":
+        llm_name = req["llm_name"]+"___LocalAI"
+        api_key = "xxxxxxxxxxxxxxx"
+    elif factory == "OpenAI-API-Compatible":
+        llm_name = req["llm_name"]+"___OpenAI-API"
+        api_key = req.get("api_key","xxxxxxxxxxxxxxx") 
+    else:
+        llm_name = req["llm_name"]
+        api_key = req.get("api_key","xxxxxxxxxxxxxxx") 
+
+    llm = {
+        "tenant_id": current_user.id,
+        "llm_factory": factory,
+        "model_type": req["model_type"],
+        "llm_name": llm_name,
+        "api_base": req.get("api_base", ""),
+        "api_key": api_key
+    }
+
+    msg = ""
+    if llm["model_type"] == LLMType.EMBEDDING.value:
+        mdl = EmbeddingModel[factory](
+            key=llm['api_key'] if factory in ["VolcEngine", "Bedrock","OpenAI-API-Compatible"] else None,
+            model_name=llm["llm_name"], 
+            base_url=llm["api_base"])
+        try:
+            arr, tc = mdl.encode(["Test if the api key is available"])
+            if len(arr[0]) == 0 or tc == 0:
+                raise Exception("Fail")
+        except Exception as e:
+            msg += f"\nFail to access embedding model({llm['llm_name']})." + str(e)
+    elif llm["model_type"] == LLMType.CHAT.value:
+        mdl = ChatModel[factory](
+            key=llm['api_key'] if factory in ["VolcEngine", "Bedrock","OpenAI-API-Compatible"] else None,
+            model_name=llm["llm_name"],
+            base_url=llm["api_base"]
+        )
+        try:
+            m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {
+                             "temperature": 0.9})
+            if not tc:
+                raise Exception(m)
+        except Exception as e:
+            msg += f"\nFail to access model({llm['llm_name']})." + str(
+                e)
+    elif llm["model_type"] == LLMType.RERANK:
+        mdl = RerankModel[factory](
+            key=None, model_name=llm["llm_name"], base_url=llm["api_base"]
+        )
+        try:
+            arr, tc = mdl.similarity("Hello~ Ragflower!", ["Hi, there!"])
+            if len(arr) == 0 or tc == 0:
+                raise Exception("Not known.")
+        except Exception as e:
+            msg += f"\nFail to access model({llm['llm_name']})." + str(
+                e)
+    elif llm["model_type"] == LLMType.IMAGE2TEXT.value:
+        mdl = CvModel[factory](
+            key=llm["api_key"] if factory in ["OpenAI-API-Compatible"] else None, model_name=llm["llm_name"], base_url=llm["api_base"]
+        )
+        try:
+            img_url = (
+                "https://upload.wikimedia.org/wikipedia/comm"
+                "ons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/256"
+                "0px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+            )
+            res = requests.get(img_url)
+            if res.status_code == 200:
+                m, tc = mdl.describe(res.content)
+                if not tc:
+                    raise Exception(m)
+            else:
+                pass
+        except Exception as e:
+            msg += f"\nFail to access model({llm['llm_name']})." + str(e)
+    else:
+        # TODO: check other type of models
+        pass
+
+    if msg:
+        return get_data_error_result(retmsg=msg)
+
+    if not TenantLLMService.filter_update(
+            [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == factory, TenantLLM.llm_name == llm["llm_name"]], llm):
+        TenantLLMService.save(**llm)
+
+    return get_json_result(data=True)
+
+
+@manager.route('/delete_llm', methods=['POST'])
+@login_required
+@validate_request("llm_factory", "llm_name")
+def delete_llm():
+    req = request.json
+    TenantLLMService.filter_delete(
+            [TenantLLM.tenant_id == current_user.id, TenantLLM.llm_factory == req["llm_factory"], TenantLLM.llm_name == req["llm_name"]])
+    return get_json_result(data=True)
+
+
+@manager.route('/my_llms', methods=['GET'])
+@login_required
+def my_llms():
+    try:
+        res = {}
+        for o in TenantLLMService.get_my_llms(current_user.id):
+            if o["llm_factory"] not in res:
+                res[o["llm_factory"]] = {
+                    "tags": o["tags"],
+                    "llm": []
+                }
+            res[o["llm_factory"]]["llm"].append({
+                "type": o["model_type"],
+                "name": o["llm_name"],
+                "used_token": o["used_tokens"]
+            })
+        return get_json_result(data=res)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route('/list', methods=['GET'])
+@login_required
+def list_app():
+    model_type = request.args.get("model_type")
+    try:
+        objs = TenantLLMService.query(tenant_id=current_user.id)
+        facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key])
+        llms = LLMService.get_all()
+        llms = [m.to_dict()
+                for m in llms if m.status == StatusEnum.VALID.value]
+        for m in llms:
+            m["available"] = m["fid"] in facts or m["llm_name"].lower() == "flag-embedding" or m["fid"] in ["Youdao","FastEmbed", "BAAI"]
+
+        llm_set = set([m["llm_name"] for m in llms])
+        for o in objs:
+            if not o.api_key:continue
+            if o.llm_name in llm_set:continue
+            llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True})
+
+        res = {}
+        for m in llms:
+            if model_type and m["model_type"].find(model_type)<0:
+                continue
+            if m["fid"] not in res:
+                res[m["fid"]] = []
+            res[m["fid"]].append(m)
+
+        return get_json_result(data=res)
+    except Exception as e:
+        return server_error_response(e)
--- a/api/apps/user_app.py
+++ b/api/apps/user_app.py
@ -1,391 +1,391 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import json
-import re
-from datetime import datetime
-
-from flask import request, session, redirect
-from werkzeug.security import generate_password_hash, check_password_hash
-from flask_login import login_required, current_user, login_user, logout_user
-
-from api.db.db_models import TenantLLM
-from api.db.services.llm_service import TenantLLMService, LLMService
-from api.utils.api_utils import server_error_response, validate_request
-from api.utils import get_uuid, get_format_time, decrypt, download_img, current_timestamp, datetime_format
-from api.db import UserTenantRole, LLMType, FileType
-from api.settings import RetCode, GITHUB_OAUTH, FEISHU_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, \
-    API_KEY, \
-    LLM_FACTORY, LLM_BASE_URL, RERANK_MDL
-from api.db.services.user_service import UserService, TenantService, UserTenantService
-from api.db.services.file_service import FileService
-from api.settings import stat_logger
-from api.utils.api_utils import get_json_result, cors_reponse
-
-
-@manager.route('/login', methods=['POST', 'GET'])
-def login():
-    login_channel = "password"
-    if not request.json:
-        return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR,
-                               retmsg='Unautherized!')
-
-    email = request.json.get('email', "")
-    users = UserService.query(email=email)
-    if not users:
-        return get_json_result(
-            data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg=f'This Email is not registered!')
-
-    password = request.json.get('password')
-    try:
-        password = decrypt(password)
-    except BaseException:
-        return get_json_result(
-            data=False, retcode=RetCode.SERVER_ERROR, retmsg='Fail to crypt password')
-
-    user = UserService.query_user(email, password)
-    if user:
-        response_data = user.to_json()
-        user.access_token = get_uuid()
-        login_user(user)
-        user.update_time = current_timestamp(),
-        user.update_date = datetime_format(datetime.now()),
-        user.save()
-        msg = "Welcome back!"
-        return cors_reponse(data=response_data, auth=user.get_id(), retmsg=msg)
-    else:
-        return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR,
-                               retmsg='Email and Password do not match!')
-
-
-@manager.route('/github_callback', methods=['GET'])
-def github_callback():
-    import requests
-    res = requests.post(GITHUB_OAUTH.get("url"), data={
-        "client_id": GITHUB_OAUTH.get("client_id"),
-        "client_secret": GITHUB_OAUTH.get("secret_key"),
-        "code": request.args.get('code')
-    }, headers={"Accept": "application/json"})
-    res = res.json()
-    if "error" in res:
-        return redirect("/?error=%s" % res["error_description"])
-
-    if "user:email" not in res["scope"].split(","):
-        return redirect("/?error=user:email not in scope")
-
-    session["access_token"] = res["access_token"]
-    session["access_token_from"] = "github"
-    userinfo = user_info_from_github(session["access_token"])
-    users = UserService.query(email=userinfo["email"])
-    user_id = get_uuid()
-    if not users:
-        try:
-            try:
-                avatar = download_img(userinfo["avatar_url"])
-            except Exception as e:
-                stat_logger.exception(e)
-                avatar = ""
-            users = user_register(user_id, {
-                "access_token": session["access_token"],
-                "email": userinfo["email"],
-                "avatar": avatar,
-                "nickname": userinfo["login"],
-                "login_channel": "github",
-                "last_login_time": get_format_time(),
-                "is_superuser": False,
-            })
-            if not users:
-                raise Exception('Register user failure.')
-            if len(users) > 1:
-                raise Exception('Same E-mail exist!')
-            user = users[0]
-            login_user(user)
-            return redirect("/?auth=%s" % user.get_id())
-        except Exception as e:
-            rollback_user_registration(user_id)
-            stat_logger.exception(e)
-            return redirect("/?error=%s" % str(e))
-    user = users[0]
-    user.access_token = get_uuid()
-    login_user(user)
-    user.save()
-    return redirect("/?auth=%s" % user.get_id())
-
-
-@manager.route('/feishu_callback', methods=['GET'])
-def feishu_callback():
-    import requests
-    app_access_token_res = requests.post(FEISHU_OAUTH.get("app_access_token_url"), data=json.dumps({
-        "app_id": FEISHU_OAUTH.get("app_id"),
-        "app_secret": FEISHU_OAUTH.get("app_secret")
-    }), headers={"Content-Type": "application/json; charset=utf-8"})
-    app_access_token_res = app_access_token_res.json()
-    if app_access_token_res['code'] != 0:
-        return redirect("/?error=%s" % app_access_token_res)
-
-    res = requests.post(FEISHU_OAUTH.get("user_access_token_url"), data=json.dumps({
-        "grant_type": FEISHU_OAUTH.get("grant_type"),
-        "code": request.args.get('code')
-    }), headers={"Content-Type": "application/json; charset=utf-8",
-                 'Authorization': f"Bearer {app_access_token_res['app_access_token']}"})
-    res = res.json()
-    if res['code'] != 0:
-        return redirect("/?error=%s" % res["message"])
-
-    if "contact:user.email:readonly" not in res["data"]["scope"].split(" "):
-        return redirect("/?error=contact:user.email:readonly not in scope")
-    session["access_token"] = res["data"]["access_token"]
-    session["access_token_from"] = "feishu"
-    userinfo = user_info_from_feishu(session["access_token"])
-    users = UserService.query(email=userinfo["email"])
-    user_id = get_uuid()
-    if not users:
-        try:
-            try:
-                avatar = download_img(userinfo["avatar_url"])
-            except Exception as e:
-                stat_logger.exception(e)
-                avatar = ""
-            users = user_register(user_id, {
-                "access_token": session["access_token"],
-                "email": userinfo["email"],
-                "avatar": avatar,
-                "nickname": userinfo["en_name"],
-                "login_channel": "feishu",
-                "last_login_time": get_format_time(),
-                "is_superuser": False,
-            })
-            if not users:
-                raise Exception('Register user failure.')
-            if len(users) > 1:
-                raise Exception('Same E-mail exist!')
-            user = users[0]
-            login_user(user)
-            return redirect("/?auth=%s" % user.get_id())
-        except Exception as e:
-            rollback_user_registration(user_id)
-            stat_logger.exception(e)
-            return redirect("/?error=%s" % str(e))
-    user = users[0]
-    user.access_token = get_uuid()
-    login_user(user)
-    user.save()
-    return redirect("/?auth=%s" % user.get_id())
-
-
-def user_info_from_feishu(access_token):
-    import requests
-    headers = {"Content-Type": "application/json; charset=utf-8",
-               'Authorization': f"Bearer {access_token}"}
-    res = requests.get(
-        f"https://open.feishu.cn/open-apis/authen/v1/user_info",
-        headers=headers)
-    user_info = res.json()["data"]
-    user_info["email"] = None if user_info.get("email") == "" else user_info["email"]
-    return user_info
-
-
-def user_info_from_github(access_token):
-    import requests
-    headers = {"Accept": "application/json",
-               'Authorization': f"token {access_token}"}
-    res = requests.get(
-        f"https://api.github.com/user?access_token={access_token}",
-        headers=headers)
-    user_info = res.json()
-    email_info = requests.get(
-        f"https://api.github.com/user/emails?access_token={access_token}",
-        headers=headers).json()
-    user_info["email"] = next(
-        (email for email in email_info if email['primary'] == True),
-        None)["email"]
-    return user_info
-
-
-@manager.route("/logout", methods=['GET'])
-@login_required
-def log_out():
-    current_user.access_token = ""
-    current_user.save()
-    logout_user()
-    return get_json_result(data=True)
-
-
-@manager.route("/setting", methods=["POST"])
-@login_required
-def setting_user():
-    update_dict = {}
-    request_data = request.json
-    if request_data.get("password"):
-        new_password = request_data.get("new_password")
-        if not check_password_hash(
-                current_user.password, decrypt(request_data["password"])):
-            return get_json_result(
-                data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg='Password error!')
-
-        if new_password:
-            update_dict["password"] = generate_password_hash(
-                decrypt(new_password))
-
-    for k in request_data.keys():
-        if k in ["password", "new_password"]:
-            continue
-        update_dict[k] = request_data[k]
-
-    try:
-        UserService.update_by_id(current_user.id, update_dict)
-        return get_json_result(data=True)
-    except Exception as e:
-        stat_logger.exception(e)
-        return get_json_result(
-            data=False, retmsg='Update failure!', retcode=RetCode.EXCEPTION_ERROR)
-
-
-@manager.route("/info", methods=["GET"])
-@login_required
-def user_info():
-    return get_json_result(data=current_user.to_dict())
-
-
-def rollback_user_registration(user_id):
-    try:
-        UserService.delete_by_id(user_id)
-    except Exception as e:
-        pass
-    try:
-        TenantService.delete_by_id(user_id)
-    except Exception as e:
-        pass
-    try:
-        u = UserTenantService.query(tenant_id=user_id)
-        if u:
-            UserTenantService.delete_by_id(u[0].id)
-    except Exception as e:
-        pass
-    try:
-        TenantLLM.delete().where(TenantLLM.tenant_id == user_id).execute()
-    except Exception as e:
-        pass
-
-
-def user_register(user_id, user):
-    user["id"] = user_id
-    tenant = {
-        "id": user_id,
-        "name": user["nickname"] + "‘s Kingdom",
-        "llm_id": CHAT_MDL,
-        "embd_id": EMBEDDING_MDL,
-        "asr_id": ASR_MDL,
-        "parser_ids": PARSERS,
-        "img2txt_id": IMAGE2TEXT_MDL,
-        "rerank_id": RERANK_MDL
-    }
-    usr_tenant = {
-        "tenant_id": user_id,
-        "user_id": user_id,
-        "invited_by": user_id,
-        "role": UserTenantRole.OWNER
-    }
-    file_id = get_uuid()
-    file = {
-        "id": file_id,
-        "parent_id": file_id,
-        "tenant_id": user_id,
-        "created_by": user_id,
-        "name": "/",
-        "type": FileType.FOLDER.value,
-        "size": 0,
-        "location": "",
-    }
-    tenant_llm = []
-    for llm in LLMService.query(fid=LLM_FACTORY):
-        tenant_llm.append({"tenant_id": user_id,
-                           "llm_factory": LLM_FACTORY,
-                           "llm_name": llm.llm_name,
-                           "model_type": llm.model_type,
-                           "api_key": API_KEY,
-                           "api_base": LLM_BASE_URL
-                           })
-
-    if not UserService.save(**user):
-        return
-    TenantService.insert(**tenant)
-    UserTenantService.insert(**usr_tenant)
-    TenantLLMService.insert_many(tenant_llm)
-    FileService.insert(file)
-    return UserService.query(email=user["email"])
-
-
-@manager.route("/register", methods=["POST"])
-@validate_request("nickname", "email", "password")
-def user_add():
-    req = request.json
-    if UserService.query(email=req["email"]):
-        return get_json_result(
-            data=False, retmsg=f'Email: {req["email"]} has already registered!', retcode=RetCode.OPERATING_ERROR)
-    if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,4}$", req["email"]):
-        return get_json_result(data=False, retmsg=f'Invaliad e-mail: {req["email"]}!',
-                               retcode=RetCode.OPERATING_ERROR)
-
-    user_dict = {
-        "access_token": get_uuid(),
-        "email": req["email"],
-        "nickname": req["nickname"],
-        "password": decrypt(req["password"]),
-        "login_channel": "password",
-        "last_login_time": get_format_time(),
-        "is_superuser": False,
-    }
-
-    user_id = get_uuid()
-    try:
-        users = user_register(user_id, user_dict)
-        if not users:
-            raise Exception('Register user failure.')
-        if len(users) > 1:
-            raise Exception('Same E-mail exist!')
-        user = users[0]
-        login_user(user)
-        return cors_reponse(data=user.to_json(),
-                            auth=user.get_id(), retmsg="Welcome aboard!")
-    except Exception as e:
-        rollback_user_registration(user_id)
-        stat_logger.exception(e)
-        return get_json_result(
-            data=False, retmsg='User registration failure!', retcode=RetCode.EXCEPTION_ERROR)
-
-
-@manager.route("/tenant_info", methods=["GET"])
-@login_required
-def tenant_info():
-    try:
-        tenants = TenantService.get_by_user_id(current_user.id)[0]
-        return get_json_result(data=tenants)
-    except Exception as e:
-        return server_error_response(e)
-
-
-@manager.route("/set_tenant_info", methods=["POST"])
-@login_required
-@validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id")
-def set_tenant_info():
-    req = request.json
-    try:
-        tid = req["tenant_id"]
-        del req["tenant_id"]
-        TenantService.update_by_id(tid, req)
-        return get_json_result(data=True)
-    except Exception as e:
-        return server_error_response(e)
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import json
+import re
+from datetime import datetime
+
+from flask import request, session, redirect
+from werkzeug.security import generate_password_hash, check_password_hash
+from flask_login import login_required, current_user, login_user, logout_user
+
+from api.db.db_models import TenantLLM
+from api.db.services.llm_service import TenantLLMService, LLMService
+from api.utils.api_utils import server_error_response, validate_request
+from api.utils import get_uuid, get_format_time, decrypt, download_img, current_timestamp, datetime_format
+from api.db import UserTenantRole, LLMType, FileType
+from api.settings import RetCode, GITHUB_OAUTH, FEISHU_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, \
+    API_KEY, \
+    LLM_FACTORY, LLM_BASE_URL, RERANK_MDL
+from api.db.services.user_service import UserService, TenantService, UserTenantService
+from api.db.services.file_service import FileService
+from api.settings import stat_logger
+from api.utils.api_utils import get_json_result, cors_reponse
+
+
+@manager.route('/login', methods=['POST', 'GET'])
+def login():
+    login_channel = "password"
+    if not request.json:
+        return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR,
+                               retmsg='Unautherized!')
+
+    email = request.json.get('email', "")
+    users = UserService.query(email=email)
+    if not users:
+        return get_json_result(
+            data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg=f'This Email is not registered!')
+
+    password = request.json.get('password')
+    try:
+        password = decrypt(password)
+    except BaseException:
+        return get_json_result(
+            data=False, retcode=RetCode.SERVER_ERROR, retmsg='Fail to crypt password')
+
+    user = UserService.query_user(email, password)
+    if user:
+        response_data = user.to_json()
+        user.access_token = get_uuid()
+        login_user(user)
+        user.update_time = current_timestamp(),
+        user.update_date = datetime_format(datetime.now()),
+        user.save()
+        msg = "Welcome back!"
+        return cors_reponse(data=response_data, auth=user.get_id(), retmsg=msg)
+    else:
+        return get_json_result(data=False, retcode=RetCode.AUTHENTICATION_ERROR,
+                               retmsg='Email and Password do not match!')
+
+
+@manager.route('/github_callback', methods=['GET'])
+def github_callback():
+    import requests
+    res = requests.post(GITHUB_OAUTH.get("url"), data={
+        "client_id": GITHUB_OAUTH.get("client_id"),
+        "client_secret": GITHUB_OAUTH.get("secret_key"),
+        "code": request.args.get('code')
+    }, headers={"Accept": "application/json"})
+    res = res.json()
+    if "error" in res:
+        return redirect("/?error=%s" % res["error_description"])
+
+    if "user:email" not in res["scope"].split(","):
+        return redirect("/?error=user:email not in scope")
+
+    session["access_token"] = res["access_token"]
+    session["access_token_from"] = "github"
+    userinfo = user_info_from_github(session["access_token"])
+    users = UserService.query(email=userinfo["email"])
+    user_id = get_uuid()
+    if not users:
+        try:
+            try:
+                avatar = download_img(userinfo["avatar_url"])
+            except Exception as e:
+                stat_logger.exception(e)
+                avatar = ""
+            users = user_register(user_id, {
+                "access_token": session["access_token"],
+                "email": userinfo["email"],
+                "avatar": avatar,
+                "nickname": userinfo["login"],
+                "login_channel": "github",
+                "last_login_time": get_format_time(),
+                "is_superuser": False,
+            })
+            if not users:
+                raise Exception('Register user failure.')
+            if len(users) > 1:
+                raise Exception('Same E-mail exist!')
+            user = users[0]
+            login_user(user)
+            return redirect("/?auth=%s" % user.get_id())
+        except Exception as e:
+            rollback_user_registration(user_id)
+            stat_logger.exception(e)
+            return redirect("/?error=%s" % str(e))
+    user = users[0]
+    user.access_token = get_uuid()
+    login_user(user)
+    user.save()
+    return redirect("/?auth=%s" % user.get_id())
+
+
+@manager.route('/feishu_callback', methods=['GET'])
+def feishu_callback():
+    import requests
+    app_access_token_res = requests.post(FEISHU_OAUTH.get("app_access_token_url"), data=json.dumps({
+        "app_id": FEISHU_OAUTH.get("app_id"),
+        "app_secret": FEISHU_OAUTH.get("app_secret")
+    }), headers={"Content-Type": "application/json; charset=utf-8"})
+    app_access_token_res = app_access_token_res.json()
+    if app_access_token_res['code'] != 0:
+        return redirect("/?error=%s" % app_access_token_res)
+
+    res = requests.post(FEISHU_OAUTH.get("user_access_token_url"), data=json.dumps({
+        "grant_type": FEISHU_OAUTH.get("grant_type"),
+        "code": request.args.get('code')
+    }), headers={"Content-Type": "application/json; charset=utf-8",
+                 'Authorization': f"Bearer {app_access_token_res['app_access_token']}"})
+    res = res.json()
+    if res['code'] != 0:
+        return redirect("/?error=%s" % res["message"])
+
+    if "contact:user.email:readonly" not in res["data"]["scope"].split(" "):
+        return redirect("/?error=contact:user.email:readonly not in scope")
+    session["access_token"] = res["data"]["access_token"]
+    session["access_token_from"] = "feishu"
+    userinfo = user_info_from_feishu(session["access_token"])
+    users = UserService.query(email=userinfo["email"])
+    user_id = get_uuid()
+    if not users:
+        try:
+            try:
+                avatar = download_img(userinfo["avatar_url"])
+            except Exception as e:
+                stat_logger.exception(e)
+                avatar = ""
+            users = user_register(user_id, {
+                "access_token": session["access_token"],
+                "email": userinfo["email"],
+                "avatar": avatar,
+                "nickname": userinfo["en_name"],
+                "login_channel": "feishu",
+                "last_login_time": get_format_time(),
+                "is_superuser": False,
+            })
+            if not users:
+                raise Exception('Register user failure.')
+            if len(users) > 1:
+                raise Exception('Same E-mail exist!')
+            user = users[0]
+            login_user(user)
+            return redirect("/?auth=%s" % user.get_id())
+        except Exception as e:
+            rollback_user_registration(user_id)
+            stat_logger.exception(e)
+            return redirect("/?error=%s" % str(e))
+    user = users[0]
+    user.access_token = get_uuid()
+    login_user(user)
+    user.save()
+    return redirect("/?auth=%s" % user.get_id())
+
+
+def user_info_from_feishu(access_token):
+    import requests
+    headers = {"Content-Type": "application/json; charset=utf-8",
+               'Authorization': f"Bearer {access_token}"}
+    res = requests.get(
+        f"https://open.feishu.cn/open-apis/authen/v1/user_info",
+        headers=headers)
+    user_info = res.json()["data"]
+    user_info["email"] = None if user_info.get("email") == "" else user_info["email"]
+    return user_info
+
+
+def user_info_from_github(access_token):
+    import requests
+    headers = {"Accept": "application/json",
+               'Authorization': f"token {access_token}"}
+    res = requests.get(
+        f"https://api.github.com/user?access_token={access_token}",
+        headers=headers)
+    user_info = res.json()
+    email_info = requests.get(
+        f"https://api.github.com/user/emails?access_token={access_token}",
+        headers=headers).json()
+    user_info["email"] = next(
+        (email for email in email_info if email['primary'] == True),
+        None)["email"]
+    return user_info
+
+
+@manager.route("/logout", methods=['GET'])
+@login_required
+def log_out():
+    current_user.access_token = ""
+    current_user.save()
+    logout_user()
+    return get_json_result(data=True)
+
+
+@manager.route("/setting", methods=["POST"])
+@login_required
+def setting_user():
+    update_dict = {}
+    request_data = request.json
+    if request_data.get("password"):
+        new_password = request_data.get("new_password")
+        if not check_password_hash(
+                current_user.password, decrypt(request_data["password"])):
+            return get_json_result(
+                data=False, retcode=RetCode.AUTHENTICATION_ERROR, retmsg='Password error!')
+
+        if new_password:
+            update_dict["password"] = generate_password_hash(
+                decrypt(new_password))
+
+    for k in request_data.keys():
+        if k in ["password", "new_password"]:
+            continue
+        update_dict[k] = request_data[k]
+
+    try:
+        UserService.update_by_id(current_user.id, update_dict)
+        return get_json_result(data=True)
+    except Exception as e:
+        stat_logger.exception(e)
+        return get_json_result(
+            data=False, retmsg='Update failure!', retcode=RetCode.EXCEPTION_ERROR)
+
+
+@manager.route("/info", methods=["GET"])
+@login_required
+def user_info():
+    return get_json_result(data=current_user.to_dict())
+
+
+def rollback_user_registration(user_id):
+    try:
+        UserService.delete_by_id(user_id)
+    except Exception as e:
+        pass
+    try:
+        TenantService.delete_by_id(user_id)
+    except Exception as e:
+        pass
+    try:
+        u = UserTenantService.query(tenant_id=user_id)
+        if u:
+            UserTenantService.delete_by_id(u[0].id)
+    except Exception as e:
+        pass
+    try:
+        TenantLLM.delete().where(TenantLLM.tenant_id == user_id).execute()
+    except Exception as e:
+        pass
+
+
+def user_register(user_id, user):
+    user["id"] = user_id
+    tenant = {
+        "id": user_id,
+        "name": user["nickname"] + "‘s Kingdom",
+        "llm_id": CHAT_MDL,
+        "embd_id": EMBEDDING_MDL,
+        "asr_id": ASR_MDL,
+        "parser_ids": PARSERS,
+        "img2txt_id": IMAGE2TEXT_MDL,
+        "rerank_id": RERANK_MDL
+    }
+    usr_tenant = {
+        "tenant_id": user_id,
+        "user_id": user_id,
+        "invited_by": user_id,
+        "role": UserTenantRole.OWNER
+    }
+    file_id = get_uuid()
+    file = {
+        "id": file_id,
+        "parent_id": file_id,
+        "tenant_id": user_id,
+        "created_by": user_id,
+        "name": "/",
+        "type": FileType.FOLDER.value,
+        "size": 0,
+        "location": "",
+    }
+    tenant_llm = []
+    for llm in LLMService.query(fid=LLM_FACTORY):
+        tenant_llm.append({"tenant_id": user_id,
+                           "llm_factory": LLM_FACTORY,
+                           "llm_name": llm.llm_name,
+                           "model_type": llm.model_type,
+                           "api_key": API_KEY,
+                           "api_base": LLM_BASE_URL
+                           })
+
+    if not UserService.save(**user):
+        return
+    TenantService.insert(**tenant)
+    UserTenantService.insert(**usr_tenant)
+    TenantLLMService.insert_many(tenant_llm)
+    FileService.insert(file)
+    return UserService.query(email=user["email"])
+
+
+@manager.route("/register", methods=["POST"])
+@validate_request("nickname", "email", "password")
+def user_add():
+    req = request.json
+    if UserService.query(email=req["email"]):
+        return get_json_result(
+            data=False, retmsg=f'Email: {req["email"]} has already registered!', retcode=RetCode.OPERATING_ERROR)
+    if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,4}$", req["email"]):
+        return get_json_result(data=False, retmsg=f'Invaliad e-mail: {req["email"]}!',
+                               retcode=RetCode.OPERATING_ERROR)
+
+    user_dict = {
+        "access_token": get_uuid(),
+        "email": req["email"],
+        "nickname": req["nickname"],
+        "password": decrypt(req["password"]),
+        "login_channel": "password",
+        "last_login_time": get_format_time(),
+        "is_superuser": False,
+    }
+
+    user_id = get_uuid()
+    try:
+        users = user_register(user_id, user_dict)
+        if not users:
+            raise Exception('Register user failure.')
+        if len(users) > 1:
+            raise Exception('Same E-mail exist!')
+        user = users[0]
+        login_user(user)
+        return cors_reponse(data=user.to_json(),
+                            auth=user.get_id(), retmsg="Welcome aboard!")
+    except Exception as e:
+        rollback_user_registration(user_id)
+        stat_logger.exception(e)
+        return get_json_result(
+            data=False, retmsg='User registration failure!', retcode=RetCode.EXCEPTION_ERROR)
+
+
+@manager.route("/tenant_info", methods=["GET"])
+@login_required
+def tenant_info():
+    try:
+        tenants = TenantService.get_by_user_id(current_user.id)[0]
+        return get_json_result(data=tenants)
+    except Exception as e:
+        return server_error_response(e)
+
+
+@manager.route("/set_tenant_info", methods=["POST"])
+@login_required
+@validate_request("tenant_id", "asr_id", "embd_id", "img2txt_id", "llm_id")
+def set_tenant_info():
+    req = request.json
+    try:
+        tid = req["tenant_id"]
+        del req["tenant_id"]
+        TenantService.update_by_id(tid, req)
+        return get_json_result(data=True)
+    except Exception as e:
+        return server_error_response(e)
--- a/api/db/init.py
+++ b/api/db/init.py
@ -1,102 +1,102 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-from enum import Enum
-from enum import IntEnum
-from strenum import StrEnum
-
-
-class StatusEnum(Enum):
-    VALID = "1"
-    INVALID = "0"
-
-
-class UserTenantRole(StrEnum):
-    OWNER = 'owner'
-    ADMIN = 'admin'
-    NORMAL = 'normal'
-
-
-class TenantPermission(StrEnum):
-    ME = 'me'
-    TEAM = 'team'
-
-
-class SerializedType(IntEnum):
-    PICKLE = 1
-    JSON = 2
-
-
-class FileType(StrEnum):
-    PDF = 'pdf'
-    DOC = 'doc'
-    VISUAL = 'visual'
-    AURAL = 'aural'
-    VIRTUAL = 'virtual'
-    FOLDER = 'folder'
-    OTHER = "other"
-
-
-class LLMType(StrEnum):
-    CHAT = 'chat'
-    EMBEDDING = 'embedding'
-    SPEECH2TEXT = 'speech2text'
-    IMAGE2TEXT = 'image2text'
-    RERANK = 'rerank'
-
-
-class ChatStyle(StrEnum):
-    CREATIVE = 'Creative'
-    PRECISE = 'Precise'
-    EVENLY = 'Evenly'
-    CUSTOM = 'Custom'
-
-
-class TaskStatus(StrEnum):
-    UNSTART = "0"
-    RUNNING = "1"
-    CANCEL = "2"
-    DONE = "3"
-    FAIL = "4"
-
-
-class ParserType(StrEnum):
-    PRESENTATION = "presentation"
-    LAWS = "laws"
-    MANUAL = "manual"
-    PAPER = "paper"
-    RESUME = "resume"
-    BOOK = "book"
-    QA = "qa"
-    TABLE = "table"
-    NAIVE = "naive"
-    PICTURE = "picture"
-    ONE = "one"
-    AUDIO = "audio"
-    EMAIL = "email"
-    KG = "knowledge_graph"
-
-
-class FileSource(StrEnum):
-    LOCAL = ""
-    KNOWLEDGEBASE = "knowledgebase"
-    S3 = "s3"
-
-
-class CanvasType(StrEnum):
-    ChatBot = "chatbot"
-    DocBot = "docbot"
-
-KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from enum import Enum
+from enum import IntEnum
+from strenum import StrEnum
+
+
+class StatusEnum(Enum):
+    VALID = "1"
+    INVALID = "0"
+
+
+class UserTenantRole(StrEnum):
+    OWNER = 'owner'
+    ADMIN = 'admin'
+    NORMAL = 'normal'
+
+
+class TenantPermission(StrEnum):
+    ME = 'me'
+    TEAM = 'team'
+
+
+class SerializedType(IntEnum):
+    PICKLE = 1
+    JSON = 2
+
+
+class FileType(StrEnum):
+    PDF = 'pdf'
+    DOC = 'doc'
+    VISUAL = 'visual'
+    AURAL = 'aural'
+    VIRTUAL = 'virtual'
+    FOLDER = 'folder'
+    OTHER = "other"
+
+
+class LLMType(StrEnum):
+    CHAT = 'chat'
+    EMBEDDING = 'embedding'
+    SPEECH2TEXT = 'speech2text'
+    IMAGE2TEXT = 'image2text'
+    RERANK = 'rerank'
+
+
+class ChatStyle(StrEnum):
+    CREATIVE = 'Creative'
+    PRECISE = 'Precise'
+    EVENLY = 'Evenly'
+    CUSTOM = 'Custom'
+
+
+class TaskStatus(StrEnum):
+    UNSTART = "0"
+    RUNNING = "1"
+    CANCEL = "2"
+    DONE = "3"
+    FAIL = "4"
+
+
+class ParserType(StrEnum):
+    PRESENTATION = "presentation"
+    LAWS = "laws"
+    MANUAL = "manual"
+    PAPER = "paper"
+    RESUME = "resume"
+    BOOK = "book"
+    QA = "qa"
+    TABLE = "table"
+    NAIVE = "naive"
+    PICTURE = "picture"
+    ONE = "one"
+    AUDIO = "audio"
+    EMAIL = "email"
+    KG = "knowledge_graph"
+
+
+class FileSource(StrEnum):
+    LOCAL = ""
+    KNOWLEDGEBASE = "knowledgebase"
+    S3 = "s3"
+
+
+class CanvasType(StrEnum):
+    ChatBot = "chatbot"
+    DocBot = "docbot"
+
+KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
--- a/api/db/db_utils.py
+++ b/api/db/db_utils.py
@ -1,130 +1,130 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import operator
-from functools import reduce
-from typing import Dict, Type, Union
-
-from api.utils import current_timestamp, timestamp_to_date
-
-from api.db.db_models import DB, DataBaseModel
-from api.db.runtime_config import RuntimeConfig
-from api.utils.log_utils import getLogger
-from enum import Enum
-
-
-LOGGER = getLogger()
-
-
-@DB.connection_context()
-def bulk_insert_into_db(model, data_source, replace_on_conflict=False):
-    DB.create_tables([model])
-
-    for i, data in enumerate(data_source):
-        current_time = current_timestamp() + i
-        current_date = timestamp_to_date(current_time)
-        if 'create_time' not in data:
-            data['create_time'] = current_time
-        data['create_date'] = timestamp_to_date(data['create_time'])
-        data['update_time'] = current_time
-        data['update_date'] = current_date
-
-    preserve = tuple(data_source[0].keys() - {'create_time', 'create_date'})
-
-    batch_size = 1000
-
-    for i in range(0, len(data_source), batch_size):
-        with DB.atomic():
-            query = model.insert_many(data_source[i:i + batch_size])
-            if replace_on_conflict:
-                query = query.on_conflict(preserve=preserve)
-            query.execute()
-
-
-def get_dynamic_db_model(base, job_id):
-    return type(base.model(
-        table_index=get_dynamic_tracking_table_index(job_id=job_id)))
-
-
-def get_dynamic_tracking_table_index(job_id):
-    return job_id[:8]
-
-
-def fill_db_model_object(model_object, human_model_dict):
-    for k, v in human_model_dict.items():
-        attr_name = 'f_%s' % k
-        if hasattr(model_object.__class__, attr_name):
-            setattr(model_object, attr_name, v)
-    return model_object
-
-
-# https://docs.peewee-orm.com/en/latest/peewee/query_operators.html
-supported_operators = {
-    '==': operator.eq,
-    '<': operator.lt,
-    '<=': operator.le,
-    '>': operator.gt,
-    '>=': operator.ge,
-    '!=': operator.ne,
-    '<<': operator.lshift,
-    '>>': operator.rshift,
-    '%': operator.mod,
-    '**': operator.pow,
-    '^': operator.xor,
-    '~': operator.inv,
-}
-
-
-def query_dict2expression(
-        model: Type[DataBaseModel], query: Dict[str, Union[bool, int, str, list, tuple]]):
-    expression = []
-
-    for field, value in query.items():
-        if not isinstance(value, (list, tuple)):
-            value = ('==', value)
-        op, *val = value
-
-        field = getattr(model, f'f_{field}')
-        value = supported_operators[op](
-            field, val[0]) if op in supported_operators else getattr(
-            field, op)(
-            *val)
-        expression.append(value)
-
-    return reduce(operator.iand, expression)
-
-
-def query_db(model: Type[DataBaseModel], limit: int = 0, offset: int = 0,
-             query: dict = None, order_by: Union[str, list, tuple] = None):
-    data = model.select()
-    if query:
-        data = data.where(query_dict2expression(model, query))
-    count = data.count()
-
-    if not order_by:
-        order_by = 'create_time'
-    if not isinstance(order_by, (list, tuple)):
-        order_by = (order_by, 'asc')
-    order_by, order = order_by
-    order_by = getattr(model, f'f_{order_by}')
-    order_by = getattr(order_by, order)()
-    data = data.order_by(order_by)
-
-    if limit > 0:
-        data = data.limit(limit)
-    if offset > 0:
-        data = data.offset(offset)
-
-    return list(data), count
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import operator
+from functools import reduce
+from typing import Dict, Type, Union
+
+from api.utils import current_timestamp, timestamp_to_date
+
+from api.db.db_models import DB, DataBaseModel
+from api.db.runtime_config import RuntimeConfig
+from api.utils.log_utils import getLogger
+from enum import Enum
+
+
+LOGGER = getLogger()
+
+
+@DB.connection_context()
+def bulk_insert_into_db(model, data_source, replace_on_conflict=False):
+    DB.create_tables([model])
+
+    for i, data in enumerate(data_source):
+        current_time = current_timestamp() + i
+        current_date = timestamp_to_date(current_time)
+        if 'create_time' not in data:
+            data['create_time'] = current_time
+        data['create_date'] = timestamp_to_date(data['create_time'])
+        data['update_time'] = current_time
+        data['update_date'] = current_date
+
+    preserve = tuple(data_source[0].keys() - {'create_time', 'create_date'})
+
+    batch_size = 1000
+
+    for i in range(0, len(data_source), batch_size):
+        with DB.atomic():
+            query = model.insert_many(data_source[i:i + batch_size])
+            if replace_on_conflict:
+                query = query.on_conflict(preserve=preserve)
+            query.execute()
+
+
+def get_dynamic_db_model(base, job_id):
+    return type(base.model(
+        table_index=get_dynamic_tracking_table_index(job_id=job_id)))
+
+
+def get_dynamic_tracking_table_index(job_id):
+    return job_id[:8]
+
+
+def fill_db_model_object(model_object, human_model_dict):
+    for k, v in human_model_dict.items():
+        attr_name = 'f_%s' % k
+        if hasattr(model_object.__class__, attr_name):
+            setattr(model_object, attr_name, v)
+    return model_object
+
+
+# https://docs.peewee-orm.com/en/latest/peewee/query_operators.html
+supported_operators = {
+    '==': operator.eq,
+    '<': operator.lt,
+    '<=': operator.le,
+    '>': operator.gt,
+    '>=': operator.ge,
+    '!=': operator.ne,
+    '<<': operator.lshift,
+    '>>': operator.rshift,
+    '%': operator.mod,
+    '**': operator.pow,
+    '^': operator.xor,
+    '~': operator.inv,
+}
+
+
+def query_dict2expression(
+        model: Type[DataBaseModel], query: Dict[str, Union[bool, int, str, list, tuple]]):
+    expression = []
+
+    for field, value in query.items():
+        if not isinstance(value, (list, tuple)):
+            value = ('==', value)
+        op, *val = value
+
+        field = getattr(model, f'f_{field}')
+        value = supported_operators[op](
+            field, val[0]) if op in supported_operators else getattr(
+            field, op)(
+            *val)
+        expression.append(value)
+
+    return reduce(operator.iand, expression)
+
+
+def query_db(model: Type[DataBaseModel], limit: int = 0, offset: int = 0,
+             query: dict = None, order_by: Union[str, list, tuple] = None):
+    data = model.select()
+    if query:
+        data = data.where(query_dict2expression(model, query))
+    count = data.count()
+
+    if not order_by:
+        order_by = 'create_time'
+    if not isinstance(order_by, (list, tuple)):
+        order_by = (order_by, 'asc')
+    order_by, order = order_by
+    order_by = getattr(model, f'f_{order_by}')
+    order_by = getattr(order_by, order)()
+    data = data.order_by(order_by)
+
+    if limit > 0:
+        data = data.limit(limit)
+    if offset > 0:
+        data = data.offset(offset)
+
+    return list(data), count
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@ -1,184 +1,184 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import json
-import os
-import time
-import uuid
-from copy import deepcopy
-
-from api.db import LLMType, UserTenantRole
-from api.db.db_models import init_database_tables as init_web_db, LLMFactories, LLM, TenantLLM
-from api.db.services import UserService
-from api.db.services.canvas_service import CanvasTemplateService
-from api.db.services.document_service import DocumentService
-from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle
-from api.db.services.user_service import TenantService, UserTenantService
-from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
-from api.utils.file_utils import get_project_base_directory
-
-
-def init_superuser():
-    user_info = {
-        "id": uuid.uuid1().hex,
-        "password": "admin",
-        "nickname": "admin",
-        "is_superuser": True,
-        "email": "admin@ragflow.io",
-        "creator": "system",
-        "status": "1",
-    }
-    tenant = {
-        "id": user_info["id"],
-        "name": user_info["nickname"] + "‘s Kingdom",
-        "llm_id": CHAT_MDL,
-        "embd_id": EMBEDDING_MDL,
-        "asr_id": ASR_MDL,
-        "parser_ids": PARSERS,
-        "img2txt_id": IMAGE2TEXT_MDL
-    }
-    usr_tenant = {
-        "tenant_id": user_info["id"],
-        "user_id": user_info["id"],
-        "invited_by": user_info["id"],
-        "role": UserTenantRole.OWNER
-    }
-    tenant_llm = []
-    for llm in LLMService.query(fid=LLM_FACTORY):
-        tenant_llm.append(
-            {"tenant_id": user_info["id"], "llm_factory": LLM_FACTORY, "llm_name": llm.llm_name, "model_type": llm.model_type,
-             "api_key": API_KEY, "api_base": LLM_BASE_URL})
-
-    if not UserService.save(**user_info):
-        print("\033[93m【ERROR】\033[0mcan't init admin.")
-        return
-    TenantService.insert(**tenant)
-    UserTenantService.insert(**usr_tenant)
-    TenantLLMService.insert_many(tenant_llm)
-    print(
-        "【INFO】Super user initialized. \033[93memail: admin@ragflow.io, password: admin\033[0m. Changing the password after logining is strongly recomanded.")
-
-    chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
-    msg = chat_mdl.chat(system="", history=[
-                        {"role": "user", "content": "Hello!"}], gen_conf={})
-    if msg.find("ERROR: ") == 0:
-        print(
-            "\33[91m【ERROR】\33[0m: ",
-            "'{}' dosen't work. {}".format(
-                tenant["llm_id"],
-                msg))
-    embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
-    v, c = embd_mdl.encode(["Hello!"])
-    if c == 0:
-        print(
-            "\33[91m【ERROR】\33[0m:",
-            " '{}' dosen't work!".format(
-                tenant["embd_id"]))
-
-
-def init_llm_factory():
-    try:
-        LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")])
-    except Exception as e:
-        pass
-
-    factory_llm_infos = json.load(
-        open(
-            os.path.join(get_project_base_directory(), "conf", "llm_factories.json"),
-            "r",
-        )
-    )
-    for factory_llm_info in factory_llm_infos["factory_llm_infos"]:
-        llm_infos = factory_llm_info.pop("llm")
-        try:
-            LLMFactoriesService.save(**factory_llm_info)
-        except Exception as e:
-            pass
-        LLMService.filter_delete([LLM.fid == factory_llm_info["name"]])
-        for llm_info in llm_infos:
-            llm_info["fid"] = factory_llm_info["name"]
-            try:
-                LLMService.save(**llm_info)
-            except Exception as e:
-                pass
-
-    LLMFactoriesService.filter_delete([LLMFactories.name == "Local"])
-    LLMService.filter_delete([LLM.fid == "Local"])
-    LLMService.filter_delete([LLM.llm_name == "qwen-vl-max"])
-    LLMService.filter_delete([LLM.fid == "Moonshot", LLM.llm_name == "flag-embedding"])
-    TenantLLMService.filter_delete([TenantLLM.llm_factory == "Moonshot", TenantLLM.llm_name == "flag-embedding"])
-    LLMFactoriesService.filter_delete([LLMFactoriesService.model.name == "QAnything"])
-    LLMService.filter_delete([LLMService.model.fid == "QAnything"])
-    TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
-    TenantService.filter_update([1 == 1], {
-        "parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email"})
-    ## insert openai two embedding models to the current openai user.
-    print("Start to insert 2 OpenAI embedding models...")
-    tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
-    for tid in tenant_ids:
-        for row in TenantLLMService.query(llm_factory="OpenAI", tenant_id=tid):
-            row = row.to_dict()
-            row["model_type"] = LLMType.EMBEDDING.value
-            row["llm_name"] = "text-embedding-3-small"
-            row["used_tokens"] = 0
-            try:
-                TenantLLMService.save(**row)
-                row = deepcopy(row)
-                row["llm_name"] = "text-embedding-3-large"
-                TenantLLMService.save(**row)
-            except Exception as e:
-                pass
-            break
-    for kb_id in KnowledgebaseService.get_all_ids():
-        KnowledgebaseService.update_by_id(kb_id, {"doc_num": DocumentService.get_kb_doc_count(kb_id)})
-    """
-    drop table llm;
-    drop table llm_factories;
-    update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph';
-    alter table knowledgebase modify avatar longtext;
-    alter table user modify avatar longtext;
-    alter table dialog modify icon longtext;
-    """
-
-
-def add_graph_templates():
-    dir = os.path.join(get_project_base_directory(), "agent", "templates")
-    for fnm in os.listdir(dir):
-        try:
-            cnvs = json.load(open(os.path.join(dir, fnm), "r"))
-            try:
-                CanvasTemplateService.save(**cnvs)
-            except:
-                CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
-        except Exception as e:
-            print("Add graph templates error: ", e)
-            print("------------", flush=True)
-
-
-def init_web_data():
-    start_time = time.time()
-
-    init_llm_factory()
-    if not UserService.get_all().count():
-        init_superuser()
-
-    add_graph_templates()
-    print("init web data success:{}".format(time.time() - start_time))
-
-
-if __name__ == '__main__':
-    init_web_db()
-    init_web_data()
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import json
+import os
+import time
+import uuid
+from copy import deepcopy
+
+from api.db import LLMType, UserTenantRole
+from api.db.db_models import init_database_tables as init_web_db, LLMFactories, LLM, TenantLLM
+from api.db.services import UserService
+from api.db.services.canvas_service import CanvasTemplateService
+from api.db.services.document_service import DocumentService
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle
+from api.db.services.user_service import TenantService, UserTenantService
+from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
+from api.utils.file_utils import get_project_base_directory
+
+
+def init_superuser():
+    user_info = {
+        "id": uuid.uuid1().hex,
+        "password": "admin",
+        "nickname": "admin",
+        "is_superuser": True,
+        "email": "admin@ragflow.io",
+        "creator": "system",
+        "status": "1",
+    }
+    tenant = {
+        "id": user_info["id"],
+        "name": user_info["nickname"] + "‘s Kingdom",
+        "llm_id": CHAT_MDL,
+        "embd_id": EMBEDDING_MDL,
+        "asr_id": ASR_MDL,
+        "parser_ids": PARSERS,
+        "img2txt_id": IMAGE2TEXT_MDL
+    }
+    usr_tenant = {
+        "tenant_id": user_info["id"],
+        "user_id": user_info["id"],
+        "invited_by": user_info["id"],
+        "role": UserTenantRole.OWNER
+    }
+    tenant_llm = []
+    for llm in LLMService.query(fid=LLM_FACTORY):
+        tenant_llm.append(
+            {"tenant_id": user_info["id"], "llm_factory": LLM_FACTORY, "llm_name": llm.llm_name, "model_type": llm.model_type,
+             "api_key": API_KEY, "api_base": LLM_BASE_URL})
+
+    if not UserService.save(**user_info):
+        print("\033[93m【ERROR】\033[0mcan't init admin.")
+        return
+    TenantService.insert(**tenant)
+    UserTenantService.insert(**usr_tenant)
+    TenantLLMService.insert_many(tenant_llm)
+    print(
+        "【INFO】Super user initialized. \033[93memail: admin@ragflow.io, password: admin\033[0m. Changing the password after logining is strongly recomanded.")
+
+    chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
+    msg = chat_mdl.chat(system="", history=[
+                        {"role": "user", "content": "Hello!"}], gen_conf={})
+    if msg.find("ERROR: ") == 0:
+        print(
+            "\33[91m【ERROR】\33[0m: ",
+            "'{}' dosen't work. {}".format(
+                tenant["llm_id"],
+                msg))
+    embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
+    v, c = embd_mdl.encode(["Hello!"])
+    if c == 0:
+        print(
+            "\33[91m【ERROR】\33[0m:",
+            " '{}' dosen't work!".format(
+                tenant["embd_id"]))
+
+
+def init_llm_factory():
+    try:
+        LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")])
+    except Exception as e:
+        pass
+
+    factory_llm_infos = json.load(
+        open(
+            os.path.join(get_project_base_directory(), "conf", "llm_factories.json"),
+            "r",
+        )
+    )
+    for factory_llm_info in factory_llm_infos["factory_llm_infos"]:
+        llm_infos = factory_llm_info.pop("llm")
+        try:
+            LLMFactoriesService.save(**factory_llm_info)
+        except Exception as e:
+            pass
+        LLMService.filter_delete([LLM.fid == factory_llm_info["name"]])
+        for llm_info in llm_infos:
+            llm_info["fid"] = factory_llm_info["name"]
+            try:
+                LLMService.save(**llm_info)
+            except Exception as e:
+                pass
+
+    LLMFactoriesService.filter_delete([LLMFactories.name == "Local"])
+    LLMService.filter_delete([LLM.fid == "Local"])
+    LLMService.filter_delete([LLM.llm_name == "qwen-vl-max"])
+    LLMService.filter_delete([LLM.fid == "Moonshot", LLM.llm_name == "flag-embedding"])
+    TenantLLMService.filter_delete([TenantLLM.llm_factory == "Moonshot", TenantLLM.llm_name == "flag-embedding"])
+    LLMFactoriesService.filter_delete([LLMFactoriesService.model.name == "QAnything"])
+    LLMService.filter_delete([LLMService.model.fid == "QAnything"])
+    TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
+    TenantService.filter_update([1 == 1], {
+        "parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email"})
+    ## insert openai two embedding models to the current openai user.
+    print("Start to insert 2 OpenAI embedding models...")
+    tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
+    for tid in tenant_ids:
+        for row in TenantLLMService.query(llm_factory="OpenAI", tenant_id=tid):
+            row = row.to_dict()
+            row["model_type"] = LLMType.EMBEDDING.value
+            row["llm_name"] = "text-embedding-3-small"
+            row["used_tokens"] = 0
+            try:
+                TenantLLMService.save(**row)
+                row = deepcopy(row)
+                row["llm_name"] = "text-embedding-3-large"
+                TenantLLMService.save(**row)
+            except Exception as e:
+                pass
+            break
+    for kb_id in KnowledgebaseService.get_all_ids():
+        KnowledgebaseService.update_by_id(kb_id, {"doc_num": DocumentService.get_kb_doc_count(kb_id)})
+    """
+    drop table llm;
+    drop table llm_factories;
+    update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph';
+    alter table knowledgebase modify avatar longtext;
+    alter table user modify avatar longtext;
+    alter table dialog modify icon longtext;
+    """
+
+
+def add_graph_templates():
+    dir = os.path.join(get_project_base_directory(), "agent", "templates")
+    for fnm in os.listdir(dir):
+        try:
+            cnvs = json.load(open(os.path.join(dir, fnm), "r"))
+            try:
+                CanvasTemplateService.save(**cnvs)
+            except:
+                CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
+        except Exception as e:
+            print("Add graph templates error: ", e)
+            print("------------", flush=True)
+
+
+def init_web_data():
+    start_time = time.time()
+
+    init_llm_factory()
+    if not UserService.get_all().count():
+        init_superuser()
+
+    add_graph_templates()
+    print("init web data success:{}".format(time.time() - start_time))
+
+
+if __name__ == '__main__':
+    init_web_db()
+    init_web_data()
--- a/api/db/operatioins.py
+++ b/api/db/operatioins.py
@ -1,21 +1,21 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-import operator
-import time
-import typing
-from api.utils.log_utils import sql_logger
-import peewee
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import operator
+import time
+import typing
+from api.utils.log_utils import sql_logger
+import peewee
--- a/api/db/reload_config_base.py
+++ b/api/db/reload_config_base.py
@ -1,28 +1,28 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-class ReloadConfigBase:
-    @classmethod
-    def get_all(cls):
-        configs = {}
-        for k, v in cls.__dict__.items():
-            if not callable(getattr(cls, k)) and not k.startswith(
-                    "__") and not k.startswith("_"):
-                configs[k] = v
-        return configs
-
-    @classmethod
-    def get(cls, config_name):
-        return getattr(cls, config_name) if hasattr(cls, config_name) else None
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+class ReloadConfigBase:
+    @classmethod
+    def get_all(cls):
+        configs = {}
+        for k, v in cls.__dict__.items():
+            if not callable(getattr(cls, k)) and not k.startswith(
+                    "__") and not k.startswith("_"):
+                configs[k] = v
+        return configs
+
+    @classmethod
+    def get(cls, config_name):
+        return getattr(cls, config_name) if hasattr(cls, config_name) else None
--- a/api/db/runtime_config.py
+++ b/api/db/runtime_config.py
@ -1,54 +1,54 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-from api.versions import get_versions
-from .reload_config_base import ReloadConfigBase
-
-
-class RuntimeConfig(ReloadConfigBase):
-    DEBUG = None
-    WORK_MODE = None
-    HTTP_PORT = None
-    JOB_SERVER_HOST = None
-    JOB_SERVER_VIP = None
-    ENV = dict()
-    SERVICE_DB = None
-    LOAD_CONFIG_MANAGER = False
-
-    @classmethod
-    def init_config(cls, **kwargs):
-        for k, v in kwargs.items():
-            if hasattr(cls, k):
-                setattr(cls, k, v)
-
-    @classmethod
-    def init_env(cls):
-        cls.ENV.update(get_versions())
-
-    @classmethod
-    def load_config_manager(cls):
-        cls.LOAD_CONFIG_MANAGER = True
-
-    @classmethod
-    def get_env(cls, key):
-        return cls.ENV.get(key, None)
-
-    @classmethod
-    def get_all_env(cls):
-        return cls.ENV
-
-    @classmethod
-    def set_service_db(cls, service_db):
-        cls.SERVICE_DB = service_db
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from api.versions import get_versions
+from .reload_config_base import ReloadConfigBase
+
+
+class RuntimeConfig(ReloadConfigBase):
+    DEBUG = None
+    WORK_MODE = None
+    HTTP_PORT = None
+    JOB_SERVER_HOST = None
+    JOB_SERVER_VIP = None
+    ENV = dict()
+    SERVICE_DB = None
+    LOAD_CONFIG_MANAGER = False
+
+    @classmethod
+    def init_config(cls, **kwargs):
+        for k, v in kwargs.items():
+            if hasattr(cls, k):
+                setattr(cls, k, v)
+
+    @classmethod
+    def init_env(cls):
+        cls.ENV.update(get_versions())
+
+    @classmethod
+    def load_config_manager(cls):
+        cls.LOAD_CONFIG_MANAGER = True
+
+    @classmethod
+    def get_env(cls, key):
+        return cls.ENV.get(key, None)
+
+    @classmethod
+    def get_all_env(cls):
+        return cls.ENV
+
+    @classmethod
+    def set_service_db(cls, service_db):
+        cls.SERVICE_DB = service_db
--- a/api/db/services/init.py
+++ b/api/db/services/init.py
@ -1,38 +1,38 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import pathlib
-import re
-from .user_service import UserService
-
-
-def duplicate_name(query_func, **kwargs):
-    fnm = kwargs["name"]
-    objs = query_func(**kwargs)
-    if not objs: return fnm
-    ext = pathlib.Path(fnm).suffix #.jpg
-    nm = re.sub(r"%s$"%ext, "", fnm)
-    r = re.search(r"\(([0-9]+)\)$", nm)
-    c = 0
-    if r:
-        c = int(r.group(1))
-        nm = re.sub(r"\([0-9]+\)$", "", nm)
-    c += 1
-    nm = f"{nm}({c})"
-    if ext: nm += f"{ext}"
-
-    kwargs["name"] = nm
-    return duplicate_name(query_func, **kwargs)
-
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import pathlib
+import re
+from .user_service import UserService
+
+
+def duplicate_name(query_func, **kwargs):
+    fnm = kwargs["name"]
+    objs = query_func(**kwargs)
+    if not objs: return fnm
+    ext = pathlib.Path(fnm).suffix #.jpg
+    nm = re.sub(r"%s$"%ext, "", fnm)
+    r = re.search(r"\(([0-9]+)\)$", nm)
+    c = 0
+    if r:
+        c = int(r.group(1))
+        nm = re.sub(r"\([0-9]+\)$", "", nm)
+    c += 1
+    nm = f"{nm}({c})"
+    if ext: nm += f"{ext}"
+
+    kwargs["name"] = nm
+    return duplicate_name(query_func, **kwargs)
+
--- a/api/db/services/api_service.py
+++ b/api/db/services/api_service.py
@ -1,68 +1,68 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-from datetime import datetime
-import peewee
-from api.db.db_models import DB, API4Conversation, APIToken, Dialog
-from api.db.services.common_service import CommonService
-from api.utils import current_timestamp, datetime_format
-
-
-class APITokenService(CommonService):
-    model = APIToken
-
-    @classmethod
-    @DB.connection_context()
-    def used(cls, token):
-        return cls.model.update({
-            "update_time": current_timestamp(),
-            "update_date": datetime_format(datetime.now()),
-        }).where(
-            cls.model.token == token
-        )
-
-
-class API4ConversationService(CommonService):
-    model = API4Conversation
-
-    @classmethod
-    @DB.connection_context()
-    def append_message(cls, id, conversation):
-        cls.update_by_id(id, conversation)
-        return cls.model.update(round=cls.model.round + 1).where(cls.model.id==id).execute()
-
-    @classmethod
-    @DB.connection_context()
-    def stats(cls, tenant_id, from_date, to_date, source=None):
-        if len(to_date) == 10: to_date += " 23:59:59"
-        return cls.model.select(
-            cls.model.create_date.truncate("day").alias("dt"),
-            peewee.fn.COUNT(
-                cls.model.id).alias("pv"),
-            peewee.fn.COUNT(
-                cls.model.user_id.distinct()).alias("uv"),
-            peewee.fn.SUM(
-                cls.model.tokens).alias("tokens"),
-            peewee.fn.SUM(
-                cls.model.duration).alias("duration"),
-            peewee.fn.AVG(
-                cls.model.round).alias("round"),
-            peewee.fn.SUM(
-                cls.model.thumb_up).alias("thumb_up")
-        ).join(Dialog, on=(cls.model.dialog_id == Dialog.id & Dialog.tenant_id == tenant_id)).where(
-            cls.model.create_date >= from_date,
-            cls.model.create_date <= to_date,
-            cls.model.source == source
-        ).group_by(cls.model.create_date.truncate("day")).dicts()
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from datetime import datetime
+import peewee
+from api.db.db_models import DB, API4Conversation, APIToken, Dialog
+from api.db.services.common_service import CommonService
+from api.utils import current_timestamp, datetime_format
+
+
+class APITokenService(CommonService):
+    model = APIToken
+
+    @classmethod
+    @DB.connection_context()
+    def used(cls, token):
+        return cls.model.update({
+            "update_time": current_timestamp(),
+            "update_date": datetime_format(datetime.now()),
+        }).where(
+            cls.model.token == token
+        )
+
+
+class API4ConversationService(CommonService):
+    model = API4Conversation
+
+    @classmethod
+    @DB.connection_context()
+    def append_message(cls, id, conversation):
+        cls.update_by_id(id, conversation)
+        return cls.model.update(round=cls.model.round + 1).where(cls.model.id==id).execute()
+
+    @classmethod
+    @DB.connection_context()
+    def stats(cls, tenant_id, from_date, to_date, source=None):
+        if len(to_date) == 10: to_date += " 23:59:59"
+        return cls.model.select(
+            cls.model.create_date.truncate("day").alias("dt"),
+            peewee.fn.COUNT(
+                cls.model.id).alias("pv"),
+            peewee.fn.COUNT(
+                cls.model.user_id.distinct()).alias("uv"),
+            peewee.fn.SUM(
+                cls.model.tokens).alias("tokens"),
+            peewee.fn.SUM(
+                cls.model.duration).alias("duration"),
+            peewee.fn.AVG(
+                cls.model.round).alias("round"),
+            peewee.fn.SUM(
+                cls.model.thumb_up).alias("thumb_up")
+        ).join(Dialog, on=(cls.model.dialog_id == Dialog.id & Dialog.tenant_id == tenant_id)).where(
+            cls.model.create_date >= from_date,
+            cls.model.create_date <= to_date,
+            cls.model.source == source
+        ).group_by(cls.model.create_date.truncate("day")).dicts()
--- a/api/db/services/common_service.py
+++ b/api/db/services/common_service.py
@ -1,183 +1,183 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-from datetime import datetime
-
-import peewee
-
-from api.db.db_models import DB
-from api.utils import datetime_format, current_timestamp, get_uuid
-
-
-class CommonService:
-    model = None
-
-    @classmethod
-    @DB.connection_context()
-    def query(cls, cols=None, reverse=None, order_by=None, **kwargs):
-        return cls.model.query(cols=cols, reverse=reverse,
-                               order_by=order_by, **kwargs)
-
-    @classmethod
-    @DB.connection_context()
-    def get_all(cls, cols=None, reverse=None, order_by=None):
-        if cols:
-            query_records = cls.model.select(*cols)
-        else:
-            query_records = cls.model.select()
-        if reverse is not None:
-            if not order_by or not hasattr(cls, order_by):
-                order_by = "create_time"
-            if reverse is True:
-                query_records = query_records.order_by(
-                    cls.model.getter_by(order_by).desc())
-            elif reverse is False:
-                query_records = query_records.order_by(
-                    cls.model.getter_by(order_by).asc())
-        return query_records
-
-    @classmethod
-    @DB.connection_context()
-    def get(cls, **kwargs):
-        return cls.model.get(**kwargs)
-
-    @classmethod
-    @DB.connection_context()
-    def get_or_none(cls, **kwargs):
-        try:
-            return cls.model.get(**kwargs)
-        except peewee.DoesNotExist:
-            return None
-
-    @classmethod
-    @DB.connection_context()
-    def save(cls, **kwargs):
-        # if "id" not in kwargs:
-        #    kwargs["id"] = get_uuid()
-        sample_obj = cls.model(**kwargs).save(force_insert=True)
-        return sample_obj
-
-    @classmethod
-    @DB.connection_context()
-    def insert(cls, **kwargs):
-        if "id" not in kwargs:
-            kwargs["id"] = get_uuid()
-        kwargs["create_time"] = current_timestamp()
-        kwargs["create_date"] = datetime_format(datetime.now())
-        kwargs["update_time"] = current_timestamp()
-        kwargs["update_date"] = datetime_format(datetime.now())
-        sample_obj = cls.model(**kwargs).save(force_insert=True)
-        return sample_obj
-
-    @classmethod
-    @DB.connection_context()
-    def insert_many(cls, data_list, batch_size=100):
-        with DB.atomic():
-            for d in data_list:
-                d["create_time"] = current_timestamp()
-                d["create_date"] = datetime_format(datetime.now())
-            for i in range(0, len(data_list), batch_size):
-                cls.model.insert_many(data_list[i:i + batch_size]).execute()
-
-    @classmethod
-    @DB.connection_context()
-    def update_many_by_id(cls, data_list):
-        with DB.atomic():
-            for data in data_list:
-                data["update_time"] = current_timestamp()
-                data["update_date"] = datetime_format(datetime.now())
-                cls.model.update(data).where(
-                    cls.model.id == data["id"]).execute()
-
-    @classmethod
-    @DB.connection_context()
-    def update_by_id(cls, pid, data):
-        data["update_time"] = current_timestamp()
-        data["update_date"] = datetime_format(datetime.now())
-        num = cls.model.update(data).where(cls.model.id == pid).execute()
-        return num
-
-    @classmethod
-    @DB.connection_context()
-    def get_by_id(cls, pid):
-        try:
-            obj = cls.model.query(id=pid)[0]
-            return True, obj
-        except Exception as e:
-            return False, None
-
-    @classmethod
-    @DB.connection_context()
-    def get_by_ids(cls, pids, cols=None):
-        if cols:
-            objs = cls.model.select(*cols)
-        else:
-            objs = cls.model.select()
-        return objs.where(cls.model.id.in_(pids))
-
-    @classmethod
-    @DB.connection_context()
-    def delete_by_id(cls, pid):
-        return cls.model.delete().where(cls.model.id == pid).execute()
-
-    @classmethod
-    @DB.connection_context()
-    def filter_delete(cls, filters):
-        with DB.atomic():
-            num = cls.model.delete().where(*filters).execute()
-            return num
-
-    @classmethod
-    @DB.connection_context()
-    def filter_update(cls, filters, update_data):
-        with DB.atomic():
-            return cls.model.update(update_data).where(*filters).execute()
-
-    @staticmethod
-    def cut_list(tar_list, n):
-        length = len(tar_list)
-        arr = range(length)
-        result = [tuple(tar_list[x:(x + n)]) for x in arr[::n]]
-        return result
-
-    @classmethod
-    @DB.connection_context()
-    def filter_scope_list(cls, in_key, in_filters_list,
-                          filters=None, cols=None):
-        in_filters_tuple_list = cls.cut_list(in_filters_list, 20)
-        if not filters:
-            filters = []
-        res_list = []
-        if cols:
-            for i in in_filters_tuple_list:
-                query_records = cls.model.select(
-                    *
-                    cols).where(
-                    getattr(
-                        cls.model,
-                        in_key).in_(i),
-                    *
-                    filters)
-                if query_records:
-                    res_list.extend(
-                        [query_record for query_record in query_records])
-        else:
-            for i in in_filters_tuple_list:
-                query_records = cls.model.select().where(
-                    getattr(cls.model, in_key).in_(i), *filters)
-                if query_records:
-                    res_list.extend(
-                        [query_record for query_record in query_records])
-        return res_list
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from datetime import datetime
+
+import peewee
+
+from api.db.db_models import DB
+from api.utils import datetime_format, current_timestamp, get_uuid
+
+
+class CommonService:
+    model = None
+
+    @classmethod
+    @DB.connection_context()
+    def query(cls, cols=None, reverse=None, order_by=None, **kwargs):
+        return cls.model.query(cols=cols, reverse=reverse,
+                               order_by=order_by, **kwargs)
+
+    @classmethod
+    @DB.connection_context()
+    def get_all(cls, cols=None, reverse=None, order_by=None):
+        if cols:
+            query_records = cls.model.select(*cols)
+        else:
+            query_records = cls.model.select()
+        if reverse is not None:
+            if not order_by or not hasattr(cls, order_by):
+                order_by = "create_time"
+            if reverse is True:
+                query_records = query_records.order_by(
+                    cls.model.getter_by(order_by).desc())
+            elif reverse is False:
+                query_records = query_records.order_by(
+                    cls.model.getter_by(order_by).asc())
+        return query_records
+
+    @classmethod
+    @DB.connection_context()
+    def get(cls, **kwargs):
+        return cls.model.get(**kwargs)
+
+    @classmethod
+    @DB.connection_context()
+    def get_or_none(cls, **kwargs):
+        try:
+            return cls.model.get(**kwargs)
+        except peewee.DoesNotExist:
+            return None
+
+    @classmethod
+    @DB.connection_context()
+    def save(cls, **kwargs):
+        # if "id" not in kwargs:
+        #    kwargs["id"] = get_uuid()
+        sample_obj = cls.model(**kwargs).save(force_insert=True)
+        return sample_obj
+
+    @classmethod
+    @DB.connection_context()
+    def insert(cls, **kwargs):
+        if "id" not in kwargs:
+            kwargs["id"] = get_uuid()
+        kwargs["create_time"] = current_timestamp()
+        kwargs["create_date"] = datetime_format(datetime.now())
+        kwargs["update_time"] = current_timestamp()
+        kwargs["update_date"] = datetime_format(datetime.now())
+        sample_obj = cls.model(**kwargs).save(force_insert=True)
+        return sample_obj
+
+    @classmethod
+    @DB.connection_context()
+    def insert_many(cls, data_list, batch_size=100):
+        with DB.atomic():
+            for d in data_list:
+                d["create_time"] = current_timestamp()
+                d["create_date"] = datetime_format(datetime.now())
+            for i in range(0, len(data_list), batch_size):
+                cls.model.insert_many(data_list[i:i + batch_size]).execute()
+
+    @classmethod
+    @DB.connection_context()
+    def update_many_by_id(cls, data_list):
+        with DB.atomic():
+            for data in data_list:
+                data["update_time"] = current_timestamp()
+                data["update_date"] = datetime_format(datetime.now())
+                cls.model.update(data).where(
+                    cls.model.id == data["id"]).execute()
+
+    @classmethod
+    @DB.connection_context()
+    def update_by_id(cls, pid, data):
+        data["update_time"] = current_timestamp()
+        data["update_date"] = datetime_format(datetime.now())
+        num = cls.model.update(data).where(cls.model.id == pid).execute()
+        return num
+
+    @classmethod
+    @DB.connection_context()
+    def get_by_id(cls, pid):
+        try:
+            obj = cls.model.query(id=pid)[0]
+            return True, obj
+        except Exception as e:
+            return False, None
+
+    @classmethod
+    @DB.connection_context()
+    def get_by_ids(cls, pids, cols=None):
+        if cols:
+            objs = cls.model.select(*cols)
+        else:
+            objs = cls.model.select()
+        return objs.where(cls.model.id.in_(pids))
+
+    @classmethod
+    @DB.connection_context()
+    def delete_by_id(cls, pid):
+        return cls.model.delete().where(cls.model.id == pid).execute()
+
+    @classmethod
+    @DB.connection_context()
+    def filter_delete(cls, filters):
+        with DB.atomic():
+            num = cls.model.delete().where(*filters).execute()
+            return num
+
+    @classmethod
+    @DB.connection_context()
+    def filter_update(cls, filters, update_data):
+        with DB.atomic():
+            return cls.model.update(update_data).where(*filters).execute()
+
+    @staticmethod
+    def cut_list(tar_list, n):
+        length = len(tar_list)
+        arr = range(length)
+        result = [tuple(tar_list[x:(x + n)]) for x in arr[::n]]
+        return result
+
+    @classmethod
+    @DB.connection_context()
+    def filter_scope_list(cls, in_key, in_filters_list,
+                          filters=None, cols=None):
+        in_filters_tuple_list = cls.cut_list(in_filters_list, 20)
+        if not filters:
+            filters = []
+        res_list = []
+        if cols:
+            for i in in_filters_tuple_list:
+                query_records = cls.model.select(
+                    *
+                    cols).where(
+                    getattr(
+                        cls.model,
+                        in_key).in_(i),
+                    *
+                    filters)
+                if query_records:
+                    res_list.extend(
+                        [query_record for query_record in query_records])
+        else:
+            for i in in_filters_tuple_list:
+                query_records = cls.model.select().where(
+                    getattr(cls.model, in_key).in_(i), *filters)
+                if query_records:
+                    res_list.extend(
+                        [query_record for query_record in query_records])
+        return res_list
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@ -1,392 +1,392 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import os
-import json
-import re
-from copy import deepcopy
-
-from api.db import LLMType, ParserType
-from api.db.db_models import Dialog, Conversation
-from api.db.services.common_service import CommonService
-from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
-from api.settings import chat_logger, retrievaler, kg_retrievaler
-from rag.app.resume import forbidden_select_fields4resume
-from rag.nlp import keyword_extraction
-from rag.nlp.search import index_name
-from rag.utils import rmSpace, num_tokens_from_string, encoder
-from api.utils.file_utils import get_project_base_directory
-
-
-class DialogService(CommonService):
-    model = Dialog
-
-
-class ConversationService(CommonService):
-    model = Conversation
-
-
-def message_fit_in(msg, max_length=4000):
-    def count():
-        nonlocal msg
-        tks_cnts = []
-        for m in msg:
-            tks_cnts.append(
-                {"role": m["role"], "count": num_tokens_from_string(m["content"])})
-        total = 0
-        for m in tks_cnts:
-            total += m["count"]
-        return total
-
-    c = count()
-    if c < max_length:
-        return c, msg
-
-    msg_ = [m for m in msg[:-1] if m["role"] == "system"]
-    msg_.append(msg[-1])
-    msg = msg_
-    c = count()
-    if c < max_length:
-        return c, msg
-
-    ll = num_tokens_from_string(msg_[0]["content"])
-    l = num_tokens_from_string(msg_[-1]["content"])
-    if ll / (ll + l) > 0.8:
-        m = msg_[0]["content"]
-        m = encoder.decode(encoder.encode(m)[:max_length - l])
-        msg[0]["content"] = m
-        return max_length, msg
-
-    m = msg_[1]["content"]
-    m = encoder.decode(encoder.encode(m)[:max_length - l])
-    msg[1]["content"] = m
-    return max_length, msg
-
-
-def llm_id2llm_type(llm_id):
-    fnm = os.path.join(get_project_base_directory(), "conf")
-    llm_factories = json.load(open(os.path.join(fnm, "llm_factories.json"), "r"))
-    for llm_factory in llm_factories["factory_llm_infos"]:
-        for llm in llm_factory["llm"]:
-            if llm_id == llm["llm_name"]:
-                return llm["model_type"].strip(",")[-1]
-                
-
-def chat(dialog, messages, stream=True, **kwargs):
-    assert messages[-1]["role"] == "user", "The last content of this conversation is not from user."
-    llm = LLMService.query(llm_name=dialog.llm_id)
-    if not llm:
-        llm = TenantLLMService.query(tenant_id=dialog.tenant_id, llm_name=dialog.llm_id)
-        if not llm:
-            raise LookupError("LLM(%s) not found" % dialog.llm_id)
-        max_tokens = 8192
-    else:
-        max_tokens = llm[0].max_tokens
-    kbs = KnowledgebaseService.get_by_ids(dialog.kb_ids)
-    embd_nms = list(set([kb.embd_id for kb in kbs]))
-    if len(embd_nms) != 1:
-        yield {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
-        return {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
-
-    is_kg = all([kb.parser_id == ParserType.KG for kb in kbs])
-    retr = retrievaler if not is_kg else kg_retrievaler
-
-    questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
-    attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None
-    if "doc_ids" in messages[-1]:
-        attachments = messages[-1]["doc_ids"]
-        for m in messages[:-1]:
-            if "doc_ids" in m:
-                attachments.extend(m["doc_ids"])
-
-    embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0])
-    if llm_id2llm_type(dialog.llm_id) == "image2text":
-        chat_mdl = LLMBundle(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id)
-    else:
-        chat_mdl = LLMBundle(dialog.tenant_id, LLMType.CHAT, dialog.llm_id)
-
-    prompt_config = dialog.prompt_config
-    field_map = KnowledgebaseService.get_field_map(dialog.kb_ids)
-    # try to use sql if field mapping is good to go
-    if field_map:
-        chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
-        ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
-        if ans:
-            yield ans
-            return
-
-    for p in prompt_config["parameters"]:
-        if p["key"] == "knowledge":
-            continue
-        if p["key"] not in kwargs and not p["optional"]:
-            raise KeyError("Miss parameter: " + p["key"])
-        if p["key"] not in kwargs:
-            prompt_config["system"] = prompt_config["system"].replace(
-                "{%s}" % p["key"], " ")
-
-    rerank_mdl = None
-    if dialog.rerank_id:
-        rerank_mdl = LLMBundle(dialog.tenant_id, LLMType.RERANK, dialog.rerank_id)
-
-    for _ in range(len(questions) // 2):
-        questions.append(questions[-1])
-    if "knowledge" not in [p["key"] for p in prompt_config["parameters"]]:
-        kbinfos = {"total": 0, "chunks": [], "doc_aggs": []}
-    else:
-        if prompt_config.get("keyword", False):
-            questions[-1] += keyword_extraction(chat_mdl, questions[-1])
-        kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
-                                        dialog.similarity_threshold,
-                                        dialog.vector_similarity_weight,
-                                        doc_ids=attachments,
-                                        top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
-    knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
-    #self-rag
-    if dialog.prompt_config.get("self_rag") and not relevant(dialog.tenant_id, dialog.llm_id, questions[-1], knowledges):
-        questions[-1] = rewrite(dialog.tenant_id, dialog.llm_id, questions[-1])
-        kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
-                                        dialog.similarity_threshold,
-                                        dialog.vector_similarity_weight,
-                                        doc_ids=attachments,
-                                        top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
-        knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
-
-    chat_logger.info(
-        "{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
-
-    if not knowledges and prompt_config.get("empty_response"):
-        yield {"answer": prompt_config["empty_response"], "reference": kbinfos}
-        return {"answer": prompt_config["empty_response"], "reference": kbinfos}
-
-    kwargs["knowledge"] = "\n".join(knowledges)
-    gen_conf = dialog.llm_setting
-
-    msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}]
-    msg.extend([{"role": m["role"], "content": re.sub(r"##\d+\$\$", "", m["content"])}
-                for m in messages if m["role"] != "system"])
-    used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97))
-    assert len(msg) >= 2, f"message_fit_in has bug: {msg}"
-
-    if "max_tokens" in gen_conf:
-        gen_conf["max_tokens"] = min(
-            gen_conf["max_tokens"],
-            max_tokens - used_token_count)
-
-    def decorate_answer(answer):
-        nonlocal prompt_config, knowledges, kwargs, kbinfos
-        refs = []
-        if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
-            answer, idx = retr.insert_citations(answer,
-                                                       [ck["content_ltks"]
-                                                        for ck in kbinfos["chunks"]],
-                                                       [ck["vector"]
-                                                        for ck in kbinfos["chunks"]],
-                                                       embd_mdl,
-                                                       tkweight=1 - dialog.vector_similarity_weight,
-                                                       vtweight=dialog.vector_similarity_weight)
-            idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
-            recall_docs = [
-                d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
-            if not recall_docs: recall_docs = kbinfos["doc_aggs"]
-            kbinfos["doc_aggs"] = recall_docs
-
-            refs = deepcopy(kbinfos)
-            for c in refs["chunks"]:
-                if c.get("vector"):
-                    del c["vector"]
-
-        if answer.lower().find("invalid key") >= 0 or answer.lower().find("invalid api") >= 0:
-            answer += " Please set LLM API-Key in 'User Setting -> Model Providers -> API-Key'"
-        return {"answer": answer, "reference": refs}
-
-    if stream:
-        answer = ""
-        for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], gen_conf):
-            answer = ans
-            yield {"answer": answer, "reference": {}}
-        yield decorate_answer(answer)
-    else:
-        answer = chat_mdl.chat(
-            msg[0]["content"], msg[1:], gen_conf)
-        chat_logger.info("User: {}|Assistant: {}".format(
-            msg[-1]["content"], answer))
-        yield decorate_answer(answer)
-
-
-def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
-    sys_prompt = "你是一个DBA。你需要这对以下表的字段结构，根据用户的问题列表，写出最后一个问题对应的SQL。"
-    user_promt = """
-表名：{}；
-数据库表字段说明如下：
-{}
-
-问题如下：
-{}
-请写出SQL, 且只要SQL，不要有其他说明及文字。
-""".format(
-        index_name(tenant_id),
-        "\n".join([f"{k}: {v}" for k, v in field_map.items()]),
-        question
-    )
-    tried_times = 0
-
-    def get_table():
-        nonlocal sys_prompt, user_promt, question, tried_times
-        sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
-            "temperature": 0.06})
-        print(user_promt, sql)
-        chat_logger.info(f"“{question}”==>{user_promt} get SQL: {sql}")
-        sql = re.sub(r"[\r\n]+", " ", sql.lower())
-        sql = re.sub(r".*select ", "select ", sql.lower())
-        sql = re.sub(r" +", " ", sql)
-        sql = re.sub(r"([;；]|```).*", "", sql)
-        if sql[:len("select ")] != "select ":
-            return None, None
-        if not re.search(r"((sum|avg|max|min)\(|group by )", sql.lower()):
-            if sql[:len("select *")] != "select *":
-                sql = "select doc_id,docnm_kwd," + sql[6:]
-            else:
-                flds = []
-                for k in field_map.keys():
-                    if k in forbidden_select_fields4resume:
-                        continue
-                    if len(flds) > 11:
-                        break
-                    flds.append(k)
-                sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
-
-        print(f"“{question}” get SQL(refined): {sql}")
-
-        chat_logger.info(f"“{question}” get SQL(refined): {sql}")
-        tried_times += 1
-        return retrievaler.sql_retrieval(sql, format="json"), sql
-
-    tbl, sql = get_table()
-    if tbl is None:
-        return None
-    if tbl.get("error") and tried_times <= 2:
-        user_promt = """
-        表名：{}；
-        数据库表字段说明如下：
-        {}
-
-        问题如下：
-        {}
-
-        你上一次给出的错误SQL如下：
-        {}
-
-        后台报错如下：
-        {}
-
-        请纠正SQL中的错误再写一遍，且只要SQL，不要有其他说明及文字。
-        """.format(
-            index_name(tenant_id),
-            "\n".join([f"{k}: {v}" for k, v in field_map.items()]),
-            question, sql, tbl["error"]
-        )
-        tbl, sql = get_table()
-        chat_logger.info("TRY it again: {}".format(sql))
-
-    chat_logger.info("GET table: {}".format(tbl))
-    print(tbl)
-    if tbl.get("error") or len(tbl["rows"]) == 0:
-        return None
-
-    docid_idx = set([ii for ii, c in enumerate(
-        tbl["columns"]) if c["name"] == "doc_id"])
-    docnm_idx = set([ii for ii, c in enumerate(
-        tbl["columns"]) if c["name"] == "docnm_kwd"])
-    clmn_idx = [ii for ii in range(
-        len(tbl["columns"])) if ii not in (docid_idx | docnm_idx)]
-
-    # compose markdown table
-    clmns = "|" + "|".join([re.sub(r"(/.*|（[^（）]+）)", "", field_map.get(tbl["columns"][i]["name"],
-                                                                        tbl["columns"][i]["name"])) for i in
-                            clmn_idx]) + ("|Source|" if docid_idx and docid_idx else "|")
-
-    line = "|" + "|".join(["------" for _ in range(len(clmn_idx))]) + \
-           ("|------|" if docid_idx and docid_idx else "")
-
-    rows = ["|" +
-            "|".join([rmSpace(str(r[i])) for i in clmn_idx]).replace("None", " ") +
-            "|" for r in tbl["rows"]]
-    if quota:
-        rows = "\n".join([r + f" ##{ii}$$ |" for ii, r in enumerate(rows)])
-    else:
-        rows = "\n".join([r + f" ##{ii}$$ |" for ii, r in enumerate(rows)])
-    rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
-
-    if not docid_idx or not docnm_idx:
-        chat_logger.warning("SQL missing field: " + sql)
-        return {
-            "answer": "\n".join([clmns, line, rows]),
-            "reference": {"chunks": [], "doc_aggs": []}
-        }
-
-    docid_idx = list(docid_idx)[0]
-    docnm_idx = list(docnm_idx)[0]
-    doc_aggs = {}
-    for r in tbl["rows"]:
-        if r[docid_idx] not in doc_aggs:
-            doc_aggs[r[docid_idx]] = {"doc_name": r[docnm_idx], "count": 0}
-        doc_aggs[r[docid_idx]]["count"] += 1
-    return {
-        "answer": "\n".join([clmns, line, rows]),
-        "reference": {"chunks": [{"doc_id": r[docid_idx], "docnm_kwd": r[docnm_idx]} for r in tbl["rows"]],
-                      "doc_aggs": [{"doc_id": did, "doc_name": d["doc_name"], "count": d["count"]} for did, d in
-                                   doc_aggs.items()]}
-    }
-
-
-def relevant(tenant_id, llm_id, question, contents: list):
-    if llm_id2llm_type(llm_id) == "image2text":
-        chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id)
-    else:
-        chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
-    prompt = """
-        You are a grader assessing relevance of a retrieved document to a user question. 
-        It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
-        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. 
-        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
-        No other words needed except 'yes' or 'no'.
-    """
-    if not contents:return False
-    contents = "Documents: \n" + "   - ".join(contents)
-    contents = f"Question: {question}\n" + contents
-    if num_tokens_from_string(contents) >= chat_mdl.max_length - 4:
-        contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4])
-    ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01})
-    if ans.lower().find("yes") >= 0: return True
-    return False
-
-
-def rewrite(tenant_id, llm_id, question):
-    if llm_id2llm_type(llm_id) == "image2text":
-        chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id)
-    else:
-        chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
-    prompt = """
-        You are an expert at query expansion to generate a paraphrasing of a question.
-        I can't retrieval relevant information from the knowledge base by using user's question directly.     
-        You need to expand or paraphrase user's question by multiple ways such as using synonyms words/phrase, 
-        writing the abbreviation in its entirety, adding some extra descriptions or explanations, 
-        changing the way of expression, translating the original question into another language (English/Chinese), etc. 
-        And return 5 versions of question and one is from translation.
-        Just list the question. No other words are needed.
-    """
-    ans = chat_mdl.chat(prompt, [{"role": "user", "content": question}], {"temperature": 0.8})
-    return ans
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import os
+import json
+import re
+from copy import deepcopy
+
+from api.db import LLMType, ParserType
+from api.db.db_models import Dialog, Conversation
+from api.db.services.common_service import CommonService
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
+from api.settings import chat_logger, retrievaler, kg_retrievaler
+from rag.app.resume import forbidden_select_fields4resume
+from rag.nlp import keyword_extraction
+from rag.nlp.search import index_name
+from rag.utils import rmSpace, num_tokens_from_string, encoder
+from api.utils.file_utils import get_project_base_directory
+
+
+class DialogService(CommonService):
+    model = Dialog
+
+
+class ConversationService(CommonService):
+    model = Conversation
+
+
+def message_fit_in(msg, max_length=4000):
+    def count():
+        nonlocal msg
+        tks_cnts = []
+        for m in msg:
+            tks_cnts.append(
+                {"role": m["role"], "count": num_tokens_from_string(m["content"])})
+        total = 0
+        for m in tks_cnts:
+            total += m["count"]
+        return total
+
+    c = count()
+    if c < max_length:
+        return c, msg
+
+    msg_ = [m for m in msg[:-1] if m["role"] == "system"]
+    msg_.append(msg[-1])
+    msg = msg_
+    c = count()
+    if c < max_length:
+        return c, msg
+
+    ll = num_tokens_from_string(msg_[0]["content"])
+    l = num_tokens_from_string(msg_[-1]["content"])
+    if ll / (ll + l) > 0.8:
+        m = msg_[0]["content"]
+        m = encoder.decode(encoder.encode(m)[:max_length - l])
+        msg[0]["content"] = m
+        return max_length, msg
+
+    m = msg_[1]["content"]
+    m = encoder.decode(encoder.encode(m)[:max_length - l])
+    msg[1]["content"] = m
+    return max_length, msg
+
+
+def llm_id2llm_type(llm_id):
+    fnm = os.path.join(get_project_base_directory(), "conf")
+    llm_factories = json.load(open(os.path.join(fnm, "llm_factories.json"), "r"))
+    for llm_factory in llm_factories["factory_llm_infos"]:
+        for llm in llm_factory["llm"]:
+            if llm_id == llm["llm_name"]:
+                return llm["model_type"].strip(",")[-1]
+                
+
+def chat(dialog, messages, stream=True, **kwargs):
+    assert messages[-1]["role"] == "user", "The last content of this conversation is not from user."
+    llm = LLMService.query(llm_name=dialog.llm_id)
+    if not llm:
+        llm = TenantLLMService.query(tenant_id=dialog.tenant_id, llm_name=dialog.llm_id)
+        if not llm:
+            raise LookupError("LLM(%s) not found" % dialog.llm_id)
+        max_tokens = 8192
+    else:
+        max_tokens = llm[0].max_tokens
+    kbs = KnowledgebaseService.get_by_ids(dialog.kb_ids)
+    embd_nms = list(set([kb.embd_id for kb in kbs]))
+    if len(embd_nms) != 1:
+        yield {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
+        return {"answer": "**ERROR**: Knowledge bases use different embedding models.", "reference": []}
+
+    is_kg = all([kb.parser_id == ParserType.KG for kb in kbs])
+    retr = retrievaler if not is_kg else kg_retrievaler
+
+    questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
+    attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else None
+    if "doc_ids" in messages[-1]:
+        attachments = messages[-1]["doc_ids"]
+        for m in messages[:-1]:
+            if "doc_ids" in m:
+                attachments.extend(m["doc_ids"])
+
+    embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0])
+    if llm_id2llm_type(dialog.llm_id) == "image2text":
+        chat_mdl = LLMBundle(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id)
+    else:
+        chat_mdl = LLMBundle(dialog.tenant_id, LLMType.CHAT, dialog.llm_id)
+
+    prompt_config = dialog.prompt_config
+    field_map = KnowledgebaseService.get_field_map(dialog.kb_ids)
+    # try to use sql if field mapping is good to go
+    if field_map:
+        chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
+        ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
+        if ans:
+            yield ans
+            return
+
+    for p in prompt_config["parameters"]:
+        if p["key"] == "knowledge":
+            continue
+        if p["key"] not in kwargs and not p["optional"]:
+            raise KeyError("Miss parameter: " + p["key"])
+        if p["key"] not in kwargs:
+            prompt_config["system"] = prompt_config["system"].replace(
+                "{%s}" % p["key"], " ")
+
+    rerank_mdl = None
+    if dialog.rerank_id:
+        rerank_mdl = LLMBundle(dialog.tenant_id, LLMType.RERANK, dialog.rerank_id)
+
+    for _ in range(len(questions) // 2):
+        questions.append(questions[-1])
+    if "knowledge" not in [p["key"] for p in prompt_config["parameters"]]:
+        kbinfos = {"total": 0, "chunks": [], "doc_aggs": []}
+    else:
+        if prompt_config.get("keyword", False):
+            questions[-1] += keyword_extraction(chat_mdl, questions[-1])
+        kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
+                                        dialog.similarity_threshold,
+                                        dialog.vector_similarity_weight,
+                                        doc_ids=attachments,
+                                        top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
+    knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
+    #self-rag
+    if dialog.prompt_config.get("self_rag") and not relevant(dialog.tenant_id, dialog.llm_id, questions[-1], knowledges):
+        questions[-1] = rewrite(dialog.tenant_id, dialog.llm_id, questions[-1])
+        kbinfos = retr.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
+                                        dialog.similarity_threshold,
+                                        dialog.vector_similarity_weight,
+                                        doc_ids=attachments,
+                                        top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
+        knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
+
+    chat_logger.info(
+        "{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
+
+    if not knowledges and prompt_config.get("empty_response"):
+        yield {"answer": prompt_config["empty_response"], "reference": kbinfos}
+        return {"answer": prompt_config["empty_response"], "reference": kbinfos}
+
+    kwargs["knowledge"] = "\n".join(knowledges)
+    gen_conf = dialog.llm_setting
+
+    msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}]
+    msg.extend([{"role": m["role"], "content": re.sub(r"##\d+\$\$", "", m["content"])}
+                for m in messages if m["role"] != "system"])
+    used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97))
+    assert len(msg) >= 2, f"message_fit_in has bug: {msg}"
+
+    if "max_tokens" in gen_conf:
+        gen_conf["max_tokens"] = min(
+            gen_conf["max_tokens"],
+            max_tokens - used_token_count)
+
+    def decorate_answer(answer):
+        nonlocal prompt_config, knowledges, kwargs, kbinfos
+        refs = []
+        if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
+            answer, idx = retr.insert_citations(answer,
+                                                       [ck["content_ltks"]
+                                                        for ck in kbinfos["chunks"]],
+                                                       [ck["vector"]
+                                                        for ck in kbinfos["chunks"]],
+                                                       embd_mdl,
+                                                       tkweight=1 - dialog.vector_similarity_weight,
+                                                       vtweight=dialog.vector_similarity_weight)
+            idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
+            recall_docs = [
+                d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
+            if not recall_docs: recall_docs = kbinfos["doc_aggs"]
+            kbinfos["doc_aggs"] = recall_docs
+
+            refs = deepcopy(kbinfos)
+            for c in refs["chunks"]:
+                if c.get("vector"):
+                    del c["vector"]
+
+        if answer.lower().find("invalid key") >= 0 or answer.lower().find("invalid api") >= 0:
+            answer += " Please set LLM API-Key in 'User Setting -> Model Providers -> API-Key'"
+        return {"answer": answer, "reference": refs}
+
+    if stream:
+        answer = ""
+        for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], gen_conf):
+            answer = ans
+            yield {"answer": answer, "reference": {}}
+        yield decorate_answer(answer)
+    else:
+        answer = chat_mdl.chat(
+            msg[0]["content"], msg[1:], gen_conf)
+        chat_logger.info("User: {}|Assistant: {}".format(
+            msg[-1]["content"], answer))
+        yield decorate_answer(answer)
+
+
+def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
+    sys_prompt = "你是一个DBA。你需要这对以下表的字段结构，根据用户的问题列表，写出最后一个问题对应的SQL。"
+    user_promt = """
+表名：{}；
+数据库表字段说明如下：
+{}
+
+问题如下：
+{}
+请写出SQL, 且只要SQL，不要有其他说明及文字。
+""".format(
+        index_name(tenant_id),
+        "\n".join([f"{k}: {v}" for k, v in field_map.items()]),
+        question
+    )
+    tried_times = 0
+
+    def get_table():
+        nonlocal sys_prompt, user_promt, question, tried_times
+        sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
+            "temperature": 0.06})
+        print(user_promt, sql)
+        chat_logger.info(f"“{question}”==>{user_promt} get SQL: {sql}")
+        sql = re.sub(r"[\r\n]+", " ", sql.lower())
+        sql = re.sub(r".*select ", "select ", sql.lower())
+        sql = re.sub(r" +", " ", sql)
+        sql = re.sub(r"([;；]|```).*", "", sql)
+        if sql[:len("select ")] != "select ":
+            return None, None
+        if not re.search(r"((sum|avg|max|min)\(|group by )", sql.lower()):
+            if sql[:len("select *")] != "select *":
+                sql = "select doc_id,docnm_kwd," + sql[6:]
+            else:
+                flds = []
+                for k in field_map.keys():
+                    if k in forbidden_select_fields4resume:
+                        continue
+                    if len(flds) > 11:
+                        break
+                    flds.append(k)
+                sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
+
+        print(f"“{question}” get SQL(refined): {sql}")
+
+        chat_logger.info(f"“{question}” get SQL(refined): {sql}")
+        tried_times += 1
+        return retrievaler.sql_retrieval(sql, format="json"), sql
+
+    tbl, sql = get_table()
+    if tbl is None:
+        return None
+    if tbl.get("error") and tried_times <= 2:
+        user_promt = """
+        表名：{}；
+        数据库表字段说明如下：
+        {}
+
+        问题如下：
+        {}
+
+        你上一次给出的错误SQL如下：
+        {}
+
+        后台报错如下：
+        {}
+
+        请纠正SQL中的错误再写一遍，且只要SQL，不要有其他说明及文字。
+        """.format(
+            index_name(tenant_id),
+            "\n".join([f"{k}: {v}" for k, v in field_map.items()]),
+            question, sql, tbl["error"]
+        )
+        tbl, sql = get_table()
+        chat_logger.info("TRY it again: {}".format(sql))
+
+    chat_logger.info("GET table: {}".format(tbl))
+    print(tbl)
+    if tbl.get("error") or len(tbl["rows"]) == 0:
+        return None
+
+    docid_idx = set([ii for ii, c in enumerate(
+        tbl["columns"]) if c["name"] == "doc_id"])
+    docnm_idx = set([ii for ii, c in enumerate(
+        tbl["columns"]) if c["name"] == "docnm_kwd"])
+    clmn_idx = [ii for ii in range(
+        len(tbl["columns"])) if ii not in (docid_idx | docnm_idx)]
+
+    # compose markdown table
+    clmns = "|" + "|".join([re.sub(r"(/.*|（[^（）]+）)", "", field_map.get(tbl["columns"][i]["name"],
+                                                                        tbl["columns"][i]["name"])) for i in
+                            clmn_idx]) + ("|Source|" if docid_idx and docid_idx else "|")
+
+    line = "|" + "|".join(["------" for _ in range(len(clmn_idx))]) + \
+           ("|------|" if docid_idx and docid_idx else "")
+
+    rows = ["|" +
+            "|".join([rmSpace(str(r[i])) for i in clmn_idx]).replace("None", " ") +
+            "|" for r in tbl["rows"]]
+    if quota:
+        rows = "\n".join([r + f" ##{ii}$$ |" for ii, r in enumerate(rows)])
+    else:
+        rows = "\n".join([r + f" ##{ii}$$ |" for ii, r in enumerate(rows)])
+    rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
+
+    if not docid_idx or not docnm_idx:
+        chat_logger.warning("SQL missing field: " + sql)
+        return {
+            "answer": "\n".join([clmns, line, rows]),
+            "reference": {"chunks": [], "doc_aggs": []}
+        }
+
+    docid_idx = list(docid_idx)[0]
+    docnm_idx = list(docnm_idx)[0]
+    doc_aggs = {}
+    for r in tbl["rows"]:
+        if r[docid_idx] not in doc_aggs:
+            doc_aggs[r[docid_idx]] = {"doc_name": r[docnm_idx], "count": 0}
+        doc_aggs[r[docid_idx]]["count"] += 1
+    return {
+        "answer": "\n".join([clmns, line, rows]),
+        "reference": {"chunks": [{"doc_id": r[docid_idx], "docnm_kwd": r[docnm_idx]} for r in tbl["rows"]],
+                      "doc_aggs": [{"doc_id": did, "doc_name": d["doc_name"], "count": d["count"]} for did, d in
+                                   doc_aggs.items()]}
+    }
+
+
+def relevant(tenant_id, llm_id, question, contents: list):
+    if llm_id2llm_type(llm_id) == "image2text":
+        chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id)
+    else:
+        chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
+    prompt = """
+        You are a grader assessing relevance of a retrieved document to a user question. 
+        It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
+        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. 
+        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
+        No other words needed except 'yes' or 'no'.
+    """
+    if not contents:return False
+    contents = "Documents: \n" + "   - ".join(contents)
+    contents = f"Question: {question}\n" + contents
+    if num_tokens_from_string(contents) >= chat_mdl.max_length - 4:
+        contents = encoder.decode(encoder.encode(contents)[:chat_mdl.max_length - 4])
+    ans = chat_mdl.chat(prompt, [{"role": "user", "content": contents}], {"temperature": 0.01})
+    if ans.lower().find("yes") >= 0: return True
+    return False
+
+
+def rewrite(tenant_id, llm_id, question):
+    if llm_id2llm_type(llm_id) == "image2text":
+        chat_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, llm_id)
+    else:
+        chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_id)
+    prompt = """
+        You are an expert at query expansion to generate a paraphrasing of a question.
+        I can't retrieval relevant information from the knowledge base by using user's question directly.     
+        You need to expand or paraphrase user's question by multiple ways such as using synonyms words/phrase, 
+        writing the abbreviation in its entirety, adding some extra descriptions or explanations, 
+        changing the way of expression, translating the original question into another language (English/Chinese), etc. 
+        And return 5 versions of question and one is from translation.
+        Just list the question. No other words are needed.
+    """
+    ans = chat_mdl.chat(prompt, [{"role": "user", "content": question}], {"temperature": 0.8})
+    return ans
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -1,382 +1,382 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import random
-from datetime import datetime
-from elasticsearch_dsl import Q
-from peewee import fn
-
-from api.db.db_utils import bulk_insert_into_db
-from api.settings import stat_logger
-from api.utils import current_timestamp, get_format_time, get_uuid
-from rag.settings import SVR_QUEUE_NAME
-from rag.utils.es_conn import ELASTICSEARCH
-from rag.utils.minio_conn import MINIO
-from rag.nlp import search
-
-from api.db import FileType, TaskStatus, ParserType
-from api.db.db_models import DB, Knowledgebase, Tenant, Task
-from api.db.db_models import Document
-from api.db.services.common_service import CommonService
-from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.db import StatusEnum
-from rag.utils.redis_conn import REDIS_CONN
-
-
-class DocumentService(CommonService):
-    model = Document
-
-    @classmethod
-    @DB.connection_context()
-    def get_by_kb_id(cls, kb_id, page_number, items_per_page,
-                     orderby, desc, keywords):
-        if keywords:
-            docs = cls.model.select().where(
-                (cls.model.kb_id == kb_id),
-                (fn.LOWER(cls.model.name).contains(keywords.lower()))
-            )
-        else:
-            docs = cls.model.select().where(cls.model.kb_id == kb_id)
-        count = docs.count()
-        if desc:
-            docs = docs.order_by(cls.model.getter_by(orderby).desc())
-        else:
-            docs = docs.order_by(cls.model.getter_by(orderby).asc())
-
-        docs = docs.paginate(page_number, items_per_page)
-
-        return list(docs.dicts()), count
-
-    @classmethod
-    @DB.connection_context()
-    def list_documents_in_dataset(cls, dataset_id, offset, count, order_by, descend, keywords):
-        if keywords:
-            docs = cls.model.select().where(
-                (cls.model.kb_id == dataset_id),
-                (fn.LOWER(cls.model.name).contains(keywords.lower()))
-            )
-        else:
-            docs = cls.model.select().where(cls.model.kb_id == dataset_id)
-
-        total = docs.count()
-
-        if descend == 'True':
-            docs = docs.order_by(cls.model.getter_by(order_by).desc())
-        if descend == 'False':
-            docs = docs.order_by(cls.model.getter_by(order_by).asc())
-
-        docs = list(docs.dicts())
-        docs_length = len(docs)
-
-        if offset < 0 or offset > docs_length:
-            raise IndexError("Offset is out of the valid range.")
-
-        if count == -1:
-            return docs[offset:], total
-
-        return docs[offset:offset + count], total
-
-    @classmethod
-    @DB.connection_context()
-    def insert(cls, doc):
-        if not cls.save(**doc):
-            raise RuntimeError("Database error (Document)!")
-        e, doc = cls.get_by_id(doc["id"])
-        if not e:
-            raise RuntimeError("Database error (Document retrieval)!")
-        e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
-        if not KnowledgebaseService.update_by_id(
-                kb.id, {"doc_num": kb.doc_num + 1}):
-            raise RuntimeError("Database error (Knowledgebase)!")
-        return doc
-
-    @classmethod
-    @DB.connection_context()
-    def remove_document(cls, doc, tenant_id):
-        ELASTICSEARCH.deleteByQuery(
-                Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
-        cls.clear_chunk_num(doc.id)
-        return cls.delete_by_id(doc.id)
-
-    @classmethod
-    @DB.connection_context()
-    def get_newly_uploaded(cls):
-        fields = [
-            cls.model.id,
-            cls.model.kb_id,
-            cls.model.parser_id,
-            cls.model.parser_config,
-            cls.model.name,
-            cls.model.type,
-            cls.model.location,
-            cls.model.size,
-            Knowledgebase.tenant_id,
-            Tenant.embd_id,
-            Tenant.img2txt_id,
-            Tenant.asr_id,
-            cls.model.update_time]
-        docs = cls.model.select(*fields) \
-            .join(Knowledgebase, on=(cls.model.kb_id == Knowledgebase.id)) \
-            .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))\
-            .where(
-                cls.model.status == StatusEnum.VALID.value,
-                ~(cls.model.type == FileType.VIRTUAL.value),
-                cls.model.progress == 0,
-                cls.model.update_time >= current_timestamp() - 1000 * 600,
-                cls.model.run == TaskStatus.RUNNING.value)\
-            .order_by(cls.model.update_time.asc())
-        return list(docs.dicts())
-
-    @classmethod
-    @DB.connection_context()
-    def get_unfinished_docs(cls):
-        fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg, cls.model.run]
-        docs = cls.model.select(*fields) \
-            .where(
-                cls.model.status == StatusEnum.VALID.value,
-                ~(cls.model.type == FileType.VIRTUAL.value),
-                cls.model.progress < 1,
-                cls.model.progress > 0)
-        return list(docs.dicts())
-
-    @classmethod
-    @DB.connection_context()
-    def increment_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation):
-        num = cls.model.update(token_num=cls.model.token_num + token_num,
-                               chunk_num=cls.model.chunk_num + chunk_num,
-                               process_duation=cls.model.process_duation + duation).where(
-            cls.model.id == doc_id).execute()
-        if num == 0:
-            raise LookupError(
-                "Document not found which is supposed to be there")
-        num = Knowledgebase.update(
-            token_num=Knowledgebase.token_num +
-            token_num,
-            chunk_num=Knowledgebase.chunk_num +
-            chunk_num).where(
-            Knowledgebase.id == kb_id).execute()
-        return num
-    
-    @classmethod
-    @DB.connection_context()
-    def decrement_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation):
-        num = cls.model.update(token_num=cls.model.token_num - token_num,
-                               chunk_num=cls.model.chunk_num - chunk_num,
-                               process_duation=cls.model.process_duation + duation).where(
-            cls.model.id == doc_id).execute()
-        if num == 0:
-            raise LookupError(
-                "Document not found which is supposed to be there")
-        num = Knowledgebase.update(
-            token_num=Knowledgebase.token_num -
-            token_num,
-            chunk_num=Knowledgebase.chunk_num -
-            chunk_num
-        ).where(
-            Knowledgebase.id == kb_id).execute()
-        return num
-    
-    @classmethod
-    @DB.connection_context()
-    def clear_chunk_num(cls, doc_id):
-        doc = cls.model.get_by_id(doc_id)
-        assert doc, "Can't fine document in database."
-
-        num = Knowledgebase.update(
-            token_num=Knowledgebase.token_num -
-            doc.token_num,
-            chunk_num=Knowledgebase.chunk_num -
-            doc.chunk_num,
-            doc_num=Knowledgebase.doc_num-1
-        ).where(
-            Knowledgebase.id == doc.kb_id).execute()
-        return num
-
-    @classmethod
-    @DB.connection_context()
-    def get_tenant_id(cls, doc_id):
-        docs = cls.model.select(
-            Knowledgebase.tenant_id).join(
-            Knowledgebase, on=(
-                Knowledgebase.id == cls.model.kb_id)).where(
-                cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
-        docs = docs.dicts()
-        if not docs:
-            return
-        return docs[0]["tenant_id"]
-
-    @classmethod
-    @DB.connection_context()
-    def get_tenant_id_by_name(cls, name):
-        docs = cls.model.select(
-            Knowledgebase.tenant_id).join(
-            Knowledgebase, on=(
-                    Knowledgebase.id == cls.model.kb_id)).where(
-            cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
-        docs = docs.dicts()
-        if not docs:
-            return
-        return docs[0]["tenant_id"]
-
-    @classmethod
-    @DB.connection_context()
-    def get_embd_id(cls, doc_id):
-        docs = cls.model.select(
-            Knowledgebase.embd_id).join(
-            Knowledgebase, on=(
-                Knowledgebase.id == cls.model.kb_id)).where(
-                cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
-        docs = docs.dicts()
-        if not docs:
-            return
-        return docs[0]["embd_id"]
-    
-    @classmethod
-    @DB.connection_context()
-    def get_doc_id_by_doc_name(cls, doc_name):
-        fields = [cls.model.id]
-        doc_id = cls.model.select(*fields) \
-            .where(cls.model.name == doc_name)
-        doc_id = doc_id.dicts()
-        if not doc_id:
-            return
-        return doc_id[0]["id"]
-
-    @classmethod
-    @DB.connection_context()
-    def get_thumbnails(cls, docids):
-        fields = [cls.model.id, cls.model.thumbnail]
-        return list(cls.model.select(
-            *fields).where(cls.model.id.in_(docids)).dicts())
-
-    @classmethod
-    @DB.connection_context()
-    def update_parser_config(cls, id, config):
-        e, d = cls.get_by_id(id)
-        if not e:
-            raise LookupError(f"Document({id}) not found.")
-
-        def dfs_update(old, new):
-            for k, v in new.items():
-                if k not in old:
-                    old[k] = v
-                    continue
-                if isinstance(v, dict):
-                    assert isinstance(old[k], dict)
-                    dfs_update(old[k], v)
-                else:
-                    old[k] = v
-        dfs_update(d.parser_config, config)
-        cls.update_by_id(id, {"parser_config": d.parser_config})
-
-    @classmethod
-    @DB.connection_context()
-    def get_doc_count(cls, tenant_id):
-        docs = cls.model.select(cls.model.id).join(Knowledgebase,
-                                                   on=(Knowledgebase.id == cls.model.kb_id)).where(
-            Knowledgebase.tenant_id == tenant_id)
-        return len(docs)
-
-    @classmethod
-    @DB.connection_context()
-    def begin2parse(cls, docid):
-        cls.update_by_id(
-            docid, {"progress": random.random() * 1 / 100.,
-                    "progress_msg": "Task dispatched...",
-                    "process_begin_at": get_format_time()
-                    })
-
-    @classmethod
-    @DB.connection_context()
-    def update_progress(cls):
-        docs = cls.get_unfinished_docs()
-        for d in docs:
-            try:
-                tsks = Task.query(doc_id=d["id"], order_by=Task.create_time)
-                if not tsks:
-                    continue
-                msg = []
-                prg = 0
-                finished = True
-                bad = 0
-                e, doc = DocumentService.get_by_id(d["id"])
-                status = doc.run#TaskStatus.RUNNING.value
-                for t in tsks:
-                    if 0 <= t.progress < 1:
-                        finished = False
-                    prg += t.progress if t.progress >= 0 else 0
-                    if t.progress_msg not in msg:
-                        msg.append(t.progress_msg)
-                    if t.progress == -1:
-                        bad += 1
-                prg /= len(tsks)
-                if finished and bad:
-                    prg = -1
-                    status = TaskStatus.FAIL.value
-                elif finished:
-                    if d["parser_config"].get("raptor", {}).get("use_raptor") and d["progress_msg"].lower().find(" raptor")<0:
-                        queue_raptor_tasks(d)
-                        prg *= 0.98
-                        msg.append("------ RAPTOR -------")
-                    else:
-                        status = TaskStatus.DONE.value
-
-                msg = "\n".join(msg)
-                info = {
-                    "process_duation": datetime.timestamp(
-                        datetime.now()) -
-                                       d["process_begin_at"].timestamp(),
-                    "run": status}
-                if prg != 0:
-                    info["progress"] = prg
-                if msg:
-                    info["progress_msg"] = msg
-                cls.update_by_id(d["id"], info)
-            except Exception as e:
-                stat_logger.error("fetch task exception:" + str(e))
-
-    @classmethod
-    @DB.connection_context()
-    def get_kb_doc_count(cls, kb_id):
-        return len(cls.model.select(cls.model.id).where(
-            cls.model.kb_id == kb_id).dicts())
-
-
-    @classmethod
-    @DB.connection_context()
-    def do_cancel(cls, doc_id):
-        try:
-            _, doc = DocumentService.get_by_id(doc_id)
-            return doc.run == TaskStatus.CANCEL.value or doc.progress < 0
-        except Exception as e:
-            pass
-        return False
-
-
-def queue_raptor_tasks(doc):
-    def new_task():
-        nonlocal doc
-        return {
-            "id": get_uuid(),
-            "doc_id": doc["id"],
-            "from_page": 0,
-            "to_page": -1,
-            "progress_msg": "Start to do RAPTOR (Recursive Abstractive Processing For Tree-Organized Retrieval)."
-        }
-
-    task = new_task()
-    bulk_insert_into_db(Task, [task], True)
-    task["type"] = "raptor"
-    assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=task), "Can't access Redis. Please check the Redis' status."
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import random
+from datetime import datetime
+from elasticsearch_dsl import Q
+from peewee import fn
+
+from api.db.db_utils import bulk_insert_into_db
+from api.settings import stat_logger
+from api.utils import current_timestamp, get_format_time, get_uuid
+from rag.settings import SVR_QUEUE_NAME
+from rag.utils.es_conn import ELASTICSEARCH
+from rag.utils.minio_conn import MINIO
+from rag.nlp import search
+
+from api.db import FileType, TaskStatus, ParserType
+from api.db.db_models import DB, Knowledgebase, Tenant, Task
+from api.db.db_models import Document
+from api.db.services.common_service import CommonService
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.db import StatusEnum
+from rag.utils.redis_conn import REDIS_CONN
+
+
+class DocumentService(CommonService):
+    model = Document
+
+    @classmethod
+    @DB.connection_context()
+    def get_by_kb_id(cls, kb_id, page_number, items_per_page,
+                     orderby, desc, keywords):
+        if keywords:
+            docs = cls.model.select().where(
+                (cls.model.kb_id == kb_id),
+                (fn.LOWER(cls.model.name).contains(keywords.lower()))
+            )
+        else:
+            docs = cls.model.select().where(cls.model.kb_id == kb_id)
+        count = docs.count()
+        if desc:
+            docs = docs.order_by(cls.model.getter_by(orderby).desc())
+        else:
+            docs = docs.order_by(cls.model.getter_by(orderby).asc())
+
+        docs = docs.paginate(page_number, items_per_page)
+
+        return list(docs.dicts()), count
+
+    @classmethod
+    @DB.connection_context()
+    def list_documents_in_dataset(cls, dataset_id, offset, count, order_by, descend, keywords):
+        if keywords:
+            docs = cls.model.select().where(
+                (cls.model.kb_id == dataset_id),
+                (fn.LOWER(cls.model.name).contains(keywords.lower()))
+            )
+        else:
+            docs = cls.model.select().where(cls.model.kb_id == dataset_id)
+
+        total = docs.count()
+
+        if descend == 'True':
+            docs = docs.order_by(cls.model.getter_by(order_by).desc())
+        if descend == 'False':
+            docs = docs.order_by(cls.model.getter_by(order_by).asc())
+
+        docs = list(docs.dicts())
+        docs_length = len(docs)
+
+        if offset < 0 or offset > docs_length:
+            raise IndexError("Offset is out of the valid range.")
+
+        if count == -1:
+            return docs[offset:], total
+
+        return docs[offset:offset + count], total
+
+    @classmethod
+    @DB.connection_context()
+    def insert(cls, doc):
+        if not cls.save(**doc):
+            raise RuntimeError("Database error (Document)!")
+        e, doc = cls.get_by_id(doc["id"])
+        if not e:
+            raise RuntimeError("Database error (Document retrieval)!")
+        e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
+        if not KnowledgebaseService.update_by_id(
+                kb.id, {"doc_num": kb.doc_num + 1}):
+            raise RuntimeError("Database error (Knowledgebase)!")
+        return doc
+
+    @classmethod
+    @DB.connection_context()
+    def remove_document(cls, doc, tenant_id):
+        ELASTICSEARCH.deleteByQuery(
+                Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
+        cls.clear_chunk_num(doc.id)
+        return cls.delete_by_id(doc.id)
+
+    @classmethod
+    @DB.connection_context()
+    def get_newly_uploaded(cls):
+        fields = [
+            cls.model.id,
+            cls.model.kb_id,
+            cls.model.parser_id,
+            cls.model.parser_config,
+            cls.model.name,
+            cls.model.type,
+            cls.model.location,
+            cls.model.size,
+            Knowledgebase.tenant_id,
+            Tenant.embd_id,
+            Tenant.img2txt_id,
+            Tenant.asr_id,
+            cls.model.update_time]
+        docs = cls.model.select(*fields) \
+            .join(Knowledgebase, on=(cls.model.kb_id == Knowledgebase.id)) \
+            .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))\
+            .where(
+                cls.model.status == StatusEnum.VALID.value,
+                ~(cls.model.type == FileType.VIRTUAL.value),
+                cls.model.progress == 0,
+                cls.model.update_time >= current_timestamp() - 1000 * 600,
+                cls.model.run == TaskStatus.RUNNING.value)\
+            .order_by(cls.model.update_time.asc())
+        return list(docs.dicts())
+
+    @classmethod
+    @DB.connection_context()
+    def get_unfinished_docs(cls):
+        fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg, cls.model.run]
+        docs = cls.model.select(*fields) \
+            .where(
+                cls.model.status == StatusEnum.VALID.value,
+                ~(cls.model.type == FileType.VIRTUAL.value),
+                cls.model.progress < 1,
+                cls.model.progress > 0)
+        return list(docs.dicts())
+
+    @classmethod
+    @DB.connection_context()
+    def increment_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation):
+        num = cls.model.update(token_num=cls.model.token_num + token_num,
+                               chunk_num=cls.model.chunk_num + chunk_num,
+                               process_duation=cls.model.process_duation + duation).where(
+            cls.model.id == doc_id).execute()
+        if num == 0:
+            raise LookupError(
+                "Document not found which is supposed to be there")
+        num = Knowledgebase.update(
+            token_num=Knowledgebase.token_num +
+            token_num,
+            chunk_num=Knowledgebase.chunk_num +
+            chunk_num).where(
+            Knowledgebase.id == kb_id).execute()
+        return num
+    
+    @classmethod
+    @DB.connection_context()
+    def decrement_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation):
+        num = cls.model.update(token_num=cls.model.token_num - token_num,
+                               chunk_num=cls.model.chunk_num - chunk_num,
+                               process_duation=cls.model.process_duation + duation).where(
+            cls.model.id == doc_id).execute()
+        if num == 0:
+            raise LookupError(
+                "Document not found which is supposed to be there")
+        num = Knowledgebase.update(
+            token_num=Knowledgebase.token_num -
+            token_num,
+            chunk_num=Knowledgebase.chunk_num -
+            chunk_num
+        ).where(
+            Knowledgebase.id == kb_id).execute()
+        return num
+    
+    @classmethod
+    @DB.connection_context()
+    def clear_chunk_num(cls, doc_id):
+        doc = cls.model.get_by_id(doc_id)
+        assert doc, "Can't fine document in database."
+
+        num = Knowledgebase.update(
+            token_num=Knowledgebase.token_num -
+            doc.token_num,
+            chunk_num=Knowledgebase.chunk_num -
+            doc.chunk_num,
+            doc_num=Knowledgebase.doc_num-1
+        ).where(
+            Knowledgebase.id == doc.kb_id).execute()
+        return num
+
+    @classmethod
+    @DB.connection_context()
+    def get_tenant_id(cls, doc_id):
+        docs = cls.model.select(
+            Knowledgebase.tenant_id).join(
+            Knowledgebase, on=(
+                Knowledgebase.id == cls.model.kb_id)).where(
+                cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
+        docs = docs.dicts()
+        if not docs:
+            return
+        return docs[0]["tenant_id"]
+
+    @classmethod
+    @DB.connection_context()
+    def get_tenant_id_by_name(cls, name):
+        docs = cls.model.select(
+            Knowledgebase.tenant_id).join(
+            Knowledgebase, on=(
+                    Knowledgebase.id == cls.model.kb_id)).where(
+            cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
+        docs = docs.dicts()
+        if not docs:
+            return
+        return docs[0]["tenant_id"]
+
+    @classmethod
+    @DB.connection_context()
+    def get_embd_id(cls, doc_id):
+        docs = cls.model.select(
+            Knowledgebase.embd_id).join(
+            Knowledgebase, on=(
+                Knowledgebase.id == cls.model.kb_id)).where(
+                cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
+        docs = docs.dicts()
+        if not docs:
+            return
+        return docs[0]["embd_id"]
+    
+    @classmethod
+    @DB.connection_context()
+    def get_doc_id_by_doc_name(cls, doc_name):
+        fields = [cls.model.id]
+        doc_id = cls.model.select(*fields) \
+            .where(cls.model.name == doc_name)
+        doc_id = doc_id.dicts()
+        if not doc_id:
+            return
+        return doc_id[0]["id"]
+
+    @classmethod
+    @DB.connection_context()
+    def get_thumbnails(cls, docids):
+        fields = [cls.model.id, cls.model.thumbnail]
+        return list(cls.model.select(
+            *fields).where(cls.model.id.in_(docids)).dicts())
+
+    @classmethod
+    @DB.connection_context()
+    def update_parser_config(cls, id, config):
+        e, d = cls.get_by_id(id)
+        if not e:
+            raise LookupError(f"Document({id}) not found.")
+
+        def dfs_update(old, new):
+            for k, v in new.items():
+                if k not in old:
+                    old[k] = v
+                    continue
+                if isinstance(v, dict):
+                    assert isinstance(old[k], dict)
+                    dfs_update(old[k], v)
+                else:
+                    old[k] = v
+        dfs_update(d.parser_config, config)
+        cls.update_by_id(id, {"parser_config": d.parser_config})
+
+    @classmethod
+    @DB.connection_context()
+    def get_doc_count(cls, tenant_id):
+        docs = cls.model.select(cls.model.id).join(Knowledgebase,
+                                                   on=(Knowledgebase.id == cls.model.kb_id)).where(
+            Knowledgebase.tenant_id == tenant_id)
+        return len(docs)
+
+    @classmethod
+    @DB.connection_context()
+    def begin2parse(cls, docid):
+        cls.update_by_id(
+            docid, {"progress": random.random() * 1 / 100.,
+                    "progress_msg": "Task dispatched...",
+                    "process_begin_at": get_format_time()
+                    })
+
+    @classmethod
+    @DB.connection_context()
+    def update_progress(cls):
+        docs = cls.get_unfinished_docs()
+        for d in docs:
+            try:
+                tsks = Task.query(doc_id=d["id"], order_by=Task.create_time)
+                if not tsks:
+                    continue
+                msg = []
+                prg = 0
+                finished = True
+                bad = 0
+                e, doc = DocumentService.get_by_id(d["id"])
+                status = doc.run#TaskStatus.RUNNING.value
+                for t in tsks:
+                    if 0 <= t.progress < 1:
+                        finished = False
+                    prg += t.progress if t.progress >= 0 else 0
+                    if t.progress_msg not in msg:
+                        msg.append(t.progress_msg)
+                    if t.progress == -1:
+                        bad += 1
+                prg /= len(tsks)
+                if finished and bad:
+                    prg = -1
+                    status = TaskStatus.FAIL.value
+                elif finished:
+                    if d["parser_config"].get("raptor", {}).get("use_raptor") and d["progress_msg"].lower().find(" raptor")<0:
+                        queue_raptor_tasks(d)
+                        prg *= 0.98
+                        msg.append("------ RAPTOR -------")
+                    else:
+                        status = TaskStatus.DONE.value
+
+                msg = "\n".join(msg)
+                info = {
+                    "process_duation": datetime.timestamp(
+                        datetime.now()) -
+                                       d["process_begin_at"].timestamp(),
+                    "run": status}
+                if prg != 0:
+                    info["progress"] = prg
+                if msg:
+                    info["progress_msg"] = msg
+                cls.update_by_id(d["id"], info)
+            except Exception as e:
+                stat_logger.error("fetch task exception:" + str(e))
+
+    @classmethod
+    @DB.connection_context()
+    def get_kb_doc_count(cls, kb_id):
+        return len(cls.model.select(cls.model.id).where(
+            cls.model.kb_id == kb_id).dicts())
+
+
+    @classmethod
+    @DB.connection_context()
+    def do_cancel(cls, doc_id):
+        try:
+            _, doc = DocumentService.get_by_id(doc_id)
+            return doc.run == TaskStatus.CANCEL.value or doc.progress < 0
+        except Exception as e:
+            pass
+        return False
+
+
+def queue_raptor_tasks(doc):
+    def new_task():
+        nonlocal doc
+        return {
+            "id": get_uuid(),
+            "doc_id": doc["id"],
+            "from_page": 0,
+            "to_page": -1,
+            "progress_msg": "Start to do RAPTOR (Recursive Abstractive Processing For Tree-Organized Retrieval)."
+        }
+
+    task = new_task()
+    bulk_insert_into_db(Task, [task], True)
+    task["type"] = "raptor"
+    assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=task), "Can't access Redis. Please check the Redis' status."
--- a/api/db/services/knowledgebase_service.py
+++ b/api/db/services/knowledgebase_service.py
@ -1,144 +1,144 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-from api.db import StatusEnum, TenantPermission
-from api.db.db_models import Knowledgebase, DB, Tenant
-from api.db.services.common_service import CommonService
-
-
-class KnowledgebaseService(CommonService):
-    model = Knowledgebase
-
-    @classmethod
-    @DB.connection_context()
-    def get_by_tenant_ids(cls, joined_tenant_ids, user_id,
-                          page_number, items_per_page, orderby, desc):
-        kbs = cls.model.select().where(
-            ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
-                                                            TenantPermission.TEAM.value)) | (
-                         cls.model.tenant_id == user_id))
-            & (cls.model.status == StatusEnum.VALID.value)
-        )
-        if desc:
-            kbs = kbs.order_by(cls.model.getter_by(orderby).desc())
-        else:
-            kbs = kbs.order_by(cls.model.getter_by(orderby).asc())
-
-        kbs = kbs.paginate(page_number, items_per_page)
-
-        return list(kbs.dicts())
-
-    @classmethod
-    @DB.connection_context()
-    def get_by_tenant_ids_by_offset(cls, joined_tenant_ids, user_id, offset, count, orderby, desc):
-        kbs = cls.model.select().where(
-            ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
-                                                            TenantPermission.TEAM.value)) | (
-                     cls.model.tenant_id == user_id))
-            & (cls.model.status == StatusEnum.VALID.value)
-        )
-        if desc:
-            kbs = kbs.order_by(cls.model.getter_by(orderby).desc())
-        else:
-            kbs = kbs.order_by(cls.model.getter_by(orderby).asc())
-
-        kbs = list(kbs.dicts())
-
-        kbs_length = len(kbs)
-        if offset < 0 or offset > kbs_length:
-            raise IndexError("Offset is out of the valid range.")
-
-        if count == -1:
-            return kbs[offset:]
-
-        return kbs[offset:offset+count]
-
-    @classmethod
-    @DB.connection_context()
-    def get_detail(cls, kb_id):
-        fields = [
-            cls.model.id,
-            #Tenant.embd_id,
-            cls.model.embd_id,
-            cls.model.avatar,
-            cls.model.name,
-            cls.model.language,
-            cls.model.description,
-            cls.model.permission,
-            cls.model.doc_num,
-            cls.model.token_num,
-            cls.model.chunk_num,
-            cls.model.parser_id,
-            cls.model.parser_config]
-        kbs = cls.model.select(*fields).join(Tenant, on=(
-                    (Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
-            (cls.model.id == kb_id),
-            (cls.model.status == StatusEnum.VALID.value)
-        )
-        if not kbs:
-            return
-        d = kbs[0].to_dict()
-        #d["embd_id"] = kbs[0].tenant.embd_id
-        return d
-
-    @classmethod
-    @DB.connection_context()
-    def update_parser_config(cls, id, config):
-        e, m = cls.get_by_id(id)
-        if not e:
-            raise LookupError(f"knowledgebase({id}) not found.")
-
-        def dfs_update(old, new):
-            for k, v in new.items():
-                if k not in old:
-                    old[k] = v
-                    continue
-                if isinstance(v, dict):
-                    assert isinstance(old[k], dict)
-                    dfs_update(old[k], v)
-                elif isinstance(v, list):
-                    assert isinstance(old[k], list)
-                    old[k] = list(set(old[k] + v))
-                else:
-                    old[k] = v
-
-        dfs_update(m.parser_config, config)
-        cls.update_by_id(id, {"parser_config": m.parser_config})
-
-    @classmethod
-    @DB.connection_context()
-    def get_field_map(cls, ids):
-        conf = {}
-        for k in cls.get_by_ids(ids):
-            if k.parser_config and "field_map" in k.parser_config:
-                conf.update(k.parser_config["field_map"])
-        return conf
-
-    @classmethod
-    @DB.connection_context()
-    def get_by_name(cls, kb_name, tenant_id):
-        kb = cls.model.select().where(
-            (cls.model.name == kb_name)
-            & (cls.model.tenant_id == tenant_id)
-            & (cls.model.status == StatusEnum.VALID.value)
-        )
-        if kb:
-            return True, kb[0]
-        return False, None
-
-    @classmethod
-    @DB.connection_context()
-    def get_all_ids(cls):
-        return [m["id"] for m in cls.model.select(cls.model.id).dicts()]
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from api.db import StatusEnum, TenantPermission
+from api.db.db_models import Knowledgebase, DB, Tenant
+from api.db.services.common_service import CommonService
+
+
+class KnowledgebaseService(CommonService):
+    model = Knowledgebase
+
+    @classmethod
+    @DB.connection_context()
+    def get_by_tenant_ids(cls, joined_tenant_ids, user_id,
+                          page_number, items_per_page, orderby, desc):
+        kbs = cls.model.select().where(
+            ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
+                                                            TenantPermission.TEAM.value)) | (
+                         cls.model.tenant_id == user_id))
+            & (cls.model.status == StatusEnum.VALID.value)
+        )
+        if desc:
+            kbs = kbs.order_by(cls.model.getter_by(orderby).desc())
+        else:
+            kbs = kbs.order_by(cls.model.getter_by(orderby).asc())
+
+        kbs = kbs.paginate(page_number, items_per_page)
+
+        return list(kbs.dicts())
+
+    @classmethod
+    @DB.connection_context()
+    def get_by_tenant_ids_by_offset(cls, joined_tenant_ids, user_id, offset, count, orderby, desc):
+        kbs = cls.model.select().where(
+            ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission ==
+                                                            TenantPermission.TEAM.value)) | (
+                     cls.model.tenant_id == user_id))
+            & (cls.model.status == StatusEnum.VALID.value)
+        )
+        if desc:
+            kbs = kbs.order_by(cls.model.getter_by(orderby).desc())
+        else:
+            kbs = kbs.order_by(cls.model.getter_by(orderby).asc())
+
+        kbs = list(kbs.dicts())
+
+        kbs_length = len(kbs)
+        if offset < 0 or offset > kbs_length:
+            raise IndexError("Offset is out of the valid range.")
+
+        if count == -1:
+            return kbs[offset:]
+
+        return kbs[offset:offset+count]
+
+    @classmethod
+    @DB.connection_context()
+    def get_detail(cls, kb_id):
+        fields = [
+            cls.model.id,
+            #Tenant.embd_id,
+            cls.model.embd_id,
+            cls.model.avatar,
+            cls.model.name,
+            cls.model.language,
+            cls.model.description,
+            cls.model.permission,
+            cls.model.doc_num,
+            cls.model.token_num,
+            cls.model.chunk_num,
+            cls.model.parser_id,
+            cls.model.parser_config]
+        kbs = cls.model.select(*fields).join(Tenant, on=(
+                    (Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
+            (cls.model.id == kb_id),
+            (cls.model.status == StatusEnum.VALID.value)
+        )
+        if not kbs:
+            return
+        d = kbs[0].to_dict()
+        #d["embd_id"] = kbs[0].tenant.embd_id
+        return d
+
+    @classmethod
+    @DB.connection_context()
+    def update_parser_config(cls, id, config):
+        e, m = cls.get_by_id(id)
+        if not e:
+            raise LookupError(f"knowledgebase({id}) not found.")
+
+        def dfs_update(old, new):
+            for k, v in new.items():
+                if k not in old:
+                    old[k] = v
+                    continue
+                if isinstance(v, dict):
+                    assert isinstance(old[k], dict)
+                    dfs_update(old[k], v)
+                elif isinstance(v, list):
+                    assert isinstance(old[k], list)
+                    old[k] = list(set(old[k] + v))
+                else:
+                    old[k] = v
+
+        dfs_update(m.parser_config, config)
+        cls.update_by_id(id, {"parser_config": m.parser_config})
+
+    @classmethod
+    @DB.connection_context()
+    def get_field_map(cls, ids):
+        conf = {}
+        for k in cls.get_by_ids(ids):
+            if k.parser_config and "field_map" in k.parser_config:
+                conf.update(k.parser_config["field_map"])
+        return conf
+
+    @classmethod
+    @DB.connection_context()
+    def get_by_name(cls, kb_name, tenant_id):
+        kb = cls.model.select().where(
+            (cls.model.name == kb_name)
+            & (cls.model.tenant_id == tenant_id)
+            & (cls.model.status == StatusEnum.VALID.value)
+        )
+        if kb:
+            return True, kb[0]
+        return False, None
+
+    @classmethod
+    @DB.connection_context()
+    def get_all_ids(cls):
+        return [m["id"] for m in cls.model.select(cls.model.id).dicts()]
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@ -1,242 +1,242 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-from api.db.services.user_service import TenantService
-from api.settings import database_logger
-from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel
-from api.db import LLMType
-from api.db.db_models import DB, UserTenant
-from api.db.db_models import LLMFactories, LLM, TenantLLM
-from api.db.services.common_service import CommonService
-
-
-class LLMFactoriesService(CommonService):
-    model = LLMFactories
-
-
-class LLMService(CommonService):
-    model = LLM
-
-
-class TenantLLMService(CommonService):
-    model = TenantLLM
-
-    @classmethod
-    @DB.connection_context()
-    def get_api_key(cls, tenant_id, model_name):
-        objs = cls.query(tenant_id=tenant_id, llm_name=model_name)
-        if not objs:
-            return
-        return objs[0]
-
-    @classmethod
-    @DB.connection_context()
-    def get_my_llms(cls, tenant_id):
-        fields = [
-            cls.model.llm_factory,
-            LLMFactories.logo,
-            LLMFactories.tags,
-            cls.model.model_type,
-            cls.model.llm_name,
-            cls.model.used_tokens
-        ]
-        objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where(
-            cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts()
-
-        return list(objs)
-
-    @classmethod
-    @DB.connection_context()
-    def model_instance(cls, tenant_id, llm_type,
-                       llm_name=None, lang="Chinese"):
-        e, tenant = TenantService.get_by_id(tenant_id)
-        if not e:
-            raise LookupError("Tenant not found")
-
-        if llm_type == LLMType.EMBEDDING.value:
-            mdlnm = tenant.embd_id if not llm_name else llm_name
-        elif llm_type == LLMType.SPEECH2TEXT.value:
-            mdlnm = tenant.asr_id
-        elif llm_type == LLMType.IMAGE2TEXT.value:
-            mdlnm = tenant.img2txt_id if not llm_name else llm_name
-        elif llm_type == LLMType.CHAT.value:
-            mdlnm = tenant.llm_id if not llm_name else llm_name
-        elif llm_type == LLMType.RERANK:
-            mdlnm = tenant.rerank_id if not llm_name else llm_name
-        else:
-            assert False, "LLM type error"
-
-        model_config = cls.get_api_key(tenant_id, mdlnm)
-        if model_config: model_config = model_config.to_dict()
-        if not model_config:
-            if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
-                llm = LLMService.query(llm_name=llm_name if llm_name else mdlnm)
-                if llm and llm[0].fid in ["Youdao", "FastEmbed", "BAAI"]:
-                    model_config = {"llm_factory": llm[0].fid, "api_key":"", "llm_name": llm_name if llm_name else mdlnm, "api_base": ""}
-            if not model_config:
-                if llm_name == "flag-embedding":
-                    model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "",
-                                "llm_name": llm_name, "api_base": ""}
-                else:
-                    if not mdlnm:
-                        raise LookupError(f"Type of {llm_type} model is not set.")
-                    raise LookupError("Model({}) not authorized".format(mdlnm))
-
-        if llm_type == LLMType.EMBEDDING.value:
-            if model_config["llm_factory"] not in EmbeddingModel:
-                return
-            return EmbeddingModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
-
-        if llm_type == LLMType.RERANK:
-            if model_config["llm_factory"] not in RerankModel:
-                return
-            return RerankModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
-
-        if llm_type == LLMType.IMAGE2TEXT.value:
-            if model_config["llm_factory"] not in CvModel:
-                return
-            return CvModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], lang,
-                base_url=model_config["api_base"]
-            )
-
-        if llm_type == LLMType.CHAT.value:
-            if model_config["llm_factory"] not in ChatModel:
-                return
-            return ChatModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
-
-        if llm_type == LLMType.SPEECH2TEXT:
-            if model_config["llm_factory"] not in Seq2txtModel:
-                return
-            return Seq2txtModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], lang,
-                base_url=model_config["api_base"]
-            )
-
-    @classmethod
-    @DB.connection_context()
-    def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None):
-        e, tenant = TenantService.get_by_id(tenant_id)
-        if not e:
-            raise LookupError("Tenant not found")
-
-        if llm_type == LLMType.EMBEDDING.value:
-            mdlnm = tenant.embd_id
-        elif llm_type == LLMType.SPEECH2TEXT.value:
-            mdlnm = tenant.asr_id
-        elif llm_type == LLMType.IMAGE2TEXT.value:
-            mdlnm = tenant.img2txt_id
-        elif llm_type == LLMType.CHAT.value:
-            mdlnm = tenant.llm_id if not llm_name else llm_name
-        elif llm_type == LLMType.RERANK:
-            mdlnm = tenant.llm_id if not llm_name else llm_name
-        else:
-            assert False, "LLM type error"
-
-        num = 0
-        try:
-            for u in cls.query(tenant_id = tenant_id, llm_name=mdlnm):
-                num += cls.model.update(used_tokens = u.used_tokens + used_tokens)\
-                    .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
-                    .execute()
-        except Exception as e:
-            pass
-        return num
-
-    @classmethod
-    @DB.connection_context()
-    def get_openai_models(cls):
-        objs = cls.model.select().where(
-            (cls.model.llm_factory == "OpenAI"),
-            ~(cls.model.llm_name == "text-embedding-3-small"),
-            ~(cls.model.llm_name == "text-embedding-3-large")
-        ).dicts()
-        return list(objs)
-
-
-class LLMBundle(object):
-    def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"):
-        self.tenant_id = tenant_id
-        self.llm_type = llm_type
-        self.llm_name = llm_name
-        self.mdl = TenantLLMService.model_instance(
-            tenant_id, llm_type, llm_name, lang=lang)
-        assert self.mdl, "Can't find mole for {}/{}/{}".format(
-            tenant_id, llm_type, llm_name)
-        self.max_length = 512
-        for lm in LLMService.query(llm_name=llm_name):
-            self.max_length = lm.max_tokens
-            break
-
-    def encode(self, texts: list, batch_size=32):
-        emd, used_tokens = self.mdl.encode(texts, batch_size)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            database_logger.error(
-                "Can't update token usage for {}/EMBEDDING".format(self.tenant_id))
-        return emd, used_tokens
-
-    def encode_queries(self, query: str):
-        emd, used_tokens = self.mdl.encode_queries(query)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            database_logger.error(
-                "Can't update token usage for {}/EMBEDDING".format(self.tenant_id))
-        return emd, used_tokens
-
-    def similarity(self, query: str, texts: list):
-        sim, used_tokens = self.mdl.similarity(query, texts)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            database_logger.error(
-                "Can't update token usage for {}/RERANK".format(self.tenant_id))
-        return sim, used_tokens
-
-    def describe(self, image, max_tokens=300):
-        txt, used_tokens = self.mdl.describe(image, max_tokens)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            database_logger.error(
-                "Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id))
-        return txt
-
-    def transcription(self, audio):
-        txt, used_tokens = self.mdl.transcription(audio)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            database_logger.error(
-                "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
-        return txt
-
-    def chat(self, system, history, gen_conf):
-        txt, used_tokens = self.mdl.chat(system, history, gen_conf)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens, self.llm_name):
-            database_logger.error(
-                "Can't update token usage for {}/CHAT".format(self.tenant_id))
-        return txt
-
-    def chat_streamly(self, system, history, gen_conf):
-        for txt in self.mdl.chat_streamly(system, history, gen_conf):
-            if isinstance(txt, int):
-                if not TenantLLMService.increase_usage(
-                        self.tenant_id, self.llm_type, txt, self.llm_name):
-                    database_logger.error(
-                        "Can't update token usage for {}/CHAT".format(self.tenant_id))
-                return
-            yield txt
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+from api.db.services.user_service import TenantService
+from api.settings import database_logger
+from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel
+from api.db import LLMType
+from api.db.db_models import DB, UserTenant
+from api.db.db_models import LLMFactories, LLM, TenantLLM
+from api.db.services.common_service import CommonService
+
+
+class LLMFactoriesService(CommonService):
+    model = LLMFactories
+
+
+class LLMService(CommonService):
+    model = LLM
+
+
+class TenantLLMService(CommonService):
+    model = TenantLLM
+
+    @classmethod
+    @DB.connection_context()
+    def get_api_key(cls, tenant_id, model_name):
+        objs = cls.query(tenant_id=tenant_id, llm_name=model_name)
+        if not objs:
+            return
+        return objs[0]
+
+    @classmethod
+    @DB.connection_context()
+    def get_my_llms(cls, tenant_id):
+        fields = [
+            cls.model.llm_factory,
+            LLMFactories.logo,
+            LLMFactories.tags,
+            cls.model.model_type,
+            cls.model.llm_name,
+            cls.model.used_tokens
+        ]
+        objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where(
+            cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts()
+
+        return list(objs)
+
+    @classmethod
+    @DB.connection_context()
+    def model_instance(cls, tenant_id, llm_type,
+                       llm_name=None, lang="Chinese"):
+        e, tenant = TenantService.get_by_id(tenant_id)
+        if not e:
+            raise LookupError("Tenant not found")
+
+        if llm_type == LLMType.EMBEDDING.value:
+            mdlnm = tenant.embd_id if not llm_name else llm_name
+        elif llm_type == LLMType.SPEECH2TEXT.value:
+            mdlnm = tenant.asr_id
+        elif llm_type == LLMType.IMAGE2TEXT.value:
+            mdlnm = tenant.img2txt_id if not llm_name else llm_name
+        elif llm_type == LLMType.CHAT.value:
+            mdlnm = tenant.llm_id if not llm_name else llm_name
+        elif llm_type == LLMType.RERANK:
+            mdlnm = tenant.rerank_id if not llm_name else llm_name
+        else:
+            assert False, "LLM type error"
+
+        model_config = cls.get_api_key(tenant_id, mdlnm)
+        if model_config: model_config = model_config.to_dict()
+        if not model_config:
+            if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
+                llm = LLMService.query(llm_name=llm_name if llm_name else mdlnm)
+                if llm and llm[0].fid in ["Youdao", "FastEmbed", "BAAI"]:
+                    model_config = {"llm_factory": llm[0].fid, "api_key":"", "llm_name": llm_name if llm_name else mdlnm, "api_base": ""}
+            if not model_config:
+                if llm_name == "flag-embedding":
+                    model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "",
+                                "llm_name": llm_name, "api_base": ""}
+                else:
+                    if not mdlnm:
+                        raise LookupError(f"Type of {llm_type} model is not set.")
+                    raise LookupError("Model({}) not authorized".format(mdlnm))
+
+        if llm_type == LLMType.EMBEDDING.value:
+            if model_config["llm_factory"] not in EmbeddingModel:
+                return
+            return EmbeddingModel[model_config["llm_factory"]](
+                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+
+        if llm_type == LLMType.RERANK:
+            if model_config["llm_factory"] not in RerankModel:
+                return
+            return RerankModel[model_config["llm_factory"]](
+                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+
+        if llm_type == LLMType.IMAGE2TEXT.value:
+            if model_config["llm_factory"] not in CvModel:
+                return
+            return CvModel[model_config["llm_factory"]](
+                model_config["api_key"], model_config["llm_name"], lang,
+                base_url=model_config["api_base"]
+            )
+
+        if llm_type == LLMType.CHAT.value:
+            if model_config["llm_factory"] not in ChatModel:
+                return
+            return ChatModel[model_config["llm_factory"]](
+                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+
+        if llm_type == LLMType.SPEECH2TEXT:
+            if model_config["llm_factory"] not in Seq2txtModel:
+                return
+            return Seq2txtModel[model_config["llm_factory"]](
+                model_config["api_key"], model_config["llm_name"], lang,
+                base_url=model_config["api_base"]
+            )
+
+    @classmethod
+    @DB.connection_context()
+    def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None):
+        e, tenant = TenantService.get_by_id(tenant_id)
+        if not e:
+            raise LookupError("Tenant not found")
+
+        if llm_type == LLMType.EMBEDDING.value:
+            mdlnm = tenant.embd_id
+        elif llm_type == LLMType.SPEECH2TEXT.value:
+            mdlnm = tenant.asr_id
+        elif llm_type == LLMType.IMAGE2TEXT.value:
+            mdlnm = tenant.img2txt_id
+        elif llm_type == LLMType.CHAT.value:
+            mdlnm = tenant.llm_id if not llm_name else llm_name
+        elif llm_type == LLMType.RERANK:
+            mdlnm = tenant.llm_id if not llm_name else llm_name
+        else:
+            assert False, "LLM type error"
+
+        num = 0
+        try:
+            for u in cls.query(tenant_id = tenant_id, llm_name=mdlnm):
+                num += cls.model.update(used_tokens = u.used_tokens + used_tokens)\
+                    .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
+                    .execute()
+        except Exception as e:
+            pass
+        return num
+
+    @classmethod
+    @DB.connection_context()
+    def get_openai_models(cls):
+        objs = cls.model.select().where(
+            (cls.model.llm_factory == "OpenAI"),
+            ~(cls.model.llm_name == "text-embedding-3-small"),
+            ~(cls.model.llm_name == "text-embedding-3-large")
+        ).dicts()
+        return list(objs)
+
+
+class LLMBundle(object):
+    def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"):
+        self.tenant_id = tenant_id
+        self.llm_type = llm_type
+        self.llm_name = llm_name
+        self.mdl = TenantLLMService.model_instance(
+            tenant_id, llm_type, llm_name, lang=lang)
+        assert self.mdl, "Can't find mole for {}/{}/{}".format(
+            tenant_id, llm_type, llm_name)
+        self.max_length = 512
+        for lm in LLMService.query(llm_name=llm_name):
+            self.max_length = lm.max_tokens
+            break
+
+    def encode(self, texts: list, batch_size=32):
+        emd, used_tokens = self.mdl.encode(texts, batch_size)
+        if not TenantLLMService.increase_usage(
+                self.tenant_id, self.llm_type, used_tokens):
+            database_logger.error(
+                "Can't update token usage for {}/EMBEDDING".format(self.tenant_id))
+        return emd, used_tokens
+
+    def encode_queries(self, query: str):
+        emd, used_tokens = self.mdl.encode_queries(query)
+        if not TenantLLMService.increase_usage(
+                self.tenant_id, self.llm_type, used_tokens):
+            database_logger.error(
+                "Can't update token usage for {}/EMBEDDING".format(self.tenant_id))
+        return emd, used_tokens
+
+    def similarity(self, query: str, texts: list):
+        sim, used_tokens = self.mdl.similarity(query, texts)
+        if not TenantLLMService.increase_usage(
+                self.tenant_id, self.llm_type, used_tokens):
+            database_logger.error(
+                "Can't update token usage for {}/RERANK".format(self.tenant_id))
+        return sim, used_tokens
+
+    def describe(self, image, max_tokens=300):
+        txt, used_tokens = self.mdl.describe(image, max_tokens)
+        if not TenantLLMService.increase_usage(
+                self.tenant_id, self.llm_type, used_tokens):
+            database_logger.error(
+                "Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id))
+        return txt
+
+    def transcription(self, audio):
+        txt, used_tokens = self.mdl.transcription(audio)
+        if not TenantLLMService.increase_usage(
+                self.tenant_id, self.llm_type, used_tokens):
+            database_logger.error(
+                "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
+        return txt
+
+    def chat(self, system, history, gen_conf):
+        txt, used_tokens = self.mdl.chat(system, history, gen_conf)
+        if not TenantLLMService.increase_usage(
+                self.tenant_id, self.llm_type, used_tokens, self.llm_name):
+            database_logger.error(
+                "Can't update token usage for {}/CHAT".format(self.tenant_id))
+        return txt
+
+    def chat_streamly(self, system, history, gen_conf):
+        for txt in self.mdl.chat_streamly(system, history, gen_conf):
+            if isinstance(txt, int):
+                if not TenantLLMService.increase_usage(
+                        self.tenant_id, self.llm_type, txt, self.llm_name):
+                    database_logger.error(
+                        "Can't update token usage for {}/CHAT".format(self.tenant_id))
+                return
+            yield txt
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@ -1,175 +1,175 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import os
-import random
-
-from api.db.db_utils import bulk_insert_into_db
-from deepdoc.parser import PdfParser
-from peewee import JOIN
-from api.db.db_models import DB, File2Document, File
-from api.db import StatusEnum, FileType, TaskStatus
-from api.db.db_models import Task, Document, Knowledgebase, Tenant
-from api.db.services.common_service import CommonService
-from api.db.services.document_service import DocumentService
-from api.utils import current_timestamp, get_uuid
-from deepdoc.parser.excel_parser import RAGFlowExcelParser
-from rag.settings import SVR_QUEUE_NAME
-from rag.utils.minio_conn import MINIO
-from rag.utils.redis_conn import REDIS_CONN
-
-
-class TaskService(CommonService):
-    model = Task
-
-    @classmethod
-    @DB.connection_context()
-    def get_tasks(cls, task_id):
-        fields = [
-            cls.model.id,
-            cls.model.doc_id,
-            cls.model.from_page,
-            cls.model.to_page,
-            Document.kb_id,
-            Document.parser_id,
-            Document.parser_config,
-            Document.name,
-            Document.type,
-            Document.location,
-            Document.size,
-            Knowledgebase.tenant_id,
-            Knowledgebase.language,
-            Knowledgebase.embd_id,
-            Tenant.img2txt_id,
-            Tenant.asr_id,
-            Tenant.llm_id,
-            cls.model.update_time]
-        docs = cls.model.select(*fields) \
-            .join(Document, on=(cls.model.doc_id == Document.id)) \
-            .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
-            .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \
-            .where(cls.model.id == task_id)
-        docs = list(docs.dicts())
-        if not docs: return []
-
-        cls.model.update(progress_msg=cls.model.progress_msg + "\n" + "Task has been received.",
-                         progress=random.random() / 10.).where(
-            cls.model.id == docs[0]["id"]).execute()
-        return docs
-
-    @classmethod
-    @DB.connection_context()
-    def get_ongoing_doc_name(cls):
-        with DB.lock("get_task", -1):
-            docs = cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id]) \
-                .join(Document, on=(cls.model.doc_id == Document.id)) \
-                .join(File2Document, on=(File2Document.document_id == Document.id), join_type=JOIN.LEFT_OUTER) \
-                .join(File, on=(File2Document.file_id == File.id), join_type=JOIN.LEFT_OUTER) \
-                .where(
-                    Document.status == StatusEnum.VALID.value,
-                    Document.run == TaskStatus.RUNNING.value,
-                    ~(Document.type == FileType.VIRTUAL.value),
-                    cls.model.progress < 1,
-                    cls.model.create_time >= current_timestamp() - 1000 * 600
-                )
-            docs = list(docs.dicts())
-            if not docs: return []
-
-            return list(set([(d["parent_id"] if d["parent_id"] else d["kb_id"], d["location"]) for d in docs]))
-
-    @classmethod
-    @DB.connection_context()
-    def do_cancel(cls, id):
-        try:
-            task = cls.model.get_by_id(id)
-            _, doc = DocumentService.get_by_id(task.doc_id)
-            return doc.run == TaskStatus.CANCEL.value or doc.progress < 0
-        except Exception as e:
-            pass
-        return False
-
-    @classmethod
-    @DB.connection_context()
-    def update_progress(cls, id, info):
-        if os.environ.get("MACOS"):
-            if info["progress_msg"]:
-                cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
-                    cls.model.id == id).execute()
-            if "progress" in info:
-                cls.model.update(progress=info["progress"]).where(
-                    cls.model.id == id).execute()
-            return
-
-        with DB.lock("update_progress", -1):
-            if info["progress_msg"]:
-                cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
-                    cls.model.id == id).execute()
-            if "progress" in info:
-                cls.model.update(progress=info["progress"]).where(
-                    cls.model.id == id).execute()
-
-
-def queue_tasks(doc, bucket, name):
-    def new_task():
-        nonlocal doc
-        return {
-            "id": get_uuid(),
-            "doc_id": doc["id"]
-        }
-    tsks = []
-
-    if doc["type"] == FileType.PDF.value:
-        file_bin = MINIO.get(bucket, name)
-        do_layout = doc["parser_config"].get("layout_recognize", True)
-        pages = PdfParser.total_page_number(doc["name"], file_bin)
-        page_size = doc["parser_config"].get("task_page_size", 12)
-        if doc["parser_id"] == "paper":
-            page_size = doc["parser_config"].get("task_page_size", 22)
-        if doc["parser_id"] == "one":
-            page_size = 1000000000
-        if doc["parser_id"] == "knowledge_graph":
-            page_size = 1000000000
-        if not do_layout:
-            page_size = 1000000000
-        page_ranges = doc["parser_config"].get("pages")
-        if not page_ranges:
-            page_ranges = [(1, 100000)]
-        for s, e in page_ranges:
-            s -= 1
-            s = max(0, s)
-            e = min(e - 1, pages)
-            for p in range(s, e, page_size):
-                task = new_task()
-                task["from_page"] = p
-                task["to_page"] = min(p + page_size, e)
-                tsks.append(task)
-
-    elif doc["parser_id"] == "table":
-        file_bin = MINIO.get(bucket, name)
-        rn = RAGFlowExcelParser.row_number(
-            doc["name"], file_bin)
-        for i in range(0, rn, 3000):
-            task = new_task()
-            task["from_page"] = i
-            task["to_page"] = min(i + 3000, rn)
-            tsks.append(task)
-    else:
-        tsks.append(new_task())
-
-    bulk_insert_into_db(Task, tsks, True)
-    DocumentService.begin2parse(doc["id"])
-
-    for t in tsks:
-        assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=t), "Can't access Redis. Please check the Redis' status."
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import os
+import random
+
+from api.db.db_utils import bulk_insert_into_db
+from deepdoc.parser import PdfParser
+from peewee import JOIN
+from api.db.db_models import DB, File2Document, File
+from api.db import StatusEnum, FileType, TaskStatus
+from api.db.db_models import Task, Document, Knowledgebase, Tenant
+from api.db.services.common_service import CommonService
+from api.db.services.document_service import DocumentService
+from api.utils import current_timestamp, get_uuid
+from deepdoc.parser.excel_parser import RAGFlowExcelParser
+from rag.settings import SVR_QUEUE_NAME
+from rag.utils.minio_conn import MINIO
+from rag.utils.redis_conn import REDIS_CONN
+
+
+class TaskService(CommonService):
+    model = Task
+
+    @classmethod
+    @DB.connection_context()
+    def get_tasks(cls, task_id):
+        fields = [
+            cls.model.id,
+            cls.model.doc_id,
+            cls.model.from_page,
+            cls.model.to_page,
+            Document.kb_id,
+            Document.parser_id,
+            Document.parser_config,
+            Document.name,
+            Document.type,
+            Document.location,
+            Document.size,
+            Knowledgebase.tenant_id,
+            Knowledgebase.language,
+            Knowledgebase.embd_id,
+            Tenant.img2txt_id,
+            Tenant.asr_id,
+            Tenant.llm_id,
+            cls.model.update_time]
+        docs = cls.model.select(*fields) \
+            .join(Document, on=(cls.model.doc_id == Document.id)) \
+            .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
+            .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \
+            .where(cls.model.id == task_id)
+        docs = list(docs.dicts())
+        if not docs: return []
+
+        cls.model.update(progress_msg=cls.model.progress_msg + "\n" + "Task has been received.",
+                         progress=random.random() / 10.).where(
+            cls.model.id == docs[0]["id"]).execute()
+        return docs
+
+    @classmethod
+    @DB.connection_context()
+    def get_ongoing_doc_name(cls):
+        with DB.lock("get_task", -1):
+            docs = cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id]) \
+                .join(Document, on=(cls.model.doc_id == Document.id)) \
+                .join(File2Document, on=(File2Document.document_id == Document.id), join_type=JOIN.LEFT_OUTER) \
+                .join(File, on=(File2Document.file_id == File.id), join_type=JOIN.LEFT_OUTER) \
+                .where(
+                    Document.status == StatusEnum.VALID.value,
+                    Document.run == TaskStatus.RUNNING.value,
+                    ~(Document.type == FileType.VIRTUAL.value),
+                    cls.model.progress < 1,
+                    cls.model.create_time >= current_timestamp() - 1000 * 600
+                )
+            docs = list(docs.dicts())
+            if not docs: return []
+
+            return list(set([(d["parent_id"] if d["parent_id"] else d["kb_id"], d["location"]) for d in docs]))
+
+    @classmethod
+    @DB.connection_context()
+    def do_cancel(cls, id):
+        try:
+            task = cls.model.get_by_id(id)
+            _, doc = DocumentService.get_by_id(task.doc_id)
+            return doc.run == TaskStatus.CANCEL.value or doc.progress < 0
+        except Exception as e:
+            pass
+        return False
+
+    @classmethod
+    @DB.connection_context()
+    def update_progress(cls, id, info):
+        if os.environ.get("MACOS"):
+            if info["progress_msg"]:
+                cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
+                    cls.model.id == id).execute()
+            if "progress" in info:
+                cls.model.update(progress=info["progress"]).where(
+                    cls.model.id == id).execute()
+            return
+
+        with DB.lock("update_progress", -1):
+            if info["progress_msg"]:
+                cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
+                    cls.model.id == id).execute()
+            if "progress" in info:
+                cls.model.update(progress=info["progress"]).where(
+                    cls.model.id == id).execute()
+
+
+def queue_tasks(doc, bucket, name):
+    def new_task():
+        nonlocal doc
+        return {
+            "id": get_uuid(),
+            "doc_id": doc["id"]
+        }
+    tsks = []
+
+    if doc["type"] == FileType.PDF.value:
+        file_bin = MINIO.get(bucket, name)
+        do_layout = doc["parser_config"].get("layout_recognize", True)
+        pages = PdfParser.total_page_number(doc["name"], file_bin)
+        page_size = doc["parser_config"].get("task_page_size", 12)
+        if doc["parser_id"] == "paper":
+            page_size = doc["parser_config"].get("task_page_size", 22)
+        if doc["parser_id"] == "one":
+            page_size = 1000000000
+        if doc["parser_id"] == "knowledge_graph":
+            page_size = 1000000000
+        if not do_layout:
+            page_size = 1000000000
+        page_ranges = doc["parser_config"].get("pages")
+        if not page_ranges:
+            page_ranges = [(1, 100000)]
+        for s, e in page_ranges:
+            s -= 1
+            s = max(0, s)
+            e = min(e - 1, pages)
+            for p in range(s, e, page_size):
+                task = new_task()
+                task["from_page"] = p
+                task["to_page"] = min(p + page_size, e)
+                tsks.append(task)
+
+    elif doc["parser_id"] == "table":
+        file_bin = MINIO.get(bucket, name)
+        rn = RAGFlowExcelParser.row_number(
+            doc["name"], file_bin)
+        for i in range(0, rn, 3000):
+            task = new_task()
+            task["from_page"] = i
+            task["to_page"] = min(i + 3000, rn)
+            tsks.append(task)
+    else:
+        tsks.append(new_task())
+
+    bulk_insert_into_db(Task, tsks, True)
+    DocumentService.begin2parse(doc["id"])
+
+    for t in tsks:
+        assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=t), "Can't access Redis. Please check the Redis' status."
--- a/api/ragflow_server.py
+++ b/api/ragflow_server.py
@ -1,100 +1,100 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-import logging
-import os
-import signal
-import sys
-import time
-import traceback
-from concurrent.futures import ThreadPoolExecutor
-
-from werkzeug.serving import run_simple
-from api.apps import app
-from api.db.runtime_config import RuntimeConfig
-from api.db.services.document_service import DocumentService
-from api.settings import (
-    HOST, HTTP_PORT, access_logger, database_logger, stat_logger,
-)
-from api import utils
-
-from api.db.db_models import init_database_tables as init_web_db
-from api.db.init_data import init_web_data
-from api.versions import get_versions
-
-
-def update_progress():
-    while True:
-        time.sleep(1)
-        try:
-            DocumentService.update_progress()
-        except Exception as e:
-            stat_logger.error("update_progress exception:" + str(e))
-
-
-if __name__ == '__main__':
-    print("""
-    ____                 ______ __               
-   / __ \ ____ _ ____ _ / ____// /____  _      __
-  / /_/ // __ `// __ `// /_   / // __ \| | /| / /
- / _, _// /_/ // /_/ // __/  / // /_/ /| |/ |/ / 
-/_/ |_| \__,_/ \__, //_/    /_/ \____/ |__/|__/  
-              /____/                             
-
-    """, flush=True)
-    stat_logger.info(
-        f'project base: {utils.file_utils.get_project_base_directory()}'
-    )
-
-    # init db
-    init_web_db()
-    init_web_data()
-    # init runtime config
-    import argparse
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--version', default=False, help="rag flow version", action='store_true')
-    parser.add_argument('--debug', default=False, help="debug mode", action='store_true')
-    args = parser.parse_args()
-    if args.version:
-        print(get_versions())
-        sys.exit(0)
-
-    RuntimeConfig.DEBUG = args.debug
-    if RuntimeConfig.DEBUG:
-        stat_logger.info("run on debug mode")
-
-    RuntimeConfig.init_env()
-    RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
-
-    peewee_logger = logging.getLogger('peewee')
-    peewee_logger.propagate = False
-    # rag_arch.common.log.ROpenHandler
-    peewee_logger.addHandler(database_logger.handlers[0])
-    peewee_logger.setLevel(database_logger.level)
-
-    thr = ThreadPoolExecutor(max_workers=1)
-    thr.submit(update_progress)
-
-    # start http server
-    try:
-        stat_logger.info("RAG Flow http server start...")
-        werkzeug_logger = logging.getLogger("werkzeug")
-        for h in access_logger.handlers:
-            werkzeug_logger.addHandler(h)
-        run_simple(hostname=HOST, port=HTTP_PORT, application=app, threaded=True, use_reloader=RuntimeConfig.DEBUG, use_debugger=RuntimeConfig.DEBUG)
-    except Exception:
-        traceback.print_exc()
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import logging
+import os
+import signal
+import sys
+import time
+import traceback
+from concurrent.futures import ThreadPoolExecutor
+
+from werkzeug.serving import run_simple
+from api.apps import app
+from api.db.runtime_config import RuntimeConfig
+from api.db.services.document_service import DocumentService
+from api.settings import (
+    HOST, HTTP_PORT, access_logger, database_logger, stat_logger,
+)
+from api import utils
+
+from api.db.db_models import init_database_tables as init_web_db
+from api.db.init_data import init_web_data
+from api.versions import get_versions
+
+
+def update_progress():
+    while True:
+        time.sleep(1)
+        try:
+            DocumentService.update_progress()
+        except Exception as e:
+            stat_logger.error("update_progress exception:" + str(e))
+
+
+if __name__ == '__main__':
+    print("""
+    ____                 ______ __               
+   / __ \ ____ _ ____ _ / ____// /____  _      __
+  / /_/ // __ `// __ `// /_   / // __ \| | /| / /
+ / _, _// /_/ // /_/ // __/  / // /_/ /| |/ |/ / 
+/_/ |_| \__,_/ \__, //_/    /_/ \____/ |__/|__/  
+              /____/                             
+
+    """, flush=True)
+    stat_logger.info(
+        f'project base: {utils.file_utils.get_project_base_directory()}'
+    )
+
+    # init db
+    init_web_db()
+    init_web_data()
+    # init runtime config
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--version', default=False, help="rag flow version", action='store_true')
+    parser.add_argument('--debug', default=False, help="debug mode", action='store_true')
+    args = parser.parse_args()
+    if args.version:
+        print(get_versions())
+        sys.exit(0)
+
+    RuntimeConfig.DEBUG = args.debug
+    if RuntimeConfig.DEBUG:
+        stat_logger.info("run on debug mode")
+
+    RuntimeConfig.init_env()
+    RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
+
+    peewee_logger = logging.getLogger('peewee')
+    peewee_logger.propagate = False
+    # rag_arch.common.log.ROpenHandler
+    peewee_logger.addHandler(database_logger.handlers[0])
+    peewee_logger.setLevel(database_logger.level)
+
+    thr = ThreadPoolExecutor(max_workers=1)
+    thr.submit(update_progress)
+
+    # start http server
+    try:
+        stat_logger.info("RAG Flow http server start...")
+        werkzeug_logger = logging.getLogger("werkzeug")
+        for h in access_logger.handlers:
+            werkzeug_logger.addHandler(h)
+        run_simple(hostname=HOST, port=HTTP_PORT, application=app, threaded=True, use_reloader=RuntimeConfig.DEBUG, use_debugger=RuntimeConfig.DEBUG)
+    except Exception:
+        traceback.print_exc()
        os.kill(os.getpid(), signal.SIGKILL)
--- a/api/settings.py
+++ b/api/settings.py
@ -1,251 +1,251 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import os
-from enum import IntEnum, Enum
-from api.utils.file_utils import get_project_base_directory
-from api.utils.log_utils import LoggerFactory, getLogger
-
-# Logger
-LoggerFactory.set_directory(
-    os.path.join(
-        get_project_base_directory(),
-        "logs",
-        "api"))
-# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
-LoggerFactory.LEVEL = 30
-
-stat_logger = getLogger("stat")
-access_logger = getLogger("access")
-database_logger = getLogger("database")
-chat_logger = getLogger("chat")
-
-from rag.utils.es_conn import ELASTICSEARCH
-from rag.nlp import search
-from graphrag import search as kg_search
-from api.utils import get_base_config, decrypt_database_config
-
-API_VERSION = "v1"
-RAG_FLOW_SERVICE_NAME = "ragflow"
-SERVER_MODULE = "rag_flow_server.py"
-TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
-RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
-
-SUBPROCESS_STD_LOG_NAME = "std.log"
-
-ERROR_REPORT = True
-ERROR_REPORT_WITH_PATH = False
-
-MAX_TIMESTAMP_INTERVAL = 60
-SESSION_VALID_PERIOD = 7 * 24 * 60 * 60
-
-REQUEST_TRY_TIMES = 3
-REQUEST_WAIT_SEC = 2
-REQUEST_MAX_WAIT_SEC = 300
-
-USE_REGISTRY = get_base_config("use_registry")
-
-default_llm = {
-    "Tongyi-Qianwen": {
-        "chat_model": "qwen-plus",
-        "embedding_model": "text-embedding-v2",
-        "image2text_model": "qwen-vl-max",
-        "asr_model": "paraformer-realtime-8k-v1",
-    },
-    "OpenAI": {
-        "chat_model": "gpt-3.5-turbo",
-        "embedding_model": "text-embedding-ada-002",
-        "image2text_model": "gpt-4-vision-preview",
-        "asr_model": "whisper-1",
-    },
-    "Azure-OpenAI": {
-        "chat_model": "azure-gpt-35-turbo",
-        "embedding_model": "azure-text-embedding-ada-002",
-        "image2text_model": "azure-gpt-4-vision-preview",
-        "asr_model": "azure-whisper-1",
-    },
-    "ZHIPU-AI": {
-        "chat_model": "glm-3-turbo",
-        "embedding_model": "embedding-2",
-        "image2text_model": "glm-4v",
-        "asr_model": "",
-    },
-    "Ollama": {
-        "chat_model": "qwen-14B-chat",
-        "embedding_model": "flag-embedding",
-        "image2text_model": "",
-        "asr_model": "",
-    },
-    "Moonshot": {
-        "chat_model": "moonshot-v1-8k",
-        "embedding_model": "",
-        "image2text_model": "",
-        "asr_model": "",
-    },
-    "DeepSeek": {
-        "chat_model": "deepseek-chat",
-        "embedding_model": "",
-        "image2text_model": "",
-        "asr_model": "",
-    },
-    "VolcEngine": {
-        "chat_model": "",
-        "embedding_model": "",
-        "image2text_model": "",
-        "asr_model": "",
-    },
-    "BAAI": {
-        "chat_model": "",
-        "embedding_model": "BAAI/bge-large-zh-v1.5",
-        "image2text_model": "",
-        "asr_model": "",
-        "rerank_model": "BAAI/bge-reranker-v2-m3",
-    }
-}
-LLM = get_base_config("user_default_llm", {})
-LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
-LLM_BASE_URL = LLM.get("base_url")
-
-if LLM_FACTORY not in default_llm:
-    print(
-        "\33[91m【ERROR】\33[0m:",
-        f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
-    LLM_FACTORY = "Tongyi-Qianwen"
-CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
-EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
-RERANK_MDL = default_llm["BAAI"]["rerank_model"]
-ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
-IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
-
-API_KEY = LLM.get("api_key", "")
-PARSERS = LLM.get(
-    "parsers",
-    "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email")
-
-# distribution
-DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False)
-RAG_FLOW_UPDATE_CHECK = False
-
-HOST = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
-HTTP_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")
-
-SECRET_KEY = get_base_config(
-    RAG_FLOW_SERVICE_NAME,
-    {}).get(
-        "secret_key",
-    "infiniflow")
-TOKEN_EXPIRE_IN = get_base_config(
-    RAG_FLOW_SERVICE_NAME, {}).get(
-        "token_expires_in", 3600)
-
-NGINX_HOST = get_base_config(
-    RAG_FLOW_SERVICE_NAME, {}).get(
-        "nginx", {}).get("host") or HOST
-NGINX_HTTP_PORT = get_base_config(
-    RAG_FLOW_SERVICE_NAME, {}).get(
-        "nginx", {}).get("http_port") or HTTP_PORT
-
-RANDOM_INSTANCE_ID = get_base_config(
-    RAG_FLOW_SERVICE_NAME, {}).get(
-        "random_instance_id", False)
-
-PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy")
-PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
-
-DATABASE = decrypt_database_config(name="mysql")
-
-# Switch
-# upload
-UPLOAD_DATA_FROM_CLIENT = True
-
-# authentication
-AUTHENTICATION_CONF = get_base_config("authentication", {})
-
-# client
-CLIENT_AUTHENTICATION = AUTHENTICATION_CONF.get(
-    "client", {}).get(
-        "switch", False)
-HTTP_APP_KEY = AUTHENTICATION_CONF.get("client", {}).get("http_app_key")
-GITHUB_OAUTH = get_base_config("oauth", {}).get("github")
-FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
-WECHAT_OAUTH = get_base_config("oauth", {}).get("wechat")
-
-# site
-SITE_AUTHENTICATION = AUTHENTICATION_CONF.get("site", {}).get("switch", False)
-
-# permission
-PERMISSION_CONF = get_base_config("permission", {})
-PERMISSION_SWITCH = PERMISSION_CONF.get("switch")
-COMPONENT_PERMISSION = PERMISSION_CONF.get("component")
-DATASET_PERMISSION = PERMISSION_CONF.get("dataset")
-
-HOOK_MODULE = get_base_config("hook_module")
-HOOK_SERVER_NAME = get_base_config("hook_server_name")
-
-ENABLE_MODEL_STORE = get_base_config('enable_model_store', False)
-# authentication
-USE_AUTHENTICATION = False
-USE_DATA_AUTHENTICATION = False
-AUTOMATIC_AUTHORIZATION_OUTPUT_DATA = True
-USE_DEFAULT_TIMEOUT = False
-AUTHENTICATION_DEFAULT_TIMEOUT = 7 * 24 * 60 * 60  # s
-PRIVILEGE_COMMAND_WHITELIST = []
-CHECK_NODES_IDENTITY = False
-
-retrievaler = search.Dealer(ELASTICSEARCH)
-kg_retrievaler = kg_search.KGSearch(ELASTICSEARCH)
-
-
-class CustomEnum(Enum):
-    @classmethod
-    def valid(cls, value):
-        try:
-            cls(value)
-            return True
-        except BaseException:
-            return False
-
-    @classmethod
-    def values(cls):
-        return [member.value for member in cls.__members__.values()]
-
-    @classmethod
-    def names(cls):
-        return [member.name for member in cls.__members__.values()]
-
-
-class PythonDependenceName(CustomEnum):
-    Rag_Source_Code = "python"
-    Python_Env = "miniconda"
-
-
-class ModelStorage(CustomEnum):
-    REDIS = "redis"
-    MYSQL = "mysql"
-
-
-class RetCode(IntEnum, CustomEnum):
-    SUCCESS = 0
-    NOT_EFFECTIVE = 10
-    EXCEPTION_ERROR = 100
-    ARGUMENT_ERROR = 101
-    DATA_ERROR = 102
-    OPERATING_ERROR = 103
-    CONNECTION_ERROR = 105
-    RUNNING = 106
-    PERMISSION_ERROR = 108
-    AUTHENTICATION_ERROR = 109
-    UNAUTHORIZED = 401
-    SERVER_ERROR = 500
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import os
+from enum import IntEnum, Enum
+from api.utils.file_utils import get_project_base_directory
+from api.utils.log_utils import LoggerFactory, getLogger
+
+# Logger
+LoggerFactory.set_directory(
+    os.path.join(
+        get_project_base_directory(),
+        "logs",
+        "api"))
+# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
+LoggerFactory.LEVEL = 30
+
+stat_logger = getLogger("stat")
+access_logger = getLogger("access")
+database_logger = getLogger("database")
+chat_logger = getLogger("chat")
+
+from rag.utils.es_conn import ELASTICSEARCH
+from rag.nlp import search
+from graphrag import search as kg_search
+from api.utils import get_base_config, decrypt_database_config
+
+API_VERSION = "v1"
+RAG_FLOW_SERVICE_NAME = "ragflow"
+SERVER_MODULE = "rag_flow_server.py"
+TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
+RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
+
+SUBPROCESS_STD_LOG_NAME = "std.log"
+
+ERROR_REPORT = True
+ERROR_REPORT_WITH_PATH = False
+
+MAX_TIMESTAMP_INTERVAL = 60
+SESSION_VALID_PERIOD = 7 * 24 * 60 * 60
+
+REQUEST_TRY_TIMES = 3
+REQUEST_WAIT_SEC = 2
+REQUEST_MAX_WAIT_SEC = 300
+
+USE_REGISTRY = get_base_config("use_registry")
+
+default_llm = {
+    "Tongyi-Qianwen": {
+        "chat_model": "qwen-plus",
+        "embedding_model": "text-embedding-v2",
+        "image2text_model": "qwen-vl-max",
+        "asr_model": "paraformer-realtime-8k-v1",
+    },
+    "OpenAI": {
+        "chat_model": "gpt-3.5-turbo",
+        "embedding_model": "text-embedding-ada-002",
+        "image2text_model": "gpt-4-vision-preview",
+        "asr_model": "whisper-1",
+    },
+    "Azure-OpenAI": {
+        "chat_model": "azure-gpt-35-turbo",
+        "embedding_model": "azure-text-embedding-ada-002",
+        "image2text_model": "azure-gpt-4-vision-preview",
+        "asr_model": "azure-whisper-1",
+    },
+    "ZHIPU-AI": {
+        "chat_model": "glm-3-turbo",
+        "embedding_model": "embedding-2",
+        "image2text_model": "glm-4v",
+        "asr_model": "",
+    },
+    "Ollama": {
+        "chat_model": "qwen-14B-chat",
+        "embedding_model": "flag-embedding",
+        "image2text_model": "",
+        "asr_model": "",
+    },
+    "Moonshot": {
+        "chat_model": "moonshot-v1-8k",
+        "embedding_model": "",
+        "image2text_model": "",
+        "asr_model": "",
+    },
+    "DeepSeek": {
+        "chat_model": "deepseek-chat",
+        "embedding_model": "",
+        "image2text_model": "",
+        "asr_model": "",
+    },
+    "VolcEngine": {
+        "chat_model": "",
+        "embedding_model": "",
+        "image2text_model": "",
+        "asr_model": "",
+    },
+    "BAAI": {
+        "chat_model": "",
+        "embedding_model": "BAAI/bge-large-zh-v1.5",
+        "image2text_model": "",
+        "asr_model": "",
+        "rerank_model": "BAAI/bge-reranker-v2-m3",
+    }
+}
+LLM = get_base_config("user_default_llm", {})
+LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
+LLM_BASE_URL = LLM.get("base_url")
+
+if LLM_FACTORY not in default_llm:
+    print(
+        "\33[91m【ERROR】\33[0m:",
+        f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
+    LLM_FACTORY = "Tongyi-Qianwen"
+CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
+EMBEDDING_MDL = default_llm["BAAI"]["embedding_model"]
+RERANK_MDL = default_llm["BAAI"]["rerank_model"]
+ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
+IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
+
+API_KEY = LLM.get("api_key", "")
+PARSERS = LLM.get(
+    "parsers",
+    "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email")
+
+# distribution
+DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False)
+RAG_FLOW_UPDATE_CHECK = False
+
+HOST = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
+HTTP_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")
+
+SECRET_KEY = get_base_config(
+    RAG_FLOW_SERVICE_NAME,
+    {}).get(
+        "secret_key",
+    "infiniflow")
+TOKEN_EXPIRE_IN = get_base_config(
+    RAG_FLOW_SERVICE_NAME, {}).get(
+        "token_expires_in", 3600)
+
+NGINX_HOST = get_base_config(
+    RAG_FLOW_SERVICE_NAME, {}).get(
+        "nginx", {}).get("host") or HOST
+NGINX_HTTP_PORT = get_base_config(
+    RAG_FLOW_SERVICE_NAME, {}).get(
+        "nginx", {}).get("http_port") or HTTP_PORT
+
+RANDOM_INSTANCE_ID = get_base_config(
+    RAG_FLOW_SERVICE_NAME, {}).get(
+        "random_instance_id", False)
+
+PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy")
+PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
+
+DATABASE = decrypt_database_config(name="mysql")
+
+# Switch
+# upload
+UPLOAD_DATA_FROM_CLIENT = True
+
+# authentication
+AUTHENTICATION_CONF = get_base_config("authentication", {})
+
+# client
+CLIENT_AUTHENTICATION = AUTHENTICATION_CONF.get(
+    "client", {}).get(
+        "switch", False)
+HTTP_APP_KEY = AUTHENTICATION_CONF.get("client", {}).get("http_app_key")
+GITHUB_OAUTH = get_base_config("oauth", {}).get("github")
+FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
+WECHAT_OAUTH = get_base_config("oauth", {}).get("wechat")
+
+# site
+SITE_AUTHENTICATION = AUTHENTICATION_CONF.get("site", {}).get("switch", False)
+
+# permission
+PERMISSION_CONF = get_base_config("permission", {})
+PERMISSION_SWITCH = PERMISSION_CONF.get("switch")
+COMPONENT_PERMISSION = PERMISSION_CONF.get("component")
+DATASET_PERMISSION = PERMISSION_CONF.get("dataset")
+
+HOOK_MODULE = get_base_config("hook_module")
+HOOK_SERVER_NAME = get_base_config("hook_server_name")
+
+ENABLE_MODEL_STORE = get_base_config('enable_model_store', False)
+# authentication
+USE_AUTHENTICATION = False
+USE_DATA_AUTHENTICATION = False
+AUTOMATIC_AUTHORIZATION_OUTPUT_DATA = True
+USE_DEFAULT_TIMEOUT = False
+AUTHENTICATION_DEFAULT_TIMEOUT = 7 * 24 * 60 * 60  # s
+PRIVILEGE_COMMAND_WHITELIST = []
+CHECK_NODES_IDENTITY = False
+
+retrievaler = search.Dealer(ELASTICSEARCH)
+kg_retrievaler = kg_search.KGSearch(ELASTICSEARCH)
+
+
+class CustomEnum(Enum):
+    @classmethod
+    def valid(cls, value):
+        try:
+            cls(value)
+            return True
+        except BaseException:
+            return False
+
+    @classmethod
+    def values(cls):
+        return [member.value for member in cls.__members__.values()]
+
+    @classmethod
+    def names(cls):
+        return [member.name for member in cls.__members__.values()]
+
+
+class PythonDependenceName(CustomEnum):
+    Rag_Source_Code = "python"
+    Python_Env = "miniconda"
+
+
+class ModelStorage(CustomEnum):
+    REDIS = "redis"
+    MYSQL = "mysql"
+
+
+class RetCode(IntEnum, CustomEnum):
+    SUCCESS = 0
+    NOT_EFFECTIVE = 10
+    EXCEPTION_ERROR = 100
+    ARGUMENT_ERROR = 101
+    DATA_ERROR = 102
+    OPERATING_ERROR = 103
+    CONNECTION_ERROR = 105
+    RUNNING = 106
+    PERMISSION_ERROR = 108
+    AUTHENTICATION_ERROR = 109
+    UNAUTHORIZED = 401
+    SERVER_ERROR = 500
--- a/api/utils/init.py
+++ b/api/utils/init.py
@ -1,346 +1,346 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import base64
-import datetime
-import io
-import json
-import os
-import pickle
-import socket
-import time
-import uuid
-import requests
-from enum import Enum, IntEnum
-import importlib
-from Cryptodome.PublicKey import RSA
-from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
-
-from filelock import FileLock
-
-from . import file_utils
-
-SERVICE_CONF = "service_conf.yaml"
-
-
-def conf_realpath(conf_name):
-    conf_path = f"conf/{conf_name}"
-    return os.path.join(file_utils.get_project_base_directory(), conf_path)
-
-
-def get_base_config(key, default=None, conf_name=SERVICE_CONF) -> dict:
-    local_config = {}
-    local_path = conf_realpath(f'local.{conf_name}')
-    if default is None:
-        default = os.environ.get(key.upper())
-
-    if os.path.exists(local_path):
-        local_config = file_utils.load_yaml_conf(local_path)
-        if not isinstance(local_config, dict):
-            raise ValueError(f'Invalid config file: "{local_path}".')
-
-        if key is not None and key in local_config:
-            return local_config[key]
-
-    config_path = conf_realpath(conf_name)
-    config = file_utils.load_yaml_conf(config_path)
-
-    if not isinstance(config, dict):
-        raise ValueError(f'Invalid config file: "{config_path}".')
-
-    config.update(local_config)
-    return config.get(key, default) if key is not None else config
-
-
-use_deserialize_safe_module = get_base_config(
-    'use_deserialize_safe_module', False)
-
-
-class CoordinationCommunicationProtocol(object):
-    HTTP = "http"
-    GRPC = "grpc"
-
-
-class BaseType:
-    def to_dict(self):
-        return dict([(k.lstrip("_"), v) for k, v in self.__dict__.items()])
-
-    def to_dict_with_type(self):
-        def _dict(obj):
-            module = None
-            if issubclass(obj.__class__, BaseType):
-                data = {}
-                for attr, v in obj.__dict__.items():
-                    k = attr.lstrip("_")
-                    data[k] = _dict(v)
-                module = obj.__module__
-            elif isinstance(obj, (list, tuple)):
-                data = []
-                for i, vv in enumerate(obj):
-                    data.append(_dict(vv))
-            elif isinstance(obj, dict):
-                data = {}
-                for _k, vv in obj.items():
-                    data[_k] = _dict(vv)
-            else:
-                data = obj
-            return {"type": obj.__class__.__name__,
-                    "data": data, "module": module}
-        return _dict(self)
-
-
-class CustomJSONEncoder(json.JSONEncoder):
-    def __init__(self, **kwargs):
-        self._with_type = kwargs.pop("with_type", False)
-        super().__init__(**kwargs)
-
-    def default(self, obj):
-        if isinstance(obj, datetime.datetime):
-            return obj.strftime('%Y-%m-%d %H:%M:%S')
-        elif isinstance(obj, datetime.date):
-            return obj.strftime('%Y-%m-%d')
-        elif isinstance(obj, datetime.timedelta):
-            return str(obj)
-        elif issubclass(type(obj), Enum) or issubclass(type(obj), IntEnum):
-            return obj.value
-        elif isinstance(obj, set):
-            return list(obj)
-        elif issubclass(type(obj), BaseType):
-            if not self._with_type:
-                return obj.to_dict()
-            else:
-                return obj.to_dict_with_type()
-        elif isinstance(obj, type):
-            return obj.__name__
-        else:
-            return json.JSONEncoder.default(self, obj)
-
-
-def rag_uuid():
-    return uuid.uuid1().hex
-
-
-def string_to_bytes(string):
-    return string if isinstance(
-        string, bytes) else string.encode(encoding="utf-8")
-
-
-def bytes_to_string(byte):
-    return byte.decode(encoding="utf-8")
-
-
-def json_dumps(src, byte=False, indent=None, with_type=False):
-    dest = json.dumps(
-        src,
-        indent=indent,
-        cls=CustomJSONEncoder,
-        with_type=with_type)
-    if byte:
-        dest = string_to_bytes(dest)
-    return dest
-
-
-def json_loads(src, object_hook=None, object_pairs_hook=None):
-    if isinstance(src, bytes):
-        src = bytes_to_string(src)
-    return json.loads(src, object_hook=object_hook,
-                      object_pairs_hook=object_pairs_hook)
-
-
-def current_timestamp():
-    return int(time.time() * 1000)
-
-
-def timestamp_to_date(timestamp, format_string="%Y-%m-%d %H:%M:%S"):
-    if not timestamp:
-        timestamp = time.time()
-    timestamp = int(timestamp) / 1000
-    time_array = time.localtime(timestamp)
-    str_date = time.strftime(format_string, time_array)
-    return str_date
-
-
-def date_string_to_timestamp(time_str, format_string="%Y-%m-%d %H:%M:%S"):
-    time_array = time.strptime(time_str, format_string)
-    time_stamp = int(time.mktime(time_array) * 1000)
-    return time_stamp
-
-
-def serialize_b64(src, to_str=False):
-    dest = base64.b64encode(pickle.dumps(src))
-    if not to_str:
-        return dest
-    else:
-        return bytes_to_string(dest)
-
-
-def deserialize_b64(src):
-    src = base64.b64decode(
-        string_to_bytes(src) if isinstance(
-            src, str) else src)
-    if use_deserialize_safe_module:
-        return restricted_loads(src)
-    return pickle.loads(src)
-
-
-safe_module = {
-    'numpy',
-    'rag_flow'
-}
-
-
-class RestrictedUnpickler(pickle.Unpickler):
-    def find_class(self, module, name):
-        import importlib
-        if module.split('.')[0] in safe_module:
-            _module = importlib.import_module(module)
-            return getattr(_module, name)
-        # Forbid everything else.
-        raise pickle.UnpicklingError("global '%s.%s' is forbidden" %
-                                     (module, name))
-
-
-def restricted_loads(src):
-    """Helper function analogous to pickle.loads()."""
-    return RestrictedUnpickler(io.BytesIO(src)).load()
-
-
-def get_lan_ip():
-    if os.name != "nt":
-        import fcntl
-        import struct
-
-        def get_interface_ip(ifname):
-            s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
-            return socket.inet_ntoa(
-                fcntl.ioctl(s.fileno(), 0x8915, struct.pack('256s', string_to_bytes(ifname[:15])))[20:24])
-
-    ip = socket.gethostbyname(socket.getfqdn())
-    if ip.startswith("127.") and os.name != "nt":
-        interfaces = [
-            "bond1",
-            "eth0",
-            "eth1",
-            "eth2",
-            "wlan0",
-            "wlan1",
-            "wifi0",
-            "ath0",
-            "ath1",
-            "ppp0",
-        ]
-        for ifname in interfaces:
-            try:
-                ip = get_interface_ip(ifname)
-                break
-            except IOError as e:
-                pass
-    return ip or ''
-
-
-def from_dict_hook(in_dict: dict):
-    if "type" in in_dict and "data" in in_dict:
-        if in_dict["module"] is None:
-            return in_dict["data"]
-        else:
-            return getattr(importlib.import_module(
-                in_dict["module"]), in_dict["type"])(**in_dict["data"])
-    else:
-        return in_dict
-
-
-def decrypt_database_password(password):
-    encrypt_password = get_base_config("encrypt_password", False)
-    encrypt_module = get_base_config("encrypt_module", False)
-    private_key = get_base_config("private_key", None)
-
-    if not password or not encrypt_password:
-        return password
-
-    if not private_key:
-        raise ValueError("No private key")
-
-    module_fun = encrypt_module.split("#")
-    pwdecrypt_fun = getattr(
-        importlib.import_module(
-            module_fun[0]),
-        module_fun[1])
-
-    return pwdecrypt_fun(private_key, password)
-
-
-def decrypt_database_config(
-        database=None, passwd_key="password", name="database"):
-    if not database:
-        database = get_base_config(name, {})
-
-    database[passwd_key] = decrypt_database_password(database[passwd_key])
-    return database
-
-
-def update_config(key, value, conf_name=SERVICE_CONF):
-    conf_path = conf_realpath(conf_name=conf_name)
-    if not os.path.isabs(conf_path):
-        conf_path = os.path.join(
-            file_utils.get_project_base_directory(), conf_path)
-
-    with FileLock(os.path.join(os.path.dirname(conf_path), ".lock")):
-        config = file_utils.load_yaml_conf(conf_path=conf_path) or {}
-        config[key] = value
-        file_utils.rewrite_yaml_conf(conf_path=conf_path, config=config)
-
-
-def get_uuid():
-    return uuid.uuid1().hex
-
-
-def datetime_format(date_time: datetime.datetime) -> datetime.datetime:
-    return datetime.datetime(date_time.year, date_time.month, date_time.day,
-                             date_time.hour, date_time.minute, date_time.second)
-
-
-def get_format_time() -> datetime.datetime:
-    return datetime_format(datetime.datetime.now())
-
-
-def str2date(date_time: str):
-    return datetime.datetime.strptime(date_time, '%Y-%m-%d')
-
-
-def elapsed2time(elapsed):
-    seconds = elapsed / 1000
-    minuter, second = divmod(seconds, 60)
-    hour, minuter = divmod(minuter, 60)
-    return '%02d:%02d:%02d' % (hour, minuter, second)
-
-
-def decrypt(line):
-    file_path = os.path.join(
-        file_utils.get_project_base_directory(),
-        "conf",
-        "private.pem")
-    rsa_key = RSA.importKey(open(file_path).read(), "Welcome")
-    cipher = Cipher_pkcs1_v1_5.new(rsa_key)
-    return cipher.decrypt(base64.b64decode(
-        line), "Fail to decrypt password!").decode('utf-8')
-
-
-def download_img(url):
-    if not url:
-        return ""
-    response = requests.get(url)
-    return "data:" + \
-           response.headers.get('Content-Type', 'image/jpg') + ";" + \
-           "base64," + base64.b64encode(response.content).decode("utf-8")
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import base64
+import datetime
+import io
+import json
+import os
+import pickle
+import socket
+import time
+import uuid
+import requests
+from enum import Enum, IntEnum
+import importlib
+from Cryptodome.PublicKey import RSA
+from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
+
+from filelock import FileLock
+
+from . import file_utils
+
+SERVICE_CONF = "service_conf.yaml"
+
+
+def conf_realpath(conf_name):
+    conf_path = f"conf/{conf_name}"
+    return os.path.join(file_utils.get_project_base_directory(), conf_path)
+
+
+def get_base_config(key, default=None, conf_name=SERVICE_CONF) -> dict:
+    local_config = {}
+    local_path = conf_realpath(f'local.{conf_name}')
+    if default is None:
+        default = os.environ.get(key.upper())
+
+    if os.path.exists(local_path):
+        local_config = file_utils.load_yaml_conf(local_path)
+        if not isinstance(local_config, dict):
+            raise ValueError(f'Invalid config file: "{local_path}".')
+
+        if key is not None and key in local_config:
+            return local_config[key]
+
+    config_path = conf_realpath(conf_name)
+    config = file_utils.load_yaml_conf(config_path)
+
+    if not isinstance(config, dict):
+        raise ValueError(f'Invalid config file: "{config_path}".')
+
+    config.update(local_config)
+    return config.get(key, default) if key is not None else config
+
+
+use_deserialize_safe_module = get_base_config(
+    'use_deserialize_safe_module', False)
+
+
+class CoordinationCommunicationProtocol(object):
+    HTTP = "http"
+    GRPC = "grpc"
+
+
+class BaseType:
+    def to_dict(self):
+        return dict([(k.lstrip("_"), v) for k, v in self.__dict__.items()])
+
+    def to_dict_with_type(self):
+        def _dict(obj):
+            module = None
+            if issubclass(obj.__class__, BaseType):
+                data = {}
+                for attr, v in obj.__dict__.items():
+                    k = attr.lstrip("_")
+                    data[k] = _dict(v)
+                module = obj.__module__
+            elif isinstance(obj, (list, tuple)):
+                data = []
+                for i, vv in enumerate(obj):
+                    data.append(_dict(vv))
+            elif isinstance(obj, dict):
+                data = {}
+                for _k, vv in obj.items():
+                    data[_k] = _dict(vv)
+            else:
+                data = obj
+            return {"type": obj.__class__.__name__,
+                    "data": data, "module": module}
+        return _dict(self)
+
+
+class CustomJSONEncoder(json.JSONEncoder):
+    def __init__(self, **kwargs):
+        self._with_type = kwargs.pop("with_type", False)
+        super().__init__(**kwargs)
+
+    def default(self, obj):
+        if isinstance(obj, datetime.datetime):
+            return obj.strftime('%Y-%m-%d %H:%M:%S')
+        elif isinstance(obj, datetime.date):
+            return obj.strftime('%Y-%m-%d')
+        elif isinstance(obj, datetime.timedelta):
+            return str(obj)
+        elif issubclass(type(obj), Enum) or issubclass(type(obj), IntEnum):
+            return obj.value
+        elif isinstance(obj, set):
+            return list(obj)
+        elif issubclass(type(obj), BaseType):
+            if not self._with_type:
+                return obj.to_dict()
+            else:
+                return obj.to_dict_with_type()
+        elif isinstance(obj, type):
+            return obj.__name__
+        else:
+            return json.JSONEncoder.default(self, obj)
+
+
+def rag_uuid():
+    return uuid.uuid1().hex
+
+
+def string_to_bytes(string):
+    return string if isinstance(
+        string, bytes) else string.encode(encoding="utf-8")
+
+
+def bytes_to_string(byte):
+    return byte.decode(encoding="utf-8")
+
+
+def json_dumps(src, byte=False, indent=None, with_type=False):
+    dest = json.dumps(
+        src,
+        indent=indent,
+        cls=CustomJSONEncoder,
+        with_type=with_type)
+    if byte:
+        dest = string_to_bytes(dest)
+    return dest
+
+
+def json_loads(src, object_hook=None, object_pairs_hook=None):
+    if isinstance(src, bytes):
+        src = bytes_to_string(src)
+    return json.loads(src, object_hook=object_hook,
+                      object_pairs_hook=object_pairs_hook)
+
+
+def current_timestamp():
+    return int(time.time() * 1000)
+
+
+def timestamp_to_date(timestamp, format_string="%Y-%m-%d %H:%M:%S"):
+    if not timestamp:
+        timestamp = time.time()
+    timestamp = int(timestamp) / 1000
+    time_array = time.localtime(timestamp)
+    str_date = time.strftime(format_string, time_array)
+    return str_date
+
+
+def date_string_to_timestamp(time_str, format_string="%Y-%m-%d %H:%M:%S"):
+    time_array = time.strptime(time_str, format_string)
+    time_stamp = int(time.mktime(time_array) * 1000)
+    return time_stamp
+
+
+def serialize_b64(src, to_str=False):
+    dest = base64.b64encode(pickle.dumps(src))
+    if not to_str:
+        return dest
+    else:
+        return bytes_to_string(dest)
+
+
+def deserialize_b64(src):
+    src = base64.b64decode(
+        string_to_bytes(src) if isinstance(
+            src, str) else src)
+    if use_deserialize_safe_module:
+        return restricted_loads(src)
+    return pickle.loads(src)
+
+
+safe_module = {
+    'numpy',
+    'rag_flow'
+}
+
+
+class RestrictedUnpickler(pickle.Unpickler):
+    def find_class(self, module, name):
+        import importlib
+        if module.split('.')[0] in safe_module:
+            _module = importlib.import_module(module)
+            return getattr(_module, name)
+        # Forbid everything else.
+        raise pickle.UnpicklingError("global '%s.%s' is forbidden" %
+                                     (module, name))
+
+
+def restricted_loads(src):
+    """Helper function analogous to pickle.loads()."""
+    return RestrictedUnpickler(io.BytesIO(src)).load()
+
+
+def get_lan_ip():
+    if os.name != "nt":
+        import fcntl
+        import struct
+
+        def get_interface_ip(ifname):
+            s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+            return socket.inet_ntoa(
+                fcntl.ioctl(s.fileno(), 0x8915, struct.pack('256s', string_to_bytes(ifname[:15])))[20:24])
+
+    ip = socket.gethostbyname(socket.getfqdn())
+    if ip.startswith("127.") and os.name != "nt":
+        interfaces = [
+            "bond1",
+            "eth0",
+            "eth1",
+            "eth2",
+            "wlan0",
+            "wlan1",
+            "wifi0",
+            "ath0",
+            "ath1",
+            "ppp0",
+        ]
+        for ifname in interfaces:
+            try:
+                ip = get_interface_ip(ifname)
+                break
+            except IOError as e:
+                pass
+    return ip or ''
+
+
+def from_dict_hook(in_dict: dict):
+    if "type" in in_dict and "data" in in_dict:
+        if in_dict["module"] is None:
+            return in_dict["data"]
+        else:
+            return getattr(importlib.import_module(
+                in_dict["module"]), in_dict["type"])(**in_dict["data"])
+    else:
+        return in_dict
+
+
+def decrypt_database_password(password):
+    encrypt_password = get_base_config("encrypt_password", False)
+    encrypt_module = get_base_config("encrypt_module", False)
+    private_key = get_base_config("private_key", None)
+
+    if not password or not encrypt_password:
+        return password
+
+    if not private_key:
+        raise ValueError("No private key")
+
+    module_fun = encrypt_module.split("#")
+    pwdecrypt_fun = getattr(
+        importlib.import_module(
+            module_fun[0]),
+        module_fun[1])
+
+    return pwdecrypt_fun(private_key, password)
+
+
+def decrypt_database_config(
+        database=None, passwd_key="password", name="database"):
+    if not database:
+        database = get_base_config(name, {})
+
+    database[passwd_key] = decrypt_database_password(database[passwd_key])
+    return database
+
+
+def update_config(key, value, conf_name=SERVICE_CONF):
+    conf_path = conf_realpath(conf_name=conf_name)
+    if not os.path.isabs(conf_path):
+        conf_path = os.path.join(
+            file_utils.get_project_base_directory(), conf_path)
+
+    with FileLock(os.path.join(os.path.dirname(conf_path), ".lock")):
+        config = file_utils.load_yaml_conf(conf_path=conf_path) or {}
+        config[key] = value
+        file_utils.rewrite_yaml_conf(conf_path=conf_path, config=config)
+
+
+def get_uuid():
+    return uuid.uuid1().hex
+
+
+def datetime_format(date_time: datetime.datetime) -> datetime.datetime:
+    return datetime.datetime(date_time.year, date_time.month, date_time.day,
+                             date_time.hour, date_time.minute, date_time.second)
+
+
+def get_format_time() -> datetime.datetime:
+    return datetime_format(datetime.datetime.now())
+
+
+def str2date(date_time: str):
+    return datetime.datetime.strptime(date_time, '%Y-%m-%d')
+
+
+def elapsed2time(elapsed):
+    seconds = elapsed / 1000
+    minuter, second = divmod(seconds, 60)
+    hour, minuter = divmod(minuter, 60)
+    return '%02d:%02d:%02d' % (hour, minuter, second)
+
+
+def decrypt(line):
+    file_path = os.path.join(
+        file_utils.get_project_base_directory(),
+        "conf",
+        "private.pem")
+    rsa_key = RSA.importKey(open(file_path).read(), "Welcome")
+    cipher = Cipher_pkcs1_v1_5.new(rsa_key)
+    return cipher.decrypt(base64.b64decode(
+        line), "Fail to decrypt password!").decode('utf-8')
+
+
+def download_img(url):
+    if not url:
+        return ""
+    response = requests.get(url)
+    return "data:" + \
+           response.headers.get('Content-Type', 'image/jpg') + ";" + \
+           "base64," + base64.b64encode(response.content).decode("utf-8")
--- a/api/utils/api_utils.py
+++ b/api/utils/api_utils.py
@ -1,269 +1,269 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import json
-import random
-import time
-from functools import wraps
-from io import BytesIO
-from flask import (
-    Response, jsonify, send_file, make_response,
-    request as flask_request,
-)
-from werkzeug.http import HTTP_STATUS_CODES
-
-from api.utils import json_dumps
-from api.settings import RetCode
-from api.settings import (
-    REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
-    stat_logger, CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
-)
-import requests
-import functools
-from api.utils import CustomJSONEncoder
-from uuid import uuid1
-from base64 import b64encode
-from hmac import HMAC
-from urllib.parse import quote, urlencode
-
-requests.models.complexjson.dumps = functools.partial(
-    json.dumps, cls=CustomJSONEncoder)
-
-
-def request(**kwargs):
-    sess = requests.Session()
-    stream = kwargs.pop('stream', sess.stream)
-    timeout = kwargs.pop('timeout', None)
-    kwargs['headers'] = {
-        k.replace(
-            '_',
-            '-').upper(): v for k,
-        v in kwargs.get(
-            'headers',
-            {}).items()}
-    prepped = requests.Request(**kwargs).prepare()
-
-    if CLIENT_AUTHENTICATION and HTTP_APP_KEY and SECRET_KEY:
-        timestamp = str(round(time() * 1000))
-        nonce = str(uuid1())
-        signature = b64encode(HMAC(SECRET_KEY.encode('ascii'), b'\n'.join([
-            timestamp.encode('ascii'),
-            nonce.encode('ascii'),
-            HTTP_APP_KEY.encode('ascii'),
-            prepped.path_url.encode('ascii'),
-            prepped.body if kwargs.get('json') else b'',
-            urlencode(
-                sorted(
-                    kwargs['data'].items()),
-                quote_via=quote,
-                safe='-._~').encode('ascii')
-            if kwargs.get('data') and isinstance(kwargs['data'], dict) else b'',
-        ]), 'sha1').digest()).decode('ascii')
-
-        prepped.headers.update({
-            'TIMESTAMP': timestamp,
-            'NONCE': nonce,
-            'APP-KEY': HTTP_APP_KEY,
-            'SIGNATURE': signature,
-        })
-
-    return sess.send(prepped, stream=stream, timeout=timeout)
-
-
-def get_exponential_backoff_interval(retries, full_jitter=False):
-    """Calculate the exponential backoff wait time."""
-    # Will be zero if factor equals 0
-    countdown = min(REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC * (2 ** retries))
-    # Full jitter according to
-    # https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
-    if full_jitter:
-        countdown = random.randrange(countdown + 1)
-    # Adjust according to maximum wait time and account for negative values.
-    return max(0, countdown)
-
-
-def get_json_result(retcode=RetCode.SUCCESS, retmsg='success',
-                    data=None, job_id=None, meta=None):
-    import re
-    result_dict = {
-        "retcode": retcode,
-        "retmsg": retmsg,
-        # "retmsg": re.sub(r"rag", "seceum", retmsg, flags=re.IGNORECASE),
-        "data": data,
-        "jobId": job_id,
-        "meta": meta,
-    }
-
-    response = {}
-    for key, value in result_dict.items():
-        if value is None and key != "retcode":
-            continue
-        else:
-            response[key] = value
-    return jsonify(response)
-
-
-def get_data_error_result(retcode=RetCode.DATA_ERROR,
-                          retmsg='Sorry! Data missing!'):
-    import re
-    result_dict = {
-        "retcode": retcode,
-        "retmsg": re.sub(
-            r"rag",
-            "seceum",
-            retmsg,
-            flags=re.IGNORECASE)}
-    response = {}
-    for key, value in result_dict.items():
-        if value is None and key != "retcode":
-            continue
-        else:
-            response[key] = value
-    return jsonify(response)
-
-
-def server_error_response(e):
-    stat_logger.exception(e)
-    try:
-        if e.code == 401:
-            return get_json_result(retcode=401, retmsg=repr(e))
-    except BaseException:
-        pass
-    if len(e.args) > 1:
-        return get_json_result(
-            retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e.args[0]), data=e.args[1])
-    if repr(e).find("index_not_found_exception") >= 0:
-        return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg="No chunk found, please upload file and parse it.")
-
-    return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e))
-
-
-def error_response(response_code, retmsg=None):
-    if retmsg is None:
-        retmsg = HTTP_STATUS_CODES.get(response_code, 'Unknown Error')
-
-    return Response(json.dumps({
-        'retmsg': retmsg,
-        'retcode': response_code,
-    }), status=response_code, mimetype='application/json')
-
-
-def validate_request(*args, **kwargs):
-    def wrapper(func):
-        @wraps(func)
-        def decorated_function(*_args, **_kwargs):
-            input_arguments = flask_request.json or flask_request.form.to_dict()
-            no_arguments = []
-            error_arguments = []
-            for arg in args:
-                if arg not in input_arguments:
-                    no_arguments.append(arg)
-            for k, v in kwargs.items():
-                config_value = input_arguments.get(k, None)
-                if config_value is None:
-                    no_arguments.append(k)
-                elif isinstance(v, (tuple, list)):
-                    if config_value not in v:
-                        error_arguments.append((k, set(v)))
-                elif config_value != v:
-                    error_arguments.append((k, v))
-            if no_arguments or error_arguments:
-                error_string = ""
-                if no_arguments:
-                    error_string += "required argument are missing: {}; ".format(
-                        ",".join(no_arguments))
-                if error_arguments:
-                    error_string += "required argument values: {}".format(
-                        ",".join(["{}={}".format(a[0], a[1]) for a in error_arguments]))
-                return get_json_result(
-                    retcode=RetCode.ARGUMENT_ERROR, retmsg=error_string)
-            return func(*_args, **_kwargs)
-        return decorated_function
-    return wrapper
-
-
-def is_localhost(ip):
-    return ip in {'127.0.0.1', '::1', '[::1]', 'localhost'}
-
-
-def send_file_in_mem(data, filename):
-    if not isinstance(data, (str, bytes)):
-        data = json_dumps(data)
-    if isinstance(data, str):
-        data = data.encode('utf-8')
-
-    f = BytesIO()
-    f.write(data)
-    f.seek(0)
-
-    return send_file(f, as_attachment=True, attachment_filename=filename)
-
-
-def get_json_result(retcode=RetCode.SUCCESS, retmsg='success', data=None):
-    response = {"retcode": retcode, "retmsg": retmsg, "data": data}
-    return jsonify(response)
-
-
-def cors_reponse(retcode=RetCode.SUCCESS,
-                 retmsg='success', data=None, auth=None):
-    result_dict = {"retcode": retcode, "retmsg": retmsg, "data": data}
-    response_dict = {}
-    for key, value in result_dict.items():
-        if value is None and key != "retcode":
-            continue
-        else:
-            response_dict[key] = value
-    response = make_response(jsonify(response_dict))
-    if auth:
-        response.headers["Authorization"] = auth
-    response.headers["Access-Control-Allow-Origin"] = "*"
-    response.headers["Access-Control-Allow-Method"] = "*"
-    response.headers["Access-Control-Allow-Headers"] = "*"
-    response.headers["Access-Control-Allow-Headers"] = "*"
-    response.headers["Access-Control-Expose-Headers"] = "Authorization"
-    return response
-
-def construct_result(code=RetCode.DATA_ERROR, message='data is missing'):
-    import re
-    result_dict = {"code": code, "message": re.sub(r"rag", "seceum", message, flags=re.IGNORECASE)}
-    response = {}
-    for key, value in result_dict.items():
-        if value is None and key != "code":
-            continue
-        else:
-            response[key] = value
-    return jsonify(response)
-
-
-def construct_json_result(code=RetCode.SUCCESS, message='success', data=None):
-    if data is None:
-        return jsonify({"code": code, "message": message})
-    else:
-        return jsonify({"code": code, "message": message, "data": data})
-
-
-def construct_error_response(e):
-    stat_logger.exception(e)
-    try:
-        if e.code == 401:
-            return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
-    except BaseException:
-        pass
-    if len(e.args) > 1:
-        return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1])
-    if repr(e).find("index_not_found_exception") >=0:
-        return construct_json_result(code=RetCode.EXCEPTION_ERROR, message="No chunk found, please upload file and parse it.")
-
-    return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import json
+import random
+import time
+from functools import wraps
+from io import BytesIO
+from flask import (
+    Response, jsonify, send_file, make_response,
+    request as flask_request,
+)
+from werkzeug.http import HTTP_STATUS_CODES
+
+from api.utils import json_dumps
+from api.settings import RetCode
+from api.settings import (
+    REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
+    stat_logger, CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
+)
+import requests
+import functools
+from api.utils import CustomJSONEncoder
+from uuid import uuid1
+from base64 import b64encode
+from hmac import HMAC
+from urllib.parse import quote, urlencode
+
+requests.models.complexjson.dumps = functools.partial(
+    json.dumps, cls=CustomJSONEncoder)
+
+
+def request(**kwargs):
+    sess = requests.Session()
+    stream = kwargs.pop('stream', sess.stream)
+    timeout = kwargs.pop('timeout', None)
+    kwargs['headers'] = {
+        k.replace(
+            '_',
+            '-').upper(): v for k,
+        v in kwargs.get(
+            'headers',
+            {}).items()}
+    prepped = requests.Request(**kwargs).prepare()
+
+    if CLIENT_AUTHENTICATION and HTTP_APP_KEY and SECRET_KEY:
+        timestamp = str(round(time() * 1000))
+        nonce = str(uuid1())
+        signature = b64encode(HMAC(SECRET_KEY.encode('ascii'), b'\n'.join([
+            timestamp.encode('ascii'),
+            nonce.encode('ascii'),
+            HTTP_APP_KEY.encode('ascii'),
+            prepped.path_url.encode('ascii'),
+            prepped.body if kwargs.get('json') else b'',
+            urlencode(
+                sorted(
+                    kwargs['data'].items()),
+                quote_via=quote,
+                safe='-._~').encode('ascii')
+            if kwargs.get('data') and isinstance(kwargs['data'], dict) else b'',
+        ]), 'sha1').digest()).decode('ascii')
+
+        prepped.headers.update({
+            'TIMESTAMP': timestamp,
+            'NONCE': nonce,
+            'APP-KEY': HTTP_APP_KEY,
+            'SIGNATURE': signature,
+        })
+
+    return sess.send(prepped, stream=stream, timeout=timeout)
+
+
+def get_exponential_backoff_interval(retries, full_jitter=False):
+    """Calculate the exponential backoff wait time."""
+    # Will be zero if factor equals 0
+    countdown = min(REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC * (2 ** retries))
+    # Full jitter according to
+    # https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
+    if full_jitter:
+        countdown = random.randrange(countdown + 1)
+    # Adjust according to maximum wait time and account for negative values.
+    return max(0, countdown)
+
+
+def get_json_result(retcode=RetCode.SUCCESS, retmsg='success',
+                    data=None, job_id=None, meta=None):
+    import re
+    result_dict = {
+        "retcode": retcode,
+        "retmsg": retmsg,
+        # "retmsg": re.sub(r"rag", "seceum", retmsg, flags=re.IGNORECASE),
+        "data": data,
+        "jobId": job_id,
+        "meta": meta,
+    }
+
+    response = {}
+    for key, value in result_dict.items():
+        if value is None and key != "retcode":
+            continue
+        else:
+            response[key] = value
+    return jsonify(response)
+
+
+def get_data_error_result(retcode=RetCode.DATA_ERROR,
+                          retmsg='Sorry! Data missing!'):
+    import re
+    result_dict = {
+        "retcode": retcode,
+        "retmsg": re.sub(
+            r"rag",
+            "seceum",
+            retmsg,
+            flags=re.IGNORECASE)}
+    response = {}
+    for key, value in result_dict.items():
+        if value is None and key != "retcode":
+            continue
+        else:
+            response[key] = value
+    return jsonify(response)
+
+
+def server_error_response(e):
+    stat_logger.exception(e)
+    try:
+        if e.code == 401:
+            return get_json_result(retcode=401, retmsg=repr(e))
+    except BaseException:
+        pass
+    if len(e.args) > 1:
+        return get_json_result(
+            retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e.args[0]), data=e.args[1])
+    if repr(e).find("index_not_found_exception") >= 0:
+        return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg="No chunk found, please upload file and parse it.")
+
+    return get_json_result(retcode=RetCode.EXCEPTION_ERROR, retmsg=repr(e))
+
+
+def error_response(response_code, retmsg=None):
+    if retmsg is None:
+        retmsg = HTTP_STATUS_CODES.get(response_code, 'Unknown Error')
+
+    return Response(json.dumps({
+        'retmsg': retmsg,
+        'retcode': response_code,
+    }), status=response_code, mimetype='application/json')
+
+
+def validate_request(*args, **kwargs):
+    def wrapper(func):
+        @wraps(func)
+        def decorated_function(*_args, **_kwargs):
+            input_arguments = flask_request.json or flask_request.form.to_dict()
+            no_arguments = []
+            error_arguments = []
+            for arg in args:
+                if arg not in input_arguments:
+                    no_arguments.append(arg)
+            for k, v in kwargs.items():
+                config_value = input_arguments.get(k, None)
+                if config_value is None:
+                    no_arguments.append(k)
+                elif isinstance(v, (tuple, list)):
+                    if config_value not in v:
+                        error_arguments.append((k, set(v)))
+                elif config_value != v:
+                    error_arguments.append((k, v))
+            if no_arguments or error_arguments:
+                error_string = ""
+                if no_arguments:
+                    error_string += "required argument are missing: {}; ".format(
+                        ",".join(no_arguments))
+                if error_arguments:
+                    error_string += "required argument values: {}".format(
+                        ",".join(["{}={}".format(a[0], a[1]) for a in error_arguments]))
+                return get_json_result(
+                    retcode=RetCode.ARGUMENT_ERROR, retmsg=error_string)
+            return func(*_args, **_kwargs)
+        return decorated_function
+    return wrapper
+
+
+def is_localhost(ip):
+    return ip in {'127.0.0.1', '::1', '[::1]', 'localhost'}
+
+
+def send_file_in_mem(data, filename):
+    if not isinstance(data, (str, bytes)):
+        data = json_dumps(data)
+    if isinstance(data, str):
+        data = data.encode('utf-8')
+
+    f = BytesIO()
+    f.write(data)
+    f.seek(0)
+
+    return send_file(f, as_attachment=True, attachment_filename=filename)
+
+
+def get_json_result(retcode=RetCode.SUCCESS, retmsg='success', data=None):
+    response = {"retcode": retcode, "retmsg": retmsg, "data": data}
+    return jsonify(response)
+
+
+def cors_reponse(retcode=RetCode.SUCCESS,
+                 retmsg='success', data=None, auth=None):
+    result_dict = {"retcode": retcode, "retmsg": retmsg, "data": data}
+    response_dict = {}
+    for key, value in result_dict.items():
+        if value is None and key != "retcode":
+            continue
+        else:
+            response_dict[key] = value
+    response = make_response(jsonify(response_dict))
+    if auth:
+        response.headers["Authorization"] = auth
+    response.headers["Access-Control-Allow-Origin"] = "*"
+    response.headers["Access-Control-Allow-Method"] = "*"
+    response.headers["Access-Control-Allow-Headers"] = "*"
+    response.headers["Access-Control-Allow-Headers"] = "*"
+    response.headers["Access-Control-Expose-Headers"] = "Authorization"
+    return response
+
+def construct_result(code=RetCode.DATA_ERROR, message='data is missing'):
+    import re
+    result_dict = {"code": code, "message": re.sub(r"rag", "seceum", message, flags=re.IGNORECASE)}
+    response = {}
+    for key, value in result_dict.items():
+        if value is None and key != "code":
+            continue
+        else:
+            response[key] = value
+    return jsonify(response)
+
+
+def construct_json_result(code=RetCode.SUCCESS, message='success', data=None):
+    if data is None:
+        return jsonify({"code": code, "message": message})
+    else:
+        return jsonify({"code": code, "message": message, "data": data})
+
+
+def construct_error_response(e):
+    stat_logger.exception(e)
+    try:
+        if e.code == 401:
+            return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
+    except BaseException:
+        pass
+    if len(e.args) > 1:
+        return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1])
+    if repr(e).find("index_not_found_exception") >=0:
+        return construct_json_result(code=RetCode.EXCEPTION_ERROR, message="No chunk found, please upload file and parse it.")
+
+    return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
--- a/api/utils/commands.py
+++ b/api/utils/commands.py
@ -1,78 +1,78 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-import base64
-import click
-import re
-
-from flask import Flask
-from werkzeug.security import generate_password_hash
-
-from api.db.services import UserService
-
-
-@click.command('reset-password', help='Reset the account password.')
-@click.option('--email', prompt=True, help='The email address of the account whose password you need to reset')
-@click.option('--new-password', prompt=True, help='the new password.')
-@click.option('--password-confirm', prompt=True, help='the new password confirm.')
-def reset_password(email, new_password, password_confirm):
-    if str(new_password).strip() != str(password_confirm).strip():
-        click.echo(click.style('sorry. The two passwords do not match.', fg='red'))
-        return
-    user = UserService.query(email=email)
-    if not user:
-        click.echo(click.style('sorry. The Email is not registered!.', fg='red'))
-        return
-    encode_password = base64.b64encode(new_password.encode('utf-8')).decode('utf-8')
-    password_hash = generate_password_hash(encode_password)
-    user_dict = {
-        'password': password_hash
-    }
-    UserService.update_user(user[0].id,user_dict)
-    click.echo(click.style('Congratulations! Password has been reset.', fg='green'))
-
-
-@click.command('reset-email', help='Reset the account email.')
-@click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset')
-@click.option('--new-email', prompt=True, help='the new email.')
-@click.option('--email-confirm', prompt=True, help='the new email confirm.')
-def reset_email(email, new_email, email_confirm):
-    if str(new_email).strip() != str(email_confirm).strip():
-        click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red'))
-        return
-    if str(new_email).strip() == str(email).strip():
-        click.echo(click.style('Sorry, new email and old email are the same.', fg='red'))
-        return
-    user = UserService.query(email=email)
-    if not user:
-        click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
-        return
-    if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,4}$", new_email):
-        click.echo(click.style('sorry. {} is not a valid email. '.format(new_email), fg='red'))
-        return
-    new_user = UserService.query(email=new_email)
-    if new_user:
-        click.echo(click.style('sorry. the account: [{}] is exist .'.format(new_email), fg='red'))
-        return
-    user_dict = {
-        'email': new_email
-    }
-    UserService.update_user(user[0].id,user_dict)
-    click.echo(click.style('Congratulations!, email has been reset.', fg='green'))
-
-def register_commands(app: Flask):
-    app.cli.add_command(reset_password)
-    app.cli.add_command(reset_email)
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import base64
+import click
+import re
+
+from flask import Flask
+from werkzeug.security import generate_password_hash
+
+from api.db.services import UserService
+
+
+@click.command('reset-password', help='Reset the account password.')
+@click.option('--email', prompt=True, help='The email address of the account whose password you need to reset')
+@click.option('--new-password', prompt=True, help='the new password.')
+@click.option('--password-confirm', prompt=True, help='the new password confirm.')
+def reset_password(email, new_password, password_confirm):
+    if str(new_password).strip() != str(password_confirm).strip():
+        click.echo(click.style('sorry. The two passwords do not match.', fg='red'))
+        return
+    user = UserService.query(email=email)
+    if not user:
+        click.echo(click.style('sorry. The Email is not registered!.', fg='red'))
+        return
+    encode_password = base64.b64encode(new_password.encode('utf-8')).decode('utf-8')
+    password_hash = generate_password_hash(encode_password)
+    user_dict = {
+        'password': password_hash
+    }
+    UserService.update_user(user[0].id,user_dict)
+    click.echo(click.style('Congratulations! Password has been reset.', fg='green'))
+
+
+@click.command('reset-email', help='Reset the account email.')
+@click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset')
+@click.option('--new-email', prompt=True, help='the new email.')
+@click.option('--email-confirm', prompt=True, help='the new email confirm.')
+def reset_email(email, new_email, email_confirm):
+    if str(new_email).strip() != str(email_confirm).strip():
+        click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red'))
+        return
+    if str(new_email).strip() == str(email).strip():
+        click.echo(click.style('Sorry, new email and old email are the same.', fg='red'))
+        return
+    user = UserService.query(email=email)
+    if not user:
+        click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
+        return
+    if not re.match(r"^[\w\._-]+@([\w_-]+\.)+[\w-]{2,4}$", new_email):
+        click.echo(click.style('sorry. {} is not a valid email. '.format(new_email), fg='red'))
+        return
+    new_user = UserService.query(email=new_email)
+    if new_user:
+        click.echo(click.style('sorry. the account: [{}] is exist .'.format(new_email), fg='red'))
+        return
+    user_dict = {
+        'email': new_email
+    }
+    UserService.update_user(user[0].id,user_dict)
+    click.echo(click.style('Congratulations!, email has been reset.', fg='green'))
+
+def register_commands(app: Flask):
+    app.cli.add_command(reset_password)
+    app.cli.add_command(reset_email)
--- a/api/utils/file_utils.py
+++ b/api/utils/file_utils.py
@ -1,207 +1,207 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import base64
-import json
-import os
-import re
-from io import BytesIO
-
-import pdfplumber
-from PIL import Image
-from cachetools import LRUCache, cached
-from ruamel.yaml import YAML
-
-from api.db import FileType
-
-PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE")
-RAG_BASE = os.getenv("RAG_BASE")
-
-
-def get_project_base_directory(*args):
-    global PROJECT_BASE
-    if PROJECT_BASE is None:
-        PROJECT_BASE = os.path.abspath(
-            os.path.join(
-                os.path.dirname(os.path.realpath(__file__)),
-                os.pardir,
-                os.pardir,
-            )
-        )
-
-    if args:
-        return os.path.join(PROJECT_BASE, *args)
-    return PROJECT_BASE
-
-
-def get_rag_directory(*args):
-    global RAG_BASE
-    if RAG_BASE is None:
-        RAG_BASE = os.path.abspath(
-            os.path.join(
-                os.path.dirname(os.path.realpath(__file__)),
-                os.pardir,
-                os.pardir,
-                os.pardir,
-            )
-        )
-    if args:
-        return os.path.join(RAG_BASE, *args)
-    return RAG_BASE
-
-
-def get_rag_python_directory(*args):
-    return get_rag_directory("python", *args)
-
-
-def get_home_cache_dir():
-    dir = os.path.join(os.path.expanduser('~'), ".ragflow")
-    try:
-        os.mkdir(dir)
-    except OSError as error:
-        pass
-    return dir
-
-
-@cached(cache=LRUCache(maxsize=10))
-def load_json_conf(conf_path):
-    if os.path.isabs(conf_path):
-        json_conf_path = conf_path
-    else:
-        json_conf_path = os.path.join(get_project_base_directory(), conf_path)
-    try:
-        with open(json_conf_path) as f:
-            return json.load(f)
-    except BaseException:
-        raise EnvironmentError(
-            "loading json file config from '{}' failed!".format(json_conf_path)
-        )
-
-
-def dump_json_conf(config_data, conf_path):
-    if os.path.isabs(conf_path):
-        json_conf_path = conf_path
-    else:
-        json_conf_path = os.path.join(get_project_base_directory(), conf_path)
-    try:
-        with open(json_conf_path, "w") as f:
-            json.dump(config_data, f, indent=4)
-    except BaseException:
-        raise EnvironmentError(
-            "loading json file config from '{}' failed!".format(json_conf_path)
-        )
-
-
-def load_json_conf_real_time(conf_path):
-    if os.path.isabs(conf_path):
-        json_conf_path = conf_path
-    else:
-        json_conf_path = os.path.join(get_project_base_directory(), conf_path)
-    try:
-        with open(json_conf_path) as f:
-            return json.load(f)
-    except BaseException:
-        raise EnvironmentError(
-            "loading json file config from '{}' failed!".format(json_conf_path)
-        )
-
-
-def load_yaml_conf(conf_path):
-    if not os.path.isabs(conf_path):
-        conf_path = os.path.join(get_project_base_directory(), conf_path)
-    try:
-        with open(conf_path) as f:
-            yaml = YAML(typ='safe', pure=True)
-            return yaml.load(f)
-    except Exception as e:
-        raise EnvironmentError(
-            "loading yaml file config from {} failed:".format(conf_path), e
-        )
-
-
-def rewrite_yaml_conf(conf_path, config):
-    if not os.path.isabs(conf_path):
-        conf_path = os.path.join(get_project_base_directory(), conf_path)
-    try:
-        with open(conf_path, "w") as f:
-            yaml = YAML(typ="safe")
-            yaml.dump(config, f)
-    except Exception as e:
-        raise EnvironmentError(
-            "rewrite yaml file config {} failed:".format(conf_path), e
-        )
-
-
-def rewrite_json_file(filepath, json_data):
-    with open(filepath, "w") as f:
-        json.dump(json_data, f, indent=4, separators=(",", ": "))
-    f.close()
-
-
-def filename_type(filename):
-    filename = filename.lower()
-    if re.match(r".*\.pdf$", filename):
-        return FileType.PDF.value
-
-    if re.match(
-             r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
-        return FileType.DOC.value
-
-    if re.match(
-            r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus|mp3)$", filename):
-        return FileType.AURAL.value
-
-    if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):
-        return FileType.VISUAL.value
-
-    return FileType.OTHER.value
-
-
-def thumbnail(filename, blob):
-    filename = filename.lower()
-    if re.match(r".*\.pdf$", filename):
-        pdf = pdfplumber.open(BytesIO(blob))
-        buffered = BytesIO()
-        pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png")
-        return "data:image/png;base64," + \
-            base64.b64encode(buffered.getvalue()).decode("utf-8")
-
-    if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
-        image = Image.open(BytesIO(blob))
-        image.thumbnail((30, 30))
-        buffered = BytesIO()
-        image.save(buffered, format="png")
-        return "data:image/png;base64," + \
-            base64.b64encode(buffered.getvalue()).decode("utf-8")
-
-    if re.match(r".*\.(ppt|pptx)$", filename):
-        import aspose.slides as slides
-        import aspose.pydrawing as drawing
-        try:
-            with slides.Presentation(BytesIO(blob)) as presentation:
-                buffered = BytesIO()
-                presentation.slides[0].get_thumbnail(0.03, 0.03).save(
-                    buffered, drawing.imaging.ImageFormat.png)
-                return "data:image/png;base64," + \
-                    base64.b64encode(buffered.getvalue()).decode("utf-8")
-        except Exception as e:
-            pass
-
-
-def traversal_files(base):
-    for root, ds, fs in os.walk(base):
-        for f in fs:
-            fullname = os.path.join(root, f)
-            yield fullname
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import base64
+import json
+import os
+import re
+from io import BytesIO
+
+import pdfplumber
+from PIL import Image
+from cachetools import LRUCache, cached
+from ruamel.yaml import YAML
+
+from api.db import FileType
+
+PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE")
+RAG_BASE = os.getenv("RAG_BASE")
+
+
+def get_project_base_directory(*args):
+    global PROJECT_BASE
+    if PROJECT_BASE is None:
+        PROJECT_BASE = os.path.abspath(
+            os.path.join(
+                os.path.dirname(os.path.realpath(__file__)),
+                os.pardir,
+                os.pardir,
+            )
+        )
+
+    if args:
+        return os.path.join(PROJECT_BASE, *args)
+    return PROJECT_BASE
+
+
+def get_rag_directory(*args):
+    global RAG_BASE
+    if RAG_BASE is None:
+        RAG_BASE = os.path.abspath(
+            os.path.join(
+                os.path.dirname(os.path.realpath(__file__)),
+                os.pardir,
+                os.pardir,
+                os.pardir,
+            )
+        )
+    if args:
+        return os.path.join(RAG_BASE, *args)
+    return RAG_BASE
+
+
+def get_rag_python_directory(*args):
+    return get_rag_directory("python", *args)
+
+
+def get_home_cache_dir():
+    dir = os.path.join(os.path.expanduser('~'), ".ragflow")
+    try:
+        os.mkdir(dir)
+    except OSError as error:
+        pass
+    return dir
+
+
+@cached(cache=LRUCache(maxsize=10))
+def load_json_conf(conf_path):
+    if os.path.isabs(conf_path):
+        json_conf_path = conf_path
+    else:
+        json_conf_path = os.path.join(get_project_base_directory(), conf_path)
+    try:
+        with open(json_conf_path) as f:
+            return json.load(f)
+    except BaseException:
+        raise EnvironmentError(
+            "loading json file config from '{}' failed!".format(json_conf_path)
+        )
+
+
+def dump_json_conf(config_data, conf_path):
+    if os.path.isabs(conf_path):
+        json_conf_path = conf_path
+    else:
+        json_conf_path = os.path.join(get_project_base_directory(), conf_path)
+    try:
+        with open(json_conf_path, "w") as f:
+            json.dump(config_data, f, indent=4)
+    except BaseException:
+        raise EnvironmentError(
+            "loading json file config from '{}' failed!".format(json_conf_path)
+        )
+
+
+def load_json_conf_real_time(conf_path):
+    if os.path.isabs(conf_path):
+        json_conf_path = conf_path
+    else:
+        json_conf_path = os.path.join(get_project_base_directory(), conf_path)
+    try:
+        with open(json_conf_path) as f:
+            return json.load(f)
+    except BaseException:
+        raise EnvironmentError(
+            "loading json file config from '{}' failed!".format(json_conf_path)
+        )
+
+
+def load_yaml_conf(conf_path):
+    if not os.path.isabs(conf_path):
+        conf_path = os.path.join(get_project_base_directory(), conf_path)
+    try:
+        with open(conf_path) as f:
+            yaml = YAML(typ='safe', pure=True)
+            return yaml.load(f)
+    except Exception as e:
+        raise EnvironmentError(
+            "loading yaml file config from {} failed:".format(conf_path), e
+        )
+
+
+def rewrite_yaml_conf(conf_path, config):
+    if not os.path.isabs(conf_path):
+        conf_path = os.path.join(get_project_base_directory(), conf_path)
+    try:
+        with open(conf_path, "w") as f:
+            yaml = YAML(typ="safe")
+            yaml.dump(config, f)
+    except Exception as e:
+        raise EnvironmentError(
+            "rewrite yaml file config {} failed:".format(conf_path), e
+        )
+
+
+def rewrite_json_file(filepath, json_data):
+    with open(filepath, "w") as f:
+        json.dump(json_data, f, indent=4, separators=(",", ": "))
+    f.close()
+
+
+def filename_type(filename):
+    filename = filename.lower()
+    if re.match(r".*\.pdf$", filename):
+        return FileType.PDF.value
+
+    if re.match(
+             r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
+        return FileType.DOC.value
+
+    if re.match(
+            r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus|mp3)$", filename):
+        return FileType.AURAL.value
+
+    if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):
+        return FileType.VISUAL.value
+
+    return FileType.OTHER.value
+
+
+def thumbnail(filename, blob):
+    filename = filename.lower()
+    if re.match(r".*\.pdf$", filename):
+        pdf = pdfplumber.open(BytesIO(blob))
+        buffered = BytesIO()
+        pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png")
+        return "data:image/png;base64," + \
+            base64.b64encode(buffered.getvalue()).decode("utf-8")
+
+    if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
+        image = Image.open(BytesIO(blob))
+        image.thumbnail((30, 30))
+        buffered = BytesIO()
+        image.save(buffered, format="png")
+        return "data:image/png;base64," + \
+            base64.b64encode(buffered.getvalue()).decode("utf-8")
+
+    if re.match(r".*\.(ppt|pptx)$", filename):
+        import aspose.slides as slides
+        import aspose.pydrawing as drawing
+        try:
+            with slides.Presentation(BytesIO(blob)) as presentation:
+                buffered = BytesIO()
+                presentation.slides[0].get_thumbnail(0.03, 0.03).save(
+                    buffered, drawing.imaging.ImageFormat.png)
+                return "data:image/png;base64," + \
+                    base64.b64encode(buffered.getvalue()).decode("utf-8")
+        except Exception as e:
+            pass
+
+
+def traversal_files(base):
+    for root, ds, fs in os.walk(base):
+        for f in fs:
+            fullname = os.path.join(root, f)
+            yield fullname
--- a/api/utils/log_utils.py
+++ b/api/utils/log_utils.py
@ -1,313 +1,313 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import os
-import typing
-import traceback
-import logging
-import inspect
-from logging.handlers import TimedRotatingFileHandler
-from threading import RLock
-
-from api.utils import file_utils
-
-
-class LoggerFactory(object):
-    TYPE = "FILE"
-    LOG_FORMAT = "[%(levelname)s] [%(asctime)s] [%(module)s.%(funcName)s] [line:%(lineno)d]: %(message)s"
-    logging.basicConfig(format=LOG_FORMAT)
-    LEVEL = logging.DEBUG
-    logger_dict = {}
-    global_handler_dict = {}
-
-    LOG_DIR = None
-    PARENT_LOG_DIR = None
-    log_share = True
-
-    append_to_parent_log = None
-
-    lock = RLock()
-    # CRITICAL = 50
-    # FATAL = CRITICAL
-    # ERROR = 40
-    # WARNING = 30
-    # WARN = WARNING
-    # INFO = 20
-    # DEBUG = 10
-    # NOTSET = 0
-    levels = (10, 20, 30, 40)
-    schedule_logger_dict = {}
-
-    @staticmethod
-    def set_directory(directory=None, parent_log_dir=None,
-                      append_to_parent_log=None, force=False):
-        if parent_log_dir:
-            LoggerFactory.PARENT_LOG_DIR = parent_log_dir
-        if append_to_parent_log:
-            LoggerFactory.append_to_parent_log = append_to_parent_log
-        with LoggerFactory.lock:
-            if not directory:
-                directory = file_utils.get_project_base_directory("logs")
-            if not LoggerFactory.LOG_DIR or force:
-                LoggerFactory.LOG_DIR = directory
-            if LoggerFactory.log_share:
-                oldmask = os.umask(000)
-                os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True)
-                os.umask(oldmask)
-            else:
-                os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True)
-            for loggerName, ghandler in LoggerFactory.global_handler_dict.items():
-                for className, (logger,
-                                handler) in LoggerFactory.logger_dict.items():
-                    logger.removeHandler(ghandler)
-                ghandler.close()
-            LoggerFactory.global_handler_dict = {}
-            for className, (logger,
-                            handler) in LoggerFactory.logger_dict.items():
-                logger.removeHandler(handler)
-                _handler = None
-                if handler:
-                    handler.close()
-                if className != "default":
-                    _handler = LoggerFactory.get_handler(className)
-                    logger.addHandler(_handler)
-                LoggerFactory.assemble_global_handler(logger)
-                LoggerFactory.logger_dict[className] = logger, _handler
-
-    @staticmethod
-    def new_logger(name):
-        logger = logging.getLogger(name)
-        logger.propagate = False
-        logger.setLevel(LoggerFactory.LEVEL)
-        return logger
-
-    @staticmethod
-    def get_logger(class_name=None):
-        with LoggerFactory.lock:
-            if class_name in LoggerFactory.logger_dict.keys():
-                logger, handler = LoggerFactory.logger_dict[class_name]
-                if not logger:
-                    logger, handler = LoggerFactory.init_logger(class_name)
-            else:
-                logger, handler = LoggerFactory.init_logger(class_name)
-            return logger
-
-    @staticmethod
-    def get_global_handler(logger_name, level=None, log_dir=None):
-        if not LoggerFactory.LOG_DIR:
-            return logging.StreamHandler()
-        if log_dir:
-            logger_name_key = logger_name + "_" + log_dir
-        else:
-            logger_name_key = logger_name + "_" + LoggerFactory.LOG_DIR
-        # if loggerName not in LoggerFactory.globalHandlerDict:
-        if logger_name_key not in LoggerFactory.global_handler_dict:
-            with LoggerFactory.lock:
-                if logger_name_key not in LoggerFactory.global_handler_dict:
-                    handler = LoggerFactory.get_handler(
-                        logger_name, level, log_dir)
-                    LoggerFactory.global_handler_dict[logger_name_key] = handler
-        return LoggerFactory.global_handler_dict[logger_name_key]
-
-    @staticmethod
-    def get_handler(class_name, level=None, log_dir=None,
-                    log_type=None, job_id=None):
-        if not log_type:
-            if not LoggerFactory.LOG_DIR or not class_name:
-                return logging.StreamHandler()
-                # return Diy_StreamHandler()
-
-            if not log_dir:
-                log_file = os.path.join(
-                    LoggerFactory.LOG_DIR,
-                    "{}.log".format(class_name))
-            else:
-                log_file = os.path.join(log_dir, "{}.log".format(class_name))
-        else:
-            log_file = os.path.join(log_dir, "rag_flow_{}.log".format(
-                log_type) if level == LoggerFactory.LEVEL else 'rag_flow_{}_error.log'.format(log_type))
-
-        os.makedirs(os.path.dirname(log_file), exist_ok=True)
-        if LoggerFactory.log_share:
-            handler = ROpenHandler(log_file,
-                                   when='D',
-                                   interval=1,
-                                   backupCount=14,
-                                   delay=True)
-        else:
-            handler = TimedRotatingFileHandler(log_file,
-                                               when='D',
-                                               interval=1,
-                                               backupCount=14,
-                                               delay=True)
-        if level:
-            handler.level = level
-
-        return handler
-
-    @staticmethod
-    def init_logger(class_name):
-        with LoggerFactory.lock:
-            logger = LoggerFactory.new_logger(class_name)
-            handler = None
-            if class_name:
-                handler = LoggerFactory.get_handler(class_name)
-                logger.addHandler(handler)
-                LoggerFactory.logger_dict[class_name] = logger, handler
-
-            else:
-                LoggerFactory.logger_dict["default"] = logger, handler
-
-            LoggerFactory.assemble_global_handler(logger)
-            return logger, handler
-
-    @staticmethod
-    def assemble_global_handler(logger):
-        if LoggerFactory.LOG_DIR:
-            for level in LoggerFactory.levels:
-                if level >= LoggerFactory.LEVEL:
-                    level_logger_name = logging._levelToName[level]
-                    logger.addHandler(
-                        LoggerFactory.get_global_handler(
-                            level_logger_name, level))
-        if LoggerFactory.append_to_parent_log and LoggerFactory.PARENT_LOG_DIR:
-            for level in LoggerFactory.levels:
-                if level >= LoggerFactory.LEVEL:
-                    level_logger_name = logging._levelToName[level]
-                    logger.addHandler(
-                        LoggerFactory.get_global_handler(level_logger_name, level, LoggerFactory.PARENT_LOG_DIR))
-
-
-def setDirectory(directory=None):
-    LoggerFactory.set_directory(directory)
-
-
-def setLevel(level):
-    LoggerFactory.LEVEL = level
-
-
-def getLogger(className=None, useLevelFile=False):
-    if className is None:
-        frame = inspect.stack()[1]
-        module = inspect.getmodule(frame[0])
-        className = 'stat'
-    return LoggerFactory.get_logger(className)
-
-
-def exception_to_trace_string(ex):
-    return "".join(traceback.TracebackException.from_exception(ex).format())
-
-
-class ROpenHandler(TimedRotatingFileHandler):
-    def _open(self):
-        prevumask = os.umask(000)
-        rtv = TimedRotatingFileHandler._open(self)
-        os.umask(prevumask)
-        return rtv
-
-
-def sql_logger(job_id='', log_type='sql'):
-    key = job_id + log_type
-    if key in LoggerFactory.schedule_logger_dict.keys():
-        return LoggerFactory.schedule_logger_dict[key]
-    return get_job_logger(job_id=job_id, log_type=log_type)
-
-
-def ready_log(msg, job=None, task=None, role=None, party_id=None, detail=None):
-    prefix, suffix = base_msg(job, task, role, party_id, detail)
-    return f"{prefix}{msg} ready{suffix}"
-
-
-def start_log(msg, job=None, task=None, role=None, party_id=None, detail=None):
-    prefix, suffix = base_msg(job, task, role, party_id, detail)
-    return f"{prefix}start to {msg}{suffix}"
-
-
-def successful_log(msg, job=None, task=None, role=None,
-                   party_id=None, detail=None):
-    prefix, suffix = base_msg(job, task, role, party_id, detail)
-    return f"{prefix}{msg} successfully{suffix}"
-
-
-def warning_log(msg, job=None, task=None, role=None,
-                party_id=None, detail=None):
-    prefix, suffix = base_msg(job, task, role, party_id, detail)
-    return f"{prefix}{msg} is not effective{suffix}"
-
-
-def failed_log(msg, job=None, task=None, role=None,
-               party_id=None, detail=None):
-    prefix, suffix = base_msg(job, task, role, party_id, detail)
-    return f"{prefix}failed to {msg}{suffix}"
-
-
-def base_msg(job=None, task=None, role: str = None,
-             party_id: typing.Union[str, int] = None, detail=None):
-    if detail:
-        detail_msg = f" detail: \n{detail}"
-    else:
-        detail_msg = ""
-    if task is not None:
-        return f"task {task.f_task_id} {task.f_task_version} ", f" on {task.f_role} {task.f_party_id}{detail_msg}"
-    elif job is not None:
-        return "", f" on {job.f_role} {job.f_party_id}{detail_msg}"
-    elif role and party_id:
-        return "", f" on {role} {party_id}{detail_msg}"
-    else:
-        return "", f"{detail_msg}"
-
-
-def exception_to_trace_string(ex):
-    return "".join(traceback.TracebackException.from_exception(ex).format())
-
-
-def get_logger_base_dir():
-    job_log_dir = file_utils.get_rag_flow_directory('logs')
-    return job_log_dir
-
-
-def get_job_logger(job_id, log_type):
-    rag_flow_log_dir = file_utils.get_rag_flow_directory('logs', 'rag_flow')
-    job_log_dir = file_utils.get_rag_flow_directory('logs', job_id)
-    if not job_id:
-        log_dirs = [rag_flow_log_dir]
-    else:
-        if log_type == 'audit':
-            log_dirs = [job_log_dir, rag_flow_log_dir]
-        else:
-            log_dirs = [job_log_dir]
-    if LoggerFactory.log_share:
-        oldmask = os.umask(000)
-        os.makedirs(job_log_dir, exist_ok=True)
-        os.makedirs(rag_flow_log_dir, exist_ok=True)
-        os.umask(oldmask)
-    else:
-        os.makedirs(job_log_dir, exist_ok=True)
-        os.makedirs(rag_flow_log_dir, exist_ok=True)
-    logger = LoggerFactory.new_logger(f"{job_id}_{log_type}")
-    for job_log_dir in log_dirs:
-        handler = LoggerFactory.get_handler(class_name=None, level=LoggerFactory.LEVEL,
-                                            log_dir=job_log_dir, log_type=log_type, job_id=job_id)
-        error_handler = LoggerFactory.get_handler(
-            class_name=None,
-            level=logging.ERROR,
-            log_dir=job_log_dir,
-            log_type=log_type,
-            job_id=job_id)
-        logger.addHandler(handler)
-        logger.addHandler(error_handler)
-    with LoggerFactory.lock:
-        LoggerFactory.schedule_logger_dict[job_id + log_type] = logger
-    return logger
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import os
+import typing
+import traceback
+import logging
+import inspect
+from logging.handlers import TimedRotatingFileHandler
+from threading import RLock
+
+from api.utils import file_utils
+
+
+class LoggerFactory(object):
+    TYPE = "FILE"
+    LOG_FORMAT = "[%(levelname)s] [%(asctime)s] [%(module)s.%(funcName)s] [line:%(lineno)d]: %(message)s"
+    logging.basicConfig(format=LOG_FORMAT)
+    LEVEL = logging.DEBUG
+    logger_dict = {}
+    global_handler_dict = {}
+
+    LOG_DIR = None
+    PARENT_LOG_DIR = None
+    log_share = True
+
+    append_to_parent_log = None
+
+    lock = RLock()
+    # CRITICAL = 50
+    # FATAL = CRITICAL
+    # ERROR = 40
+    # WARNING = 30
+    # WARN = WARNING
+    # INFO = 20
+    # DEBUG = 10
+    # NOTSET = 0
+    levels = (10, 20, 30, 40)
+    schedule_logger_dict = {}
+
+    @staticmethod
+    def set_directory(directory=None, parent_log_dir=None,
+                      append_to_parent_log=None, force=False):
+        if parent_log_dir:
+            LoggerFactory.PARENT_LOG_DIR = parent_log_dir
+        if append_to_parent_log:
+            LoggerFactory.append_to_parent_log = append_to_parent_log
+        with LoggerFactory.lock:
+            if not directory:
+                directory = file_utils.get_project_base_directory("logs")
+            if not LoggerFactory.LOG_DIR or force:
+                LoggerFactory.LOG_DIR = directory
+            if LoggerFactory.log_share:
+                oldmask = os.umask(000)
+                os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True)
+                os.umask(oldmask)
+            else:
+                os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True)
+            for loggerName, ghandler in LoggerFactory.global_handler_dict.items():
+                for className, (logger,
+                                handler) in LoggerFactory.logger_dict.items():
+                    logger.removeHandler(ghandler)
+                ghandler.close()
+            LoggerFactory.global_handler_dict = {}
+            for className, (logger,
+                            handler) in LoggerFactory.logger_dict.items():
+                logger.removeHandler(handler)
+                _handler = None
+                if handler:
+                    handler.close()
+                if className != "default":
+                    _handler = LoggerFactory.get_handler(className)
+                    logger.addHandler(_handler)
+                LoggerFactory.assemble_global_handler(logger)
+                LoggerFactory.logger_dict[className] = logger, _handler
+
+    @staticmethod
+    def new_logger(name):
+        logger = logging.getLogger(name)
+        logger.propagate = False
+        logger.setLevel(LoggerFactory.LEVEL)
+        return logger
+
+    @staticmethod
+    def get_logger(class_name=None):
+        with LoggerFactory.lock:
+            if class_name in LoggerFactory.logger_dict.keys():
+                logger, handler = LoggerFactory.logger_dict[class_name]
+                if not logger:
+                    logger, handler = LoggerFactory.init_logger(class_name)
+            else:
+                logger, handler = LoggerFactory.init_logger(class_name)
+            return logger
+
+    @staticmethod
+    def get_global_handler(logger_name, level=None, log_dir=None):
+        if not LoggerFactory.LOG_DIR:
+            return logging.StreamHandler()
+        if log_dir:
+            logger_name_key = logger_name + "_" + log_dir
+        else:
+            logger_name_key = logger_name + "_" + LoggerFactory.LOG_DIR
+        # if loggerName not in LoggerFactory.globalHandlerDict:
+        if logger_name_key not in LoggerFactory.global_handler_dict:
+            with LoggerFactory.lock:
+                if logger_name_key not in LoggerFactory.global_handler_dict:
+                    handler = LoggerFactory.get_handler(
+                        logger_name, level, log_dir)
+                    LoggerFactory.global_handler_dict[logger_name_key] = handler
+        return LoggerFactory.global_handler_dict[logger_name_key]
+
+    @staticmethod
+    def get_handler(class_name, level=None, log_dir=None,
+                    log_type=None, job_id=None):
+        if not log_type:
+            if not LoggerFactory.LOG_DIR or not class_name:
+                return logging.StreamHandler()
+                # return Diy_StreamHandler()
+
+            if not log_dir:
+                log_file = os.path.join(
+                    LoggerFactory.LOG_DIR,
+                    "{}.log".format(class_name))
+            else:
+                log_file = os.path.join(log_dir, "{}.log".format(class_name))
+        else:
+            log_file = os.path.join(log_dir, "rag_flow_{}.log".format(
+                log_type) if level == LoggerFactory.LEVEL else 'rag_flow_{}_error.log'.format(log_type))
+
+        os.makedirs(os.path.dirname(log_file), exist_ok=True)
+        if LoggerFactory.log_share:
+            handler = ROpenHandler(log_file,
+                                   when='D',
+                                   interval=1,
+                                   backupCount=14,
+                                   delay=True)
+        else:
+            handler = TimedRotatingFileHandler(log_file,
+                                               when='D',
+                                               interval=1,
+                                               backupCount=14,
+                                               delay=True)
+        if level:
+            handler.level = level
+
+        return handler
+
+    @staticmethod
+    def init_logger(class_name):
+        with LoggerFactory.lock:
+            logger = LoggerFactory.new_logger(class_name)
+            handler = None
+            if class_name:
+                handler = LoggerFactory.get_handler(class_name)
+                logger.addHandler(handler)
+                LoggerFactory.logger_dict[class_name] = logger, handler
+
+            else:
+                LoggerFactory.logger_dict["default"] = logger, handler
+
+            LoggerFactory.assemble_global_handler(logger)
+            return logger, handler
+
+    @staticmethod
+    def assemble_global_handler(logger):
+        if LoggerFactory.LOG_DIR:
+            for level in LoggerFactory.levels:
+                if level >= LoggerFactory.LEVEL:
+                    level_logger_name = logging._levelToName[level]
+                    logger.addHandler(
+                        LoggerFactory.get_global_handler(
+                            level_logger_name, level))
+        if LoggerFactory.append_to_parent_log and LoggerFactory.PARENT_LOG_DIR:
+            for level in LoggerFactory.levels:
+                if level >= LoggerFactory.LEVEL:
+                    level_logger_name = logging._levelToName[level]
+                    logger.addHandler(
+                        LoggerFactory.get_global_handler(level_logger_name, level, LoggerFactory.PARENT_LOG_DIR))
+
+
+def setDirectory(directory=None):
+    LoggerFactory.set_directory(directory)
+
+
+def setLevel(level):
+    LoggerFactory.LEVEL = level
+
+
+def getLogger(className=None, useLevelFile=False):
+    if className is None:
+        frame = inspect.stack()[1]
+        module = inspect.getmodule(frame[0])
+        className = 'stat'
+    return LoggerFactory.get_logger(className)
+
+
+def exception_to_trace_string(ex):
+    return "".join(traceback.TracebackException.from_exception(ex).format())
+
+
+class ROpenHandler(TimedRotatingFileHandler):
+    def _open(self):
+        prevumask = os.umask(000)
+        rtv = TimedRotatingFileHandler._open(self)
+        os.umask(prevumask)
+        return rtv
+
+
+def sql_logger(job_id='', log_type='sql'):
+    key = job_id + log_type
+    if key in LoggerFactory.schedule_logger_dict.keys():
+        return LoggerFactory.schedule_logger_dict[key]
+    return get_job_logger(job_id=job_id, log_type=log_type)
+
+
+def ready_log(msg, job=None, task=None, role=None, party_id=None, detail=None):
+    prefix, suffix = base_msg(job, task, role, party_id, detail)
+    return f"{prefix}{msg} ready{suffix}"
+
+
+def start_log(msg, job=None, task=None, role=None, party_id=None, detail=None):
+    prefix, suffix = base_msg(job, task, role, party_id, detail)
+    return f"{prefix}start to {msg}{suffix}"
+
+
+def successful_log(msg, job=None, task=None, role=None,
+                   party_id=None, detail=None):
+    prefix, suffix = base_msg(job, task, role, party_id, detail)
+    return f"{prefix}{msg} successfully{suffix}"
+
+
+def warning_log(msg, job=None, task=None, role=None,
+                party_id=None, detail=None):
+    prefix, suffix = base_msg(job, task, role, party_id, detail)
+    return f"{prefix}{msg} is not effective{suffix}"
+
+
+def failed_log(msg, job=None, task=None, role=None,
+               party_id=None, detail=None):
+    prefix, suffix = base_msg(job, task, role, party_id, detail)
+    return f"{prefix}failed to {msg}{suffix}"
+
+
+def base_msg(job=None, task=None, role: str = None,
+             party_id: typing.Union[str, int] = None, detail=None):
+    if detail:
+        detail_msg = f" detail: \n{detail}"
+    else:
+        detail_msg = ""
+    if task is not None:
+        return f"task {task.f_task_id} {task.f_task_version} ", f" on {task.f_role} {task.f_party_id}{detail_msg}"
+    elif job is not None:
+        return "", f" on {job.f_role} {job.f_party_id}{detail_msg}"
+    elif role and party_id:
+        return "", f" on {role} {party_id}{detail_msg}"
+    else:
+        return "", f"{detail_msg}"
+
+
+def exception_to_trace_string(ex):
+    return "".join(traceback.TracebackException.from_exception(ex).format())
+
+
+def get_logger_base_dir():
+    job_log_dir = file_utils.get_rag_flow_directory('logs')
+    return job_log_dir
+
+
+def get_job_logger(job_id, log_type):
+    rag_flow_log_dir = file_utils.get_rag_flow_directory('logs', 'rag_flow')
+    job_log_dir = file_utils.get_rag_flow_directory('logs', job_id)
+    if not job_id:
+        log_dirs = [rag_flow_log_dir]
+    else:
+        if log_type == 'audit':
+            log_dirs = [job_log_dir, rag_flow_log_dir]
+        else:
+            log_dirs = [job_log_dir]
+    if LoggerFactory.log_share:
+        oldmask = os.umask(000)
+        os.makedirs(job_log_dir, exist_ok=True)
+        os.makedirs(rag_flow_log_dir, exist_ok=True)
+        os.umask(oldmask)
+    else:
+        os.makedirs(job_log_dir, exist_ok=True)
+        os.makedirs(rag_flow_log_dir, exist_ok=True)
+    logger = LoggerFactory.new_logger(f"{job_id}_{log_type}")
+    for job_log_dir in log_dirs:
+        handler = LoggerFactory.get_handler(class_name=None, level=LoggerFactory.LEVEL,
+                                            log_dir=job_log_dir, log_type=log_type, job_id=job_id)
+        error_handler = LoggerFactory.get_handler(
+            class_name=None,
+            level=logging.ERROR,
+            log_dir=job_log_dir,
+            log_type=log_type,
+            job_id=job_id)
+        logger.addHandler(handler)
+        logger.addHandler(error_handler)
+    with LoggerFactory.lock:
+        LoggerFactory.schedule_logger_dict[job_id + log_type] = logger
+    return logger
--- a/api/utils/t_crypt.py
+++ b/api/utils/t_crypt.py
@ -1,24 +1,24 @@
-import base64
-import os
-import sys
-from Cryptodome.PublicKey import RSA
-from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
-from api.utils import decrypt, file_utils
-
-
-def crypt(line):
-    file_path = os.path.join(
-        file_utils.get_project_base_directory(),
-        "conf",
-        "public.pem")
-    rsa_key = RSA.importKey(open(file_path).read(),"Welcome")
-    cipher = Cipher_pkcs1_v1_5.new(rsa_key)
-    password_base64 = base64.b64encode(line.encode('utf-8')).decode("utf-8")
-    encrypted_password = cipher.encrypt(password_base64.encode())
-    return base64.b64encode(encrypted_password).decode('utf-8')
-
-
-if __name__ == "__main__":
-    pswd = crypt(sys.argv[1])
-    print(pswd)
-    print(decrypt(pswd))
+import base64
+import os
+import sys
+from Cryptodome.PublicKey import RSA
+from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
+from api.utils import decrypt, file_utils
+
+
+def crypt(line):
+    file_path = os.path.join(
+        file_utils.get_project_base_directory(),
+        "conf",
+        "public.pem")
+    rsa_key = RSA.importKey(open(file_path).read(),"Welcome")
+    cipher = Cipher_pkcs1_v1_5.new(rsa_key)
+    password_base64 = base64.b64encode(line.encode('utf-8')).decode("utf-8")
+    encrypted_password = cipher.encrypt(password_base64.encode())
+    return base64.b64encode(encrypted_password).decode('utf-8')
+
+
+if __name__ == "__main__":
+    pswd = crypt(sys.argv[1])
+    print(pswd)
+    print(decrypt(pswd))
--- a/api/versions.py
+++ b/api/versions.py
@ -1,28 +1,28 @@
-#
-#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-import os
-import dotenv
-import typing
-from api.utils.file_utils import get_project_base_directory
-
-
-def get_versions() -> typing.Mapping[str, typing.Any]:
-    dotenv.load_dotenv(dotenv.find_dotenv())
-    return dotenv.dotenv_values()
-
-
-def get_rag_version() -> typing.Optional[str]:
+#
+#  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import os
+import dotenv
+import typing
+from api.utils.file_utils import get_project_base_directory
+
+
+def get_versions() -> typing.Mapping[str, typing.Any]:
+    dotenv.load_dotenv(dotenv.find_dotenv())
+    return dotenv.dotenv_values()
+
+
+def get_rag_version() -> typing.Optional[str]:
    return get_versions().get("RAGFLOW_VERSION", "dev")