Rework logging (#3358)

Unified all log files into one.

### What problem does this PR solve?

Unified all log files into one.

### Type of change

- [x] Refactoring
This commit is contained in:
Zhichang Yu
2024-11-12 17:35:13 +08:00
committed by GitHub
parent 567a7563e7
commit a2a5631da4
75 changed files with 481 additions and 853 deletions

View File

@ -30,12 +30,9 @@ from peewee import (
)
from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase
from api.db import SerializedType, ParserType
from api.settings import DATABASE, stat_logger, SECRET_KEY, DATABASE_TYPE
from api.utils.log_utils import getLogger
from api.settings import DATABASE, SECRET_KEY, DATABASE_TYPE
from api import utils
LOGGER = getLogger()
from api.utils.log_utils import logger
def singleton(cls, *args, **kw):
instances = {}
@ -288,7 +285,7 @@ class BaseDataBase:
database_config = DATABASE.copy()
db_name = database_config.pop("name")
self.database_connection = PooledDatabase[DATABASE_TYPE.upper()].value(db_name, **database_config)
stat_logger.info('init database on cluster mode successfully')
logger.info('init database on cluster mode successfully')
class PostgresDatabaseLock:
def __init__(self, lock_name, timeout=10, db=None):
@ -396,7 +393,7 @@ def close_connection():
if DB:
DB.close_stale(age=30)
except Exception as e:
LOGGER.exception(e)
logger.exception(e)
class DataBaseModel(BaseModel):
@ -412,15 +409,15 @@ def init_database_tables(alter_fields=[]):
for name, obj in members:
if obj != DataBaseModel and issubclass(obj, DataBaseModel):
table_objs.append(obj)
LOGGER.info(f"start create table {obj.__name__}")
logger.info(f"start create table {obj.__name__}")
try:
obj.create_table()
LOGGER.info(f"create table success: {obj.__name__}")
logger.info(f"create table success: {obj.__name__}")
except Exception as e:
LOGGER.exception(e)
logger.exception(e)
create_failed_list.append(obj.__name__)
if create_failed_list:
LOGGER.info(f"create tables failed: {create_failed_list}")
logger.info(f"create tables failed: {create_failed_list}")
raise Exception(f"create tables failed: {create_failed_list}")
migrate_db()

View File

@ -22,12 +22,6 @@ from playhouse.pool import PooledMySQLDatabase
from api.utils import current_timestamp, timestamp_to_date
from api.db.db_models import DB, DataBaseModel
from api.db.runtime_config import RuntimeConfig
from api.utils.log_utils import getLogger
from enum import Enum
LOGGER = getLogger()
@DB.connection_context()

View File

@ -30,6 +30,7 @@ from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantL
from api.db.services.user_service import TenantService, UserTenantService
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
from api.utils.file_utils import get_project_base_directory
from api.utils.log_utils import logger
def encode_to_base64(input_string):
@ -69,36 +70,34 @@ def init_superuser():
"api_key": API_KEY, "api_base": LLM_BASE_URL})
if not UserService.save(**user_info):
print("\033[93m【ERROR】\033[0mcan't init admin.")
logger.info("can't init admin.")
return
TenantService.insert(**tenant)
UserTenantService.insert(**usr_tenant)
TenantLLMService.insert_many(tenant_llm)
print(
"【INFO】Super user initialized. \033[93memail: admin@ragflow.io, password: admin\033[0m. Changing the password after logining is strongly recomanded.")
logger.info(
"Super user initialized. email: admin@ragflow.io, password: admin. Changing the password after logining is strongly recomanded.")
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
msg = chat_mdl.chat(system="", history=[
{"role": "user", "content": "Hello!"}], gen_conf={})
if msg.find("ERROR: ") == 0:
print(
"\33[91m【ERROR】\33[0m: ",
logger.error(
"'{}' dosen't work. {}".format(
tenant["llm_id"],
msg))
embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
v, c = embd_mdl.encode(["Hello!"])
if c == 0:
print(
"\33[91m【ERROR】\33[0m:",
" '{}' dosen't work!".format(
logger.error(
"'{}' dosen't work!".format(
tenant["embd_id"]))
def init_llm_factory():
try:
LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")])
except Exception as e:
except Exception:
pass
factory_llm_infos = json.load(
@ -111,14 +110,14 @@ def init_llm_factory():
llm_infos = factory_llm_info.pop("llm")
try:
LLMFactoriesService.save(**factory_llm_info)
except Exception as e:
except Exception:
pass
LLMService.filter_delete([LLM.fid == factory_llm_info["name"]])
for llm_info in llm_infos:
llm_info["fid"] = factory_llm_info["name"]
try:
LLMService.save(**llm_info)
except Exception as e:
except Exception:
pass
LLMFactoriesService.filter_delete([LLMFactories.name == "Local"])
@ -145,7 +144,7 @@ def init_llm_factory():
row = deepcopy(row)
row["llm_name"] = "text-embedding-3-large"
TenantLLMService.save(**row)
except Exception as e:
except Exception:
pass
break
for kb_id in KnowledgebaseService.get_all_ids():
@ -169,9 +168,8 @@ def add_graph_templates():
CanvasTemplateService.save(**cnvs)
except:
CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
except Exception as e:
print("Add graph templates error: ", e)
print("------------", flush=True)
except Exception:
logger.exception("Add graph templates error: ")
def init_web_data():
@ -182,7 +180,7 @@ def init_web_data():
# init_superuser()
add_graph_templates()
print("init web data success:{}".format(time.time() - start_time))
logger.info("init web data success:{}".format(time.time() - start_time))
if __name__ == '__main__':

View File

@ -1,21 +0,0 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import operator
import time
import typing
from api.utils.log_utils import sql_logger
import peewee

View File

@ -26,11 +26,12 @@ from api.db.db_models import Dialog, Conversation,DB
from api.db.services.common_service import CommonService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
from api.settings import chat_logger, retrievaler, kg_retrievaler
from api.settings import retrievaler, kg_retrievaler
from rag.app.resume import forbidden_select_fields4resume
from rag.nlp.search import index_name
from rag.utils import rmSpace, num_tokens_from_string, encoder
from api.utils.file_utils import get_project_base_directory
from api.utils.log_utils import logger
class DialogService(CommonService):
@ -177,7 +178,7 @@ def chat(dialog, messages, stream=True, **kwargs):
tts_mdl = LLMBundle(dialog.tenant_id, LLMType.TTS)
# try to use sql if field mapping is good to go
if field_map:
chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
logger.info("Use SQL to retrieval:{}".format(questions[-1]))
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
if ans:
yield ans
@ -219,7 +220,7 @@ def chat(dialog, messages, stream=True, **kwargs):
doc_ids=attachments,
top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
chat_logger.info(
logger.info(
"{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
retrieval_tm = timer()
@ -291,7 +292,7 @@ def chat(dialog, messages, stream=True, **kwargs):
yield decorate_answer(answer)
else:
answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
chat_logger.info("User: {}|Assistant: {}".format(
logger.info("User: {}|Assistant: {}".format(
msg[-1]["content"], answer))
res = decorate_answer(answer)
res["audio_binary"] = tts(tts_mdl, answer)
@ -319,8 +320,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
nonlocal sys_prompt, user_promt, question, tried_times
sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
"temperature": 0.06})
print(user_promt, sql)
chat_logger.info(f"{question}”==>{user_promt} get SQL: {sql}")
logger.info(f"{question} ==> {user_promt} get SQL: {sql}")
sql = re.sub(r"[\r\n]+", " ", sql.lower())
sql = re.sub(r".*select ", "select ", sql.lower())
sql = re.sub(r" +", " ", sql)
@ -340,9 +340,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
flds.append(k)
sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
print(f"{question} get SQL(refined): {sql}")
chat_logger.info(f"{question}” get SQL(refined): {sql}")
logger.info(f"{question} get SQL(refined): {sql}")
tried_times += 1
return retrievaler.sql_retrieval(sql, format="json"), sql
@ -371,10 +369,9 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
question, sql, tbl["error"]
)
tbl, sql = get_table()
chat_logger.info("TRY it again: {}".format(sql))
logger.info("TRY it again: {}".format(sql))
chat_logger.info("GET table: {}".format(tbl))
print(tbl)
logger.info("GET table: {}".format(tbl))
if tbl.get("error") or len(tbl["rows"]) == 0:
return None
@ -404,7 +401,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
if not docid_idx or not docnm_idx:
chat_logger.warning("SQL missing field: " + sql)
logger.warning("SQL missing field: " + sql)
return {
"answer": "\n".join([clmns, line, rows]),
"reference": {"chunks": [], "doc_aggs": []},

View File

@ -17,7 +17,6 @@ import hashlib
import json
import random
import re
import traceback
from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy
from datetime import datetime
@ -26,7 +25,7 @@ from io import BytesIO
from peewee import fn
from api.db.db_utils import bulk_insert_into_db
from api.settings import stat_logger, docStoreConn
from api.settings import docStoreConn
from api.utils import current_timestamp, get_format_time, get_uuid
from graphrag.mind_map_extractor import MindMapExtractor
from rag.settings import SVR_QUEUE_NAME
@ -40,6 +39,7 @@ from api.db.services.common_service import CommonService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db import StatusEnum
from rag.utils.redis_conn import REDIS_CONN
from api.utils.log_utils import logger
class DocumentService(CommonService):
@ -387,7 +387,7 @@ class DocumentService(CommonService):
cls.update_by_id(d["id"], info)
except Exception as e:
if str(e).find("'0'") < 0:
stat_logger.error("fetch task exception:" + str(e))
logger.exception("fetch task exception")
@classmethod
@DB.connection_context()
@ -544,7 +544,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
"knowledge_graph_kwd": "mind_map"
})
except Exception as e:
stat_logger.error("Mind map generation error:", traceback.format_exc())
logger.exception("Mind map generation error")
vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
assert len(cks) == len(vects)

View File

@ -28,6 +28,7 @@ from api.db.services.file2document_service import File2DocumentService
from api.utils import get_uuid
from api.utils.file_utils import filename_type, thumbnail_img
from rag.utils.storage_factory import STORAGE_IMPL
from api.utils.log_utils import logger
class FileService(CommonService):
@ -272,8 +273,8 @@ class FileService(CommonService):
cls.delete_folder_by_pf_id(user_id, file.id)
return cls.model.delete().where((cls.model.tenant_id == user_id)
& (cls.model.id == folder_id)).execute(),
except Exception as e:
print(e)
except Exception:
logger.exception("delete_folder_by_pf_id")
raise RuntimeError("Database error (File retrieval)!")
@classmethod
@ -321,8 +322,8 @@ class FileService(CommonService):
def move_file(cls, file_ids, folder_id):
try:
cls.filter_update((cls.model.id << file_ids, ), { 'parent_id': folder_id })
except Exception as e:
print(e)
except Exception:
logger.exception("move_file")
raise RuntimeError("Database error (File move)!")
@classmethod

View File

@ -14,12 +14,12 @@
# limitations under the License.
#
from api.db.services.user_service import TenantService
from api.settings import database_logger
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
from api.db import LLMType
from api.db.db_models import DB
from api.db.db_models import LLMFactories, LLM, TenantLLM
from api.db.services.common_service import CommonService
from api.utils.log_utils import logger
class LLMFactoriesService(CommonService):
@ -209,40 +209,40 @@ class LLMBundle(object):
emd, used_tokens = self.mdl.encode(texts, batch_size)
if not TenantLLMService.increase_usage(
self.tenant_id, self.llm_type, used_tokens):
database_logger.error(
"Can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
logger.error(
"LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
return emd, used_tokens
def encode_queries(self, query: str):
emd, used_tokens = self.mdl.encode_queries(query)
if not TenantLLMService.increase_usage(
self.tenant_id, self.llm_type, used_tokens):
database_logger.error(
"Can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
logger.error(
"LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
return emd, used_tokens
def similarity(self, query: str, texts: list):
sim, used_tokens = self.mdl.similarity(query, texts)
if not TenantLLMService.increase_usage(
self.tenant_id, self.llm_type, used_tokens):
database_logger.error(
"Can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
logger.error(
"LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
return sim, used_tokens
def describe(self, image, max_tokens=300):
txt, used_tokens = self.mdl.describe(image, max_tokens)
if not TenantLLMService.increase_usage(
self.tenant_id, self.llm_type, used_tokens):
database_logger.error(
"Can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
logger.error(
"LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
return txt
def transcription(self, audio):
txt, used_tokens = self.mdl.transcription(audio)
if not TenantLLMService.increase_usage(
self.tenant_id, self.llm_type, used_tokens):
database_logger.error(
"Can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
logger.error(
"LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
return txt
def tts(self, text):
@ -250,8 +250,8 @@ class LLMBundle(object):
if isinstance(chunk,int):
if not TenantLLMService.increase_usage(
self.tenant_id, self.llm_type, chunk, self.llm_name):
database_logger.error(
"Can't update token usage for {}/TTS".format(self.tenant_id))
logger.error(
"LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
return
yield chunk
@ -259,8 +259,8 @@ class LLMBundle(object):
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
if isinstance(txt, int) and not TenantLLMService.increase_usage(
self.tenant_id, self.llm_type, used_tokens, self.llm_name):
database_logger.error(
"Can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
logger.error(
"LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
return txt
def chat_streamly(self, system, history, gen_conf):
@ -268,7 +268,7 @@ class LLMBundle(object):
if isinstance(txt, int):
if not TenantLLMService.increase_usage(
self.tenant_id, self.llm_type, txt, self.llm_name):
database_logger.error(
"Can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
logger.error(
"LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
return
yield txt