Use consistent log file names, introduced initLogger (#3403)

### What problem does this PR solve?

Use consistent log file names, introduced initLogger

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [x] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
This commit is contained in:
Zhichang Yu
2024-11-14 17:13:48 +08:00
committed by GitHub
parent ab4384e011
commit 30f6421760
75 changed files with 396 additions and 402 deletions

View File

@ -13,19 +13,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import time
import traceback
from api.db.db_models import close_connection
from api.db.services.task_service import TaskService
from api.utils.log_utils import logger
from rag.utils.storage_factory import STORAGE_IMPL
from rag.utils.redis_conn import REDIS_CONN
def collect():
doc_locations = TaskService.get_ongoing_doc_name()
logger.info(doc_locations)
logging.debug(doc_locations)
if len(doc_locations) == 0:
time.sleep(1)
return
@ -34,7 +34,7 @@ def collect():
def main():
locations = collect()
if not locations:return
logger.info(f"TASKS: {len(locations)}")
logging.info(f"TASKS: {len(locations)}")
for kb_id, loc in locations:
try:
if REDIS_CONN.is_alive():
@ -43,7 +43,7 @@ def main():
if REDIS_CONN.exist(key):continue
file_bin = STORAGE_IMPL.get(kb_id, loc)
REDIS_CONN.transaction(key, file_bin, 12 * 60)
logger.info("CACHE: {}".format(loc))
logging.info("CACHE: {}".format(loc))
except Exception as e:
traceback.print_stack(e)
except Exception as e:

View File

@ -13,11 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import discord
import requests
import base64
import asyncio
from api.utils.log_utils import logger
URL = '{YOUR_IP_ADDRESS:PORT}/v1/api/completion_aibotk' # Default: https://demo.ragflow.io/v1/api/completion_aibotk
@ -37,7 +37,7 @@ client = discord.Client(intents=intents)
@client.event
async def on_ready():
logger.info(f'We have logged in as {client.user}')
logging.info(f'We have logged in as {client.user}')
@client.event

View File

@ -13,9 +13,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import inspect
from api.utils.log_utils import initRootLogger
initRootLogger(inspect.getfile(inspect.currentframe()))
for module in ["pdfminer"]:
module_logger = logging.getLogger(module)
module_logger.setLevel(logging.WARNING)
for module in ["peewee"]:
module_logger = logging.getLogger(module)
module_logger.handlers.clear()
module_logger.propagate = True
import datetime
import json
import logging
import os
import hashlib
import copy
@ -42,7 +53,6 @@ from api.db.db_models import close_connection
from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one, audio, knowledge_graph, email
from rag.nlp import search, rag_tokenizer
from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
from api.utils.log_utils import logger, LOG_FILE
from rag.settings import DOC_MAXIMUM_SIZE, SVR_QUEUE_NAME
from rag.utils import rmSpace, num_tokens_from_string
from rag.utils.redis_conn import REDIS_CONN, Payload
@ -90,7 +100,7 @@ def set_progress(task_id, from_page=0, to_page=-1, prog=None, msg="Processing...
try:
TaskService.update_progress(task_id, d)
except Exception:
logger.exception(f"set_progress({task_id}) got exception")
logging.exception(f"set_progress({task_id}) got exception")
close_connection()
if cancel:
@ -110,7 +120,7 @@ def collect():
time.sleep(1)
return pd.DataFrame()
except Exception:
logger.exception("Get task event from queue exception")
logging.exception("Get task event from queue exception")
return pd.DataFrame()
msg = PAYLOAD.get_message()
@ -118,11 +128,11 @@ def collect():
return pd.DataFrame()
if TaskService.do_cancel(msg["id"]):
logger.info("Task {} has been canceled.".format(msg["id"]))
logging.info("Task {} has been canceled.".format(msg["id"]))
return pd.DataFrame()
tasks = TaskService.get_tasks(msg["id"])
if not tasks:
logger.warning("{} empty task!".format(msg["id"]))
logging.warning("{} empty task!".format(msg["id"]))
return []
tasks = pd.DataFrame(tasks)
@ -151,29 +161,29 @@ def build(row):
st = timer()
bucket, name = File2DocumentService.get_storage_address(doc_id=row["doc_id"])
binary = get_storage_binary(bucket, name)
logger.info(
logging.info(
"From minio({}) {}/{}".format(timer() - st, row["location"], row["name"]))
except TimeoutError:
callback(-1, "Internal server error: Fetch file from minio timeout. Could you try it again.")
logger.exception("Minio {}/{} got timeout: Fetch file from minio timeout.".format(row["location"], row["name"]))
logging.exception("Minio {}/{} got timeout: Fetch file from minio timeout.".format(row["location"], row["name"]))
return
except Exception as e:
if re.search("(No such file|not found)", str(e)):
callback(-1, "Can not find file <%s> from minio. Could you try it again?" % row["name"])
else:
callback(-1, "Get file from minio: %s" % str(e).replace("'", ""))
logger.exception("Chunking {}/{} got exception".format(row["location"], row["name"]))
logging.exception("Chunking {}/{} got exception".format(row["location"], row["name"]))
return
try:
cks = chunker.chunk(row["name"], binary=binary, from_page=row["from_page"],
to_page=row["to_page"], lang=row["language"], callback=callback,
kb_id=row["kb_id"], parser_config=row["parser_config"], tenant_id=row["tenant_id"])
logger.info("Chunking({}) {}/{} done".format(timer() - st, row["location"], row["name"]))
logging.info("Chunking({}) {}/{} done".format(timer() - st, row["location"], row["name"]))
except Exception as e:
callback(-1, "Internal server error while chunking: %s" %
str(e).replace("'", ""))
logger.exception("Chunking {}/{} got exception".format(row["location"], row["name"]))
logging.exception("Chunking {}/{} got exception".format(row["location"], row["name"]))
return
docs = []
@ -210,12 +220,12 @@ def build(row):
STORAGE_IMPL.put(row["kb_id"], d["id"], output_buffer.getvalue())
el += timer() - st
except Exception:
logger.exception("Saving image of chunk {}/{}/{} got exception".format(row["location"], row["name"], d["_id"]))
logging.exception("Saving image of chunk {}/{}/{} got exception".format(row["location"], row["name"], d["_id"]))
d["img_id"] = "{}-{}".format(row["kb_id"], d["id"])
del d["image"]
docs.append(d)
logger.info("MINIO PUT({}):{}".format(row["name"], el))
logging.info("MINIO PUT({}):{}".format(row["name"], el))
if row["parser_config"].get("auto_keywords", 0):
st = timer()
@ -345,7 +355,7 @@ def main():
embd_mdl = LLMBundle(r["tenant_id"], LLMType.EMBEDDING, llm_name=r["embd_id"], lang=r["language"])
except Exception as e:
callback(-1, msg=str(e))
logger.exception("LLMBundle got exception")
logging.exception("LLMBundle got exception")
continue
if r.get("task_type", "") == "raptor":
@ -354,12 +364,12 @@ def main():
cks, tk_count, vector_size = run_raptor(r, chat_mdl, embd_mdl, callback)
except Exception as e:
callback(-1, msg=str(e))
logger.exception("run_raptor got exception")
logging.exception("run_raptor got exception")
continue
else:
st = timer()
cks = build(r)
logger.info("Build chunks({}): {}".format(r["name"], timer() - st))
logging.info("Build chunks({}): {}".format(r["name"], timer() - st))
if cks is None:
continue
if not cks:
@ -375,12 +385,12 @@ def main():
tk_count, vector_size = embedding(cks, embd_mdl, r["parser_config"], callback)
except Exception as e:
callback(-1, "Embedding error:{}".format(str(e)))
logger.exception("run_rembedding got exception")
logging.exception("run_rembedding got exception")
tk_count = 0
logger.info("Embedding elapsed({}): {:.2f}".format(r["name"], timer() - st))
logging.info("Embedding elapsed({}): {:.2f}".format(r["name"], timer() - st))
callback(msg="Finished embedding (in {:.2f}s)! Start to build index!".format(timer() - st))
# logger.info(f"task_executor init_kb index {search.index_name(r["tenant_id"])} embd_mdl {embd_mdl.llm_name} vector length {vector_size}")
# logging.info(f"task_executor init_kb index {search.index_name(r["tenant_id"])} embd_mdl {embd_mdl.llm_name} vector length {vector_size}")
init_kb(r, vector_size)
chunk_count = len(set([c["id"] for c in cks]))
st = timer()
@ -391,11 +401,11 @@ def main():
if b % 128 == 0:
callback(prog=0.8 + 0.1 * (b + 1) / len(cks), msg="")
logger.info("Indexing elapsed({}): {:.2f}".format(r["name"], timer() - st))
logging.info("Indexing elapsed({}): {:.2f}".format(r["name"], timer() - st))
if es_r:
callback(-1, f"Insert chunk error, detail info please check {LOG_FILE}. Please also check ES status!")
docStoreConn.delete({"doc_id": r["doc_id"]}, search.index_name(r["tenant_id"]), r["kb_id"])
logger.error('Insert chunk error: ' + str(es_r))
logging.error('Insert chunk error: ' + str(es_r))
else:
if TaskService.do_cancel(r["id"]):
docStoreConn.delete({"doc_id": r["doc_id"]}, search.index_name(r["tenant_id"]), r["kb_id"])
@ -404,7 +414,7 @@ def main():
callback(1., "Done!")
DocumentService.increment_chunk_num(
r["doc_id"], r["kb_id"], tk_count, chunk_count, 0)
logger.info(
logging.info(
"Chunk doc({}), token({}), chunks({}), elapsed:{:.2f}".format(
r["id"], tk_count, len(cks), timer() - st))
@ -421,16 +431,11 @@ def report_status():
obj[CONSUMER_NAME] = obj[CONSUMER_NAME][-60:]
REDIS_CONN.set_obj("TASKEXE", obj, 60*2)
except Exception:
logger.exception("report_status got exception")
logging.exception("report_status got exception")
time.sleep(30)
if __name__ == "__main__":
peewee_logger = logging.getLogger('peewee')
peewee_logger.propagate = False
peewee_logger.addHandler(logger.handlers[0])
peewee_logger.setLevel(logger.handlers[0].level)
exe = ThreadPoolExecutor(max_workers=1)
exe.submit(report_status)