Rework logging (#3358)

Unified all log files into one.

### What problem does this PR solve?

Unified all log files into one.

### Type of change

- [x] Refactoring
This commit is contained in:
Zhichang Yu
2024-11-12 17:35:13 +08:00
committed by GitHub
parent 567a7563e7
commit a2a5631da4
75 changed files with 481 additions and 853 deletions

View File

@ -7,7 +7,6 @@ Reference:
import argparse
import json
import logging
import re
import traceback
from dataclasses import dataclass
@ -18,12 +17,12 @@ import tiktoken
from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
from rag.llm.chat_model import Base as CompletionLLM
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
from api.utils.log_utils import logger
DEFAULT_TUPLE_DELIMITER = "<|>"
DEFAULT_RECORD_DELIMITER = "##"
DEFAULT_COMPLETION_DELIMITER = "<|COMPLETE|>"
CLAIM_MAX_GLEANINGS = 1
log = logging.getLogger(__name__)
@dataclass
@ -127,7 +126,7 @@ class ClaimExtractor:
]
source_doc_map[document_id] = text
except Exception as e:
log.exception("error extracting claim")
logger.exception("error extracting claim")
self._on_error(
e,
traceback.format_exc(),
@ -266,4 +265,4 @@ if __name__ == "__main__":
"claim_description": ""
}
claim = ex(info)
print(json.dumps(claim.output, ensure_ascii=False, indent=2))
logger.info(json.dumps(claim.output, ensure_ascii=False, indent=2))

View File

@ -6,11 +6,10 @@ Reference:
"""
import json
import logging
import re
import traceback
from dataclasses import dataclass
from typing import Any, List, Callable
from typing import List, Callable
import networkx as nx
import pandas as pd
from graphrag import leiden
@ -20,8 +19,7 @@ from rag.llm.chat_model import Base as CompletionLLM
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
from rag.utils import num_tokens_from_string
from timeit import default_timer as timer
log = logging.getLogger(__name__)
from api.utils.log_utils import logger
@dataclass
@ -82,7 +80,7 @@ class CommunityReportsExtractor:
response = re.sub(r"[^\}]*$", "", response)
response = re.sub(r"\{\{", "{", response)
response = re.sub(r"\}\}", "}", response)
print(response)
logger.info(response)
response = json.loads(response)
if not dict_has_keys_with_types(response, [
("title", str),
@ -94,7 +92,7 @@ class CommunityReportsExtractor:
response["weight"] = weight
response["entities"] = ents
except Exception as e:
print("ERROR: ", traceback.format_exc())
logger.exception("CommunityReportsExtractor got exception")
self._on_error(e, traceback.format_exc(), None)
continue
@ -127,5 +125,4 @@ class CommunityReportsExtractor:
report_sections = "\n\n".join(
f"## {finding_summary(f)}\n\n{finding_explanation(f)}" for f in findings
)
return f"# {title}\n\n{summary}\n\n{report_sections}"

View File

@ -28,6 +28,7 @@ from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
from graphrag.mind_map_extractor import MindMapExtractor
from rag.nlp import rag_tokenizer
from rag.utils import num_tokens_from_string
from api.utils.log_utils import logger
def graph_merge(g1, g2):
@ -94,7 +95,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
chunks = []
for n, attr in graph.nodes(data=True):
if attr.get("rank", 0) == 0:
print(f"Ignore entity: {n}")
logger.info(f"Ignore entity: {n}")
continue
chunk = {
"name_kwd": n,
@ -136,7 +137,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
mg = mindmap(_chunks).output
if not len(mg.keys()): return chunks
print(json.dumps(mg, ensure_ascii=False, indent=2))
logger.info(json.dumps(mg, ensure_ascii=False, indent=2))
chunks.append(
{
"content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),

View File

@ -18,7 +18,6 @@ import collections
import logging
import os
import re
import logging
import traceback
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
@ -30,6 +29,7 @@ from rag.llm.chat_model import Base as CompletionLLM
import markdown_to_json
from functools import reduce
from rag.utils import num_tokens_from_string
from api.utils.log_utils import logger
@dataclass
@ -193,6 +193,6 @@ class MindMapExtractor:
gen_conf = {"temperature": 0.5}
response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
response = re.sub(r"```[^\n]*", "", response)
print(response)
print("---------------------------------------------------\n", self._todict(markdown_to_json.dictify(response)))
logger.info(response)
logger.info(self._todict(markdown_to_json.dictify(response)))
return self._todict(markdown_to_json.dictify(response))