Use consistent log file names, introduced initLogger (#3403)

### What problem does this PR solve?

Use consistent log file names, introduced initLogger

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [x] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
This commit is contained in:
Zhichang Yu
2024-11-14 17:13:48 +08:00
committed by GitHub
parent ab4384e011
commit 30f6421760
75 changed files with 396 additions and 402 deletions

View File

@ -5,6 +5,7 @@ Reference:
- [graphrag](https://github.com/microsoft/graphrag)
"""
import logging
import argparse
import json
import re
@ -17,7 +18,6 @@ import tiktoken
from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
from rag.llm.chat_model import Base as CompletionLLM
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
from api.utils.log_utils import logger
DEFAULT_TUPLE_DELIMITER = "<|>"
DEFAULT_RECORD_DELIMITER = "##"
@ -126,7 +126,7 @@ class ClaimExtractor:
]
source_doc_map[document_id] = text
except Exception as e:
logger.exception("error extracting claim")
logging.exception("error extracting claim")
self._on_error(
e,
traceback.format_exc(),
@ -265,4 +265,4 @@ if __name__ == "__main__":
"claim_description": ""
}
claim = ex(info)
logger.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
logging.info(json.dumps(claim.output, ensure_ascii=False, indent=2))

View File

@ -5,6 +5,7 @@ Reference:
- [graphrag](https://github.com/microsoft/graphrag)
"""
import logging
import json
import re
import traceback
@ -19,7 +20,6 @@ from rag.llm.chat_model import Base as CompletionLLM
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
from rag.utils import num_tokens_from_string
from timeit import default_timer as timer
from api.utils.log_utils import logger
@dataclass
@ -80,7 +80,7 @@ class CommunityReportsExtractor:
response = re.sub(r"[^\}]*$", "", response)
response = re.sub(r"\{\{", "{", response)
response = re.sub(r"\}\}", "}", response)
logger.info(response)
logging.debug(response)
response = json.loads(response)
if not dict_has_keys_with_types(response, [
("title", str),
@ -92,7 +92,7 @@ class CommunityReportsExtractor:
response["weight"] = weight
response["entities"] = ents
except Exception as e:
logger.exception("CommunityReportsExtractor got exception")
logging.exception("CommunityReportsExtractor got exception")
self._on_error(e, traceback.format_exc(), None)
continue

View File

@ -5,19 +5,11 @@ Reference:
- [graphrag](https://github.com/microsoft/graphrag)
"""
import argparse
import html
import json
import logging
import numbers
import re
import traceback
from collections.abc import Callable
from dataclasses import dataclass
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
from rag.llm.chat_model import Base as CompletionLLM
import networkx as nx
from rag.utils import num_tokens_from_string

View File

@ -13,8 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import itertools
import logging
import itertools
import re
import traceback
from dataclasses import dataclass

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import os
from concurrent.futures import ThreadPoolExecutor
import json
@ -28,7 +29,6 @@ from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
from graphrag.mind_map_extractor import MindMapExtractor
from rag.nlp import rag_tokenizer
from rag.utils import num_tokens_from_string
from api.utils.log_utils import logger
def graph_merge(g1, g2):
@ -95,7 +95,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
chunks = []
for n, attr in graph.nodes(data=True):
if attr.get("rank", 0) == 0:
logger.info(f"Ignore entity: {n}")
logging.debug(f"Ignore entity: {n}")
continue
chunk = {
"name_kwd": n,
@ -137,7 +137,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
mg = mindmap(_chunks).output
if not len(mg.keys()): return chunks
logger.info(json.dumps(mg, ensure_ascii=False, indent=2))
logging.debug(json.dumps(mg, ensure_ascii=False, indent=2))
chunks.append(
{
"content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),

View File

@ -14,8 +14,6 @@ from graspologic.utils import largest_connected_component
import networkx as nx
from networkx import is_empty
log = logging.getLogger(__name__)
def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
"""Ensure an undirected graph with the same relationships will always be read the same way."""
@ -99,7 +97,7 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]:
max_cluster_size = args.get("max_cluster_size", 12)
use_lcc = args.get("use_lcc", True)
if args.get("verbose", False):
log.info(
logging.debug(
"Running leiden with max_cluster_size=%s, lcc=%s", max_cluster_size, use_lcc
)
if not graph.nodes(): return {}

View File

@ -14,8 +14,8 @@
# limitations under the License.
#
import collections
import logging
import collections
import os
import re
import traceback
@ -29,7 +29,6 @@ from rag.llm.chat_model import Base as CompletionLLM
import markdown_to_json
from functools import reduce
from rag.utils import num_tokens_from_string
from api.utils.log_utils import logger
@dataclass
@ -193,6 +192,6 @@ class MindMapExtractor:
gen_conf = {"temperature": 0.5}
response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
response = re.sub(r"```[^\n]*", "", response)
logger.info(response)
logger.info(self._todict(markdown_to_json.dictify(response)))
logging.debug(response)
logging.debug(self._todict(markdown_to_json.dictify(response)))
return self._todict(markdown_to_json.dictify(response))