Use consistent log file names, introduced initLogger (#3403)

### What problem does this PR solve? Use consistent log file names, introduced initLogger ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [x] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
2026-01-31 07:36:46 +08:00 · 2024-11-14 17:13:48 +08:00
parent ab4384e011
commit 30f6421760
75 changed files with 396 additions and 402 deletions
--- a/graphrag/claim_extractor.py
+++ b/graphrag/claim_extractor.py
@ -5,6 +5,7 @@ Reference:
 - [graphrag](https://github.com/microsoft/graphrag)
 """

+import logging
 import argparse
 import json
 import re
@ -17,7 +18,6 @@ import tiktoken
 from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
 from rag.llm.chat_model import Base as CompletionLLM
 from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
-from api.utils.log_utils import logger

 DEFAULT_TUPLE_DELIMITER = "<|>"
 DEFAULT_RECORD_DELIMITER = "##"
@ -126,7 +126,7 @@ class ClaimExtractor:
                ]
                source_doc_map[document_id] = text
            except Exception as e:
-                logger.exception("error extracting claim")
+                logging.exception("error extracting claim")
                self._on_error(
                    e,
                    traceback.format_exc(),
@ -265,4 +265,4 @@ if __name__ == "__main__":
        "claim_description": ""
    }
    claim = ex(info)
-    logger.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
+    logging.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
--- a/graphrag/community_reports_extractor.py
+++ b/graphrag/community_reports_extractor.py
@ -5,6 +5,7 @@ Reference:
 - [graphrag](https://github.com/microsoft/graphrag)
 """

+import logging
 import json
 import re
 import traceback
@ -19,7 +20,6 @@ from rag.llm.chat_model import Base as CompletionLLM
 from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
 from rag.utils import num_tokens_from_string
 from timeit import default_timer as timer
-from api.utils.log_utils import logger


@dataclass
@ -80,7 +80,7 @@ class CommunityReportsExtractor:
                    response = re.sub(r"[^\}]*$", "", response)
                    response = re.sub(r"\{\{", "{", response)
                    response = re.sub(r"\}\}", "}", response)
-                    logger.info(response)
+                    logging.debug(response)
                    response = json.loads(response)
                    if not dict_has_keys_with_types(response, [
                                ("title", str),
@ -92,7 +92,7 @@ class CommunityReportsExtractor:
                    response["weight"] = weight
                    response["entities"] = ents
                except Exception as e:
-                    logger.exception("CommunityReportsExtractor got exception")
+                    logging.exception("CommunityReportsExtractor got exception")
                    self._on_error(e, traceback.format_exc(), None)
                    continue

--- a/graphrag/description_summary.py
+++ b/graphrag/description_summary.py
@ -5,19 +5,11 @@ Reference:
 - [graphrag](https://github.com/microsoft/graphrag)
 """

-import argparse
-import html
 import json
-import logging
-import numbers
-import re
-import traceback
-from collections.abc import Callable
 from dataclasses import dataclass

 from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
 from rag.llm.chat_model import Base as CompletionLLM
-import networkx as nx

 from rag.utils import num_tokens_from_string

--- a/graphrag/entity_resolution.py
+++ b/graphrag/entity_resolution.py
@ -13,8 +13,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import itertools
 import logging
+import itertools
 import re
 import traceback
 from dataclasses import dataclass
--- a/graphrag/index.py
+++ b/graphrag/index.py
@ -13,6 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import logging
 import os
 from concurrent.futures import ThreadPoolExecutor
 import json
@ -28,7 +29,6 @@ from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
 from graphrag.mind_map_extractor import MindMapExtractor
 from rag.nlp import rag_tokenizer
 from rag.utils import num_tokens_from_string
-from api.utils.log_utils import logger


 def graph_merge(g1, g2):
@ -95,7 +95,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
    chunks = []
    for n, attr in graph.nodes(data=True):
        if attr.get("rank", 0) == 0:
-            logger.info(f"Ignore entity: {n}")
+            logging.debug(f"Ignore entity: {n}")
            continue
        chunk = {
            "name_kwd": n,
@ -137,7 +137,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
    mg = mindmap(_chunks).output
    if not len(mg.keys()): return chunks

-    logger.info(json.dumps(mg, ensure_ascii=False, indent=2))
+    logging.debug(json.dumps(mg, ensure_ascii=False, indent=2))
    chunks.append(
        {
            "content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
--- a/graphrag/leiden.py
+++ b/graphrag/leiden.py
@ -14,8 +14,6 @@ from graspologic.utils import largest_connected_component
 import networkx as nx
 from networkx import is_empty

-log = logging.getLogger(__name__)
-

 def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
    """Ensure an undirected graph with the same relationships will always be read the same way."""
@ -99,7 +97,7 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]:
    max_cluster_size = args.get("max_cluster_size", 12)
    use_lcc = args.get("use_lcc", True)
    if args.get("verbose", False):
-        log.info(
+        logging.debug(
            "Running leiden with max_cluster_size=%s, lcc=%s", max_cluster_size, use_lcc
        )
    if not graph.nodes(): return {}
--- a/graphrag/mind_map_extractor.py
+++ b/graphrag/mind_map_extractor.py
@ -14,8 +14,8 @@
 #  limitations under the License.
 #

-import collections
 import logging
+import collections
 import os
 import re
 import traceback
@ -29,7 +29,6 @@ from rag.llm.chat_model import Base as CompletionLLM
 import markdown_to_json
 from functools import reduce
 from rag.utils import num_tokens_from_string
-from api.utils.log_utils import logger


@dataclass
@ -193,6 +192,6 @@ class MindMapExtractor:
        gen_conf = {"temperature": 0.5}
        response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
        response = re.sub(r"```[^\n]*", "", response)
-        logger.info(response)
-        logger.info(self._todict(markdown_to_json.dictify(response)))
+        logging.debug(response)
+        logging.debug(self._todict(markdown_to_json.dictify(response)))
        return self._todict(markdown_to_json.dictify(response))