Rework logging (#3358)

Unified all log files into one. ### What problem does this PR solve? Unified all log files into one. ### Type of change - [x] Refactoring
2026-01-31 15:45:08 +08:00 · 2024-11-12 17:35:13 +08:00
parent 567a7563e7
commit a2a5631da4
75 changed files with 481 additions and 853 deletions
--- a/graphrag/claim_extractor.py
+++ b/graphrag/claim_extractor.py
@ -7,7 +7,6 @@ Reference:

 import argparse
 import json
-import logging
 import re
 import traceback
 from dataclasses import dataclass
@ -18,12 +17,12 @@ import tiktoken
 from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
 from rag.llm.chat_model import Base as CompletionLLM
 from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
+from api.utils.log_utils import logger

 DEFAULT_TUPLE_DELIMITER = "<|>"
 DEFAULT_RECORD_DELIMITER = "##"
 DEFAULT_COMPLETION_DELIMITER = "<|COMPLETE|>"
 CLAIM_MAX_GLEANINGS = 1
-log = logging.getLogger(__name__)


@dataclass
@ -127,7 +126,7 @@ class ClaimExtractor:
                ]
                source_doc_map[document_id] = text
            except Exception as e:
-                log.exception("error extracting claim")
+                logger.exception("error extracting claim")
                self._on_error(
                    e,
                    traceback.format_exc(),
@ -266,4 +265,4 @@ if __name__ == "__main__":
        "claim_description": ""
    }
    claim = ex(info)
-    print(json.dumps(claim.output, ensure_ascii=False, indent=2))
+    logger.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
--- a/graphrag/community_reports_extractor.py
+++ b/graphrag/community_reports_extractor.py
@ -6,11 +6,10 @@ Reference:
 """

 import json
-import logging
 import re
 import traceback
 from dataclasses import dataclass
-from typing import Any, List, Callable
+from typing import List, Callable
 import networkx as nx
 import pandas as pd
 from graphrag import leiden
@ -20,8 +19,7 @@ from rag.llm.chat_model import Base as CompletionLLM
 from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
 from rag.utils import num_tokens_from_string
 from timeit import default_timer as timer
-
-log = logging.getLogger(__name__)
+from api.utils.log_utils import logger


@dataclass
@ -82,7 +80,7 @@ class CommunityReportsExtractor:
                    response = re.sub(r"[^\}]*$", "", response)
                    response = re.sub(r"\{\{", "{", response)
                    response = re.sub(r"\}\}", "}", response)
-                    print(response)
+                    logger.info(response)
                    response = json.loads(response)
                    if not dict_has_keys_with_types(response, [
                                ("title", str),
@ -94,7 +92,7 @@ class CommunityReportsExtractor:
                    response["weight"] = weight
                    response["entities"] = ents
                except Exception as e:
-                    print("ERROR: ", traceback.format_exc())
+                    logger.exception("CommunityReportsExtractor got exception")
                    self._on_error(e, traceback.format_exc(), None)
                    continue

@ -127,5 +125,4 @@ class CommunityReportsExtractor:
        report_sections = "\n\n".join(
            f"## {finding_summary(f)}\n\n{finding_explanation(f)}" for f in findings
        )
-     
        return f"# {title}\n\n{summary}\n\n{report_sections}"
--- a/graphrag/index.py
+++ b/graphrag/index.py
@ -28,6 +28,7 @@ from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
 from graphrag.mind_map_extractor import MindMapExtractor
 from rag.nlp import rag_tokenizer
 from rag.utils import num_tokens_from_string
+from api.utils.log_utils import logger


 def graph_merge(g1, g2):
@ -94,7 +95,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
    chunks = []
    for n, attr in graph.nodes(data=True):
        if attr.get("rank", 0) == 0:
-            print(f"Ignore entity: {n}")
+            logger.info(f"Ignore entity: {n}")
            continue
        chunk = {
            "name_kwd": n,
@ -136,7 +137,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
    mg = mindmap(_chunks).output
    if not len(mg.keys()): return chunks

-    print(json.dumps(mg, ensure_ascii=False, indent=2))
+    logger.info(json.dumps(mg, ensure_ascii=False, indent=2))
    chunks.append(
        {
            "content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
--- a/graphrag/mind_map_extractor.py
+++ b/graphrag/mind_map_extractor.py
@ -18,7 +18,6 @@ import collections
 import logging
 import os
 import re
-import logging
 import traceback
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
@ -30,6 +29,7 @@ from rag.llm.chat_model import Base as CompletionLLM
 import markdown_to_json
 from functools import reduce
 from rag.utils import num_tokens_from_string
+from api.utils.log_utils import logger


@dataclass
@ -193,6 +193,6 @@ class MindMapExtractor:
        gen_conf = {"temperature": 0.5}
        response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
        response = re.sub(r"```[^\n]*", "", response)
-        print(response)
-        print("---------------------------------------------------\n", self._todict(markdown_to_json.dictify(response)))
+        logger.info(response)
+        logger.info(self._todict(markdown_to_json.dictify(response)))
        return self._todict(markdown_to_json.dictify(response))