Add license and Fix IDE warnings (#11985)

### What problem does this PR solve? - Add license - Fix IDE warnings ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2026-02-09 12:25:06 +08:00 · 2025-12-17 17:04:44 +08:00
parent 8e4d011b15
commit d38f8a1562
10 changed files with 41 additions and 27 deletions
--- a/agent/component/message.py
+++ b/agent/component/message.py
@ -204,10 +204,10 @@ class Message(ComponentBase):

    def _parse_markdown_table_lines(self, table_lines: list):
        """
-        Parse a list of markdown table lines into a pandas DataFrame.
+        Parse a list of Markdown table lines into a pandas DataFrame.
        
        Args:
-            table_lines: List of strings, each representing a row in the markdown table
+            table_lines: List of strings, each representing a row in the Markdown table
                        (excluding separator lines like |---|---|)
        
        Returns:
@ -278,7 +278,7 @@ class Message(ComponentBase):
                # Debug: log the content being parsed
                logging.info(f"XLSX Parser: Content length={len(content) if content else 0}, first 500 chars: {content[:500] if content else 'None'}")
                
-                # Try to parse ALL markdown tables from the content
+                # Try to parse ALL Markdown tables from the content
                # Each table will be written to a separate sheet
                tables = []  # List of (sheet_name, dataframe)
                
--- a/common/data_source/init.py
+++ b/common/data_source/init.py
@ -1,6 +1,26 @@

 """
 Thanks to https://github.com/onyx-dot-app/onyx
+
+Content of this directory is under the "MIT Expat" license as defined below.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
 """

 from .blob_connector import BlobStorageConnector
--- a/common/data_source/confluence_connector.py
+++ b/common/data_source/confluence_connector.py
@ -717,7 +717,7 @@ class OnyxConfluence:
        """
        The search/user endpoint can be used to fetch users.
        It's a separate endpoint from the content/search endpoint used only for users.
-        Otherwise it's very similar to the content/search endpoint.
+        It's very similar to the content/search endpoint.
        """

        # this is needed since there is a live bug with Confluence Server/Data Center
--- a/common/data_source/discord_connector.py
+++ b/common/data_source/discord_connector.py
@ -233,8 +233,8 @@ class DiscordConnector(LoadConnector, PollConnector):

    def __init__(
        self,
-        server_ids: list[str] = [],
-        channel_names: list[str] = [],
+        server_ids: list[str] | None = None,
+        channel_names: list[str] | None = None,
        # YYYY-MM-DD
        start_date: str | None = None,
        batch_size: int = INDEX_BATCH_SIZE,
--- a/common/data_source/gmail_connector.py
+++ b/common/data_source/gmail_connector.py
@ -1,5 +1,4 @@
 import logging
-import os
 from typing import Any
 from google.oauth2.credentials import Credentials as OAuthCredentials
 from google.oauth2.service_account import Credentials as ServiceAccountCredentials
--- a/common/data_source/google_drive/connector.py
+++ b/common/data_source/google_drive/connector.py
@ -1210,7 +1210,7 @@ if __name__ == "__main__":
        creds = get_credentials_from_env(email, oauth=True)
        print("Credentials loaded successfully")
        print(f"{creds=}")
-        sys.exit(0)
+        # sys.exit(0)
        connector = GoogleDriveConnector(
            include_shared_drives=False,
            shared_drive_urls=None,
--- a/common/data_source/google_drive/file_retrieval.py
+++ b/common/data_source/google_drive/file_retrieval.py
@ -341,6 +341,6 @@ def get_all_files_for_oauth(

 # Just in case we need to get the root folder id
 def get_root_folder_id(service: Resource) -> str:
-    # we dont paginate here because there is only one root folder per user
+    # we don't paginate here because there is only one root folder per user
    # https://developers.google.com/drive/api/guides/v2-to-v3-reference
    return service.files().get(fileId="root", fields=GoogleFields.ID.value).execute()[GoogleFields.ID.value]
--- a/deepdoc/parser/resume/step_two.py
+++ b/deepdoc/parser/resume/step_two.py
@ -147,15 +147,11 @@ def forEdu(cv):
        edu_nst.append(e)

    cv["sch_rank_kwd"] = []
-    if cv["school_rank_int"] <= 20 \
-            or ("海外名校" in fea and cv["school_rank_int"] <= 200):
+    if cv["school_rank_int"] <= 20 or ("海外名校" in fea and cv["school_rank_int"] <= 200):
        cv["sch_rank_kwd"].append("顶尖学校")
-    elif cv["school_rank_int"] <= 50 and cv["school_rank_int"] > 20 \
-            or ("海外名校" in fea and cv["school_rank_int"] <= 500 and \
-                cv["school_rank_int"] > 200):
+    elif 50 >= cv["school_rank_int"] > 20 or ("海外名校" in fea and 500 >= cv["school_rank_int"] > 200):
        cv["sch_rank_kwd"].append("精英学校")
-    elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) \
-            or ("海外名校" in fea and cv["school_rank_int"] > 500):
+    elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) or ("海外名校" in fea and cv["school_rank_int"] > 500):
        cv["sch_rank_kwd"].append("优质学校")
    else:
        cv["sch_rank_kwd"].append("一般学校")
@ -208,8 +204,7 @@ def forEdu(cv):
                    cv["tag_kwd"].append("好学校")
                    cv["tag_kwd"].append("好学历")
                    break
-        if (len(cv.get("degree_kwd", [])) >= 1 and \
-            "本科" in cv["degree_kwd"] and \
+        if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"] and
            any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
                or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
                or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
@ -406,7 +401,7 @@ def forWork(cv):

 def turnTm2Dt(b):
    if not b:
-        return
+        return None
    b = str(b).strip()
    if re.match(r"[0-9]{10,}", b):
        b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
@ -416,7 +411,7 @@ def turnTm2Dt(b):
 def getYMD(b):
    y, m, d = "", "", "01"
    if not b:
-        return (y, m, d)
+        return y, m, d
    b = turnTm2Dt(b)
    if re.match(r"[0-9]{4}", b):
        y = int(b[:4])
@ -430,7 +425,7 @@ def getYMD(b):
        d = "1"
    if not m or int(m) > 12 or int(m) < 1:
        m = "1"
-    return (y, m, d)
+    return y, m, d


 def birth(cv):
@ -480,22 +475,22 @@ def parse(cv):
    for k in rmkeys:
        del cv[k]

-    integerity = 0.
+    integrity = 0.
    flds_num = 0.

    def hasValues(flds):
-        nonlocal integerity, flds_num
+        nonlocal integrity, flds_num
        flds_num += len(flds)
        for f in flds:
            v = str(cv.get(f, ""))
            if len(v) > 0 and v != '0' and v != '[]':
-                integerity += 1
+                integrity += 1

    hasValues(tks_fld)
    hasValues(small_tks_fld)
    hasValues(kwd_fld)
    hasValues(num_fld)
-    cv["integerity_flt"] = integerity / flds_num
+    cv["integerity_flt"] = integrity / flds_num

    if cv.get("corporation_type"):
        for p, r in [(r"(公司|企业|其它|其他|Others*|\n|未填写|Enterprises|Company|companies)", ""),
--- a/docs/guides/agent/agent_component_reference/docs_generator.md
+++ b/docs/guides/agent/agent_component_reference/docs_generator.md
@ -40,7 +40,7 @@ In the **Message** component, reference the `download` output variable from the

 ### Content

-The main text content to include in the document. Supports markdown formatting:
+The main text content to include in the document. Supports Markdown formatting:

 - **Bold**: `**text**` or `__text__`
 - **Italic**: `*text*` or `_text_`
--- a/rag/prompts/analyze_task_system.md
+++ b/rag/prompts/analyze_task_system.md
@ -41,7 +41,7 @@ Scale depth to match complexity. Always stop once success criteria are met.

 **For HIGH (150–250 words for analysis only):**
 - Comprehensive objective analysis; Intent & Scope
- 5–8 step Plan with dependencies/parallelism
+- 5–8 steps Plan with dependencies/parallelism
 - **Uncertainty & Probes** (key unknowns → probe → stop condition)
 - Measurable Success Criteria; Failure detectors & fallbacks
 - **Source Plan** (evidence acquisition & validation)