diff --git a/agent/component/message.py b/agent/component/message.py index 164716575..53d948e4b 100644 --- a/agent/component/message.py +++ b/agent/component/message.py @@ -204,10 +204,10 @@ class Message(ComponentBase): def _parse_markdown_table_lines(self, table_lines: list): """ - Parse a list of markdown table lines into a pandas DataFrame. + Parse a list of Markdown table lines into a pandas DataFrame. Args: - table_lines: List of strings, each representing a row in the markdown table + table_lines: List of strings, each representing a row in the Markdown table (excluding separator lines like |---|---|) Returns: @@ -278,7 +278,7 @@ class Message(ComponentBase): # Debug: log the content being parsed logging.info(f"XLSX Parser: Content length={len(content) if content else 0}, first 500 chars: {content[:500] if content else 'None'}") - # Try to parse ALL markdown tables from the content + # Try to parse ALL Markdown tables from the content # Each table will be written to a separate sheet tables = [] # List of (sheet_name, dataframe) diff --git a/common/data_source/__init__.py b/common/data_source/__init__.py index 66d393884..bcbc368cc 100644 --- a/common/data_source/__init__.py +++ b/common/data_source/__init__.py @@ -1,6 +1,26 @@ """ Thanks to https://github.com/onyx-dot-app/onyx + +Content of this directory is under the "MIT Expat" license as defined below. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. """ from .blob_connector import BlobStorageConnector diff --git a/common/data_source/confluence_connector.py b/common/data_source/confluence_connector.py index aff225703..51849b69a 100644 --- a/common/data_source/confluence_connector.py +++ b/common/data_source/confluence_connector.py @@ -717,7 +717,7 @@ class OnyxConfluence: """ The search/user endpoint can be used to fetch users. It's a separate endpoint from the content/search endpoint used only for users. - Otherwise it's very similar to the content/search endpoint. + It's very similar to the content/search endpoint. """ # this is needed since there is a live bug with Confluence Server/Data Center diff --git a/common/data_source/discord_connector.py b/common/data_source/discord_connector.py index 4c19a6d5e..e65a63241 100644 --- a/common/data_source/discord_connector.py +++ b/common/data_source/discord_connector.py @@ -233,8 +233,8 @@ class DiscordConnector(LoadConnector, PollConnector): def __init__( self, - server_ids: list[str] = [], - channel_names: list[str] = [], + server_ids: list[str] | None = None, + channel_names: list[str] | None = None, # YYYY-MM-DD start_date: str | None = None, batch_size: int = INDEX_BATCH_SIZE, diff --git a/common/data_source/gmail_connector.py b/common/data_source/gmail_connector.py index 1757f4ffe..e64db9847 100644 --- a/common/data_source/gmail_connector.py +++ b/common/data_source/gmail_connector.py @@ -1,5 +1,4 @@ import logging -import os from typing import Any from google.oauth2.credentials import Credentials as OAuthCredentials from google.oauth2.service_account import Credentials as ServiceAccountCredentials diff --git a/common/data_source/google_drive/connector.py b/common/data_source/google_drive/connector.py index 48628f490..39017dd4a 100644 --- a/common/data_source/google_drive/connector.py +++ b/common/data_source/google_drive/connector.py @@ -1210,7 +1210,7 @@ if __name__ == "__main__": creds = get_credentials_from_env(email, oauth=True) print("Credentials loaded successfully") print(f"{creds=}") - sys.exit(0) + # sys.exit(0) connector = GoogleDriveConnector( include_shared_drives=False, shared_drive_urls=None, diff --git a/common/data_source/google_drive/file_retrieval.py b/common/data_source/google_drive/file_retrieval.py index d2a8e0611..00bade157 100644 --- a/common/data_source/google_drive/file_retrieval.py +++ b/common/data_source/google_drive/file_retrieval.py @@ -341,6 +341,6 @@ def get_all_files_for_oauth( # Just in case we need to get the root folder id def get_root_folder_id(service: Resource) -> str: - # we dont paginate here because there is only one root folder per user + # we don't paginate here because there is only one root folder per user # https://developers.google.com/drive/api/guides/v2-to-v3-reference return service.files().get(fileId="root", fields=GoogleFields.ID.value).execute()[GoogleFields.ID.value] diff --git a/deepdoc/parser/resume/step_two.py b/deepdoc/parser/resume/step_two.py index 0aa3ad383..f23b6ad20 100644 --- a/deepdoc/parser/resume/step_two.py +++ b/deepdoc/parser/resume/step_two.py @@ -147,15 +147,11 @@ def forEdu(cv): edu_nst.append(e) cv["sch_rank_kwd"] = [] - if cv["school_rank_int"] <= 20 \ - or ("海外名校" in fea and cv["school_rank_int"] <= 200): + if cv["school_rank_int"] <= 20 or ("海外名校" in fea and cv["school_rank_int"] <= 200): cv["sch_rank_kwd"].append("顶尖学校") - elif cv["school_rank_int"] <= 50 and cv["school_rank_int"] > 20 \ - or ("海外名校" in fea and cv["school_rank_int"] <= 500 and \ - cv["school_rank_int"] > 200): + elif 50 >= cv["school_rank_int"] > 20 or ("海外名校" in fea and 500 >= cv["school_rank_int"] > 200): cv["sch_rank_kwd"].append("精英学校") - elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) \ - or ("海外名校" in fea and cv["school_rank_int"] > 500): + elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) or ("海外名校" in fea and cv["school_rank_int"] > 500): cv["sch_rank_kwd"].append("优质学校") else: cv["sch_rank_kwd"].append("一般学校") @@ -208,8 +204,7 @@ def forEdu(cv): cv["tag_kwd"].append("好学校") cv["tag_kwd"].append("好学历") break - if (len(cv.get("degree_kwd", [])) >= 1 and \ - "本科" in cv["degree_kwd"] and \ + if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"] and any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \ or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \ or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]): @@ -406,7 +401,7 @@ def forWork(cv): def turnTm2Dt(b): if not b: - return + return None b = str(b).strip() if re.match(r"[0-9]{10,}", b): b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10]))) @@ -416,7 +411,7 @@ def turnTm2Dt(b): def getYMD(b): y, m, d = "", "", "01" if not b: - return (y, m, d) + return y, m, d b = turnTm2Dt(b) if re.match(r"[0-9]{4}", b): y = int(b[:4]) @@ -430,7 +425,7 @@ def getYMD(b): d = "1" if not m or int(m) > 12 or int(m) < 1: m = "1" - return (y, m, d) + return y, m, d def birth(cv): @@ -480,22 +475,22 @@ def parse(cv): for k in rmkeys: del cv[k] - integerity = 0. + integrity = 0. flds_num = 0. def hasValues(flds): - nonlocal integerity, flds_num + nonlocal integrity, flds_num flds_num += len(flds) for f in flds: v = str(cv.get(f, "")) if len(v) > 0 and v != '0' and v != '[]': - integerity += 1 + integrity += 1 hasValues(tks_fld) hasValues(small_tks_fld) hasValues(kwd_fld) hasValues(num_fld) - cv["integerity_flt"] = integerity / flds_num + cv["integerity_flt"] = integrity / flds_num if cv.get("corporation_type"): for p, r in [(r"(公司|企业|其它|其他|Others*|\n|未填写|Enterprises|Company|companies)", ""), diff --git a/docs/guides/agent/agent_component_reference/docs_generator.md b/docs/guides/agent/agent_component_reference/docs_generator.md index 2fc0700f7..be4efa463 100644 --- a/docs/guides/agent/agent_component_reference/docs_generator.md +++ b/docs/guides/agent/agent_component_reference/docs_generator.md @@ -40,7 +40,7 @@ In the **Message** component, reference the `download` output variable from the ### Content -The main text content to include in the document. Supports markdown formatting: +The main text content to include in the document. Supports Markdown formatting: - **Bold**: `**text**` or `__text__` - **Italic**: `*text*` or `_text_` diff --git a/rag/prompts/analyze_task_system.md b/rag/prompts/analyze_task_system.md index 148e4113f..8a610aa3f 100644 --- a/rag/prompts/analyze_task_system.md +++ b/rag/prompts/analyze_task_system.md @@ -41,7 +41,7 @@ Scale depth to match complexity. Always stop once success criteria are met. **For HIGH (150–250 words for analysis only):** - Comprehensive objective analysis; Intent & Scope -- 5–8 step Plan with dependencies/parallelism +- 5–8 steps Plan with dependencies/parallelism - **Uncertainty & Probes** (key unknowns → probe → stop condition) - Measurable Success Criteria; Failure detectors & fallbacks - **Source Plan** (evidence acquisition & validation)