mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-18 19:46:44 +08:00
Add license and Fix IDE warnings (#11985)
### What problem does this PR solve? - Add license - Fix IDE warnings ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@ -204,10 +204,10 @@ class Message(ComponentBase):
|
|||||||
|
|
||||||
def _parse_markdown_table_lines(self, table_lines: list):
|
def _parse_markdown_table_lines(self, table_lines: list):
|
||||||
"""
|
"""
|
||||||
Parse a list of markdown table lines into a pandas DataFrame.
|
Parse a list of Markdown table lines into a pandas DataFrame.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
table_lines: List of strings, each representing a row in the markdown table
|
table_lines: List of strings, each representing a row in the Markdown table
|
||||||
(excluding separator lines like |---|---|)
|
(excluding separator lines like |---|---|)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -278,7 +278,7 @@ class Message(ComponentBase):
|
|||||||
# Debug: log the content being parsed
|
# Debug: log the content being parsed
|
||||||
logging.info(f"XLSX Parser: Content length={len(content) if content else 0}, first 500 chars: {content[:500] if content else 'None'}")
|
logging.info(f"XLSX Parser: Content length={len(content) if content else 0}, first 500 chars: {content[:500] if content else 'None'}")
|
||||||
|
|
||||||
# Try to parse ALL markdown tables from the content
|
# Try to parse ALL Markdown tables from the content
|
||||||
# Each table will be written to a separate sheet
|
# Each table will be written to a separate sheet
|
||||||
tables = [] # List of (sheet_name, dataframe)
|
tables = [] # List of (sheet_name, dataframe)
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,26 @@
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
Thanks to https://github.com/onyx-dot-app/onyx
|
Thanks to https://github.com/onyx-dot-app/onyx
|
||||||
|
|
||||||
|
Content of this directory is under the "MIT Expat" license as defined below.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .blob_connector import BlobStorageConnector
|
from .blob_connector import BlobStorageConnector
|
||||||
|
|||||||
@ -717,7 +717,7 @@ class OnyxConfluence:
|
|||||||
"""
|
"""
|
||||||
The search/user endpoint can be used to fetch users.
|
The search/user endpoint can be used to fetch users.
|
||||||
It's a separate endpoint from the content/search endpoint used only for users.
|
It's a separate endpoint from the content/search endpoint used only for users.
|
||||||
Otherwise it's very similar to the content/search endpoint.
|
It's very similar to the content/search endpoint.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# this is needed since there is a live bug with Confluence Server/Data Center
|
# this is needed since there is a live bug with Confluence Server/Data Center
|
||||||
|
|||||||
@ -233,8 +233,8 @@ class DiscordConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
server_ids: list[str] = [],
|
server_ids: list[str] | None = None,
|
||||||
channel_names: list[str] = [],
|
channel_names: list[str] | None = None,
|
||||||
# YYYY-MM-DD
|
# YYYY-MM-DD
|
||||||
start_date: str | None = None,
|
start_date: str | None = None,
|
||||||
batch_size: int = INDEX_BATCH_SIZE,
|
batch_size: int = INDEX_BATCH_SIZE,
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from google.oauth2.credentials import Credentials as OAuthCredentials
|
from google.oauth2.credentials import Credentials as OAuthCredentials
|
||||||
from google.oauth2.service_account import Credentials as ServiceAccountCredentials
|
from google.oauth2.service_account import Credentials as ServiceAccountCredentials
|
||||||
|
|||||||
@ -1210,7 +1210,7 @@ if __name__ == "__main__":
|
|||||||
creds = get_credentials_from_env(email, oauth=True)
|
creds = get_credentials_from_env(email, oauth=True)
|
||||||
print("Credentials loaded successfully")
|
print("Credentials loaded successfully")
|
||||||
print(f"{creds=}")
|
print(f"{creds=}")
|
||||||
sys.exit(0)
|
# sys.exit(0)
|
||||||
connector = GoogleDriveConnector(
|
connector = GoogleDriveConnector(
|
||||||
include_shared_drives=False,
|
include_shared_drives=False,
|
||||||
shared_drive_urls=None,
|
shared_drive_urls=None,
|
||||||
|
|||||||
@ -341,6 +341,6 @@ def get_all_files_for_oauth(
|
|||||||
|
|
||||||
# Just in case we need to get the root folder id
|
# Just in case we need to get the root folder id
|
||||||
def get_root_folder_id(service: Resource) -> str:
|
def get_root_folder_id(service: Resource) -> str:
|
||||||
# we dont paginate here because there is only one root folder per user
|
# we don't paginate here because there is only one root folder per user
|
||||||
# https://developers.google.com/drive/api/guides/v2-to-v3-reference
|
# https://developers.google.com/drive/api/guides/v2-to-v3-reference
|
||||||
return service.files().get(fileId="root", fields=GoogleFields.ID.value).execute()[GoogleFields.ID.value]
|
return service.files().get(fileId="root", fields=GoogleFields.ID.value).execute()[GoogleFields.ID.value]
|
||||||
|
|||||||
@ -147,15 +147,11 @@ def forEdu(cv):
|
|||||||
edu_nst.append(e)
|
edu_nst.append(e)
|
||||||
|
|
||||||
cv["sch_rank_kwd"] = []
|
cv["sch_rank_kwd"] = []
|
||||||
if cv["school_rank_int"] <= 20 \
|
if cv["school_rank_int"] <= 20 or ("海外名校" in fea and cv["school_rank_int"] <= 200):
|
||||||
or ("海外名校" in fea and cv["school_rank_int"] <= 200):
|
|
||||||
cv["sch_rank_kwd"].append("顶尖学校")
|
cv["sch_rank_kwd"].append("顶尖学校")
|
||||||
elif cv["school_rank_int"] <= 50 and cv["school_rank_int"] > 20 \
|
elif 50 >= cv["school_rank_int"] > 20 or ("海外名校" in fea and 500 >= cv["school_rank_int"] > 200):
|
||||||
or ("海外名校" in fea and cv["school_rank_int"] <= 500 and \
|
|
||||||
cv["school_rank_int"] > 200):
|
|
||||||
cv["sch_rank_kwd"].append("精英学校")
|
cv["sch_rank_kwd"].append("精英学校")
|
||||||
elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) \
|
elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) or ("海外名校" in fea and cv["school_rank_int"] > 500):
|
||||||
or ("海外名校" in fea and cv["school_rank_int"] > 500):
|
|
||||||
cv["sch_rank_kwd"].append("优质学校")
|
cv["sch_rank_kwd"].append("优质学校")
|
||||||
else:
|
else:
|
||||||
cv["sch_rank_kwd"].append("一般学校")
|
cv["sch_rank_kwd"].append("一般学校")
|
||||||
@ -208,8 +204,7 @@ def forEdu(cv):
|
|||||||
cv["tag_kwd"].append("好学校")
|
cv["tag_kwd"].append("好学校")
|
||||||
cv["tag_kwd"].append("好学历")
|
cv["tag_kwd"].append("好学历")
|
||||||
break
|
break
|
||||||
if (len(cv.get("degree_kwd", [])) >= 1 and \
|
if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"] and
|
||||||
"本科" in cv["degree_kwd"] and \
|
|
||||||
any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
|
any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
|
||||||
or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
|
or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
|
||||||
or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
|
or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
|
||||||
@ -406,7 +401,7 @@ def forWork(cv):
|
|||||||
|
|
||||||
def turnTm2Dt(b):
|
def turnTm2Dt(b):
|
||||||
if not b:
|
if not b:
|
||||||
return
|
return None
|
||||||
b = str(b).strip()
|
b = str(b).strip()
|
||||||
if re.match(r"[0-9]{10,}", b):
|
if re.match(r"[0-9]{10,}", b):
|
||||||
b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
|
b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
|
||||||
@ -416,7 +411,7 @@ def turnTm2Dt(b):
|
|||||||
def getYMD(b):
|
def getYMD(b):
|
||||||
y, m, d = "", "", "01"
|
y, m, d = "", "", "01"
|
||||||
if not b:
|
if not b:
|
||||||
return (y, m, d)
|
return y, m, d
|
||||||
b = turnTm2Dt(b)
|
b = turnTm2Dt(b)
|
||||||
if re.match(r"[0-9]{4}", b):
|
if re.match(r"[0-9]{4}", b):
|
||||||
y = int(b[:4])
|
y = int(b[:4])
|
||||||
@ -430,7 +425,7 @@ def getYMD(b):
|
|||||||
d = "1"
|
d = "1"
|
||||||
if not m or int(m) > 12 or int(m) < 1:
|
if not m or int(m) > 12 or int(m) < 1:
|
||||||
m = "1"
|
m = "1"
|
||||||
return (y, m, d)
|
return y, m, d
|
||||||
|
|
||||||
|
|
||||||
def birth(cv):
|
def birth(cv):
|
||||||
@ -480,22 +475,22 @@ def parse(cv):
|
|||||||
for k in rmkeys:
|
for k in rmkeys:
|
||||||
del cv[k]
|
del cv[k]
|
||||||
|
|
||||||
integerity = 0.
|
integrity = 0.
|
||||||
flds_num = 0.
|
flds_num = 0.
|
||||||
|
|
||||||
def hasValues(flds):
|
def hasValues(flds):
|
||||||
nonlocal integerity, flds_num
|
nonlocal integrity, flds_num
|
||||||
flds_num += len(flds)
|
flds_num += len(flds)
|
||||||
for f in flds:
|
for f in flds:
|
||||||
v = str(cv.get(f, ""))
|
v = str(cv.get(f, ""))
|
||||||
if len(v) > 0 and v != '0' and v != '[]':
|
if len(v) > 0 and v != '0' and v != '[]':
|
||||||
integerity += 1
|
integrity += 1
|
||||||
|
|
||||||
hasValues(tks_fld)
|
hasValues(tks_fld)
|
||||||
hasValues(small_tks_fld)
|
hasValues(small_tks_fld)
|
||||||
hasValues(kwd_fld)
|
hasValues(kwd_fld)
|
||||||
hasValues(num_fld)
|
hasValues(num_fld)
|
||||||
cv["integerity_flt"] = integerity / flds_num
|
cv["integerity_flt"] = integrity / flds_num
|
||||||
|
|
||||||
if cv.get("corporation_type"):
|
if cv.get("corporation_type"):
|
||||||
for p, r in [(r"(公司|企业|其它|其他|Others*|\n|未填写|Enterprises|Company|companies)", ""),
|
for p, r in [(r"(公司|企业|其它|其他|Others*|\n|未填写|Enterprises|Company|companies)", ""),
|
||||||
|
|||||||
@ -40,7 +40,7 @@ In the **Message** component, reference the `download` output variable from the
|
|||||||
|
|
||||||
### Content
|
### Content
|
||||||
|
|
||||||
The main text content to include in the document. Supports markdown formatting:
|
The main text content to include in the document. Supports Markdown formatting:
|
||||||
|
|
||||||
- **Bold**: `**text**` or `__text__`
|
- **Bold**: `**text**` or `__text__`
|
||||||
- **Italic**: `*text*` or `_text_`
|
- **Italic**: `*text*` or `_text_`
|
||||||
|
|||||||
@ -41,7 +41,7 @@ Scale depth to match complexity. Always stop once success criteria are met.
|
|||||||
|
|
||||||
**For HIGH (150–250 words for analysis only):**
|
**For HIGH (150–250 words for analysis only):**
|
||||||
- Comprehensive objective analysis; Intent & Scope
|
- Comprehensive objective analysis; Intent & Scope
|
||||||
- 5–8 step Plan with dependencies/parallelism
|
- 5–8 steps Plan with dependencies/parallelism
|
||||||
- **Uncertainty & Probes** (key unknowns → probe → stop condition)
|
- **Uncertainty & Probes** (key unknowns → probe → stop condition)
|
||||||
- Measurable Success Criteria; Failure detectors & fallbacks
|
- Measurable Success Criteria; Failure detectors & fallbacks
|
||||||
- **Source Plan** (evidence acquisition & validation)
|
- **Source Plan** (evidence acquisition & validation)
|
||||||
|
|||||||
Reference in New Issue
Block a user