mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-18 03:26:42 +08:00
Add license and Fix IDE warnings (#11985)
### What problem does this PR solve? - Add license - Fix IDE warnings ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@ -204,10 +204,10 @@ class Message(ComponentBase):
|
||||
|
||||
def _parse_markdown_table_lines(self, table_lines: list):
|
||||
"""
|
||||
Parse a list of markdown table lines into a pandas DataFrame.
|
||||
Parse a list of Markdown table lines into a pandas DataFrame.
|
||||
|
||||
Args:
|
||||
table_lines: List of strings, each representing a row in the markdown table
|
||||
table_lines: List of strings, each representing a row in the Markdown table
|
||||
(excluding separator lines like |---|---|)
|
||||
|
||||
Returns:
|
||||
@ -278,7 +278,7 @@ class Message(ComponentBase):
|
||||
# Debug: log the content being parsed
|
||||
logging.info(f"XLSX Parser: Content length={len(content) if content else 0}, first 500 chars: {content[:500] if content else 'None'}")
|
||||
|
||||
# Try to parse ALL markdown tables from the content
|
||||
# Try to parse ALL Markdown tables from the content
|
||||
# Each table will be written to a separate sheet
|
||||
tables = [] # List of (sheet_name, dataframe)
|
||||
|
||||
|
||||
@ -1,6 +1,26 @@
|
||||
|
||||
"""
|
||||
Thanks to https://github.com/onyx-dot-app/onyx
|
||||
|
||||
Content of this directory is under the "MIT Expat" license as defined below.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
"""
|
||||
|
||||
from .blob_connector import BlobStorageConnector
|
||||
|
||||
@ -717,7 +717,7 @@ class OnyxConfluence:
|
||||
"""
|
||||
The search/user endpoint can be used to fetch users.
|
||||
It's a separate endpoint from the content/search endpoint used only for users.
|
||||
Otherwise it's very similar to the content/search endpoint.
|
||||
It's very similar to the content/search endpoint.
|
||||
"""
|
||||
|
||||
# this is needed since there is a live bug with Confluence Server/Data Center
|
||||
|
||||
@ -233,8 +233,8 @@ class DiscordConnector(LoadConnector, PollConnector):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
server_ids: list[str] = [],
|
||||
channel_names: list[str] = [],
|
||||
server_ids: list[str] | None = None,
|
||||
channel_names: list[str] | None = None,
|
||||
# YYYY-MM-DD
|
||||
start_date: str | None = None,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
from google.oauth2.credentials import Credentials as OAuthCredentials
|
||||
from google.oauth2.service_account import Credentials as ServiceAccountCredentials
|
||||
|
||||
@ -1210,7 +1210,7 @@ if __name__ == "__main__":
|
||||
creds = get_credentials_from_env(email, oauth=True)
|
||||
print("Credentials loaded successfully")
|
||||
print(f"{creds=}")
|
||||
sys.exit(0)
|
||||
# sys.exit(0)
|
||||
connector = GoogleDriveConnector(
|
||||
include_shared_drives=False,
|
||||
shared_drive_urls=None,
|
||||
|
||||
@ -341,6 +341,6 @@ def get_all_files_for_oauth(
|
||||
|
||||
# Just in case we need to get the root folder id
|
||||
def get_root_folder_id(service: Resource) -> str:
|
||||
# we dont paginate here because there is only one root folder per user
|
||||
# we don't paginate here because there is only one root folder per user
|
||||
# https://developers.google.com/drive/api/guides/v2-to-v3-reference
|
||||
return service.files().get(fileId="root", fields=GoogleFields.ID.value).execute()[GoogleFields.ID.value]
|
||||
|
||||
@ -147,15 +147,11 @@ def forEdu(cv):
|
||||
edu_nst.append(e)
|
||||
|
||||
cv["sch_rank_kwd"] = []
|
||||
if cv["school_rank_int"] <= 20 \
|
||||
or ("海外名校" in fea and cv["school_rank_int"] <= 200):
|
||||
if cv["school_rank_int"] <= 20 or ("海外名校" in fea and cv["school_rank_int"] <= 200):
|
||||
cv["sch_rank_kwd"].append("顶尖学校")
|
||||
elif cv["school_rank_int"] <= 50 and cv["school_rank_int"] > 20 \
|
||||
or ("海外名校" in fea and cv["school_rank_int"] <= 500 and \
|
||||
cv["school_rank_int"] > 200):
|
||||
elif 50 >= cv["school_rank_int"] > 20 or ("海外名校" in fea and 500 >= cv["school_rank_int"] > 200):
|
||||
cv["sch_rank_kwd"].append("精英学校")
|
||||
elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) \
|
||||
or ("海外名校" in fea and cv["school_rank_int"] > 500):
|
||||
elif cv["school_rank_int"] > 50 and ("985" in fea or "211" in fea) or ("海外名校" in fea and cv["school_rank_int"] > 500):
|
||||
cv["sch_rank_kwd"].append("优质学校")
|
||||
else:
|
||||
cv["sch_rank_kwd"].append("一般学校")
|
||||
@ -208,8 +204,7 @@ def forEdu(cv):
|
||||
cv["tag_kwd"].append("好学校")
|
||||
cv["tag_kwd"].append("好学历")
|
||||
break
|
||||
if (len(cv.get("degree_kwd", [])) >= 1 and \
|
||||
"本科" in cv["degree_kwd"] and \
|
||||
if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"] and
|
||||
any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
|
||||
or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
|
||||
or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
|
||||
@ -406,7 +401,7 @@ def forWork(cv):
|
||||
|
||||
def turnTm2Dt(b):
|
||||
if not b:
|
||||
return
|
||||
return None
|
||||
b = str(b).strip()
|
||||
if re.match(r"[0-9]{10,}", b):
|
||||
b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
|
||||
@ -416,7 +411,7 @@ def turnTm2Dt(b):
|
||||
def getYMD(b):
|
||||
y, m, d = "", "", "01"
|
||||
if not b:
|
||||
return (y, m, d)
|
||||
return y, m, d
|
||||
b = turnTm2Dt(b)
|
||||
if re.match(r"[0-9]{4}", b):
|
||||
y = int(b[:4])
|
||||
@ -430,7 +425,7 @@ def getYMD(b):
|
||||
d = "1"
|
||||
if not m or int(m) > 12 or int(m) < 1:
|
||||
m = "1"
|
||||
return (y, m, d)
|
||||
return y, m, d
|
||||
|
||||
|
||||
def birth(cv):
|
||||
@ -480,22 +475,22 @@ def parse(cv):
|
||||
for k in rmkeys:
|
||||
del cv[k]
|
||||
|
||||
integerity = 0.
|
||||
integrity = 0.
|
||||
flds_num = 0.
|
||||
|
||||
def hasValues(flds):
|
||||
nonlocal integerity, flds_num
|
||||
nonlocal integrity, flds_num
|
||||
flds_num += len(flds)
|
||||
for f in flds:
|
||||
v = str(cv.get(f, ""))
|
||||
if len(v) > 0 and v != '0' and v != '[]':
|
||||
integerity += 1
|
||||
integrity += 1
|
||||
|
||||
hasValues(tks_fld)
|
||||
hasValues(small_tks_fld)
|
||||
hasValues(kwd_fld)
|
||||
hasValues(num_fld)
|
||||
cv["integerity_flt"] = integerity / flds_num
|
||||
cv["integerity_flt"] = integrity / flds_num
|
||||
|
||||
if cv.get("corporation_type"):
|
||||
for p, r in [(r"(公司|企业|其它|其他|Others*|\n|未填写|Enterprises|Company|companies)", ""),
|
||||
|
||||
@ -40,7 +40,7 @@ In the **Message** component, reference the `download` output variable from the
|
||||
|
||||
### Content
|
||||
|
||||
The main text content to include in the document. Supports markdown formatting:
|
||||
The main text content to include in the document. Supports Markdown formatting:
|
||||
|
||||
- **Bold**: `**text**` or `__text__`
|
||||
- **Italic**: `*text*` or `_text_`
|
||||
|
||||
@ -41,7 +41,7 @@ Scale depth to match complexity. Always stop once success criteria are met.
|
||||
|
||||
**For HIGH (150–250 words for analysis only):**
|
||||
- Comprehensive objective analysis; Intent & Scope
|
||||
- 5–8 step Plan with dependencies/parallelism
|
||||
- 5–8 steps Plan with dependencies/parallelism
|
||||
- **Uncertainty & Probes** (key unknowns → probe → stop condition)
|
||||
- Measurable Success Criteria; Failure detectors & fallbacks
|
||||
- **Source Plan** (evidence acquisition & validation)
|
||||
|
||||
Reference in New Issue
Block a user