Fix errors detected by Ruff (#3918)

### What problem does this PR solve? Fix errors detected by Ruff ### Type of change - [x] Refactoring
2026-01-23 03:26:53 +08:00 · 2024-12-08 14:21:12 +08:00
parent e267a026f3
commit 0d68a6cd1b
97 changed files with 2558 additions and 1976 deletions
--- a/deepdoc/parser/init.py
+++ b/deepdoc/parser/init.py
@ -18,4 +18,16 @@ from .ppt_parser import RAGFlowPptParser as PptParser
 from .html_parser import RAGFlowHtmlParser as HtmlParser
 from .json_parser import RAGFlowJsonParser as JsonParser
 from .markdown_parser import RAGFlowMarkdownParser as MarkdownParser
-from .txt_parser import RAGFlowTxtParser as TxtParser
+from .txt_parser import RAGFlowTxtParser as TxtParser
+
+__all__ = [
+    "PdfParser",
+    "PlainParser",
+    "DocxParser",
+    "ExcelParser",
+    "PptParser",
+    "HtmlParser",
+    "JsonParser",
+    "MarkdownParser",
+    "TxtParser",
+]
--- a/deepdoc/parser/excel_parser.py
+++ b/deepdoc/parser/excel_parser.py
@ -29,7 +29,8 @@ class RAGFlowExcelParser:
        for sheetname in wb.sheetnames:
            ws = wb[sheetname]
            rows = list(ws.rows)
-            if not rows: continue
+            if not rows:
+                continue

            tb_rows_0 = "<tr>"
            for t in list(rows[0]):
@ -40,7 +41,9 @@ class RAGFlowExcelParser:
                tb = ""
                tb += f"<table><caption>{sheetname}</caption>"
                tb += tb_rows_0
-                for r in list(rows[1 + chunk_i * chunk_rows:1 + (chunk_i + 1) * chunk_rows]):
+                for r in list(
+                    rows[1 + chunk_i * chunk_rows : 1 + (chunk_i + 1) * chunk_rows]
+                ):
                    tb += "<tr>"
                    for i, c in enumerate(r):
                        if c.value is None:
@ -62,20 +65,21 @@ class RAGFlowExcelParser:
        for sheetname in wb.sheetnames:
            ws = wb[sheetname]
            rows = list(ws.rows)
-            if not rows:continue
+            if not rows:
+                continue
            ti = list(rows[0])
            for r in list(rows[1:]):
-                l = []
+                fields = []
                for i, c in enumerate(r):
                    if not c.value:
                        continue
                    t = str(ti[i].value) if i < len(ti) else ""
                    t += ("：" if t else "") + str(c.value)
-                    l.append(t)
-                l = "; ".join(l)
+                    fields.append(t)
+                line = "; ".join(fields)
                if sheetname.lower().find("sheet") < 0:
-                    l += " ——" + sheetname
-                res.append(l)
+                    line += " ——" + sheetname
+                res.append(line)
        return res

    @staticmethod
--- a/deepdoc/parser/html_parser.py
+++ b/deepdoc/parser/html_parser.py
@ -36,7 +36,7 @@ class RAGFlowHtmlParser:

    @classmethod
    def parser_txt(cls, txt):
-        if type(txt) != str:
+        if not isinstance(txt, str):
            raise TypeError("txt type should be str!")
        html_doc = readability.Document(txt)
        title = html_doc.title()
--- a/deepdoc/parser/json_parser.py
+++ b/deepdoc/parser/json_parser.py
@ -22,7 +22,7 @@ class RAGFlowJsonParser:
        txt = binary.decode(encoding, errors="ignore")
        json_data = json.loads(txt)
        chunks = self.split_json(json_data, True)   
-        sections = [json.dumps(l, ensure_ascii=False) for l in chunks if l]
+        sections = [json.dumps(line, ensure_ascii=False) for line in chunks if line]
        return sections

    @staticmethod
--- a/deepdoc/parser/pdf_parser.py
+++ b/deepdoc/parser/pdf_parser.py
@ -752,7 +752,7 @@ class RAGFlowPdfParser:
                    "x1": np.max([b["x1"] for b in bxs]),
                    "bottom": np.max([b["bottom"] for b in bxs]) - ht
                }
-                louts = [l for l in self.page_layout[pn] if l["type"] == ltype]
+                louts = [layout for layout in self.page_layout[pn] if layout["type"] == ltype]
                ii = Recognizer.find_overlapped(b, louts, naive=True)
                if ii is not None:
                    b = louts[ii]
@ -763,7 +763,8 @@ class RAGFlowPdfParser:
                            "layoutno", "")))

                left, top, right, bott = b["x0"], b["top"], b["x1"], b["bottom"]
-                if right < left: right = left + 1
+                if right < left:
+                    right = left + 1
                poss.append((pn + self.page_from, left, right, top, bott))
                return self.page_images[pn] \
                    .crop((left * ZM, top * ZM,
@ -845,7 +846,8 @@ class RAGFlowPdfParser:
        top = bx["top"] - self.page_cum_height[pn[0] - 1]
        bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
        page_images_cnt = len(self.page_images)
-        if pn[-1] - 1 >= page_images_cnt: return ""
+        if pn[-1] - 1 >= page_images_cnt:
+            return ""
        while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
            bott -= self.page_images[pn[-1] - 1].size[1] / ZM
            pn.append(pn[-1] + 1)
@ -889,7 +891,6 @@ class RAGFlowPdfParser:
                nonlocal mh, pw, lines, widths
                lines.append(line)
                widths.append(width(line))
-                width_mean = np.mean(widths)
                mmj = self.proj_match(
                    line["text"]) or line.get(
                    "layout_type",
@ -994,7 +995,7 @@ class RAGFlowPdfParser:
        else:
            self.is_english = False

-        st = timer()
+        # st = timer()
        for i, img in enumerate(self.page_images_x2):
            chars = self.page_chars[i] if not self.is_english else []
            self.mean_height.append(
@ -1028,8 +1029,8 @@ class RAGFlowPdfParser:

        self.page_cum_height = np.cumsum(self.page_cum_height)
        assert len(self.page_cum_height) == len(self.page_images) + 1
-        if len(self.boxes) == 0 and zoomin < 9: self.__images__(fnm, zoomin * 3, page_from,
-                                                                page_to, callback)
+        if len(self.boxes) == 0 and zoomin < 9:
+            self.__images__(fnm, zoomin * 3, page_from, page_to, callback)

    def __call__(self, fnm, need_image=True, zoomin=3, return_html=False):
        self.__images__(fnm, zoomin)
@ -1168,7 +1169,7 @@ class PlainParser(object):
        if not self.outlines:
            logging.warning("Miss outlines")

-        return [(l, "") for l in lines], []
+        return [(line, "") for line in lines], []

    def crop(self, ck, need_position):
        raise NotImplementedError
--- a/deepdoc/parser/resume/init.py
+++ b/deepdoc/parser/resume/init.py
@ -15,21 +15,42 @@ import datetime


 def refactor(cv):
-    for n in ["raw_txt", "parser_name", "inference", "ori_text", "use_time", "time_stat"]:
-        if n in cv and cv[n] is not None: del cv[n]
+    for n in [
+        "raw_txt",
+        "parser_name",
+        "inference",
+        "ori_text",
+        "use_time",
+        "time_stat",
+    ]:
+        if n in cv and cv[n] is not None:
+            del cv[n]
    cv["is_deleted"] = 0
-    if "basic" not in cv: cv["basic"] = {}
-    if cv["basic"].get("photo2"): del cv["basic"]["photo2"]
+    if "basic" not in cv:
+        cv["basic"] = {}
+    if cv["basic"].get("photo2"):
+        del cv["basic"]["photo2"]

-    for n in ["education", "work", "certificate", "project", "language", "skill", "training"]:
-        if n not in cv or cv[n] is None: continue
-        if type(cv[n]) == type({}): cv[n] = [v for _, v in cv[n].items()]
-        if type(cv[n]) != type([]):
+    for n in [
+        "education",
+        "work",
+        "certificate",
+        "project",
+        "language",
+        "skill",
+        "training",
+    ]:
+        if n not in cv or cv[n] is None:
+            continue
+        if isinstance(cv[n], dict):
+            cv[n] = [v for _, v in cv[n].items()]
+        if not isinstance(cv[n], list):
            del cv[n]
            continue
        vv = []
        for v in cv[n]:
-            if "external" in v and v["external"] is not None: del v["external"]
+            if "external" in v and v["external"] is not None:
+                del v["external"]
            vv.append(v)
        cv[n] = {str(i): vv[i] for i in range(len(vv))}

@ -42,24 +63,44 @@ def refactor(cv):
            cv["basic"][t] = cv["basic"][n]
            del cv["basic"][n]

-    work = sorted([v for _, v in cv.get("work", {}).items()], key=lambda x: x.get("start_time", ""))
-    edu = sorted([v for _, v in cv.get("education", {}).items()], key=lambda x: x.get("start_time", ""))
+    work = sorted(
+        [v for _, v in cv.get("work", {}).items()],
+        key=lambda x: x.get("start_time", ""),
+    )
+    edu = sorted(
+        [v for _, v in cv.get("education", {}).items()],
+        key=lambda x: x.get("start_time", ""),
+    )

    if work:
        cv["basic"]["work_start_time"] = work[0].get("start_time", "")
-        cv["basic"]["management_experience"] = 'Y' if any(
-            [w.get("management_experience", '') == 'Y' for w in work]) else 'N'
+        cv["basic"]["management_experience"] = (
+            "Y"
+            if any([w.get("management_experience", "") == "Y" for w in work])
+            else "N"
+        )
        cv["basic"]["annual_salary"] = work[-1].get("annual_salary_from", "0")

-        for n in ["annual_salary_from", "annual_salary_to", "industry_name", "position_name", "responsibilities",
-                  "corporation_type", "scale", "corporation_name"]:
+        for n in [
+            "annual_salary_from",
+            "annual_salary_to",
+            "industry_name",
+            "position_name",
+            "responsibilities",
+            "corporation_type",
+            "scale",
+            "corporation_name",
+        ]:
            cv["basic"][n] = work[-1].get(n, "")

    if edu:
        for n in ["school_name", "discipline_name"]:
-            if n in edu[-1]: cv["basic"][n] = edu[-1][n]
+            if n in edu[-1]:
+                cv["basic"][n] = edu[-1][n]

    cv["basic"]["updated_at"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    if "contact" not in cv: cv["contact"] = {}
-    if not cv["contact"].get("name"): cv["contact"]["name"] = cv["basic"].get("name", "")
-    return cv
+    if "contact" not in cv:
+        cv["contact"] = {}
+    if not cv["contact"].get("name"):
+        cv["contact"]["name"] = cv["basic"].get("name", "")
+    return cv
--- a/deepdoc/parser/resume/entities/corporations.py
+++ b/deepdoc/parser/resume/entities/corporations.py
@ -21,13 +21,18 @@ from . import regions


 current_file_path = os.path.dirname(os.path.abspath(__file__))
-GOODS = pd.read_csv(os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0).fillna(0)
+GOODS = pd.read_csv(
+    os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0
+).fillna(0)
 GOODS["cid"] = GOODS["cid"].astype(str)
 GOODS = GOODS.set_index(["cid"])
-CORP_TKS = json.load(open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r"))
+CORP_TKS = json.load(
+    open(os.path.join(current_file_path, "res/corp.tks.freq.json"), "r")
+)
 GOOD_CORP = json.load(open(os.path.join(current_file_path, "res/good_corp.json"), "r"))
 CORP_TAG = json.load(open(os.path.join(current_file_path, "res/corp_tag.json"), "r"))

+
 def baike(cid, default_v=0):
    global GOODS
    try:
@ -39,27 +44,41 @@ def baike(cid, default_v=0):

 def corpNorm(nm, add_region=True):
    global CORP_TKS
-    if not nm or type(nm)!=type(""):return ""
+    if not nm or isinstance(nm, str):
+        return ""
    nm = rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(nm)).lower()
    nm = re.sub(r"&amp;", "&", nm)
    nm = re.sub(r"[\(\)（）\+'\"\t \*\\【】-]+", " ", nm)
-    nm = re.sub(r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE)
-    nm = re.sub(r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", "", nm, 10000, re.IGNORECASE)
-    if not nm or (len(nm)<5 and not regions.isName(nm[0:2])):return nm
+    nm = re.sub(
+        r"([—-]+.*| +co\..*|corp\..*| +inc\..*| +ltd.*)", "", nm, 10000, re.IGNORECASE
+    )
+    nm = re.sub(
+        r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$",
+        "",
+        nm,
+        10000,
+        re.IGNORECASE,
+    )
+    if not nm or (len(nm) < 5 and not regions.isName(nm[0:2])):
+        return nm

    tks = rag_tokenizer.tokenize(nm).split()
-    reg = [t for i,t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
+    reg = [t for i, t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)]
    nm = ""
    for t in tks:
-        if regions.isName(t) or t in CORP_TKS:continue
-        if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm):nm += " "
+        if regions.isName(t) or t in CORP_TKS:
+            continue
+        if re.match(r"[0-9a-zA-Z\\,.]+", t) and re.match(r".*[0-9a-zA-Z\,.]+$", nm):
+            nm += " "
        nm += t

    r = re.search(r"^([^a-z0-9 \(\)&]{2,})[a-z ]{4,}$", nm.strip())
-    if r:nm = r.group(1)
+    if r:
+        nm = r.group(1)
    r = re.search(r"^([a-z ]{3,})[^a-z0-9 \(\)&]{2,}$", nm.strip())
-    if r:nm = r.group(1)
-    return nm.strip() + (("" if not reg else "(%s)"%reg[0]) if add_region else "")
+    if r:
+        nm = r.group(1)
+    return nm.strip() + (("" if not reg else "(%s)" % reg[0]) if add_region else "")


 def rmNoise(n):
@ -67,33 +86,40 @@ def rmNoise(n):
    n = re.sub(r"[,. &（）()]+", "", n)
    return n

+
 GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
-for c,v in CORP_TAG.items():
+for c, v in CORP_TAG.items():
    cc = corpNorm(rmNoise(c), False)
    if not cc:
        logging.debug(c)
-CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
+CORP_TAG = {corpNorm(rmNoise(c), False): v for c, v in CORP_TAG.items()}
+

 def is_good(nm):
    global GOOD_CORP
-    if nm.find("外派")>=0:return False
+    if nm.find("外派") >= 0:
+        return False
    nm = rmNoise(nm)
    nm = corpNorm(nm, False)
    for n in GOOD_CORP:
        if re.match(r"[0-9a-zA-Z]+$", n):
-            if n == nm: return True
-        elif nm.find(n)>=0:return True
+            if n == nm:
+                return True
+        elif nm.find(n) >= 0:
+            return True
    return False

+
 def corp_tag(nm):
    global CORP_TAG
    nm = rmNoise(nm)
    nm = corpNorm(nm, False)
    for n in CORP_TAG.keys():
        if re.match(r"[0-9a-zA-Z., ]+$", n):
-            if n == nm: return CORP_TAG[n]
-        elif nm.find(n)>=0:
-            if len(n)<3 and len(nm)/len(n)>=2:continue
+            if n == nm:
+                return CORP_TAG[n]
+        elif nm.find(n) >= 0:
+            if len(n) < 3 and len(nm) / len(n) >= 2:
+                continue
            return CORP_TAG[n]
    return []
-
--- a/deepdoc/parser/resume/entities/degrees.py
+++ b/deepdoc/parser/resume/entities/degrees.py
@ -11,27 +11,31 @@
 #  limitations under the License.
 #

-TBL = {"94":"EMBA",
-"6":"MBA",
-"95":"MPA",
-"92":"专升本",
-"4":"专科",
-"90":"中专",
-"91":"中技",
-"86":"初中",
-"3":"博士",
-"10":"博士后",
-"1":"本科",
-"2":"硕士",
-"87":"职高",
-"89":"高中"
+TBL = {
+    "94": "EMBA",
+    "6": "MBA",
+    "95": "MPA",
+    "92": "专升本",
+    "4": "专科",
+    "90": "中专",
+    "91": "中技",
+    "86": "初中",
+    "3": "博士",
+    "10": "博士后",
+    "1": "本科",
+    "2": "硕士",
+    "87": "职高",
+    "89": "高中",
 }

-TBL_ = {v:k for k,v in TBL.items()}
+TBL_ = {v: k for k, v in TBL.items()}
+

 def get_name(id):
    return TBL.get(str(id), "")

+
 def get_id(nm):
-    if not nm:return ""
+    if not nm:
+        return ""
    return TBL_.get(nm.upper().strip(), "")
--- a/deepdoc/parser/resume/entities/industries.py
+++ b/deepdoc/parser/resume/entities/industries.py
--- a/deepdoc/parser/resume/entities/regions.py
+++ b/deepdoc/parser/resume/entities/regions.py
--- a/deepdoc/parser/resume/entities/schools.py
+++ b/deepdoc/parser/resume/entities/schools.py
@ -16,8 +16,11 @@ import json
 import re
 import copy
 import pandas as pd
+
 current_file_path = os.path.dirname(os.path.abspath(__file__))
-TBL = pd.read_csv(os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0).fillna("")
+TBL = pd.read_csv(
+    os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0
+).fillna("")
 TBL["name_en"] = TBL["name_en"].map(lambda x: x.lower().strip())
 GOOD_SCH = json.load(open(os.path.join(current_file_path, "res/good_sch.json"), "r"))
 GOOD_SCH = set([re.sub(r"[,. &（）()]+", "", c) for c in GOOD_SCH])
@ -26,14 +29,15 @@ GOOD_SCH = set([re.sub(r"[,. &（）()]+", "", c) for c in GOOD_SCH])
 def loadRank(fnm):
    global TBL
    TBL["rank"] = 1000000
-    with open(fnm, "r", encoding='utf-8') as f:
+    with open(fnm, "r", encoding="utf-8") as f:
        while True:
-            l = f.readline()
-            if not l:break
-            l = l.strip("\n").split(",")
+            line = f.readline()
+            if not line:
+                break
+            line = line.strip("\n").split(",")
            try:
-                nm,rk = l[0].strip(),int(l[1])
-                #assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>"
+                nm, rk = line[0].strip(), int(line[1])
+                # assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>"
                TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk
            except Exception:
                pass
@ -44,27 +48,35 @@ loadRank(os.path.join(current_file_path, "res/school.rank.csv"))

 def split(txt):
    tks = []
-    for t in re.sub(r"[ \t]+", " ",txt).split():
-        if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and \
-           re.match(r"[a-zA-Z]", t) and tks:
+    for t in re.sub(r"[ \t]+", " ", txt).split():
+        if (
+            tks
+            and re.match(r".*[a-zA-Z]$", tks[-1])
+            and re.match(r"[a-zA-Z]", t)
+            and tks
+        ):
            tks[-1] = tks[-1] + " " + t
-        else:tks.append(t)
+        else:
+            tks.append(t)
    return tks


 def select(nm):
    global TBL
-    if not nm:return 
-    if isinstance(nm, list):nm = str(nm[0])
+    if not nm:
+        return
+    if isinstance(nm, list):
+        nm = str(nm[0])
    nm = split(nm)[0]
    nm = str(nm).lower().strip()
    nm = re.sub(r"[(（][^()（）]+[)）]", "", nm.lower())
    nm = re.sub(r"(^the |[,.&（）();；·]+|^(英国|美国|瑞士))", "", nm)
    nm = re.sub(r"大学.*学院", "大学", nm)
    tbl = copy.deepcopy(TBL)
-    tbl["hit_alias"] = tbl["alias"].map(lambda x:nm in set(x.split("+")))
-    res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | (tbl.hit_alias == True))]
-    if res.empty:return
+    tbl["hit_alias"] = tbl["alias"].map(lambda x: nm in set(x.split("+")))
+    res = tbl[((tbl.name_cn == nm) | (tbl.name_en == nm) | tbl.hit_alias)]
+    if res.empty:
+        return

    return json.loads(res.to_json(orient="records"))[0]

@ -74,4 +86,3 @@ def is_good(nm):
    nm = re.sub(r"[(（][^()（）]+[)）]", "", nm.lower())
    nm = re.sub(r"[''`‘’“”,. &（）();；]+", "", nm)
    return nm in GOOD_SCH
-
--- a/deepdoc/parser/resume/step_two.py
+++ b/deepdoc/parser/resume/step_two.py
@ -25,7 +25,8 @@ from xpinyin import Pinyin
 from contextlib import contextmanager


-class TimeoutException(Exception): pass
+class TimeoutException(Exception):
+    pass


@contextmanager
@ -50,8 +51,10 @@ def rmHtmlTag(line):


 def highest_degree(dg):
-    if not dg: return ""
-    if type(dg) == type(""): dg = [dg]
+    if not dg:
+        return ""
+    if isinstance(dg, str):
+        dg = [dg]
    m = {"初中": 0, "高中": 1, "中专": 2, "大专": 3, "专升本": 4, "本科": 5, "硕士": 6, "博士": 7, "博士后": 8}
    return sorted([(d, m.get(d, -1)) for d in dg], key=lambda x: x[1] * -1)[0][0]

@ -68,10 +71,12 @@ def forEdu(cv):
    for ii, n in enumerate(sorted(cv["education_obj"], key=lambda x: x.get("start_time", "3"))):
        e = {}
        if n.get("end_time"):
-            if n["end_time"] > edu_end_dt: edu_end_dt = n["end_time"]
+            if n["end_time"] > edu_end_dt:
+                edu_end_dt = n["end_time"]
            try:
                dt = n["end_time"]
-                if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt)
+                if re.match(r"[0-9]{9,}", dt):
+                    dt = turnTm2Dt(dt)
                y, m, d = getYMD(dt)
                ed_dt.append(str(y))
                e["end_dt_kwd"] = str(y)
@ -80,7 +85,8 @@ def forEdu(cv):
        if n.get("start_time"):
            try:
                dt = n["start_time"]
-                if re.match(r"[0-9]{9,}", dt): dt = turnTm2Dt(dt)
+                if re.match(r"[0-9]{9,}", dt):
+                    dt = turnTm2Dt(dt)
                y, m, d = getYMD(dt)
                st_dt.append(str(y))
                e["start_dt_kwd"] = str(y)
@ -89,13 +95,20 @@ def forEdu(cv):

        r = schools.select(n.get("school_name", ""))
        if r:
-            if str(r.get("type", "")) == "1": fea.append("211")
-            if str(r.get("type", "")) == "2": fea.append("211")
-            if str(r.get("is_abroad", "")) == "1": fea.append("留学")
-            if str(r.get("is_double_first", "")) == "1": fea.append("双一流")
-            if str(r.get("is_985", "")) == "1": fea.append("985")
-            if str(r.get("is_world_known", "")) == "1": fea.append("海外知名")
-            if r.get("rank") and cv["school_rank_int"] > r["rank"]: cv["school_rank_int"] = r["rank"]
+            if str(r.get("type", "")) == "1":
+                fea.append("211")
+            if str(r.get("type", "")) == "2":
+                fea.append("211")
+            if str(r.get("is_abroad", "")) == "1":
+                fea.append("留学")
+            if str(r.get("is_double_first", "")) == "1":
+                fea.append("双一流")
+            if str(r.get("is_985", "")) == "1":
+                fea.append("985")
+            if str(r.get("is_world_known", "")) == "1":
+                fea.append("海外知名")
+            if r.get("rank") and cv["school_rank_int"] > r["rank"]:
+                cv["school_rank_int"] = r["rank"]

        if n.get("school_name") and isinstance(n["school_name"], str):
            sch.append(re.sub(r"(211|985|重点大学|[,&;；-])", "", n["school_name"]))
@ -106,22 +119,25 @@ def forEdu(cv):
            maj.append(n["discipline_name"])
            e["major_kwd"] = n["discipline_name"]

-        if not n.get("degree") and "985" in fea and not first_fea: n["degree"] = "1"
+        if not n.get("degree") and "985" in fea and not first_fea:
+            n["degree"] = "1"

        if n.get("degree"):
            d = degrees.get_name(n["degree"])
-            if d: e["degree_kwd"] = d
-            if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)",
-                                                                                                     n.get(
-                                                                                                         "school_name",
-                                                                                                         ""))): d = "专升本"
-            if d: deg.append(d)
+            if d:
+                e["degree_kwd"] = d
+            if d == "本科" and ("专科" in deg or "专升本" in deg or "中专" in deg or "大专" in deg or re.search(r"(成人|自考|自学考试)", n.get("school_name",""))):
+                d = "专升本"
+            if d:
+                deg.append(d)

            # for first degree
            if not fdeg and d in ["中专", "专升本", "专科", "本科", "大专"]:
                fdeg = [d]
-                if n.get("school_name"): fsch = [n["school_name"]]
-                if n.get("discipline_name"): fmaj = [n["discipline_name"]]
+                if n.get("school_name"):
+                    fsch = [n["school_name"]]
+                if n.get("discipline_name"):
+                    fmaj = [n["discipline_name"]]
                first_fea = copy.deepcopy(fea)

        edu_nst.append(e)
@ -140,16 +156,26 @@ def forEdu(cv):
    else:
        cv["sch_rank_kwd"].append("一般学校")

-    if edu_nst: cv["edu_nst"] = edu_nst
-    if fea: cv["edu_fea_kwd"] = list(set(fea))
-    if first_fea: cv["edu_first_fea_kwd"] = list(set(first_fea))
-    if maj: cv["major_kwd"] = maj
-    if fsch: cv["first_school_name_kwd"] = fsch
-    if fdeg: cv["first_degree_kwd"] = fdeg
-    if fmaj: cv["first_major_kwd"] = fmaj
-    if st_dt: cv["edu_start_kwd"] = st_dt
-    if ed_dt: cv["edu_end_kwd"] = ed_dt
-    if ed_dt: cv["edu_end_int"] = max([int(t) for t in ed_dt])
+    if edu_nst:
+        cv["edu_nst"] = edu_nst
+    if fea:
+        cv["edu_fea_kwd"] = list(set(fea))
+    if first_fea:
+        cv["edu_first_fea_kwd"] = list(set(first_fea))
+    if maj:
+        cv["major_kwd"] = maj
+    if fsch:
+        cv["first_school_name_kwd"] = fsch
+    if fdeg:
+        cv["first_degree_kwd"] = fdeg
+    if fmaj:
+        cv["first_major_kwd"] = fmaj
+    if st_dt:
+        cv["edu_start_kwd"] = st_dt
+    if ed_dt:
+        cv["edu_end_kwd"] = ed_dt
+    if ed_dt:
+        cv["edu_end_int"] = max([int(t) for t in ed_dt])
    if deg:
        if "本科" in deg and "专科" in deg:
            deg.append("专升本")
@ -158,8 +184,10 @@ def forEdu(cv):
        cv["highest_degree_kwd"] = highest_degree(deg)
    if edu_end_dt:
        try:
-            if re.match(r"[0-9]{9,}", edu_end_dt): edu_end_dt = turnTm2Dt(edu_end_dt)
-            if edu_end_dt.strip("\n") == "至今": edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today()))
+            if re.match(r"[0-9]{9,}", edu_end_dt):
+                edu_end_dt = turnTm2Dt(edu_end_dt)
+            if edu_end_dt.strip("\n") == "至今":
+                edu_end_dt = cv.get("updated_at_dt", str(datetime.date.today()))
            y, m, d = getYMD(edu_end_dt)
            cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
        except Exception as e:
@ -171,7 +199,8 @@ def forEdu(cv):
                or not cv.get("degree_kwd"):
            for c in sch:
                if schools.is_good(c):
-                    if "tag_kwd" not in cv: cv["tag_kwd"] = []
+                    if "tag_kwd" not in cv:
+                        cv["tag_kwd"] = []
                    cv["tag_kwd"].append("好学校")
                    cv["tag_kwd"].append("好学历")
                    break
@ -180,28 +209,39 @@ def forEdu(cv):
            any([d.lower() in ["硕士", "博士", "mba", "博士"] for d in cv.get("degree_kwd", [])])) \
                or all([d.lower() in ["硕士", "博士", "mba", "博士后"] for d in cv.get("degree_kwd", [])]) \
                or any([d in ["mba", "emba", "博士后"] for d in cv.get("degree_kwd", [])]):
-            if "tag_kwd" not in cv: cv["tag_kwd"] = []
-            if "好学历" not in cv["tag_kwd"]: cv["tag_kwd"].append("好学历")
+            if "tag_kwd" not in cv:
+                cv["tag_kwd"] = []
+            if "好学历" not in cv["tag_kwd"]:
+                cv["tag_kwd"].append("好学历")

-    if cv.get("major_kwd"): cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj))
-    if cv.get("school_name_kwd"): cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch))
-    if cv.get("first_school_name_kwd"): cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch))
-    if cv.get("first_major_kwd"): cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj))
+    if cv.get("major_kwd"):
+        cv["major_tks"] = rag_tokenizer.tokenize(" ".join(maj))
+    if cv.get("school_name_kwd"):
+        cv["school_name_tks"] = rag_tokenizer.tokenize(" ".join(sch))
+    if cv.get("first_school_name_kwd"):
+        cv["first_school_name_tks"] = rag_tokenizer.tokenize(" ".join(fsch))
+    if cv.get("first_major_kwd"):
+        cv["first_major_tks"] = rag_tokenizer.tokenize(" ".join(fmaj))

    return cv


 def forProj(cv):
-    if not cv.get("project_obj"): return cv
+    if not cv.get("project_obj"):
+        return cv

    pro_nms, desc = [], []
    for i, n in enumerate(
-            sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if type(x) == type({}) else "",
+            sorted(cv.get("project_obj", []), key=lambda x: str(x.get("updated_at", "")) if isinstance(x, dict) else "",
                   reverse=True)):
-        if n.get("name"): pro_nms.append(n["name"])
-        if n.get("describe"): desc.append(str(n["describe"]))
-        if n.get("responsibilities"): desc.append(str(n["responsibilities"]))
-        if n.get("achivement"): desc.append(str(n["achivement"]))
+        if n.get("name"):
+            pro_nms.append(n["name"])
+        if n.get("describe"):
+            desc.append(str(n["describe"]))
+        if n.get("responsibilities"):
+            desc.append(str(n["responsibilities"]))
+        if n.get("achivement"):
+            desc.append(str(n["achivement"]))

    if pro_nms:
        # cv["pro_nms_tks"] = rag_tokenizer.tokenize(" ".join(pro_nms))
@ -233,15 +273,16 @@ def forWork(cv):
    work_st_tm = ""
    corp_tags = []
    for i, n in enumerate(
-            sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if type(x) == type({}) else "",
+            sorted(cv.get("work_obj", []), key=lambda x: str(x.get("start_time", "")) if isinstance(x, dict) else "",
                   reverse=True)):
-        if type(n) == type(""):
+        if isinstance(n, str):
            try:
                n = json_loads(n)
            except Exception:
                continue

-        if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"]
+        if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm):
+            work_st_tm = n["start_time"]
        for c in flds:
            if not n.get(c) or str(n[c]) == '0':
                fea[c].append("")
@ -262,14 +303,18 @@ def forWork(cv):
            fea[c].append(rmHtmlTag(str(n[c]).lower()))

        y, m, d = getYMD(n.get("start_time"))
-        if not y or not m: continue
+        if not y or not m:
+            continue
        st = "%s-%02d-%02d" % (y, int(m), int(d))
        latest_job_tm = st

        y, m, d = getYMD(n.get("end_time"))
-        if (not y or not m) and i > 0: continue
-        if not y or not m or int(y) > 2022:  y, m, d = getYMD(str(n.get("updated_at", "")))
-        if not y or not m: continue
+        if (not y or not m) and i > 0:
+            continue
+        if not y or not m or int(y) > 2022:
+            y, m, d = getYMD(str(n.get("updated_at", "")))
+        if not y or not m:
+            continue
        ed = "%s-%02d-%02d" % (y, int(m), int(d))

        try:
@ -279,22 +324,28 @@ def forWork(cv):

        if n.get("scale"):
            r = re.search(r"^([0-9]+)", str(n["scale"]))
-            if r: scales.append(int(r.group(1)))
+            if r:
+                scales.append(int(r.group(1)))

    if goodcorp:
-        if "tag_kwd" not in cv: cv["tag_kwd"] = []
+        if "tag_kwd" not in cv:
+            cv["tag_kwd"] = []
        cv["tag_kwd"].append("好公司")
    if goodcorp_:
-        if "tag_kwd" not in cv: cv["tag_kwd"] = []
+        if "tag_kwd" not in cv:
+            cv["tag_kwd"] = []
        cv["tag_kwd"].append("好公司(曾)")

    if corp_tags:
-        if "tag_kwd" not in cv: cv["tag_kwd"] = []
+        if "tag_kwd" not in cv:
+            cv["tag_kwd"] = []
        cv["tag_kwd"].extend(corp_tags)
        cv["corp_tag_kwd"] = [c for c in corp_tags if re.match(r"(综合|行业)", c)]

-    if latest_job_tm: cv["latest_job_dt"] = latest_job_tm
-    if fea["corporation_id"]: cv["corporation_id"] = fea["corporation_id"]
+    if latest_job_tm:
+        cv["latest_job_dt"] = latest_job_tm
+    if fea["corporation_id"]:
+        cv["corporation_id"] = fea["corporation_id"]

    if fea["position_name"]:
        cv["position_name_tks"] = rag_tokenizer.tokenize(fea["position_name"][0])
@ -317,18 +368,23 @@ def forWork(cv):
        cv["responsibilities_ltks"] = rag_tokenizer.tokenize(fea["responsibilities"][0])
        cv["resp_ltks"] = rag_tokenizer.tokenize(" ".join(fea["responsibilities"][1:]))

-    if fea["subordinates_count"]: fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if
+    if fea["subordinates_count"]:
+        fea["subordinates_count"] = [int(i) for i in fea["subordinates_count"] if
                                                               re.match(r"[^0-9]+$", str(i))]
-    if fea["subordinates_count"]: cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"])
+    if fea["subordinates_count"]:
+        cv["max_sub_cnt_int"] = np.max(fea["subordinates_count"])

-    if type(cv.get("corporation_id")) == type(1): cv["corporation_id"] = [str(cv["corporation_id"])]
-    if not cv.get("corporation_id"): cv["corporation_id"] = []
+    if isinstance(cv.get("corporation_id"), int):
+        cv["corporation_id"] = [str(cv["corporation_id"])]
+    if not cv.get("corporation_id"):
+        cv["corporation_id"] = []
    for i in cv.get("corporation_id", []):
        cv["baike_flt"] = max(corporations.baike(i), cv["baike_flt"] if "baike_flt" in cv else 0)

    if work_st_tm:
        try:
-            if re.match(r"[0-9]{9,}", work_st_tm): work_st_tm = turnTm2Dt(work_st_tm)
+            if re.match(r"[0-9]{9,}", work_st_tm):
+                work_st_tm = turnTm2Dt(work_st_tm)
            y, m, d = getYMD(work_st_tm)
            cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
        except Exception as e:
@ -339,28 +395,37 @@ def forWork(cv):
        cv["dua_flt"] = np.mean(duas)
        cv["cur_dua_int"] = duas[0]
        cv["job_num_int"] = len(duas)
-    if scales: cv["scale_flt"] = np.max(scales)
+    if scales:
+        cv["scale_flt"] = np.max(scales)
    return cv


 def turnTm2Dt(b):
-    if not b: return
+    if not b:
+        return
    b = str(b).strip()
-    if re.match(r"[0-9]{10,}", b): b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
+    if re.match(r"[0-9]{10,}", b):
+        b = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(b[:10])))
    return b


 def getYMD(b):
    y, m, d = "", "", "01"
-    if not b: return (y, m, d)
+    if not b:
+        return (y, m, d)
    b = turnTm2Dt(b)
-    if re.match(r"[0-9]{4}", b): y = int(b[:4])
+    if re.match(r"[0-9]{4}", b):
+        y = int(b[:4])
    r = re.search(r"[0-9]{4}.?([0-9]{1,2})", b)
-    if r: m = r.group(1)
+    if r:
+        m = r.group(1)
    r = re.search(r"[0-9]{4}.?[0-9]{,2}.?([0-9]{1,2})", b)
-    if r: d = r.group(1)
-    if not d or int(d) == 0 or int(d) > 31: d = "1"
-    if not m or int(m) > 12 or int(m) < 1: m = "1"
+    if r:
+        d = r.group(1)
+    if not d or int(d) == 0 or int(d) > 31:
+        d = "1"
+    if not m or int(m) > 12 or int(m) < 1:
+        m = "1"
    return (y, m, d)


@ -369,7 +434,8 @@ def birth(cv):
        cv["integerity_flt"] *= 0.9
        return cv
    y, m, d = getYMD(cv["birth"])
-    if not m or not y: return cv
+    if not m or not y:
+        return cv
    b = "%s-%02d-%02d" % (y, int(m), int(d))
    cv["birth_dt"] = b
    cv["birthday_kwd"] = "%02d%02d" % (int(m), int(d))
@ -380,7 +446,8 @@ def birth(cv):

 def parse(cv):
    for k in cv.keys():
-        if cv[k] == '\\N': cv[k] = ''
+        if cv[k] == '\\N':
+            cv[k] = ''
    # cv = cv.asDict()
    tks_fld = ["address", "corporation_name", "discipline_name", "email", "expect_city_names",
               "expect_industry_name", "expect_position_name", "industry_name", "industry_names", "name",
@ -402,9 +469,12 @@ def parse(cv):

    rmkeys = []
    for k in cv.keys():
-        if cv[k] is None: rmkeys.append(k)
-        if (type(cv[k]) == type([]) or type(cv[k]) == type("")) and len(cv[k]) == 0: rmkeys.append(k)
-    for k in rmkeys: del cv[k]
+        if cv[k] is None:
+            rmkeys.append(k)
+        if (isinstance(cv[k], list) or isinstance(cv[k], str)) and len(cv[k]) == 0:
+            rmkeys.append(k)
+    for k in rmkeys:
+        del cv[k]

    integerity = 0.
    flds_num = 0.
@ -414,7 +484,8 @@ def parse(cv):
        flds_num += len(flds)
        for f in flds:
            v = str(cv.get(f, ""))
-            if len(v) > 0 and v != '0' and v != '[]': integerity += 1
+            if len(v) > 0 and v != '0' and v != '[]':
+                integerity += 1

    hasValues(tks_fld)
    hasValues(small_tks_fld)
@ -433,7 +504,8 @@ def parse(cv):
                     (r"[ （）\(\)人/·0-9-]+", ""),
                     (r".*(元|规模|于|=|北京|上海|至今|中国|工资|州|shanghai|强|餐饮|融资|职).*", "")]:
            cv["corporation_type"] = re.sub(p, r, cv["corporation_type"], 1000, re.IGNORECASE)
-        if len(cv["corporation_type"]) < 2: del cv["corporation_type"]
+        if len(cv["corporation_type"]) < 2:
+            del cv["corporation_type"]

    if cv.get("political_status"):
        for p, r in [
@ -441,9 +513,11 @@ def parse(cv):
            (r".*(无党派|公民).*", "群众"),
            (r".*团员.*", "团员")]:
            cv["political_status"] = re.sub(p, r, cv["political_status"])
-        if not re.search(r"[党团群]", cv["political_status"]): del cv["political_status"]
+        if not re.search(r"[党团群]", cv["political_status"]):
+            del cv["political_status"]

-    if cv.get("phone"): cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"]))
+    if cv.get("phone"):
+        cv["phone"] = re.sub(r"^0*86([0-9]{11})", r"\1", re.sub(r"[^0-9]+", "", cv["phone"]))

    keys = list(cv.keys())
    for k in keys:
@ -454,9 +528,11 @@ def parse(cv):
                cv[k] = [a for _, a in cv[k].items()]
                nms = []
                for n in cv[k]:
-                    if type(n) != type({}) or "name" not in n or not n.get("name"): continue
+                    if not isinstance(n, dict) or "name" not in n or not n.get("name"):
+                        continue
                    n["name"] = re.sub(r"(（442）|\t )", "", n["name"]).strip().lower()
-                    if not n["name"]: continue
+                    if not n["name"]:
+                        continue
                    nms.append(n["name"])
                if nms:
                    t = k[:-4]
@ -469,15 +545,18 @@ def parse(cv):
        # tokenize fields
        if k in tks_fld:
            cv[f"{k}_tks"] = rag_tokenizer.tokenize(cv[k])
-            if k in small_tks_fld: cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"])
+            if k in small_tks_fld:
+                cv[f"{k}_sm_tks"] = rag_tokenizer.tokenize(cv[f"{k}_tks"])

        # keyword fields
-        if k in kwd_fld: cv[f"{k}_kwd"] = [n.lower()
+        if k in kwd_fld:
+            cv[f"{k}_kwd"] = [n.lower()
                                           for n in re.split(r"[\t,，；;. ]",
                                                             re.sub(r"([^a-zA-Z])[ ]+([^a-zA-Z ])", r"\1，\2", cv[k])
                                                             ) if n]

-        if k in num_fld and cv.get(k): cv[f"{k}_int"] = cv[k]
+        if k in num_fld and cv.get(k):
+            cv[f"{k}_int"] = cv[k]

    cv["email_kwd"] = cv.get("email_tks", "").replace(" ", "")
    # for name field
@ -501,10 +580,12 @@ def parse(cv):
        cv["name_py_pref0_tks"] = ""
        cv["name_py_pref_tks"] = ""
        for py in PY.get_pinyins(nm[:20], ''):
-            for i in range(2, len(py) + 1): cv["name_py_pref_tks"] += " " + py[:i]
+            for i in range(2, len(py) + 1):
+                cv["name_py_pref_tks"] += " " + py[:i]
        for py in PY.get_pinyins(nm[:20], ' '):
            py = py.split()
-            for i in range(1, len(py) + 1): cv["name_py_pref0_tks"] += " " + "".join(py[:i])
+            for i in range(1, len(py) + 1):
+                cv["name_py_pref0_tks"] += " " + "".join(py[:i])

        cv["name_kwd"] = name
        cv["name_pinyin_kwd"] = PY.get_pinyins(nm[:20], ' ')[:3]
@ -526,22 +607,30 @@ def parse(cv):
        cv["updated_at_dt"] = cv["updated_at"].strftime('%Y-%m-%d %H:%M:%S')
    else:
        y, m, d = getYMD(str(cv.get("updated_at", "")))
-        if not y: y = "2012"
-        if not m: m = "01"
-        if not d: d = "01"
+        if not y:
+            y = "2012"
+        if not m:
+            m = "01"
+        if not d:
+            d = "01"
        cv["updated_at_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
        # long text tokenize

-    if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
+    if cv.get("responsibilities"):
+        cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))

    # for yes or no field
    fea = []
    for f, y, n in is_fld:
-        if f not in cv: continue
-        if cv[f] == '是': fea.append(y)
-        if cv[f] == '否': fea.append(n)
+        if f not in cv:
+            continue
+        if cv[f] == '是':
+            fea.append(y)
+        if cv[f] == '否':
+            fea.append(n)

-    if fea: cv["tag_kwd"] = fea
+    if fea:
+        cv["tag_kwd"] = fea

    cv = forEdu(cv)
    cv = forProj(cv)
@ -550,9 +639,11 @@ def parse(cv):

    cv["corp_proj_sch_deg_kwd"] = [c for c in cv.get("corp_tag_kwd", [])]
    for i in range(len(cv["corp_proj_sch_deg_kwd"])):
-        for j in cv.get("sch_rank_kwd", []): cv["corp_proj_sch_deg_kwd"][i] += "+" + j
+        for j in cv.get("sch_rank_kwd", []):
+            cv["corp_proj_sch_deg_kwd"][i] += "+" + j
    for i in range(len(cv["corp_proj_sch_deg_kwd"])):
-        if cv.get("highest_degree_kwd"): cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"]
+        if cv.get("highest_degree_kwd"):
+            cv["corp_proj_sch_deg_kwd"][i] += "+" + cv["highest_degree_kwd"]

    try:
        if not cv.get("work_exp_flt") and cv.get("work_start_time"):
@ -565,17 +656,21 @@ def parse(cv):
                cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
    except Exception as e:
        logging.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
-    if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
+    if "work_exp_flt" not in cv and cv.get("work_experience", 0):
+        cv["work_exp_flt"] = int(cv["work_experience"]) / 12.

    keys = list(cv.keys())
    for k in keys:
-        if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k): del cv[k]
+        if not re.search(r"_(fea|tks|nst|dt|int|flt|ltks|kwd|id)$", k):
+            del cv[k]
    for k in cv.keys():
-        if not re.search("_(kwd|id)$", k) or type(cv[k]) != type([]): continue
+        if not re.search("_(kwd|id)$", k) or not isinstance(cv[k], list):
+            continue
        cv[k] = list(set([re.sub("(市)$", "", str(n)) for n in cv[k] if n not in ['中国', '0']]))
    keys = [k for k in cv.keys() if re.search(r"_feas*$", k)]
    for k in keys:
-        if cv[k] <= 0: del cv[k]
+        if cv[k] <= 0:
+            del cv[k]

    cv["tob_resume_id"] = str(cv["tob_resume_id"])
    cv["id"] = cv["tob_resume_id"]
@ -592,5 +687,6 @@ def dealWithInt64(d):
    if isinstance(d, list):
        d = [dealWithInt64(t) for t in d]

-    if isinstance(d, np.integer): d = int(d)
+    if isinstance(d, np.integer):
+        d = int(d)
    return d
--- a/deepdoc/parser/txt_parser.py
+++ b/deepdoc/parser/txt_parser.py
@ -51,6 +51,7 @@ class RAGFlowTxtParser:
        dels = [d for d in dels if d]
        dels = "|".join(dels)
        secs = re.split(r"(%s)" % dels, txt)
-        for sec in secs: add_chunk(sec)
+        for sec in secs:
+            add_chunk(sec)

        return [[c, ""] for c in cks]