Format file format from Windows/dos to Unix (#1949)

### What problem does this PR solve? Related source file is in Windows/DOS format, they are format to Unix format. ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-02-02 08:35:08 +08:00 · 2024-08-15 09:17:36 +08:00
parent 1328d715db
commit 6b3a40be5c
108 changed files with 36399 additions and 36399 deletions
--- a/deepdoc/parser/resume/step_one.py
+++ b/deepdoc/parser/resume/step_one.py
@ -1,186 +1,186 @@
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-import json
-from deepdoc.parser.resume.entities import degrees, regions, industries
-
-FIELDS = [
-"address STRING",
-"annual_salary int",
-"annual_salary_from int",
-"annual_salary_to int",
-"birth STRING",
-"card STRING",
-"certificate_obj string",
-"city STRING",
-"corporation_id int",
-"corporation_name STRING",
-"corporation_type STRING",
-"degree STRING",
-"discipline_name STRING",
-"education_obj string",
-"email STRING",
-"expect_annual_salary int",
-"expect_city_names string",
-"expect_industry_name STRING",
-"expect_position_name STRING",
-"expect_salary_from int",
-"expect_salary_to int",
-"expect_type STRING",
-"gender STRING",
-"industry_name STRING",
-"industry_names STRING",
-"is_deleted STRING",
-"is_fertility STRING",
-"is_house STRING",
-"is_management_experience STRING",
-"is_marital STRING",
-"is_oversea STRING",
-"language_obj string",
-"name STRING",
-"nation STRING",
-"phone STRING",
-"political_status STRING",
-"position_name STRING",
-"project_obj string",
-"responsibilities string",
-"salary_month int",
-"scale STRING",
-"school_name STRING",
-"self_remark string",
-"skill_obj string",
-"title_name STRING",
-"tob_resume_id STRING",
-"updated_at Timestamp",
-"wechat STRING",
-"work_obj string",
-"work_experience int",
-"work_start_time BIGINT"
-]
-
-def refactor(df):
-    def deal_obj(obj, k, kk):
-        if not isinstance(obj, type({})):
-            return ""
-        obj = obj.get(k, {})
-        if not isinstance(obj, type({})):
-            return ""
-        return obj.get(kk, "")
-
-    def loadjson(line):
-        try:
-            return json.loads(line)
-        except Exception as e:
-            pass
-        return {}
-
-    df["obj"] = df["resume_content"].map(lambda x: loadjson(x))
-    df.fillna("", inplace=True)
-
-    clms = ["tob_resume_id", "updated_at"]
-
-    def extract(nms, cc=None):
-        nonlocal clms
-        clms.extend(nms)
-        for c in nms:
-            if cc:
-                df[c] = df["obj"].map(lambda x: deal_obj(x, cc, c))
-            else:
-                df[c] = df["obj"].map(
-                    lambda x: json.dumps(
-                        x.get(
-                            c,
-                            {}),
-                        ensure_ascii=False) if isinstance(
-                        x,
-                        type(
-                            {})) and (
-                        isinstance(
-                            x.get(c),
-                            type(
-                                {})) or not x.get(c)) else str(x).replace(
-                                    "None",
-                        ""))
-
-    extract(["education", "work", "certificate", "project", "language",
-             "skill"])
-    extract(["wechat", "phone", "is_deleted",
-            "name", "tel", "email"], "contact")
-    extract(["nation", "expect_industry_name", "salary_month",
-             "industry_ids", "is_house", "birth", "annual_salary_from",
-             "annual_salary_to", "card",
-             "expect_salary_to", "expect_salary_from",
-             "expect_position_name", "gender", "city",
-             "is_fertility", "expect_city_names",
-             "political_status", "title_name", "expect_annual_salary",
-             "industry_name", "address", "position_name", "school_name",
-             "corporation_id",
-             "is_oversea", "responsibilities",
-             "work_start_time", "degree", "management_experience",
-             "expect_type", "corporation_type", "scale", "corporation_name",
-             "self_remark", "annual_salary", "work_experience",
-             "discipline_name", "marital", "updated_at"], "basic")
-
-    df["degree"] = df["degree"].map(lambda x: degrees.get_name(x))
-    df["address"] = df["address"].map(lambda x: " ".join(regions.get_names(x)))
-    df["industry_names"] = df["industry_ids"].map(lambda x: " ".join([" ".join(industries.get_names(i)) for i in
-                                                                      str(x).split(",")]))
-    clms.append("industry_names")
-
-    def arr2str(a):
-        if not a:
-            return ""
-        if isinstance(a, list):
-            a = " ".join([str(i) for i in a])
-        return str(a).replace(",", " ")
-
-    df["expect_industry_name"] = df["expect_industry_name"].map(
-        lambda x: arr2str(x))
-    df["gender"] = df["gender"].map(
-        lambda x: "男" if x == 'M' else (
-            "女" if x == 'F' else ""))
-    for c in ["is_fertility", "is_oversea", "is_house",
-              "management_experience", "marital"]:
-        df[c] = df[c].map(
-            lambda x: '是' if x == 'Y' else (
-                '否' if x == 'N' else ""))
-    df["is_management_experience"] = df["management_experience"]
-    df["is_marital"] = df["marital"]
-    clms.extend(["is_management_experience", "is_marital"])
-
-    df.fillna("", inplace=True)
-    for i in range(len(df)):
-        if not df.loc[i, "phone"].strip() and df.loc[i, "tel"].strip():
-            df.loc[i, "phone"] = df.loc[i, "tel"].strip()
-
-    for n in ["industry_ids", "management_experience", "marital", "tel"]:
-        for i in range(len(clms)):
-            if clms[i] == n:
-                del clms[i]
-                break
-
-    clms = list(set(clms))
-
-    df = df.reindex(sorted(clms), axis=1)
-    #print(json.dumps(list(df.columns.values)), "LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL")
-    for c in clms:
-        df[c] = df[c].map(
-            lambda s: str(s).replace(
-                "\t",
-                " ").replace(
-                "\n",
-                "\\n").replace(
-                "\r",
-                "\\n"))
-    # print(df.values.tolist())
-    return dict(zip([n.split(" ")[0] for n in FIELDS], df.values.tolist()[0]))
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import json
+from deepdoc.parser.resume.entities import degrees, regions, industries
+
+FIELDS = [
+"address STRING",
+"annual_salary int",
+"annual_salary_from int",
+"annual_salary_to int",
+"birth STRING",
+"card STRING",
+"certificate_obj string",
+"city STRING",
+"corporation_id int",
+"corporation_name STRING",
+"corporation_type STRING",
+"degree STRING",
+"discipline_name STRING",
+"education_obj string",
+"email STRING",
+"expect_annual_salary int",
+"expect_city_names string",
+"expect_industry_name STRING",
+"expect_position_name STRING",
+"expect_salary_from int",
+"expect_salary_to int",
+"expect_type STRING",
+"gender STRING",
+"industry_name STRING",
+"industry_names STRING",
+"is_deleted STRING",
+"is_fertility STRING",
+"is_house STRING",
+"is_management_experience STRING",
+"is_marital STRING",
+"is_oversea STRING",
+"language_obj string",
+"name STRING",
+"nation STRING",
+"phone STRING",
+"political_status STRING",
+"position_name STRING",
+"project_obj string",
+"responsibilities string",
+"salary_month int",
+"scale STRING",
+"school_name STRING",
+"self_remark string",
+"skill_obj string",
+"title_name STRING",
+"tob_resume_id STRING",
+"updated_at Timestamp",
+"wechat STRING",
+"work_obj string",
+"work_experience int",
+"work_start_time BIGINT"
+]
+
+def refactor(df):
+    def deal_obj(obj, k, kk):
+        if not isinstance(obj, type({})):
+            return ""
+        obj = obj.get(k, {})
+        if not isinstance(obj, type({})):
+            return ""
+        return obj.get(kk, "")
+
+    def loadjson(line):
+        try:
+            return json.loads(line)
+        except Exception as e:
+            pass
+        return {}
+
+    df["obj"] = df["resume_content"].map(lambda x: loadjson(x))
+    df.fillna("", inplace=True)
+
+    clms = ["tob_resume_id", "updated_at"]
+
+    def extract(nms, cc=None):
+        nonlocal clms
+        clms.extend(nms)
+        for c in nms:
+            if cc:
+                df[c] = df["obj"].map(lambda x: deal_obj(x, cc, c))
+            else:
+                df[c] = df["obj"].map(
+                    lambda x: json.dumps(
+                        x.get(
+                            c,
+                            {}),
+                        ensure_ascii=False) if isinstance(
+                        x,
+                        type(
+                            {})) and (
+                        isinstance(
+                            x.get(c),
+                            type(
+                                {})) or not x.get(c)) else str(x).replace(
+                                    "None",
+                        ""))
+
+    extract(["education", "work", "certificate", "project", "language",
+             "skill"])
+    extract(["wechat", "phone", "is_deleted",
+            "name", "tel", "email"], "contact")
+    extract(["nation", "expect_industry_name", "salary_month",
+             "industry_ids", "is_house", "birth", "annual_salary_from",
+             "annual_salary_to", "card",
+             "expect_salary_to", "expect_salary_from",
+             "expect_position_name", "gender", "city",
+             "is_fertility", "expect_city_names",
+             "political_status", "title_name", "expect_annual_salary",
+             "industry_name", "address", "position_name", "school_name",
+             "corporation_id",
+             "is_oversea", "responsibilities",
+             "work_start_time", "degree", "management_experience",
+             "expect_type", "corporation_type", "scale", "corporation_name",
+             "self_remark", "annual_salary", "work_experience",
+             "discipline_name", "marital", "updated_at"], "basic")
+
+    df["degree"] = df["degree"].map(lambda x: degrees.get_name(x))
+    df["address"] = df["address"].map(lambda x: " ".join(regions.get_names(x)))
+    df["industry_names"] = df["industry_ids"].map(lambda x: " ".join([" ".join(industries.get_names(i)) for i in
+                                                                      str(x).split(",")]))
+    clms.append("industry_names")
+
+    def arr2str(a):
+        if not a:
+            return ""
+        if isinstance(a, list):
+            a = " ".join([str(i) for i in a])
+        return str(a).replace(",", " ")
+
+    df["expect_industry_name"] = df["expect_industry_name"].map(
+        lambda x: arr2str(x))
+    df["gender"] = df["gender"].map(
+        lambda x: "男" if x == 'M' else (
+            "女" if x == 'F' else ""))
+    for c in ["is_fertility", "is_oversea", "is_house",
+              "management_experience", "marital"]:
+        df[c] = df[c].map(
+            lambda x: '是' if x == 'Y' else (
+                '否' if x == 'N' else ""))
+    df["is_management_experience"] = df["management_experience"]
+    df["is_marital"] = df["marital"]
+    clms.extend(["is_management_experience", "is_marital"])
+
+    df.fillna("", inplace=True)
+    for i in range(len(df)):
+        if not df.loc[i, "phone"].strip() and df.loc[i, "tel"].strip():
+            df.loc[i, "phone"] = df.loc[i, "tel"].strip()
+
+    for n in ["industry_ids", "management_experience", "marital", "tel"]:
+        for i in range(len(clms)):
+            if clms[i] == n:
+                del clms[i]
+                break
+
+    clms = list(set(clms))
+
+    df = df.reindex(sorted(clms), axis=1)
+    #print(json.dumps(list(df.columns.values)), "LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL")
+    for c in clms:
+        df[c] = df[c].map(
+            lambda s: str(s).replace(
+                "\t",
+                " ").replace(
+                "\n",
+                "\\n").replace(
+                "\r",
+                "\\n"))
+    # print(df.values.tolist())
+    return dict(zip([n.split(" ")[0] for n in FIELDS], df.values.tolist()[0]))