Fix IDE warnings (#12281)

### What problem does this PR solve? As title ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-30 00:32:30 +08:00 · 2025-12-29 12:01:18 +08:00
parent 647fb115a0
commit 01f0ced1e6
43 changed files with 817 additions and 637 deletions
--- a/rag/app/table.py
+++ b/rag/app/table.py
@ -51,14 +51,15 @@ class Excel(ExcelParser):
        tables = []
        for sheetname in wb.sheetnames:
            ws = wb[sheetname]
-            images = Excel._extract_images_from_worksheet(ws,sheetname=sheetname)
+            images = Excel._extract_images_from_worksheet(ws, sheetname=sheetname)
            if images:
-                image_descriptions = vision_figure_parser_figure_xlsx_wrapper(images=images, callback=callback, **kwargs)
+                image_descriptions = vision_figure_parser_figure_xlsx_wrapper(images=images, callback=callback,
+                                                                              **kwargs)
                if image_descriptions and len(image_descriptions) == len(images):
                    for i, bf in enumerate(image_descriptions):
                        images[i]["image_description"] = "\n".join(bf[0][1])
                    for img in images:
-                        if (img["span_type"] == "single_cell"and img.get("image_description")):
+                        if (img["span_type"] == "single_cell" and img.get("image_description")):
                            pending_cell_images.append(img)
                        else:
                            flow_images.append(img)
@ -113,16 +114,17 @@ class Excel(ExcelParser):
            tables.append(
                (
                    (
-                        img["image"],   # Image.Image
-                        [img["image_description"]]     # description list (must be list)
+                        img["image"],  # Image.Image
+                        [img["image_description"]]  # description list (must be list)
                    ),
                    [
-                        (0, 0, 0, 0, 0)   # dummy position
+                        (0, 0, 0, 0, 0)  # dummy position
                    ]
                )
            )
-        callback(0.3, ("Extract records: {}~{}".format(from_page + 1, min(to_page, from_page + rn)) + (f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else "")))
-        return res,tables
+        callback(0.3, ("Extract records: {}~{}".format(from_page + 1, min(to_page, from_page + rn)) + (
+            f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else "")))
+        return res, tables

    def _parse_headers(self, ws, rows):
        if len(rows) == 0:
@ -315,14 +317,15 @@ def trans_bool(s):
 def column_data_type(arr):
    arr = list(arr)
    counts = {"int": 0, "float": 0, "text": 0, "datetime": 0, "bool": 0}
-    trans = {t: f for f, t in [(int, "int"), (float, "float"), (trans_datatime, "datetime"), (trans_bool, "bool"), (str, "text")]}
+    trans = {t: f for f, t in
+             [(int, "int"), (float, "float"), (trans_datatime, "datetime"), (trans_bool, "bool"), (str, "text")]}
    float_flag = False
    for a in arr:
        if a is None:
            continue
        if re.match(r"[+-]?[0-9]+$", str(a).replace("%%", "")) and not str(a).replace("%%", "").startswith("0"):
            counts["int"] += 1
-            if int(str(a)) > 2**63 - 1:
+            if int(str(a)) > 2 ** 63 - 1:
                float_flag = True
                break
        elif re.match(r"[+-]?[0-9.]{,19}$", str(a).replace("%%", "")) and not str(a).replace("%%", "").startswith("0"):
@ -370,7 +373,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
    if re.search(r"\.xlsx?$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
        excel_parser = Excel()
-        dfs,tbls = excel_parser(filename, binary, from_page=from_page, to_page=to_page, callback=callback, **kwargs)
+        dfs, tbls = excel_parser(filename, binary, from_page=from_page, to_page=to_page, callback=callback, **kwargs)
    elif re.search(r"\.txt$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
        txt = get_text(filename, binary)
@ -389,7 +392,8 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
                continue
            rows.append(row)

-        callback(0.3, ("Extract records: {}~{}".format(from_page, min(len(lines), to_page)) + (f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else "")))
+        callback(0.3, ("Extract records: {}~{}".format(from_page, min(len(lines), to_page)) + (
+            f"{len(fails)} failure, line: %s..." % (",".join(fails[:3])) if fails else "")))

        dfs = [pd.DataFrame(np.array(rows), columns=headers)]
    elif re.search(r"\.csv$", filename, re.IGNORECASE):
@ -406,7 +410,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
        fails = []
        rows = []

-        for i, row in enumerate(all_rows[1 + from_page : 1 + to_page]):
+        for i, row in enumerate(all_rows[1 + from_page: 1 + to_page]):
            if len(row) != len(headers):
                fails.append(str(i + from_page))
                continue
@ -415,7 +419,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
        callback(
            0.3,
            (f"Extract records: {from_page}~{from_page + len(rows)}" +
-            (f"{len(fails)} failure, line: {','.join(fails[:3])}..." if fails else ""))
+             (f"{len(fails)} failure, line: {','.join(fails[:3])}..." if fails else ""))
        )

        dfs = [pd.DataFrame(rows, columns=headers)]
@ -445,7 +449,8 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
            df[clmns[j]] = cln
            if ty == "text":
                txts.extend([str(c) for c in cln if c])
-        clmns_map = [(py_clmns[i].lower() + fieds_map[clmn_tys[i]], str(clmns[i]).replace("_", " ")) for i in range(len(clmns))]
+        clmns_map = [(py_clmns[i].lower() + fieds_map[clmn_tys[i]], str(clmns[i]).replace("_", " ")) for i in
+                     range(len(clmns))]

        eng = lang.lower() == "english"  # is_english(txts)
        for ii, row in df.iterrows():
@ -477,7 +482,9 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, lang="Chinese
 if __name__ == "__main__":
    import sys

+
    def dummy(prog=None, msg=""):
        pass

+
    chunk(sys.argv[1], callback=dummy)