apply pep8 formalize (#155)

2026-01-31 07:36:46 +08:00 · 2024-03-27 11:33:46 +08:00
parent a02e836790
commit fd7fcb5baf
55 changed files with 1568 additions and 753 deletions
--- a/deepdoc/vision/layout_recognizer.py
+++ b/deepdoc/vision/layout_recognizer.py
@ -24,18 +24,19 @@ from deepdoc.vision import Recognizer

 class LayoutRecognizer(Recognizer):
    labels = [
-             "_background_",
-             "Text",
-             "Title",
-             "Figure",
-             "Figure caption",
-             "Table",
-             "Table caption",
-             "Header",
-             "Footer",
-             "Reference",
-             "Equation",
-        ]
+        "_background_",
+        "Text",
+        "Title",
+        "Figure",
+        "Figure caption",
+        "Table",
+        "Table caption",
+        "Header",
+        "Footer",
+        "Reference",
+        "Equation",
+    ]
+
    def __init__(self, domain):
        try:
            model_dir = snapshot_download(
@ -47,10 +48,12 @@ class LayoutRecognizer(Recognizer):
        except Exception as e:
            model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc")

-        super().__init__(self.labels, domain, model_dir)#os.path.join(get_project_base_directory(), "rag/res/deepdoc/"))
+        # os.path.join(get_project_base_directory(), "rag/res/deepdoc/"))
+        super().__init__(self.labels, domain, model_dir)
        self.garbage_layouts = ["footer", "header", "reference"]

-    def __call__(self, image_list, ocr_res, scale_factor=3, thr=0.2, batch_size=16, drop=True):
+    def __call__(self, image_list, ocr_res, scale_factor=3,
+                 thr=0.2, batch_size=16, drop=True):
        def __is_garbage(b):
            patt = [r"^•+$", r"(版权归©|免责条款|地址[:：])", r"\.{3,}", "^[0-9]{1,2} / ?[0-9]{1,2}$",
                    r"^[0-9]{1,2} of [0-9]{1,2}$", "^http://[^ ]{12,}",
@ -75,7 +78,8 @@ class LayoutRecognizer(Recognizer):
                    "top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
                    "page_number": pn,
                    } for b in lts]
-            lts = self.sort_Y_firstly(lts, np.mean([l["bottom"]-l["top"] for l in lts]) / 2)
+            lts = self.sort_Y_firstly(lts, np.mean(
+                [l["bottom"] - l["top"] for l in lts]) / 2)
            lts = self.layouts_cleanup(bxs, lts)
            page_layout.append(lts)

@ -93,17 +97,20 @@ class LayoutRecognizer(Recognizer):
                        continue

                    ii = self.find_overlapped_with_threashold(bxs[i], lts_,
-                                                                thr=0.4)
+                                                              thr=0.4)
                    if ii is None:  # belong to nothing
                        bxs[i]["layout_type"] = ""
                        i += 1
                        continue
                    lts_[ii]["visited"] = True
                    keep_feats = [
-                        lts_[ii]["type"] == "footer" and bxs[i]["bottom"] < image_list[pn].size[1]*0.9/scale_factor,
-                        lts_[ii]["type"] == "header" and bxs[i]["top"] > image_list[pn].size[1]*0.1/scale_factor,
+                        lts_[
+                            ii]["type"] == "footer" and bxs[i]["bottom"] < image_list[pn].size[1] * 0.9 / scale_factor,
+                        lts_[
+                            ii]["type"] == "header" and bxs[i]["top"] > image_list[pn].size[1] * 0.1 / scale_factor,
                    ]
-                    if drop and lts_[ii]["type"] in self.garbage_layouts and not any(keep_feats):
+                    if drop and lts_[
+                            ii]["type"] in self.garbage_layouts and not any(keep_feats):
                        if lts_[ii]["type"] not in garbages:
                            garbages[lts_[ii]["type"]] = []
                        garbages[lts_[ii]["type"]].append(bxs[i]["text"])
@ -111,7 +118,8 @@ class LayoutRecognizer(Recognizer):
                        continue

                    bxs[i]["layoutno"] = f"{ty}-{ii}"
-                    bxs[i]["layout_type"] = lts_[ii]["type"] if lts_[ii]["type"]!="equation" else "figure"
+                    bxs[i]["layout_type"] = lts_[ii]["type"] if lts_[
+                        ii]["type"] != "equation" else "figure"
                    i += 1

            for lt in ["footer", "header", "reference", "figure caption",
@ -120,7 +128,7 @@ class LayoutRecognizer(Recognizer):

            # add box to figure layouts which has not text box
            for i, lt in enumerate(
-                    [lt for lt in lts if lt["type"] in ["figure","equation"]]):
+                    [lt for lt in lts if lt["type"] in ["figure", "equation"]]):
                if lt.get("visited"):
                    continue
                lt = deepcopy(lt)
@ -143,6 +151,3 @@ class LayoutRecognizer(Recognizer):

        ocr_res = [b for b in ocr_res if b["text"].strip() not in garbag_set]
        return ocr_res, page_layout
-
-
-
--- a/deepdoc/vision/operators.py
+++ b/deepdoc/vision/operators.py
@ -63,6 +63,7 @@ class DecodeImage(object):
        data['image'] = img
        return data

+
 class StandardizeImage(object):
    """normalize image
    Args:
@ -707,4 +708,4 @@ def preprocess(im, preprocess_ops):
    im, im_info = decode_image(im, im_info)
    for operator in preprocess_ops:
        im, im_info = operator(im, im_info)
-    return im, im_info
+    return im, im_info
--- a/deepdoc/vision/t_ocr.py
+++ b/deepdoc/vision/t_ocr.py
@ -11,12 +11,20 @@
 #  limitations under the License.
 #

-import os, sys
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../')))
-import numpy as np
-import argparse
-from deepdoc.vision import OCR, init_in_out
 from deepdoc.vision.seeit import draw_box
+from deepdoc.vision import OCR, init_in_out
+import argparse
+import numpy as np
+import os
+import sys
+sys.path.insert(
+    0,
+    os.path.abspath(
+        os.path.join(
+            os.path.dirname(
+                os.path.abspath(__file__)),
+            '../../')))
+

 def main(args):
    ocr = OCR()
@ -26,14 +34,14 @@ def main(args):
        bxs = ocr(np.array(img))
        bxs = [(line[0], line[1][0]) for line in bxs]
        bxs = [{
-                "text": t,
-                "bbox": [b[0][0], b[0][1],  b[1][0], b[-1][1]],
-                "type": "ocr",
-                "score": 1} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]]
+            "text": t,
+            "bbox": [b[0][0], b[0][1], b[1][0], b[-1][1]],
+            "type": "ocr",
+            "score": 1} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]]
        img = draw_box(images[i], bxs, ["ocr"], 1.)
        img.save(outputs[i], quality=95)
-        with open(outputs[i] + ".txt", "w+") as f: f.write("\n".join([o["text"] for o in bxs]))
-
+        with open(outputs[i] + ".txt", "w+") as f:
+            f.write("\n".join([o["text"] for o in bxs]))


 if __name__ == "__main__":
@ -42,6 +50,6 @@ if __name__ == "__main__":
                        help="Directory where to store images or PDFs, or a file path to a single image or PDF",
                        required=True)
    parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './ocr_outputs'",
-                         default="./ocr_outputs")
+                        default="./ocr_outputs")
    args = parser.parse_args()
-    main(args)
+    main(args)
--- a/deepdoc/vision/t_recognizer.py
+++ b/deepdoc/vision/t_recognizer.py
@ -11,24 +11,35 @@
 #  limitations under the License.
 #

-import os, sys
+from deepdoc.vision.seeit import draw_box
+from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
+from api.utils.file_utils import get_project_base_directory
+import argparse
+import os
+import sys
 import re

 import numpy as np

-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../')))
-
-import argparse
-from api.utils.file_utils import get_project_base_directory
-from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
-from deepdoc.vision.seeit import draw_box
+sys.path.insert(
+    0,
+    os.path.abspath(
+        os.path.join(
+            os.path.dirname(
+                os.path.abspath(__file__)),
+            '../../')))


 def main(args):
    images, outputs = init_in_out(args)
    if args.mode.lower() == "layout":
        labels = LayoutRecognizer.labels
-        detr = Recognizer(labels, "layout", os.path.join(get_project_base_directory(), "rag/res/deepdoc/"))
+        detr = Recognizer(
+            labels,
+            "layout",
+            os.path.join(
+                get_project_base_directory(),
+                "rag/res/deepdoc/"))
    if args.mode.lower() == "tsr":
        labels = TableStructureRecognizer.labels
        detr = TableStructureRecognizer()
@ -39,7 +50,8 @@ def main(args):
        if args.mode.lower() == "tsr":
            #lyt = [t for t in lyt if t["type"] == "table column"]
            html = get_table_html(images[i], lyt, ocr)
-            with open(outputs[i]+".html", "w+") as f: f.write(html)
+            with open(outputs[i] + ".html", "w+") as f:
+                f.write(html)
            lyt = [{
                "type": t["label"],
                "bbox": [t["x0"], t["top"], t["x1"], t["bottom"]],
@ -58,7 +70,7 @@ def get_table_html(img, tb_cpns, ocr):
          "bottom": b[-1][1],
          "layout_type": "table",
          "page_number": 0} for b, t in boxes if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]],
-        np.mean([b[-1][1]-b[0][1] for b,_ in boxes]) / 3
+        np.mean([b[-1][1] - b[0][1] for b, _ in boxes]) / 3
    )

    def gather(kwd, fzy=10, ption=0.6):
@ -117,7 +129,7 @@ def get_table_html(img, tb_cpns, ocr):
      margin-bottom: 50px;
      border: 1px solid #e1e1e1;
    }
-    
+
    caption {
      color: #6ac1ca;
      font-size: 20px;
@ -126,25 +138,25 @@ def get_table_html(img, tb_cpns, ocr):
      font-weight: 600;
      margin-bottom: 10px;
    }
-    
+
    ._table_1nkzy_11 table {
      width: 100%%;
      border-collapse: collapse;
    }
-    
+
    th {
      color: #fff;
      background-color: #6ac1ca;
    }
-    
+
    td:hover {
      background: #c1e8e8;
    }
-    
+
    tr:nth-child(even) {
      background-color: #f2f2f2;
    }
-    
+
    ._table_1nkzy_11 th,
    ._table_1nkzy_11 td {
      text-align: center;
@ -157,7 +169,7 @@ def get_table_html(img, tb_cpns, ocr):
    %s
    </body>
    </html>
-"""% TableStructureRecognizer.construct_table(boxes, html=True)
+""" % TableStructureRecognizer.construct_table(boxes, html=True)
    return html


@ -168,7 +180,10 @@ if __name__ == "__main__":
                        required=True)
    parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './layouts_outputs'",
                        default="./layouts_outputs")
-    parser.add_argument('--threshold', help="A threshold to filter out detections. Default: 0.5", default=0.5)
+    parser.add_argument(
+        '--threshold',
+        help="A threshold to filter out detections. Default: 0.5",
+        default=0.5)
    parser.add_argument('--mode', help="Task mode: layout recognition or table structure recognition", choices=["layout", "tsr"],
                        default="layout")
    args = parser.parse_args()
--- a/deepdoc/vision/table_structure_recognizer.py
+++ b/deepdoc/vision/table_structure_recognizer.py
@ -44,7 +44,8 @@ class TableStructureRecognizer(Recognizer):
        except Exception as e:
            model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc")

-        super().__init__(self.labels, "tsr", model_dir)#os.path.join(get_project_base_directory(), "rag/res/deepdoc/"))
+        # os.path.join(get_project_base_directory(), "rag/res/deepdoc/"))
+        super().__init__(self.labels, "tsr", model_dir)

    def __call__(self, images, thr=0.2):
        tbls = super().__call__(images, thr)
@ -138,7 +139,8 @@ class TableStructureRecognizer(Recognizer):
        i = 0
        while i < len(boxes):
            if TableStructureRecognizer.is_caption(boxes[i]):
-                if is_english: cap + " "
+                if is_english:
+                    cap + " "
                cap += boxes[i]["text"]
                boxes.pop(i)
                i -= 1
@ -164,7 +166,7 @@ class TableStructureRecognizer(Recognizer):
            lst_r = rows[-1]
            if lst_r[-1].get("R", "") != b.get("R", "") \
                    or (b["top"] >= btm - 3 and lst_r[-1].get("R", "-1") != b.get("R", "-2")
-            ):  # new row
+                        ):  # new row
                btm = b["bottom"]
                b["rn"] += 1
                rows.append([b])
@ -214,9 +216,9 @@ class TableStructureRecognizer(Recognizer):
                    j += 1
                    continue
                f = (j > 0 and tbl[ii][j - 1] and tbl[ii]
-                [j - 1][0].get("text")) or j == 0
+                     [j - 1][0].get("text")) or j == 0
                ff = (j + 1 < len(tbl[ii]) and tbl[ii][j + 1] and tbl[ii]
-                [j + 1][0].get("text")) or j + 1 >= len(tbl[ii])
+                      [j + 1][0].get("text")) or j + 1 >= len(tbl[ii])
                if f and ff:
                    j += 1
                    continue
@ -277,9 +279,9 @@ class TableStructureRecognizer(Recognizer):
                    i += 1
                    continue
                f = (i > 0 and tbl[i - 1][jj] and tbl[i - 1]
-                [jj][0].get("text")) or i == 0
+                     [jj][0].get("text")) or i == 0
                ff = (i + 1 < len(tbl) and tbl[i + 1][jj] and tbl[i + 1]
-                [jj][0].get("text")) or i + 1 >= len(tbl)
+                      [jj][0].get("text")) or i + 1 >= len(tbl)
                if f and ff:
                    i += 1
                    continue
@ -366,7 +368,8 @@ class TableStructureRecognizer(Recognizer):
                    continue
                txt = ""
                if arr:
-                    h = min(np.min([c["bottom"] - c["top"] for c in arr]) / 2, 10)
+                    h = min(np.min([c["bottom"] - c["top"]
+                            for c in arr]) / 2, 10)
                    txt = " ".join([c["text"]
                                   for c in Recognizer.sort_Y_firstly(arr, h)])
                txts.append(txt)
@ -438,8 +441,8 @@ class TableStructureRecognizer(Recognizer):
                                          else "") + headers[j - 1][k]
                    else:
                        headers[j][k] = headers[j - 1][k] \
-                                        + (de if headers[j - 1][k] else "") \
-                                        + headers[j][k]
+                            + (de if headers[j - 1][k] else "") \
+                            + headers[j][k]

        logging.debug(
            f">>>>>>>>>>>>>>>>>{cap}：SIZE:{rowno}X{clmno} Header: {hdr_rowno}")