Fix errors detected by Ruff (#3918)

### What problem does this PR solve? Fix errors detected by Ruff ### Type of change - [x] Refactoring
2026-01-30 23:26:36 +08:00 · 2024-12-08 14:21:12 +08:00
parent e267a026f3
commit 0d68a6cd1b
97 changed files with 2558 additions and 1976 deletions
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@ -57,28 +57,33 @@ class TaskService(CommonService):
            Tenant.img2txt_id,
            Tenant.asr_id,
            Tenant.llm_id,
-            cls.model.update_time]
-        docs = cls.model.select(*fields) \
-            .join(Document, on=(cls.model.doc_id == Document.id)) \
-            .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
-            .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \
+            cls.model.update_time,
+        ]
+        docs = (
+            cls.model.select(*fields)
+            .join(Document, on=(cls.model.doc_id == Document.id))
+            .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id))
+            .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
            .where(cls.model.id == task_id)
+        )
        docs = list(docs.dicts())
-        if not docs: return None
+        if not docs:
+            return None

        msg = "\nTask has been received."
-        prog = random.random() / 10.
+        prog = random.random() / 10.0
        if docs[0]["retry_count"] >= 3:
            msg = "\nERROR: Task is abandoned after 3 times attempts."
            prog = -1

-        cls.model.update(progress_msg=cls.model.progress_msg + msg,
-                         progress=prog,
-                         retry_count=docs[0]["retry_count"]+1
-                         ).where(
-            cls.model.id == docs[0]["id"]).execute()
+        cls.model.update(
+            progress_msg=cls.model.progress_msg + msg,
+            progress=prog,
+            retry_count=docs[0]["retry_count"] + 1,
+        ).where(cls.model.id == docs[0]["id"]).execute()

-        if docs[0]["retry_count"] >= 3: return None
+        if docs[0]["retry_count"] >= 3:
+            return None

        return docs[0]

@ -86,21 +91,44 @@ class TaskService(CommonService):
    @DB.connection_context()
    def get_ongoing_doc_name(cls):
        with DB.lock("get_task", -1):
-            docs = cls.model.select(*[Document.id, Document.kb_id, Document.location, File.parent_id]) \
-                .join(Document, on=(cls.model.doc_id == Document.id)) \
-                .join(File2Document, on=(File2Document.document_id == Document.id), join_type=JOIN.LEFT_OUTER) \
-                .join(File, on=(File2Document.file_id == File.id), join_type=JOIN.LEFT_OUTER) \
+            docs = (
+                cls.model.select(
+                    *[Document.id, Document.kb_id, Document.location, File.parent_id]
+                )
+                .join(Document, on=(cls.model.doc_id == Document.id))
+                .join(
+                    File2Document,
+                    on=(File2Document.document_id == Document.id),
+                    join_type=JOIN.LEFT_OUTER,
+                )
+                .join(
+                    File,
+                    on=(File2Document.file_id == File.id),
+                    join_type=JOIN.LEFT_OUTER,
+                )
                .where(
                    Document.status == StatusEnum.VALID.value,
                    Document.run == TaskStatus.RUNNING.value,
                    ~(Document.type == FileType.VIRTUAL.value),
                    cls.model.progress < 1,
-                    cls.model.create_time >= current_timestamp() - 1000 * 600
+                    cls.model.create_time >= current_timestamp() - 1000 * 600,
                )
+            )
            docs = list(docs.dicts())
-            if not docs: return []
+            if not docs:
+                return []

-            return list(set([(d["parent_id"] if d["parent_id"] else d["kb_id"], d["location"]) for d in docs]))
+            return list(
+                set(
+                    [
+                        (
+                            d["parent_id"] if d["parent_id"] else d["kb_id"],
+                            d["location"],
+                        )
+                        for d in docs
+                    ]
+                )
+            )

    @classmethod
    @DB.connection_context()
@ -118,28 +146,30 @@ class TaskService(CommonService):
    def update_progress(cls, id, info):
        if os.environ.get("MACOS"):
            if info["progress_msg"]:
-                cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
-                    cls.model.id == id).execute()
+                cls.model.update(
+                    progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
+                ).where(cls.model.id == id).execute()
            if "progress" in info:
                cls.model.update(progress=info["progress"]).where(
-                    cls.model.id == id).execute()
+                    cls.model.id == id
+                ).execute()
            return

        with DB.lock("update_progress", -1):
            if info["progress_msg"]:
-                cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
-                    cls.model.id == id).execute()
+                cls.model.update(
+                    progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]
+                ).where(cls.model.id == id).execute()
            if "progress" in info:
                cls.model.update(progress=info["progress"]).where(
-                    cls.model.id == id).execute()
+                    cls.model.id == id
+                ).execute()


 def queue_tasks(doc: dict, bucket: str, name: str):
    def new_task():
-        return {
-            "id": get_uuid(),
-            "doc_id": doc["id"]
-        }
+        return {"id": get_uuid(), "doc_id": doc["id"]}
+
    tsks = []

    if doc["type"] == FileType.PDF.value:
@ -150,8 +180,8 @@ def queue_tasks(doc: dict, bucket: str, name: str):
        if doc["parser_id"] == "paper":
            page_size = doc["parser_config"].get("task_page_size", 22)
        if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
-            page_size = 10 ** 9
-        page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
+            page_size = 10**9
+        page_ranges = doc["parser_config"].get("pages") or [(1, 10**5)]
        for s, e in page_ranges:
            s -= 1
            s = max(0, s)
@ -177,4 +207,6 @@ def queue_tasks(doc: dict, bucket: str, name: str):
    DocumentService.begin2parse(doc["id"])

    for t in tsks:
-        assert REDIS_CONN.queue_product(SVR_QUEUE_NAME, message=t), "Can't access Redis. Please check the Redis' status."
+        assert REDIS_CONN.queue_product(
+            SVR_QUEUE_NAME, message=t
+        ), "Can't access Redis. Please check the Redis' status."