Fit a lot of encodings for text file. (#458)

### What problem does this PR solve? #384 ### Type of change - [x] Performance Improvement
2025-12-08 20:42:30 +08:00 · 2024-04-19 18:02:53 +08:00
parent cda7b607cb
commit ed6081845a
19 changed files with 118 additions and 55 deletions
--- a/api/apps/api_app.py
+++ b/api/apps/api_app.py
@ -105,7 +105,7 @@ def stats():
        res = {
            "pv": [(o["dt"], o["pv"]) for o in objs],
            "uv": [(o["dt"], o["uv"]) for o in objs],
-            "speed": [(o["dt"], float(o["tokens"])/float(o["duration"])) for o in objs],
+            "speed": [(o["dt"], float(o["tokens"])/(float(o["duration"]+0.1))) for o in objs],
            "tokens": [(o["dt"], float(o["tokens"])/1000.) for o in objs],
            "round": [(o["dt"], o["round"]) for o in objs],
            "thumb_up": [(o["dt"], o["thumb_up"]) for o in objs]
@ -176,7 +176,6 @@ def completion():
        conv.reference.append(ans["reference"])
        conv.message.append({"role": "assistant", "content": ans["answer"]})
        API4ConversationService.append_message(conv.id, conv.to_dict())
-        APITokenService.APITokenService(token)
        return get_json_result(data=ans)
    except Exception as e:
        return server_error_response(e)
--- a/api/apps/user_app.py
+++ b/api/apps/user_app.py
@ -14,6 +14,7 @@
 #  limitations under the License.
 #
 import re
+from datetime import datetime

 from flask import request, session, redirect
 from werkzeug.security import generate_password_hash, check_password_hash
@ -22,7 +23,7 @@ from flask_login import login_required, current_user, login_user, logout_user
 from api.db.db_models import TenantLLM
 from api.db.services.llm_service import TenantLLMService, LLMService
 from api.utils.api_utils import server_error_response, validate_request
-from api.utils import get_uuid, get_format_time, decrypt, download_img
+from api.utils import get_uuid, get_format_time, decrypt, download_img, current_timestamp, datetime_format
 from api.db import UserTenantRole, LLMType
 from api.settings import RetCode, GITHUB_OAUTH, CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, API_KEY, \
    LLM_FACTORY, LLM_BASE_URL
@ -56,6 +57,8 @@ def login():
        response_data = user.to_json()
        user.access_token = get_uuid()
        login_user(user)
+        user.update_time = current_timestamp(),
+        user.update_date = datetime_format(datetime.now()),
        user.save()
        msg = "Welcome back!"
        return cors_reponse(data=response_data, auth=user.get_id(), retmsg=msg)
--- a/api/db/services/api_service.py
+++ b/api/db/services/api_service.py
@ -40,8 +40,8 @@ class API4ConversationService(CommonService):
    @classmethod
    @DB.connection_context()
    def append_message(cls, id, conversation):
-        cls.model.update_by_id(id, conversation)
-        return cls.model.update(round=cls.model.round + 1).where(id=id).execute()
+        cls.update_by_id(id, conversation)
+        return cls.model.update(round=cls.model.round + 1).where(cls.model.id==id).execute()

    @classmethod
    @DB.connection_context()
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@ -13,6 +13,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import random
+
 from peewee import Expression
 from api.db.db_models import DB
 from api.db import StatusEnum, FileType, TaskStatus
@ -26,7 +28,7 @@ class TaskService(CommonService):

    @classmethod
    @DB.connection_context()
-    def get_tasks(cls, tm, mod=0, comm=1, items_per_page=64):
+    def get_tasks(cls, tm, mod=0, comm=1, items_per_page=1, takeit=True):
        fields = [
            cls.model.id,
            cls.model.doc_id,
@ -45,20 +47,28 @@ class TaskService(CommonService):
            Tenant.img2txt_id,
            Tenant.asr_id,
            cls.model.update_time]
-        docs = cls.model.select(*fields) \
-            .join(Document, on=(cls.model.doc_id == Document.id)) \
-            .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
-            .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))\
-            .where(
-                Document.status == StatusEnum.VALID.value,
-                Document.run == TaskStatus.RUNNING.value,
-                ~(Document.type == FileType.VIRTUAL.value),
-                cls.model.progress == 0,
-                cls.model.update_time >= tm,
-                (Expression(cls.model.create_time, "%%", comm) == mod))\
-            .order_by(cls.model.update_time.asc())\
-            .paginate(1, items_per_page)
-        return list(docs.dicts())
+        with DB.lock("get_task", -1):
+            docs = cls.model.select(*fields) \
+                .join(Document, on=(cls.model.doc_id == Document.id)) \
+                .join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
+                .join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))\
+                .where(
+                    Document.status == StatusEnum.VALID.value,
+                    Document.run == TaskStatus.RUNNING.value,
+                    ~(Document.type == FileType.VIRTUAL.value),
+                    cls.model.progress == 0,
+                    #cls.model.update_time >= tm,
+                    #(Expression(cls.model.create_time, "%%", comm) == mod)
+                )\
+                .order_by(cls.model.update_time.asc())\
+                .paginate(0, items_per_page)
+            docs = list(docs.dicts())
+            if not docs: return []
+            if not takeit: return docs
+
+            cls.model.update(progress_msg=cls.model.progress_msg + "\n" + "Task has been received.", progress=random.random()/10.).where(
+                cls.model.id == docs[0]["id"]).execute()
+            return docs

    @classmethod
    @DB.connection_context()
@ -74,9 +84,10 @@ class TaskService(CommonService):
    @classmethod
    @DB.connection_context()
    def update_progress(cls, id, info):
-        if info["progress_msg"]:
-            cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
-                cls.model.id == id).execute()
-        if "progress" in info:
-            cls.model.update(progress=info["progress"]).where(
-                cls.model.id == id).execute()
+        with DB.lock("update_progress", -1):
+            if info["progress_msg"]:
+                cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
+                    cls.model.id == id).execute()
+            if "progress" in info:
+                cls.model.update(progress=info["progress"]).where(
+                    cls.model.id == id).execute()