Feature/feat1017 (#2872)

### What problem does this PR solve? 1. fix: mid map show error in knowledge graph, juse because ```@antv/g6```version changed 2. feat: concurrent threads configuration support in graph extractor 3. fix: used tokens update failed for tenant 4. feat: timeout configuration support for llm 5. fix: regex error in graph extractor 6. feat: qwen rerank(```gte-rerank```) support 7. fix: timeout deal in knowledge graph index process. Now chat by stream output, also, it is configuratable. 8. feat: ```qwen-long``` model configuration ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: chongchuanbing <chongchuanbing@gmail.com> Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2026-01-31 07:36:46 +08:00 · 2024-10-21 12:11:08 +08:00
parent 4bdf3fd48e
commit ac26d09a59
8 changed files with 95 additions and 35 deletions
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@ -167,11 +167,13 @@ class TenantLLMService(CommonService):
        else:
            assert False, "LLM type error"

+        llm_name = mdlnm.split("@")[0] if "@" in mdlnm else mdlnm
+
        num = 0
        try:
-            for u in cls.query(tenant_id=tenant_id, llm_name=mdlnm):
+            for u in cls.query(tenant_id=tenant_id, llm_name=llm_name):
                num += cls.model.update(used_tokens=u.used_tokens + used_tokens)\
-                    .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
+                    .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == llm_name)\
                    .execute()
        except Exception as e:
            pass
@ -207,7 +209,7 @@ class LLMBundle(object):
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens):
            database_logger.error(
-                "Can't update token usage for {}/EMBEDDING".format(self.tenant_id))
+                "Can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
        return emd, used_tokens

    def encode_queries(self, query: str):
@ -215,7 +217,7 @@ class LLMBundle(object):
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens):
            database_logger.error(
-                "Can't update token usage for {}/EMBEDDING".format(self.tenant_id))
+                "Can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
        return emd, used_tokens

    def similarity(self, query: str, texts: list):
@ -223,7 +225,7 @@ class LLMBundle(object):
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens):
            database_logger.error(
-                "Can't update token usage for {}/RERANK".format(self.tenant_id))
+                "Can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
        return sim, used_tokens

    def describe(self, image, max_tokens=300):
@ -231,7 +233,7 @@ class LLMBundle(object):
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens):
            database_logger.error(
-                "Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id))
+                "Can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
        return txt

    def transcription(self, audio):
@ -239,7 +241,7 @@ class LLMBundle(object):
        if not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens):
            database_logger.error(
-                "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
+                "Can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
        return txt

    def tts(self, text):
@ -254,10 +256,10 @@ class LLMBundle(object):

    def chat(self, system, history, gen_conf):
        txt, used_tokens = self.mdl.chat(system, history, gen_conf)
-        if not TenantLLMService.increase_usage(
+        if isinstance(txt, int) and not TenantLLMService.increase_usage(
                self.tenant_id, self.llm_type, used_tokens, self.llm_name):
            database_logger.error(
-                "Can't update token usage for {}/CHAT".format(self.tenant_id))
+                "Can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
        return txt

    def chat_streamly(self, system, history, gen_conf):
@ -266,6 +268,6 @@ class LLMBundle(object):
                if not TenantLLMService.increase_usage(
                        self.tenant_id, self.llm_type, txt, self.llm_name):
                    database_logger.error(
-                        "Can't update token usage for {}/CHAT".format(self.tenant_id))
+                        "Can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
                return
            yield txt