Test chat API and refine ppt chunker (#42)

2026-01-31 15:45:08 +08:00 · 2024-01-23 19:45:36 +08:00
parent 34b2ab3b2f
commit e32ef75e99
10 changed files with 226 additions and 91 deletions
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -37,7 +37,7 @@ class GptTurbo(Base):
            model=self.model_name,
            messages=history,
            **gen_conf)
-        return res.choices[0].message.content.strip()
+        return res.choices[0].message.content.strip(), res.usage.completion_tokens


 from dashscope import Generation
@ -56,5 +56,5 @@ class QWenChat(Base):
            result_format='message'
        )
        if response.status_code == HTTPStatus.OK:
-            return response.output.choices[0]['message']['content']
-        return response.message
+            return response.output.choices[0]['message']['content'], response.usage.output_tokens
+        return response.message, 0
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@ -72,7 +72,7 @@ class GptV4(Base):
            messages=self.prompt(b64),
            max_tokens=max_tokens,
        )
-        return res.choices[0].message.content.strip()
+        return res.choices[0].message.content.strip(), res.usage.total_tokens


 class QWenCV(Base):
@ -87,5 +87,5 @@ class QWenCV(Base):
        response = MultiModalConversation.call(model=self.model_name,
                                               messages=self.prompt(self.image2base64(image)))
        if response.status_code == HTTPStatus.OK:
-            return response.output.choices[0]['message']['content']
-        return response.message
+            return response.output.choices[0]['message']['content'], response.usage.output_tokens
+        return response.message, 0
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@ -36,6 +36,9 @@ class Base(ABC):
    def encode(self, texts: list, batch_size=32):
        raise NotImplementedError("Please implement encode method!")

+    def encode_queries(self, text: str):
+        raise NotImplementedError("Please implement encode method!")
+

 class HuEmbedding(Base):
    def __init__(self, key="", model_name=""):
@ -68,15 +71,18 @@ class HuEmbedding(Base):

 class OpenAIEmbed(Base):
    def __init__(self, key, model_name="text-embedding-ada-002"):
-        self.client = OpenAI(key)
+        self.client = OpenAI(api_key=key)
        self.model_name = model_name

    def encode(self, texts: list, batch_size=32):
-        token_count = 0
-        for t in texts: token_count += num_tokens_from_string(t)
        res = self.client.embeddings.create(input=texts,
                                            model=self.model_name)
-        return [d["embedding"] for d in res["data"]], token_count
+        return np.array([d.embedding for d in res.data]), res.usage.total_tokens
+
+    def encode_queries(self, text):
+        res = self.client.embeddings.create(input=[text],
+                                            model=self.model_name)
+        return np.array(res.data[0].embedding), res.usage.total_tokens


 class QWenEmbed(Base):
@ -84,16 +90,28 @@ class QWenEmbed(Base):
        dashscope.api_key = key
        self.model_name = model_name

-    def encode(self, texts: list, batch_size=32, text_type="document"):
+    def encode(self, texts: list, batch_size=10):
        import dashscope
        res = []
        token_count = 0
-        for txt in texts:
+        texts = [txt[:2048] for txt in texts]
+        for i in range(0, len(texts), batch_size):
            resp = dashscope.TextEmbedding.call(
                model=self.model_name,
-                input=txt[:2048],
-                text_type=text_type
+                input=texts[i:i+batch_size],
+                text_type="document"
            )
-            res.append(resp["output"]["embeddings"][0]["embedding"])
-            token_count += resp["usage"]["total_tokens"]
-        return res, token_count
+            embds = [[]] * len(resp["output"]["embeddings"])
+            for e in resp["output"]["embeddings"]:
+                embds[e["text_index"]] = e["embedding"]
+            res.extend(embds)
+            token_count += resp["usage"]["input_tokens"]
+        return np.array(res), token_count
+
+    def encode_queries(self, text):
+        resp = dashscope.TextEmbedding.call(
+                model=self.model_name,
+                input=text[:2048],
+                text_type="query"
+            )
+        return np.array(resp["output"]["embeddings"][0]["embedding"]), resp["usage"]["input_tokens"]