diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 758d471bc..2060da624 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -542,6 +542,7 @@ class GeminiCV(Base): yield response.usage_metadata.total_token_count else: yield 0 + class NvidiaCV(Base): _FACTORY_NAME = "NVIDIA" diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 9edd6afe6..23a0dc214 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -420,7 +420,6 @@ def init_kb(row, vector_size: int): return settings.docStoreConn.createIdx(idxnm, row.get("kb_id", ""), vector_size) -@timeout(60*20) async def embedding(docs, mdl, parser_config=None, callback=None): if parser_config is None: parser_config = {} @@ -441,10 +440,15 @@ async def embedding(docs, mdl, parser_config=None, callback=None): tts = np.concatenate([vts for _ in range(len(tts))], axis=0) tk_count += c + @timeout(5) + def batch_encode(txts): + nonlocal mdl + return mdl.encode([truncate(c, mdl.max_length-10) for c in txts]) + cnts_ = np.array([]) for i in range(0, len(cnts), EMBEDDING_BATCH_SIZE): async with embed_limiter: - vts, c = await trio.to_thread.run_sync(lambda: mdl.encode([truncate(c, mdl.max_length-10) for c in cnts[i: i + EMBEDDING_BATCH_SIZE]])) + vts, c = await trio.to_thread.run_sync(lambda: batch_encode(cnts[i: i + EMBEDDING_BATCH_SIZE])) if len(cnts_) == 0: cnts_ = vts else: