mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refactor (#537)
### What problem does this PR solve? ### Type of change - [x] Refactoring
This commit is contained in:
@ -25,7 +25,7 @@ EmbeddingModel = {
|
||||
"Tongyi-Qianwen": HuEmbedding, #QWenEmbed,
|
||||
"ZHIPU-AI": ZhipuEmbed,
|
||||
"FastEmbed": FastEmbed,
|
||||
"QAnything": QAnythingEmbed
|
||||
"Youdao": YoudaoEmbed
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -229,19 +229,19 @@ class XinferenceEmbed(Base):
|
||||
return np.array(res.data[0].embedding), res.usage.total_tokens
|
||||
|
||||
|
||||
class QAnythingEmbed(Base):
|
||||
class YoudaoEmbed(Base):
|
||||
_client = None
|
||||
|
||||
def __init__(self, key=None, model_name="maidalun1020/bce-embedding-base_v1", **kwargs):
|
||||
from BCEmbedding import EmbeddingModel as qanthing
|
||||
if not QAnythingEmbed._client:
|
||||
if not YoudaoEmbed._client:
|
||||
try:
|
||||
print("LOADING BCE...")
|
||||
QAnythingEmbed._client = qanthing(model_name_or_path=os.path.join(
|
||||
YoudaoEmbed._client = qanthing(model_name_or_path=os.path.join(
|
||||
get_project_base_directory(),
|
||||
"rag/res/bce-embedding-base_v1"))
|
||||
except Exception as e:
|
||||
QAnythingEmbed._client = qanthing(
|
||||
YoudaoEmbed._client = qanthing(
|
||||
model_name_or_path=model_name.replace(
|
||||
"maidalun1020", "InfiniFlow"))
|
||||
|
||||
@ -251,10 +251,10 @@ class QAnythingEmbed(Base):
|
||||
for t in texts:
|
||||
token_count += num_tokens_from_string(t)
|
||||
for i in range(0, len(texts), batch_size):
|
||||
embds = QAnythingEmbed._client.encode(texts[i:i + batch_size])
|
||||
embds = YoudaoEmbed._client.encode(texts[i:i + batch_size])
|
||||
res.extend(embds)
|
||||
return np.array(res), token_count
|
||||
|
||||
def encode_queries(self, text):
|
||||
embds = QAnythingEmbed._client.encode([text])
|
||||
embds = YoudaoEmbed._client.encode([text])
|
||||
return np.array(embds[0]), num_tokens_from_string(text)
|
||||
|
||||
43
rag/svr/cache_file_svr.py
Normal file
43
rag/svr/cache_file_svr.py
Normal file
@ -0,0 +1,43 @@
|
||||
import random
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from api.db.db_models import close_connection
|
||||
from api.db.services.task_service import TaskService
|
||||
from rag.utils import MINIO
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
|
||||
|
||||
def collect():
|
||||
doc_locations = TaskService.get_ongoing_doc_name()
|
||||
#print(tasks)
|
||||
if len(doc_locations) == 0:
|
||||
time.sleep(1)
|
||||
return
|
||||
return doc_locations
|
||||
|
||||
def main():
|
||||
locations = collect()
|
||||
if not locations:return
|
||||
print("TASKS:", len(locations))
|
||||
for kb_id, loc in locations:
|
||||
try:
|
||||
if REDIS_CONN.is_alive():
|
||||
try:
|
||||
key = "{}/{}".format(kb_id, loc)
|
||||
if REDIS_CONN.exist(key):continue
|
||||
file_bin = MINIO.get(kb_id, loc)
|
||||
REDIS_CONN.transaction(key, file_bin, 12 * 60)
|
||||
print("CACHE:", loc)
|
||||
except Exception as e:
|
||||
traceback.print_stack(e)
|
||||
except Exception as e:
|
||||
traceback.print_stack(e)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
while True:
|
||||
main()
|
||||
close_connection()
|
||||
time.sleep(1)
|
||||
@ -167,7 +167,7 @@ def update_progress():
|
||||
info = {
|
||||
"process_duation": datetime.timestamp(
|
||||
datetime.now()) -
|
||||
d["process_begin_at"].timestamp(),
|
||||
d["process_begin_at"].timestamp(),
|
||||
"run": status}
|
||||
if prg != 0:
|
||||
info["progress"] = prg
|
||||
|
||||
@ -107,8 +107,14 @@ def get_minio_binary(bucket, name):
|
||||
global MINIO
|
||||
if REDIS_CONN.is_alive():
|
||||
try:
|
||||
for _ in range(30):
|
||||
if REDIS_CONN.exist("{}/{}".format(bucket, name)):
|
||||
time.sleep(1)
|
||||
break
|
||||
time.sleep(1)
|
||||
r = REDIS_CONN.get("{}/{}".format(bucket, name))
|
||||
if r: return r
|
||||
cron_logger.warning("Cache missing: {}".format(name))
|
||||
except Exception as e:
|
||||
cron_logger.warning("Get redis[EXCEPTION]:" + str(e))
|
||||
return MINIO.get(bucket, name)
|
||||
|
||||
@ -56,7 +56,6 @@ class HuMinio(object):
|
||||
except Exception as e:
|
||||
minio_logger.error(f"Fail rm {bucket}/{fnm}: " + str(e))
|
||||
|
||||
|
||||
def get(self, bucket, fnm):
|
||||
for _ in range(1):
|
||||
try:
|
||||
|
||||
@ -25,6 +25,14 @@ class RedisDB:
|
||||
def is_alive(self):
|
||||
return self.REDIS is not None
|
||||
|
||||
def exist(self, k):
|
||||
if not self.REDIS: return
|
||||
try:
|
||||
return self.REDIS.exists(k)
|
||||
except Exception as e:
|
||||
logging.warning("[EXCEPTION]exist" + str(k) + "||" + str(e))
|
||||
self.__open__()
|
||||
|
||||
def get(self, k):
|
||||
if not self.REDIS: return
|
||||
try:
|
||||
@ -51,5 +59,16 @@ class RedisDB:
|
||||
self.__open__()
|
||||
return False
|
||||
|
||||
def transaction(self, key, value, exp=3600):
|
||||
try:
|
||||
pipeline = self.REDIS.pipeline(transaction=True)
|
||||
pipeline.set(key, value, exp, nx=True)
|
||||
pipeline.execute()
|
||||
return True
|
||||
except Exception as e:
|
||||
logging.warning("[EXCEPTION]set" + str(key) + "||" + str(e))
|
||||
self.__open__()
|
||||
return False
|
||||
|
||||
|
||||
REDIS_CONN = RedisDB()
|
||||
Reference in New Issue
Block a user