Made task_executor async to speedup parsing (#5530)

### What problem does this PR solve?

Made task_executor async to speedup parsing

### Type of change

- [x] Performance Improvement
This commit is contained in:
Zhichang Yu
2025-03-03 18:59:49 +08:00
committed by GitHub
parent abac2ca2c5
commit c813c1ff4c
22 changed files with 576 additions and 1005 deletions

View File

@ -24,7 +24,7 @@ from rag import settings
from rag.utils import singleton
class Payload:
class RedisMsg:
def __init__(self, consumer, queue_name, group_name, msg_id, message):
self.__consumer = consumer
self.__queue_name = queue_name
@ -43,6 +43,9 @@ class Payload:
def get_message(self):
return self.__message
def get_msg_id(self):
return self.__msg_id
@singleton
class RedisDB:
@ -206,9 +209,8 @@ class RedisDB:
)
return False
def queue_consumer(
self, queue_name, group_name, consumer_name, msg_id=b">"
) -> Payload:
def queue_consumer(self, queue_name, group_name, consumer_name, msg_id=b">") -> RedisMsg:
"""https://redis.io/docs/latest/commands/xreadgroup/"""
try:
group_info = self.REDIS.xinfo_groups(queue_name)
if not any(e["name"] == group_name for e in group_info):
@ -217,15 +219,17 @@ class RedisDB:
"groupname": group_name,
"consumername": consumer_name,
"count": 1,
"block": 10000,
"block": 5,
"streams": {queue_name: msg_id},
}
messages = self.REDIS.xreadgroup(**args)
if not messages:
return None
stream, element_list = messages[0]
if not element_list:
return None
msg_id, payload = element_list[0]
res = Payload(self.REDIS, queue_name, group_name, msg_id, payload)
res = RedisMsg(self.REDIS, queue_name, group_name, msg_id, payload)
return res
except Exception as e:
if "key" in str(e):
@ -239,30 +243,24 @@ class RedisDB:
)
return None
def get_unacked_for(self, consumer_name, queue_name, group_name):
def get_unacked_iterator(self, queue_name, group_name, consumer_name):
try:
group_info = self.REDIS.xinfo_groups(queue_name)
if not any(e["name"] == group_name for e in group_info):
return
pendings = self.REDIS.xpending_range(
queue_name,
group_name,
min=0,
max=10000000000000,
count=1,
consumername=consumer_name,
)
if not pendings:
return
msg_id = pendings[0]["message_id"]
msg = self.REDIS.xrange(queue_name, min=msg_id, count=1)
_, payload = msg[0]
return Payload(self.REDIS, queue_name, group_name, msg_id, payload)
current_min = 0
while True:
payload = self.queue_consumer(queue_name, group_name, consumer_name, current_min)
if not payload:
return
current_min = payload.get_msg_id()
logging.info(f"RedisDB.get_unacked_iterator {consumer_name} msg_id {current_min}")
yield payload
except Exception as e:
if "key" in str(e):
return
logging.exception(
"RedisDB.get_unacked_for " + consumer_name + " got exception: " + str(e)
"RedisDB.get_unacked_iterator " + consumer_name + " got exception: "
)
self.__open__()