feat: Recover pending tasks while pod restart. (#7073)

### What problem does this PR solve?

If you deploy Ragflow using Kubernetes, the hostname will change during
a rolling update. This causes the consumer name of the task executor to
change, making it impossible to schedule tasks that were previously in a
pending state.
To address this, I introduced a recovery task that scans these pending
messages and re-publishes them, allowing the tasks to continue being
processed.

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):

---------

Co-authored-by: liuzhenghua-jk <liuzhenghua-jk@360shuke.com>
This commit is contained in:
liuzhenghua
2025-04-19 16:18:51 +08:00
committed by GitHub
parent 487aed419e
commit d4dbdfb61d
2 changed files with 92 additions and 2 deletions

View File

@ -282,6 +282,28 @@ class RedisDB:
)
self.__open__()
def get_pending_msg(self, queue, group_name):
try:
messages = self.REDIS.xpending_range(queue, group_name, '-', '+', 10)
return messages
except Exception as e:
if 'No such key' not in (str(e) or ''):
logging.warning(
"RedisDB.get_pending_msg " + str(queue) + " got exception: " + str(e)
)
return []
def requeue_msg(self, queue: str, group_name: str, msg_id: str):
try:
messages = self.REDIS.xrange(queue, msg_id, msg_id)
if messages:
self.REDIS.xadd(queue, messages[0][1])
self.REDIS.xack(queue, group_name, msg_id)
except Exception as e:
logging.warning(
"RedisDB.get_pending_msg " + str(queue) + " got exception: " + str(e)
)
def queue_info(self, queue, group_name) -> dict | None:
try:
groups = self.REDIS.xinfo_groups(queue)
@ -301,6 +323,16 @@ class RedisDB:
"""
return bool(self.lua_delete_if_equal(keys=[key], args=[expected_value], client=self.REDIS))
def delete(self, key) -> bool:
try:
self.REDIS.delete(key)
return True
except Exception as e:
logging.warning("RedisDB.delete " + str(key) + " got exception: " + str(e))
self.__open__()
return False
REDIS_CONN = RedisDB()