Feat/monitor task (#11116)

### What problem does this PR solve?

Show task executor.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Lynn
2025-11-10 12:51:39 +08:00
committed by GitHub
parent 7423a5806e
commit d016a06fd5
7 changed files with 74 additions and 3 deletions

View File

@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import datetime
import json
import os
import requests
from timeit import default_timer as timer
@ -148,7 +150,7 @@ def check_ragflow_server_alive():
try:
url = f'http://{settings.HOST_IP}:{settings.HOST_PORT}/v1/system/ping'
if '0.0.0.0' in url:
url.replace('0.0.0.0', '127.0.0.1')
url = url.replace('0.0.0.0', '127.0.0.1')
response = requests.get(url)
if response.status_code == 200:
return {"status": "alive", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
@ -161,6 +163,26 @@ def check_ragflow_server_alive():
}
def check_task_executor_alive():
task_executor_heartbeats = {}
try:
task_executors = REDIS_CONN.smembers("TASKEXE")
now = datetime.now().timestamp()
for task_executor_id in task_executors:
heartbeats = REDIS_CONN.zrangebyscore(task_executor_id, now - 60 * 30, now)
heartbeats = [json.loads(heartbeat) for heartbeat in heartbeats]
task_executor_heartbeats[task_executor_id] = heartbeats
if task_executor_heartbeats:
return {"status": "alive", "message": task_executor_heartbeats}
else:
return {"status": "timeout", "message": "Not found any task executor."}
except Exception as e:
return {
"status": "timeout",
"message": f"error: {str(e)}"
}
def run_health_checks() -> tuple[dict, bool]:
result: dict[str, str | dict] = {}