mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: add code_executor_manager (#7814)
### What problem does this PR solve? Add code_executor_manager. #4977. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
15
sandbox/executor_manager/core/__init__.py
Normal file
15
sandbox/executor_manager/core/__init__.py
Normal file
@ -0,0 +1,15 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
44
sandbox/executor_manager/core/config.py
Normal file
44
sandbox/executor_manager/core/config.py
Normal file
@ -0,0 +1,44 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
from util import format_timeout_duration, parse_timeout_duration
|
||||
|
||||
from core.container import init_containers, teardown_containers
|
||||
from core.logger import logger
|
||||
|
||||
TIMEOUT = 10
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _lifespan(app: FastAPI):
|
||||
"""Asynchronous lifecycle management"""
|
||||
size = int(os.getenv("SANDBOX_EXECUTOR_MANAGER_POOL_SIZE", 1))
|
||||
|
||||
success_count, total_task_count = await init_containers(size)
|
||||
logger.info(f"\n📊 Container pool initialization complete: {success_count}/{total_task_count} available")
|
||||
|
||||
yield
|
||||
|
||||
await teardown_containers()
|
||||
|
||||
|
||||
def init():
|
||||
TIMEOUT = parse_timeout_duration(os.getenv("SANDBOX_TIMEOUT"))
|
||||
logger.info(f"Global timeout: {format_timeout_duration(TIMEOUT)}")
|
||||
return _lifespan
|
||||
190
sandbox/executor_manager/core/container.py
Normal file
190
sandbox/executor_manager/core/container.py
Normal file
@ -0,0 +1,190 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import asyncio
|
||||
import contextlib
|
||||
import os
|
||||
import time
|
||||
from queue import Empty, Queue
|
||||
from threading import Lock
|
||||
|
||||
from models.enums import SupportLanguage
|
||||
from util import env_setting_enabled, is_valid_memory_limit
|
||||
from utils.common import async_run_command
|
||||
|
||||
from core.logger import logger
|
||||
|
||||
_CONTAINER_QUEUES: dict[SupportLanguage, Queue] = {}
|
||||
_CONTAINER_LOCK: Lock = Lock()
|
||||
|
||||
|
||||
async def init_containers(size: int) -> tuple[int, int]:
|
||||
global _CONTAINER_QUEUES
|
||||
_CONTAINER_QUEUES = {SupportLanguage.PYTHON: Queue(), SupportLanguage.NODEJS: Queue()}
|
||||
|
||||
with _CONTAINER_LOCK:
|
||||
while not _CONTAINER_QUEUES[SupportLanguage.PYTHON].empty():
|
||||
_CONTAINER_QUEUES[SupportLanguage.PYTHON].get_nowait()
|
||||
while not _CONTAINER_QUEUES[SupportLanguage.NODEJS].empty():
|
||||
_CONTAINER_QUEUES[SupportLanguage.NODEJS].get_nowait()
|
||||
|
||||
create_tasks = []
|
||||
for i in range(size):
|
||||
name = f"sandbox_python_{i}"
|
||||
logger.info(f"🛠️ Creating Python container {i + 1}/{size}")
|
||||
create_tasks.append(_prepare_container(name, SupportLanguage.PYTHON))
|
||||
|
||||
name = f"sandbox_nodejs_{i}"
|
||||
logger.info(f"🛠️ Creating Node.js container {i + 1}/{size}")
|
||||
create_tasks.append(_prepare_container(name, SupportLanguage.NODEJS))
|
||||
|
||||
results = await asyncio.gather(*create_tasks, return_exceptions=True)
|
||||
success_count = sum(1 for r in results if r is True)
|
||||
total_task_count = len(create_tasks)
|
||||
return success_count, total_task_count
|
||||
|
||||
|
||||
async def teardown_containers():
|
||||
with _CONTAINER_LOCK:
|
||||
while not _CONTAINER_QUEUES[SupportLanguage.PYTHON].empty():
|
||||
name = _CONTAINER_QUEUES[SupportLanguage.PYTHON].get_nowait()
|
||||
await async_run_command("docker", "rm", "-f", name, timeout=5)
|
||||
while not _CONTAINER_QUEUES[SupportLanguage.NODEJS].empty():
|
||||
name = _CONTAINER_QUEUES[SupportLanguage.NODEJS].get_nowait()
|
||||
await async_run_command("docker", "rm", "-f", name, timeout=5)
|
||||
|
||||
|
||||
async def _prepare_container(name: str, language: SupportLanguage) -> bool:
|
||||
"""Prepare a single container"""
|
||||
with contextlib.suppress(Exception):
|
||||
await async_run_command("docker", "rm", "-f", name, timeout=5)
|
||||
|
||||
if await create_container(name, language):
|
||||
_CONTAINER_QUEUES[language].put(name)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def create_container(name: str, language: SupportLanguage) -> bool:
|
||||
"""Asynchronously create a container"""
|
||||
create_args = [
|
||||
"docker",
|
||||
"run",
|
||||
"-d",
|
||||
"--runtime=runsc",
|
||||
"--name",
|
||||
name,
|
||||
"--read-only",
|
||||
"--tmpfs",
|
||||
"/workspace:rw,exec,size=100M,uid=65534,gid=65534",
|
||||
"--tmpfs",
|
||||
"/tmp:rw,exec,size=50M",
|
||||
"--user",
|
||||
"nobody",
|
||||
"--workdir",
|
||||
"/workspace",
|
||||
]
|
||||
if os.getenv("SANDBOX_MAX_MEMORY"):
|
||||
memory_limit = os.getenv("SANDBOX_MAX_MEMORY") or "256m"
|
||||
if is_valid_memory_limit(memory_limit):
|
||||
logger.info(f"SANDBOX_MAX_MEMORY: {os.getenv('SANDBOX_MAX_MEMORY')}")
|
||||
else:
|
||||
logger.info("Invalid SANDBOX_MAX_MEMORY, using default value: 256m")
|
||||
memory_limit = "256m"
|
||||
create_args.extend(["--memory", memory_limit])
|
||||
else:
|
||||
logger.info("Set default SANDBOX_MAX_MEMORY: 256m")
|
||||
create_args.extend(["--memory", "256m"])
|
||||
|
||||
if env_setting_enabled("SANDBOX_ENABLE_SECCOMP", "false"):
|
||||
logger.info(f"SANDBOX_ENABLE_SECCOMP: {os.getenv('SANDBOX_ENABLE_SECCOMP')}")
|
||||
create_args.extend(["--security-opt", "seccomp=/app/seccomp-profile-default.json"])
|
||||
|
||||
if language == SupportLanguage.PYTHON:
|
||||
create_args.append(os.getenv("SANDBOX_BASE_PYTHON_IMAGE", "sandbox-base-python:latest"))
|
||||
elif language == SupportLanguage.NODEJS:
|
||||
create_args.append(os.getenv("SANDBOX_BASE_NODEJS_IMAGE", "sandbox-base-nodejs:latest"))
|
||||
|
||||
logger.info(f"Sandbox config:\n\t {create_args}")
|
||||
|
||||
try:
|
||||
returncode, _, stderr = await async_run_command(*create_args, timeout=10)
|
||||
if returncode != 0:
|
||||
logger.error(f"❌ Container creation failed {name}: {stderr}")
|
||||
return False
|
||||
|
||||
if language == SupportLanguage.NODEJS:
|
||||
copy_cmd = ["docker", "exec", name, "bash", "-c", "cp -a /app/node_modules /workspace/"]
|
||||
returncode, _, stderr = await async_run_command(*copy_cmd, timeout=10)
|
||||
if returncode != 0:
|
||||
logger.error(f"❌ Failed to prepare dependencies for {name}: {stderr}")
|
||||
return False
|
||||
|
||||
return await container_is_running(name)
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Container creation exception {name}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
async def recreate_container(name: str, language: SupportLanguage) -> bool:
|
||||
"""Asynchronously recreate a container"""
|
||||
logger.info(f"🛠️ Recreating container: {name}")
|
||||
try:
|
||||
await async_run_command("docker", "rm", "-f", name, timeout=5)
|
||||
|
||||
return await create_container(name, language)
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Container {name} recreation failed: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
async def release_container(name: str, language: SupportLanguage):
|
||||
"""Asynchronously release a container"""
|
||||
with _CONTAINER_LOCK:
|
||||
if await container_is_running(name):
|
||||
_CONTAINER_QUEUES[language].put(name)
|
||||
logger.info(f"🟢 Released container: {name} (remaining available: {_CONTAINER_QUEUES[language].qsize()})")
|
||||
else:
|
||||
logger.warning(f"⚠️ Container {name} has crashed, attempting to recreate...")
|
||||
if await recreate_container(name, language):
|
||||
_CONTAINER_QUEUES[language].put(name)
|
||||
logger.info(f"✅ Container {name} successfully recreated and returned to queue")
|
||||
|
||||
|
||||
async def allocate_container_blocking(language: SupportLanguage, timeout=10) -> str:
|
||||
"""Asynchronously allocate an available container"""
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < timeout:
|
||||
try:
|
||||
name = _CONTAINER_QUEUES[language].get_nowait()
|
||||
|
||||
with _CONTAINER_LOCK:
|
||||
if not await container_is_running(name) and not await recreate_container(name, language):
|
||||
continue
|
||||
|
||||
return name
|
||||
except Empty:
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
async def container_is_running(name: str) -> bool:
|
||||
"""Asynchronously check the container status"""
|
||||
try:
|
||||
returncode, stdout, _ = await async_run_command("docker", "inspect", "-f", "{{.State.Running}}", name, timeout=2)
|
||||
return returncode == 0 and stdout.strip() == "true"
|
||||
except Exception:
|
||||
return False
|
||||
19
sandbox/executor_manager/core/logger.py
Normal file
19
sandbox/executor_manager/core/logger.py
Normal file
@ -0,0 +1,19 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("sandbox")
|
||||
Reference in New Issue
Block a user