diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index a5942c5b8..f37f108d4 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -209,23 +209,42 @@ function ensure_mineru() { local default_prefix="/ragflow/uv_tools" local venv_dir="${default_prefix}/.venv" local exe="${MINERU_EXECUTABLE:-${venv_dir}/bin/mineru}" + local mineru_backend="${MINERU_BACKEND:-pipeline}" + local mineru_pkg="mineru[core]" + + if [[ "${mineru_backend}" == vlm-* ]]; then + mineru_pkg="mineru[core,vlm]" + fi if [[ -x "${exe}" ]]; then - echo "[mineru] found: ${exe}" + echo "[mineru] found: ${exe} (MINERU_BACKEND=${mineru_backend})" export MINERU_EXECUTABLE="${exe}" + + if [[ "${mineru_backend}" == vlm-* ]]; then + if ! "${venv_dir}/bin/python3" -c "import importlib.util, sys; sys.exit(0 if importlib.util.find_spec('vllm') else 1)" >/dev/null 2>&1; then + echo "[mineru] vllm not found for MINERU_BACKEND=${mineru_backend}, installing ${mineru_pkg} ..." + ( + set -e + source "${venv_dir}/bin/activate" + uv pip install -U "${mineru_pkg}" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple + deactivate + ) || return 1 + fi + fi return 0 fi - echo "[mineru] not found, bootstrapping with uv ..." + echo "[mineru] not found, bootstrapping with uv ... (MINERU_BACKEND=${mineru_backend}, pkg=${mineru_pkg})" ( set -e mkdir -p "${default_prefix}" cd "${default_prefix}" - [[ -d "${venv_dir}" ]] || uv venv "${venv_dir}" + [[ -d "${venv_dir}" ]] || { echo "[mineru] creating venv at ${venv_dir} ..."; uv venv "${venv_dir}"; } + echo "[mineru] installing ${mineru_pkg} into ${venv_dir} ..." source "${venv_dir}/bin/activate" - uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple + uv pip install -U "${mineru_pkg}" -i https://mirrors.aliyun.com/pypi/simple --extra-index-url https://pypi.org/simple deactivate ) export MINERU_EXECUTABLE="${exe}" diff --git a/rag/llm/ocr_model.py b/rag/llm/ocr_model.py index b18a16a36..73c3ed009 100644 --- a/rag/llm/ocr_model.py +++ b/rag/llm/ocr_model.py @@ -34,18 +34,28 @@ class MinerUOcrModel(Base, MinerUParser): def __init__(self, key: str | dict, model_name: str, **kwargs): Base.__init__(self, key, model_name, **kwargs) - config = {} + raw_config = {} if key: try: - config = json.loads(key) + raw_config = json.loads(key) except Exception: - config = {} - config = config["api_key"] - self.mineru_api = config.get("mineru_apiserver", os.environ.get("MINERU_APISERVER", "")) - self.mineru_output_dir = config.get("mineru_output_dir", os.environ.get("MINERU_OUTPUT_DIR", "")) - self.mineru_backend = config.get("mineru_backend", os.environ.get("MINERU_BACKEND", "pipeline")) - self.mineru_server_url = config.get("mineru_server_url", os.environ.get("MINERU_SERVER_URL", "")) - self.mineru_delete_output = bool(int(config.get("mineru_delete_output", os.environ.get("MINERU_DELETE_OUTPUT", 1)))) + raw_config = {} + + # nested {"api_key": {...}} from UI + # flat {"MINERU_*": "..."} payload auto-provisioned from env vars + config = raw_config.get("api_key", raw_config) + if not isinstance(config, dict): + config = {} + + def _resolve_config(key: str, env_key: str, default=""): + # lower-case keys (UI), upper-case MINERU_* (env auto-provision), env vars + return config.get(key, config.get(env_key, os.environ.get(env_key, default))) + + self.mineru_api = _resolve_config("mineru_apiserver", "MINERU_APISERVER", "") + self.mineru_output_dir = _resolve_config("mineru_output_dir", "MINERU_OUTPUT_DIR", "") + self.mineru_backend = _resolve_config("mineru_backend", "MINERU_BACKEND", "pipeline") + self.mineru_server_url = _resolve_config("mineru_server_url", "MINERU_SERVER_URL", "") + self.mineru_delete_output = bool(int(_resolve_config("mineru_delete_output", "MINERU_DELETE_OUTPUT", 1))) self.mineru_executable = os.environ.get("MINERU_EXECUTABLE", "mineru") logging.info(f"Parsed MinerU config: {config}")