diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index 8caaaffad..1f46a4098 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -192,6 +192,9 @@ async def add_llm(): elif factory == "OpenRouter": api_key = apikey_json(["api_key", "provider_order"]) + elif factory == "MinerU": + api_key = apikey_json(["api_key", "provider_order"]) + llm = { "tenant_id": current_user.id, "llm_factory": factory, diff --git a/api/db/services/tenant_llm_service.py b/api/db/services/tenant_llm_service.py index 84658d246..88689fdab 100644 --- a/api/db/services/tenant_llm_service.py +++ b/api/db/services/tenant_llm_service.py @@ -16,6 +16,7 @@ import os import json import logging +from peewee import IntegrityError from langfuse import Langfuse from common import settings from common.constants import MINERU_DEFAULT_CONFIG, MINERU_ENV_KEYS, LLMType @@ -274,21 +275,28 @@ class TenantLLMService(CommonService): used_names = {item.llm_name for item in saved_mineru_models} idx = 1 base_name = "mineru-from-env" - candidate = f"{base_name}-{idx}" - while candidate in used_names: - idx += 1 + while True: candidate = f"{base_name}-{idx}" + if candidate in used_names: + idx += 1 + continue - cls.save( - tenant_id=tenant_id, - llm_factory="MinerU", - llm_name=candidate, - model_type=LLMType.OCR.value, - api_key=json.dumps(cfg), - api_base="", - max_tokens=0, - ) - return candidate + try: + cls.save( + tenant_id=tenant_id, + llm_factory="MinerU", + llm_name=candidate, + model_type=LLMType.OCR.value, + api_key=json.dumps(cfg), + api_base="", + max_tokens=0, + ) + return candidate + except IntegrityError: + logging.warning("MinerU env model %s already exists for tenant %s, retry with next name", candidate, tenant_id) + used_names.add(candidate) + idx += 1 + continue @classmethod @DB.connection_context() diff --git a/deepdoc/parser/mineru_parser.py b/deepdoc/parser/mineru_parser.py index 57840ebb8..2883bf881 100644 --- a/deepdoc/parser/mineru_parser.py +++ b/deepdoc/parser/mineru_parser.py @@ -54,7 +54,7 @@ class MinerUContentType(StrEnum): class MinerUParser(RAGFlowPdfParser): - def __init__(self, mineru_path: str = "mineru", mineru_api: str = "http://host.docker.internal:9987", mineru_server_url: str = ""): + def __init__(self, mineru_path: str = "mineru", mineru_api: str = "", mineru_server_url: str = ""): self.mineru_path = Path(mineru_path) self.mineru_api = mineru_api.rstrip("/") self.mineru_server_url = mineru_server_url.rstrip("/") @@ -176,7 +176,9 @@ class MinerUParser(RAGFlowPdfParser): self.using_api = openapi_exists return openapi_exists, reason else: - self.logger.info("[MinerU] api not exists.") + reason = "[MinerU] api not exists. Setting MINERU_SERVER_URL if your backend is vlm-http-client." + self.logger.info(reason) + return False, reason except Exception as e: reason = f"[MinerU] Unexpected error during api check: {e}" self.logger.error(f"[MinerU] Unexpected error during api check: {e}") diff --git a/docker/.env b/docker/.env index 3d90d2c55..51d2cf73b 100644 --- a/docker/.env +++ b/docker/.env @@ -236,10 +236,11 @@ USE_DOCLING=false # Enable Mineru USE_MINERU=false MINERU_EXECUTABLE="$HOME/uv_tools/.venv/bin/mineru" -MINERU_DELETE_OUTPUT=0 # keep output directory -MINERU_BACKEND=pipeline # or another backend you prefer +# Uncommenting these lines will automatically add MinerU to the model provider whenever possible. +# MINERU_DELETE_OUTPUT=0 # keep output directory +# MINERU_BACKEND=pipeline # or another backend you prefer # pptx support -DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 \ No newline at end of file +DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 diff --git a/rag/app/naive.py b/rag/app/naive.py index 8315f801f..353504d77 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -68,7 +68,7 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese" from api.db.services.tenant_llm_service import TenantLLMService env_name = TenantLLMService.ensure_mineru_from_env(tenant_id) - candidates = TenantLLMService.query(tenant_id=tenant_id, llm_factory="MinerU", model_type=LLMType.OCR.value) + candidates = TenantLLMService.query(tenant_id=tenant_id, llm_factory="MinerU", model_type=LLMType.OCR) if candidates: mineru_llm_name = candidates[0].llm_name elif env_name: @@ -78,7 +78,7 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese" if mineru_llm_name: try: - ocr_model = LLMBundle(tenant_id, LLMType.OCR, llm_name=mineru_llm_name, lang=lang) + ocr_model = LLMBundle(tenant_id=tenant_id, llm_type=LLMType.OCR, llm_name=mineru_llm_name, lang=lang) pdf_parser = ocr_model.mdl sections, tables = pdf_parser.parse_pdf( filepath=filename, @@ -711,8 +711,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca layout_recognizer = layout_recognizer_raw if isinstance(layout_recognizer_raw, str): lowered = layout_recognizer_raw.lower() - if lowered.startswith("mineru@"): - parser_model_name = layout_recognizer_raw.split("@", 1)[1] + if lowered.endswith("@mineru"): + parser_model_name = layout_recognizer_raw.split("@", 1)[0] layout_recognizer = "MinerU" if parser_config.get("analyze_hyperlink", False) and is_root: diff --git a/rag/flow/parser/parser.py b/rag/flow/parser/parser.py index 319a16d88..f32fb1719 100644 --- a/rag/flow/parser/parser.py +++ b/rag/flow/parser/parser.py @@ -240,10 +240,7 @@ class Parser(ProcessBase): parse_method = parse_method or "" if isinstance(raw_parse_method, str): lowered = raw_parse_method.lower() - if lowered.startswith("mineru@"): - parser_model_name = raw_parse_method.split("@", 1)[1] - parse_method = "MinerU" - elif lowered.endswith("@mineru"): + if lowered.endswith("@mineru"): parser_model_name = raw_parse_method.rsplit("@", 1)[0] parse_method = "MinerU" @@ -853,4 +850,4 @@ class Parser(ProcessBase): for t in tasks: t.cancel() await asyncio.gather(*tasks, return_exceptions=True) - raise \ No newline at end of file + raise diff --git a/rag/llm/ocr_model.py b/rag/llm/ocr_model.py index 183ef2041..b18a16a36 100644 --- a/rag/llm/ocr_model.py +++ b/rag/llm/ocr_model.py @@ -22,7 +22,7 @@ from deepdoc.parser.mineru_parser import MinerUParser class Base: - def __init__(self, key: str, model_name: str, **kwargs): + def __init__(self, key: str | dict, model_name: str, **kwargs): self.model_name = model_name def parse_pdf(self, filepath: str, binary=None, **kwargs) -> Tuple[Any, Any]: @@ -32,23 +32,23 @@ class Base: class MinerUOcrModel(Base, MinerUParser): _FACTORY_NAME = "MinerU" - def __init__(self, key: str, model_name: str, **kwargs): + def __init__(self, key: str | dict, model_name: str, **kwargs): Base.__init__(self, key, model_name, **kwargs) - cfg = {} + config = {} if key: try: - cfg = json.loads(key) + config = json.loads(key) except Exception: - cfg = {} - - self.mineru_api = cfg.get("MINERU_APISERVER", os.environ.get("MINERU_APISERVER", "http://host.docker.internal:9987")) - self.mineru_output_dir = cfg.get("MINERU_OUTPUT_DIR", os.environ.get("MINERU_OUTPUT_DIR", "")) - self.mineru_backend = cfg.get("MINERU_BACKEND", os.environ.get("MINERU_BACKEND", "pipeline")) - self.mineru_server_url = cfg.get("MINERU_SERVER_URL", os.environ.get("MINERU_SERVER_URL", "")) - self.mineru_delete_output = bool(int(cfg.get("MINERU_DELETE_OUTPUT", os.environ.get("MINERU_DELETE_OUTPUT", 1)))) + config = {} + config = config["api_key"] + self.mineru_api = config.get("mineru_apiserver", os.environ.get("MINERU_APISERVER", "")) + self.mineru_output_dir = config.get("mineru_output_dir", os.environ.get("MINERU_OUTPUT_DIR", "")) + self.mineru_backend = config.get("mineru_backend", os.environ.get("MINERU_BACKEND", "pipeline")) + self.mineru_server_url = config.get("mineru_server_url", os.environ.get("MINERU_SERVER_URL", "")) + self.mineru_delete_output = bool(int(config.get("mineru_delete_output", os.environ.get("MINERU_DELETE_OUTPUT", 1)))) self.mineru_executable = os.environ.get("MINERU_EXECUTABLE", "mineru") - logging.info(f"Parsered MinerU config: {cfg}") + logging.info(f"Parsed MinerU config: {config}") MinerUParser.__init__(self, mineru_path=self.mineru_executable, mineru_api=self.mineru_api, mineru_server_url=self.mineru_server_url)