Refa: treat MinerU as an OCR model 2 (#11905)

### What problem does this PR solve?

Treat MinerU as an OCR model 2. #11903

### Type of change

- [x] Refactoring
This commit is contained in:
Yongteng Lei
2025-12-11 17:33:12 +08:00
committed by GitHub
parent bd0eff2954
commit e9710b7aa9
7 changed files with 50 additions and 39 deletions

View File

@ -54,7 +54,7 @@ class MinerUContentType(StrEnum):
class MinerUParser(RAGFlowPdfParser):
def __init__(self, mineru_path: str = "mineru", mineru_api: str = "http://host.docker.internal:9987", mineru_server_url: str = ""):
def __init__(self, mineru_path: str = "mineru", mineru_api: str = "", mineru_server_url: str = ""):
self.mineru_path = Path(mineru_path)
self.mineru_api = mineru_api.rstrip("/")
self.mineru_server_url = mineru_server_url.rstrip("/")
@ -176,7 +176,9 @@ class MinerUParser(RAGFlowPdfParser):
self.using_api = openapi_exists
return openapi_exists, reason
else:
self.logger.info("[MinerU] api not exists.")
reason = "[MinerU] api not exists. Setting MINERU_SERVER_URL if your backend is vlm-http-client."
self.logger.info(reason)
return False, reason
except Exception as e:
reason = f"[MinerU] Unexpected error during api check: {e}"
self.logger.error(f"[MinerU] Unexpected error during api check: {e}")