Refa: treat MinerU as an OCR model (#11849)

### What problem does this PR solve?

 Treat MinerU as an OCR model.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
This commit is contained in:
Yongteng Lei
2025-12-09 18:54:14 +08:00
committed by GitHub
parent 30377319d8
commit a94b3b9df2
9 changed files with 283 additions and 43 deletions

View File

@ -25,7 +25,7 @@ from api.utils.api_utils import get_allowed_llm_factories, get_data_error_result
from common.constants import StatusEnum, LLMType
from api.db.db_models import TenantLLM
from rag.utils.base64_image import test_image
from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel, OcrModel
@manager.route("/factories", methods=["GET"]) # noqa: F821
@ -43,7 +43,13 @@ def factories():
mdl_types[m.fid] = set([])
mdl_types[m.fid].add(m.model_type)
for f in fac:
f["model_types"] = list(mdl_types.get(f["name"], [LLMType.CHAT, LLMType.EMBEDDING, LLMType.RERANK, LLMType.IMAGE2TEXT, LLMType.SPEECH2TEXT, LLMType.TTS]))
f["model_types"] = list(
mdl_types.get(
f["name"],
[LLMType.CHAT, LLMType.EMBEDDING, LLMType.RERANK, LLMType.IMAGE2TEXT, LLMType.SPEECH2TEXT, LLMType.TTS, LLMType.OCR],
)
)
return get_json_result(data=fac)
except Exception as e:
return server_error_response(e)
@ -251,6 +257,15 @@ async def add_llm():
pass
except RuntimeError as e:
msg += f"\nFail to access model({factory}/{mdl_nm})." + str(e)
elif llm["model_type"] == LLMType.OCR.value:
assert factory in OcrModel, f"OCR model from {factory} is not supported yet."
try:
mdl = OcrModel[factory](key=llm["api_key"], model_name=mdl_nm, base_url=llm.get("api_base", ""))
ok, reason = mdl.check_available()
if not ok:
raise RuntimeError(reason or "Model not available")
except Exception as e:
msg += f"\nFail to access model({factory}/{mdl_nm})." + str(e)
else:
# TODO: check other type of models
pass
@ -297,6 +312,7 @@ async def delete_factory():
@login_required
def my_llms():
try:
TenantLLMService.ensure_mineru_from_env(current_user.id)
include_details = request.args.get("include_details", "false").lower() == "true"
if include_details:
@ -344,6 +360,7 @@ def list_app():
weighted = []
model_type = request.args.get("model_type")
try:
TenantLLMService.ensure_mineru_from_env(current_user.id)
objs = TenantLLMService.query(tenant_id=current_user.id)
facts = set([o.to_dict()["llm_factory"] for o in objs if o.api_key and o.status == StatusEnum.VALID.value])
status = {(o.llm_name + "@" + o.llm_factory) for o in objs if o.status == StatusEnum.VALID.value}