Feat: add Docling parser (#10759)

### What problem does this PR solve?
issue:
#3945
change:
add Docling parser

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
buua436
2025-10-23 19:44:25 +08:00
committed by GitHub
parent de24e74b4c
commit 0ff2042fc1
5 changed files with 378 additions and 0 deletions

View File

@ -195,3 +195,4 @@ REGISTER_ENABLED=1
# COMPOSE_PROFILES=infinity,sandbox
# - For OpenSearch:
# COMPOSE_PROFILES=opensearch,sandbox
USE_DOCLING=false

View File

@ -178,6 +178,16 @@ function start_mcp_server() {
"${MCP_JSON_RESPONSE_FLAG}" &
}
function ensure_docling() {
if [[ "${USE_DOCLING}" == "true" ]]; then
if ! python3 -c "import importlib.util,sys; sys.exit(0 if importlib.util.find_spec('docling') else 1)"; then
echo "[docling] not found, installing..."
python3 -m pip install --no-cache-dir "docling${DOCLING_VERSION:-}"
else
echo "[docling] already installed, skip."
fi
fi
}
# -----------------------------------------------------------------------------
# Start components based on flags
# -----------------------------------------------------------------------------
@ -203,6 +213,8 @@ if [[ "${ENABLE_MCP_SERVER}" -eq 1 ]]; then
start_mcp_server
fi
ensure_docling
if [[ "${ENABLE_TASKEXECUTOR}" -eq 1 ]]; then
if [[ "${CONSUMER_NO_END}" -gt "${CONSUMER_NO_BEG}" ]]; then
echo "Starting task executors on host '${HOST_ID}' for IDs in [${CONSUMER_NO_BEG}, ${CONSUMER_NO_END})..."