From eb661c028de6225a4e471baf27df9b221a38228d Mon Sep 17 00:00:00 2001 From: Maxine Lai <76145646+mmtmr@users.noreply.github.com> Date: Wed, 31 Dec 2025 19:55:39 +0800 Subject: [PATCH] =?UTF-8?q?Fix=20Tika=20version=20mismatch=20in=20Dockerfi?= =?UTF-8?q?le.deps=20(3.0.0=20=E2=86=92=203.2.3)=20(#12267)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #12266 Dockerfile.deps still referenced `tika-server-standard-3.0.0.jar` even after the project moved to Tika 3.2.3 for security reasons. This caused Docker builds to fail due to a version mismatch and missing artifact. Changes: - Update Dockerfile.deps to consistently use Tika 3.2.3 No functional changes beyond dependency alignment. Co-authored-by: Liu An --- Dockerfile | 6 +++--- Dockerfile.deps | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5f2c5f6cf..e4defbf31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,17 +19,17 @@ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ cp -r /deps/nltk_data /root/ && \ - cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \ + cp /deps/tika-server-standard-3.2.3.jar /deps/tika-server-standard-3.2.3.jar.md5 /ragflow/ && \ cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 -ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar" +ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.2.3.jar" ENV DEBIAN_FRONTEND=noninteractive # Setup apt # Python package and implicit dependencies: # opencv-python: libglib2.0-0 libglx-mesa0 libgl1 # aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb -# python-pptx: default-jdk tika-server-standard-3.0.0.jar +# python-pptx: default-jdk tika-server-standard-3.2.3.jar # selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85 # Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ diff --git a/Dockerfile.deps b/Dockerfile.deps index c683ebf7c..0405519d8 100644 --- a/Dockerfile.deps +++ b/Dockerfile.deps @@ -3,7 +3,7 @@ FROM scratch # Copy resources downloaded via download_deps.py -COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 libssl*.deb uv-x86_64-unknown-linux-gnu.tar.gz / +COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.2.3.jar tika-server-standard-3.2.3.jar.md5 libssl*.deb uv-x86_64-unknown-linux-gnu.tar.gz / COPY nltk_data /nltk_data