From d4574ffb49cfa72148e6ff10928178eb226974ae Mon Sep 17 00:00:00 2001 From: "localhost IN A 127.0.0.1" <121652515+zonefile@users.noreply.github.com> Date: Mon, 7 Apr 2025 11:58:46 +0800 Subject: [PATCH] Fix: improve Dockerfile build for China (#6812) ### What problem does this PR solve? This PR addresses the build and dependency issues faced by developers in regions with poor connectivity to official Ubuntu repositories and standard dependency sources. Currently, developers in these regions experience slow or failed Docker builds and dependency downloads, significantly impacting development efficiency. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): The changes include: 1. Modified Dockerfile to use alternative Ubuntu mirrors with better connectivity in affected regions 2. Added a new script (download_deps_CN.py) that provides region-specific alternative download links for dependencies --- Dockerfile | 3 ++- download_deps.py | 40 ++++++++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4b2c922c3..d102c1c57 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,7 +44,8 @@ ENV DEBIAN_FRONTEND=noninteractive # Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ if [ "$NEED_MIRROR" == "1" ]; then \ - sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ + sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ + sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ fi; \ rm -f /etc/apt/apt.conf.d/docker-clean && \ echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \ diff --git a/download_deps.py b/download_deps.py index baf65303a..3ada5be2c 100644 --- a/download_deps.py +++ b/download_deps.py @@ -6,6 +6,7 @@ # dependencies = [ # "huggingface-hub", # "nltk", +# "argparse", # ] # /// @@ -13,16 +14,29 @@ from huggingface_hub import snapshot_download import nltk import os import urllib.request +import argparse -urls = [ - "http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb", - "http://ports.ubuntu.com/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb", - "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar", - "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5", - "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken", - "https://bit.ly/chrome-linux64-121-0-6167-85", - "https://bit.ly/chromedriver-linux64-121-0-6167-85", -] +def get_urls(use_china_mirrors=False): + if use_china_mirrors: + return [ + "http://mirrors.tuna.tsinghua.edu.cn/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb", + "http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb", + "https://repo.huaweicloud.com/repository/maven/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar", + "https://repo.huaweicloud.com/repository/maven/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5", + "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken", + "https://storage.googleapis.com/chrome-for-testing-public/121.0.6167.85/linux64/chrome-linux64.zip", + "https://storage.googleapis.com/chrome-for-testing-public/121.0.6167.85/linux64/chromedriver-linux64.zip", + ] + else: + return [ + "http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb", + "http://ports.ubuntu.com/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb", + "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar", + "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5", + "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken", + "https://bit.ly/chrome-linux64-121-0-6167-85", + "https://bit.ly/chromedriver-linux64-121-0-6167-85", + ] repos = [ "InfiniFlow/text_concat_xgb_v1.0", @@ -39,6 +53,12 @@ def download_model(repo_id): if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Download dependencies with optional China mirror support') + parser.add_argument('--china-mirrors', action='store_true', help='Use China-accessible mirrors for downloads') + args = parser.parse_args() + + urls = get_urls(args.china_mirrors) + for url in urls: filename = url.split("/")[-1] print(f"Downloading {url}...") @@ -52,4 +72,4 @@ if __name__ == "__main__": for repo_id in repos: print(f"Downloading huggingface repo {repo_id}...") - download_model(repo_id) + download_model(repo_id) \ No newline at end of file