From 15c75bbf15d34bd15cab346d89c5ae4633f25eb3 Mon Sep 17 00:00:00 2001 From: Liu An Date: Thu, 6 Nov 2025 11:53:33 +0800 Subject: [PATCH] Refa: Remove HuggingFace repo downloads (#11048) ### What problem does this PR solve? - Removed download_model function and HuggingFace repo download loop ### Type of change - [x] Refactoring --- download_deps.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/download_deps.py b/download_deps.py index 7a0a6b614..0d11d451c 100644 --- a/download_deps.py +++ b/download_deps.py @@ -9,12 +9,13 @@ # ] # /// -from huggingface_hub import snapshot_download -from typing import Union -import nltk +import argparse import os import urllib.request -import argparse +from typing import Union + +import nltk + def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]: if use_china_mirrors: @@ -38,25 +39,21 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]: ["https://storage.googleapis.com/chrome-for-testing-public/121.0.6167.85/linux64/chromedriver-linux64.zip", "chromedriver-linux64-121-0-6167-85"], ] + repos = [ "InfiniFlow/text_concat_xgb_v1.0", "InfiniFlow/deepdoc", "InfiniFlow/huqie", ] -def download_model(repo_id): - local_dir = os.path.abspath(os.path.join("huggingface.co", repo_id)) - os.makedirs(local_dir, exist_ok=True) - snapshot_download(repo_id=repo_id, local_dir=local_dir) - if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Download dependencies with optional China mirror support') - parser.add_argument('--china-mirrors', action='store_true', help='Use China-accessible mirrors for downloads') + parser = argparse.ArgumentParser(description="Download dependencies with optional China mirror support") + parser.add_argument("--china-mirrors", action="store_true", help="Use China-accessible mirrors for downloads") args = parser.parse_args() - + urls = get_urls(args.china_mirrors) - + for url in urls: download_url = url[0] if isinstance(url, list) else url filename = url[1] if isinstance(url, list) else url.split("/")[-1] @@ -64,11 +61,7 @@ if __name__ == "__main__": if not os.path.exists(filename): urllib.request.urlretrieve(download_url, filename) - local_dir = os.path.abspath('nltk_data') - for data in ['wordnet', 'punkt', 'punkt_tab']: + local_dir = os.path.abspath("nltk_data") + for data in ["wordnet", "punkt", "punkt_tab"]: print(f"Downloading nltk {data}...") nltk.download(data, download_dir=local_dir) - - for repo_id in repos: - print(f"Downloading huggingface repo {repo_id}...") - download_model(repo_id)