Refactor file utils (#10970)

### What problem does this PR solve?

As title.

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-11-03 18:54:55 +08:00
committed by GitHub
parent 076d811086
commit 1284647694
3 changed files with 7 additions and 78 deletions

View File

@ -19,8 +19,6 @@
import base64 import base64
import hashlib import hashlib
import io import io
import json
import os
import re import re
import shutil import shutil
import subprocess import subprocess
@ -40,85 +38,17 @@ from typing import List, Union, Tuple, Optional, Dict
# Third-party imports # Third-party imports
import olefile import olefile
import pdfplumber import pdfplumber
from cachetools import LRUCache, cached
from PIL import Image from PIL import Image
from ruamel.yaml import YAML
# Local imports # Local imports
from api.constants import IMG_BASE64_PREFIX from api.constants import IMG_BASE64_PREFIX
from api.db import FileType from api.db import FileType
from common.file_utils import get_project_base_directory
LOCK_KEY_pdfplumber = "global_shared_lock_pdfplumber" LOCK_KEY_pdfplumber = "global_shared_lock_pdfplumber"
if LOCK_KEY_pdfplumber not in sys.modules: if LOCK_KEY_pdfplumber not in sys.modules:
sys.modules[LOCK_KEY_pdfplumber] = threading.Lock() sys.modules[LOCK_KEY_pdfplumber] = threading.Lock()
@cached(cache=LRUCache(maxsize=10))
def load_json_conf(conf_path):
if os.path.isabs(conf_path):
json_conf_path = conf_path
else:
json_conf_path = os.path.join(get_project_base_directory(), conf_path)
try:
with open(json_conf_path) as f:
return json.load(f)
except BaseException:
raise EnvironmentError("loading json file config from '{}' failed!".format(json_conf_path))
def dump_json_conf(config_data, conf_path):
if os.path.isabs(conf_path):
json_conf_path = conf_path
else:
json_conf_path = os.path.join(get_project_base_directory(), conf_path)
try:
with open(json_conf_path, "w") as f:
json.dump(config_data, f, indent=4)
except BaseException:
raise EnvironmentError("loading json file config from '{}' failed!".format(json_conf_path))
def load_json_conf_real_time(conf_path):
if os.path.isabs(conf_path):
json_conf_path = conf_path
else:
json_conf_path = os.path.join(get_project_base_directory(), conf_path)
try:
with open(json_conf_path) as f:
return json.load(f)
except BaseException:
raise EnvironmentError("loading json file config from '{}' failed!".format(json_conf_path))
def load_yaml_conf(conf_path):
if not os.path.isabs(conf_path):
conf_path = os.path.join(get_project_base_directory(), conf_path)
try:
with open(conf_path) as f:
yaml = YAML(typ="safe", pure=True)
return yaml.load(f)
except Exception as e:
raise EnvironmentError("loading yaml file config from {} failed:".format(conf_path), e)
def rewrite_yaml_conf(conf_path, config):
if not os.path.isabs(conf_path):
conf_path = os.path.join(get_project_base_directory(), conf_path)
try:
with open(conf_path, "w") as f:
yaml = YAML(typ="safe")
yaml.dump(config, f)
except Exception as e:
raise EnvironmentError("rewrite yaml file config {} failed:".format(conf_path), e)
def rewrite_json_file(filepath, json_data):
with open(filepath, "w", encoding="utf-8") as f:
json.dump(json_data, f, indent=4, separators=(",", ": "))
f.close()
def filename_type(filename): def filename_type(filename):
filename = filename.lower() filename = filename.lower()
if re.match(r".*\.pdf$", filename): if re.match(r".*\.pdf$", filename):
@ -199,13 +129,6 @@ def thumbnail(filename, blob):
return "" return ""
def traversal_files(base):
for root, ds, fs in os.walk(base):
for f in fs:
fullname = os.path.join(root, f)
yield fullname
def repair_pdf_with_ghostscript(input_bytes): def repair_pdf_with_ghostscript(input_bytes):
if shutil.which("gs") is None: if shutil.which("gs") is None:
return input_bytes return input_bytes

View File

@ -31,3 +31,9 @@ def get_project_base_directory(*args):
if args: if args:
return os.path.join(PROJECT_BASE, *args) return os.path.join(PROJECT_BASE, *args)
return PROJECT_BASE return PROJECT_BASE
def traversal_files(base):
for root, ds, fs in os.walk(base):
for f in fs:
fullname = os.path.join(root, f)
yield fullname

View File

@ -36,7 +36,7 @@ def init_in_out(args):
from PIL import Image from PIL import Image
from api.utils.file_utils import traversal_files from common.file_utils import traversal_files
images = [] images = []
outputs = [] outputs = []