diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4357bf982..2d0804c12 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -95,6 +95,38 @@ jobs: version: ">=0.11.x" args: "check" + - name: Check comments of changed Python files + if: ${{ !cancelled() && !failure() }} + run: | + if [[ ${{ github.event_name }} == 'pull_request_target' ]]; then + CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }} \ + | grep -E '\.(py)$' || true) + + if [ -n "$CHANGED_FILES" ]; then + echo "Check comments of changed Python files with check_comment_ascii.py" + + readarray -t files <<< "$CHANGED_FILES" + HAS_ERROR=0 + + for file in "${files[@]}"; do + if [ -f "$file" ]; then + if python3 check_comment_ascii.py $file"; then + echo "✅ $file" + else + echo "❌ $file" + HAS_ERROR=1 + fi + fi + done + + if [ $HAS_ERROR -ne 0 ]; then + exit 1 + fi + else + echo "No Python files changed" + fi + fi + - name: Build ragflow:nightly run: | RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-${HOME}} diff --git a/check_comment_ascii.py b/check_comment_ascii.py new file mode 100644 index 000000000..49cac90d7 --- /dev/null +++ b/check_comment_ascii.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +import sys +import tokenize +import ast +import pathlib +import re + +ASCII = re.compile(r"^[ -~]*\Z") # Only printable ASCII + + +def check(src: str, name: str) -> int: + """ + I'm a docstring + """ + ok = 1 + # A common comment begins with `#` + with tokenize.open(src) as fp: + for tk in tokenize.generate_tokens(fp.readline): + if tk.type == tokenize.COMMENT and not ASCII.fullmatch(tk.string): + print(f"{name}:{tk.start[0]}: non-ASCII comment: {tk.string}") + ok = 0 + # A docstring begins and ends with `'''` + for node in ast.walk(ast.parse(pathlib.Path(src).read_text(), filename=name)): + if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.Module)): + if (doc := ast.get_docstring(node)) and not ASCII.fullmatch(doc): + print(f"{name}:{node.lineno}: non-ASCII docstring: {doc}") + ok = 0 + return ok + + +if __name__ == "__main__": + status = 0 + for file in sys.argv[1:]: + if not check(file, file): + status = 1 + sys.exit(status)