Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/workflows/lint-cloudlinux.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Lint (cloudlinux)

# CloudLinux fork uses the `cloudlinux` branch as trunk; the inherited
# upstream workflows (unit-tests.yml, codespell.yml, etc.) only fire on
# `master` and so do not run on this fork. This narrow workflow guards
# the cloudlinux branch against the most common regression we have hit:
# stray non-ASCII bytes in Python source (em-dashes, smart quotes,
# ellipsis from copy/paste, leftover BOMs).
#
# The check is just `make lint-non-ascii`, which delegates to
# utils/check-non-ascii.py. That same target is also invoked as a
# prerequisite of `make lint`, so local developers get the identical
# rule when they run the full lint locally.
#
# Broader fixes (running the full upstream test+lint matrix on cloudlinux
# PRs) are tracked separately.

on:
pull_request:
branches: [cloudlinux]
push:
branches: [cloudlinux]

permissions:
contents: read

jobs:
lint-non-ascii:
name: Reject undeclared non-ASCII in Python source
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: make lint-non-ascii
run: make lint-non-ascii
12 changes: 11 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,17 @@ install-deps-fedora:
$(VENVNAME)/bin/pip install -I "git+https://github.com/oamg/leapp.git@refs/pull/$(REQ_LEAPP_PR)/head"; \
fi

lint:
# Reject undeclared non-ASCII bytes in Python source. Standalone target so
# CI can call it without bringing up the full lint venv (the script is
# pure stdlib python3) and so the rule is the single source of truth for
# both `make lint` and the lint-cloudlinux GitHub Action.
lint-non-ascii:
@echo "--- Checking for non-ASCII characters (Python 2.7 compat, PEP 263 aware) ---"
@SEARCH_PATH="$(TEST_PATHS)"; \
if [ -z "$${SEARCH_PATH}" ]; then echo "TEST_PATHS is empty; nothing to scan." >&2; exit 0; fi; \
python3 utils/check-non-ascii.py $${SEARCH_PATH}

lint: lint-non-ascii
. $(VENVNAME)/bin/activate; \
echo "--- Linting ... ---" && \
SEARCH_PATH="$(TEST_PATHS)" && \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
import os
import os.path
import shutil
import logging
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
import os
import os.path

from leapp.libraries.stdlib import api
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
import os
import os.path

from leapp.libraries.stdlib import api
Expand Down
2 changes: 1 addition & 1 deletion repos/system_upgrade/common/libraries/repomaputils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import json
import json
from collections import defaultdict
from leapp.models import PESIDRepositoryEntry, RepoMapEntry, RepositoriesMapping

Expand Down
91 changes: 91 additions & 0 deletions utils/check-non-ascii.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""Reject undeclared non-ASCII bytes in Python source.

Python 2.7 source is ASCII-only unless the file declares its encoding via
PEP 263 (a `# -*- coding: <name> -*-` or `# coding=<name>` comment on the
first or second line). This script enforces that rule on the paths given
as positional arguments: any *.py file that contains a byte > 0x7F and
does NOT carry a PEP 263 declaration is reported and the script exits 1.

Used both by `make lint-non-ascii` (and therefore `make lint`) and by
the lint-cloudlinux GitHub Action so the two stay in sync.
"""

from __future__ import print_function

import os
import re
import sys


# PEP 263: an encoding declaration must appear on line 1 or 2 and match
# the regex below. https://peps.python.org/pep-0263/
_CODING_RE = re.compile(rb"^[ \t\f]*#.*?coding[=:][ \t]*([-_.a-zA-Z0-9]+)")


def _file_has_encoding_declaration(data):
head = data.split(b"\n", 2)[:2]
return any(_CODING_RE.match(line) for line in head)


def _scan_file(path):
"""Return a list of (lineno, decoded_line) for lines with non-ASCII
bytes. Empty list means the file is clean OR has a PEP 263 declaration.
"""
with open(path, "rb") as fp:
data = fp.read()
if not any(b > 0x7F for b in bytearray(data)):
return []
if _file_has_encoding_declaration(data):
return []
hits = []
for i, line in enumerate(data.splitlines(), start=1):
if any(b > 0x7F for b in bytearray(line)):
hits.append((i, line.decode("utf-8", "replace")))
return hits


def _walk_paths(roots):
for root in roots:
if os.path.isfile(root):
if root.endswith(".py"):
yield root
continue
for dirpath, _dirs, files in os.walk(root):
for name in files:
if name.endswith(".py"):
yield os.path.join(dirpath, name)


def main(argv):
paths = argv[1:]
if not paths:
print("usage: {} <path> [path ...]".format(argv[0]), file=sys.stderr)
return 2
paths = [p for p in paths if os.path.exists(p)]
if not paths:
print("warning: no provided paths exist; nothing to scan", file=sys.stderr)
return 0

bad = 0
for path in _walk_paths(paths):
hits = _scan_file(path)
for lineno, text in hits:
print("{}:{}:{}".format(path, lineno, text))
bad += 1

if bad:
print(
"\nERROR: Non-ASCII bytes found in Python source without a PEP 263 "
"encoding declaration. Replace em-dashes (U+2014), smart quotes, "
"ellipsis, etc. with ASCII equivalents, or add "
"'# -*- coding: utf-8 -*-' on line 1 or 2 if the non-ASCII content "
"is intentional.",
file=sys.stderr,
)
return 1
return 0


if __name__ == "__main__":
sys.exit(main(sys.argv))
Loading