You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ansible/test/sanity/code-smell/codespell.py

111 lines
3.2 KiB
Python

"""Check for common misspelled words using codespell."""
from __future__ import annotations
import pathlib
import re
import subprocess
import sys
import tempfile
import typing as t
def main() -> None:
paths = sys.argv[1:] or sys.stdin.read().splitlines()
ignore_words_lines = (pathlib.Path(__file__).parent / 'codespell' / 'ignore-words.txt').read_text().splitlines()
ignore_words = [re.split(r'\s*#', line, maxsplit=1)[0] for line in ignore_words_lines]
quiet_level = (
32 # don't print configuration file
+ 16 # don't print the list of fixed files
+ 4 # omit warnings about automatic fixes that were disabled in the dictionary
+ 2 # disable warnings about binary files
+ 1 # disable warnings about wrong encoding
)
paths = [path for path in paths if path != 'test/sanity/ignore.txt']
with tempfile.NamedTemporaryFile(mode='wt') as ignore_words_temp_file:
ignore_words_temp_file.write('\n'.join(ignore_words))
ignore_words_temp_file.flush()
cmd = [
sys.executable,
'-m',
'codespell_lib',
'--disable-colors',
'--builtin',
'clear', # minimize false positives
'--quiet-level',
str(quiet_level),
'--ignore-words',
ignore_words_temp_file.name,
] + paths
process = subprocess.run(
cmd,
stdin=subprocess.DEVNULL,
capture_output=True,
check=False,
text=True,
)
if process.stderr:
print(process.stderr.strip(), file=sys.stderr)
sys.exit(1)
if not (stdout := process.stdout.strip()):
return
if process.returncode not in (0, 65):
print(f'Unexpected return code: {process.returncode}')
sys.exit(1)
pattern = re.compile(r'^(?P<path>[^:]*):(?P<line>[0-9]+): (?P<left>.*) ==> (?P<right>.*)$')
matches = parse_to_list_of_dict(pattern, stdout)
results: list[str] = []
for match in matches:
path, line_num_str, left, right = match['path'], match['line'], match['left'], match['right']
line_num = int(line_num_str)
try:
line = pathlib.Path(path).read_text().splitlines()[line_num - 1]
col_num = line.index(left) + 1
except UnicodeDecodeError:
col_num = 0
if len(left) <= 3 and pathlib.Path(path).suffix == '.py':
continue # ignore short words in Python files, as they're likely just short variable names
code = re.sub('[^a-zA-Z]', '_', left)
results.append(f"{path}:{line_num}:{col_num}: {code}: {left} ==> {right}")
if results:
print('\n'.join(results))
def parse_to_list_of_dict(pattern: re.Pattern, value: str) -> list[dict[str, t.Any]]:
matched = []
unmatched = []
for line in value.splitlines():
match = re.search(pattern, line)
if match:
matched.append(match.groupdict())
else:
unmatched.append(line)
if unmatched:
raise Exception(f'Pattern {pattern!r} did not match values:\n' + '\n'.join(unmatched))
return matched
if __name__ == '__main__':
main()