From b1dd2af4cac9df517ce8216eaa97e66c0b15d90a Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Mon, 11 Jul 2022 09:22:27 -0500 Subject: [PATCH] Preflight utf8 encodings check (#78175) --- changelogs/fragments/ansible-require-utf8.yml | 6 ++ .../porting_guide_core_2.14.rst | 2 +- lib/ansible/cli/__init__.py | 28 ++++++- lib/ansible/utils/display.py | 75 ++----------------- test/integration/targets/connection/test.sh | 5 +- .../targets/preflight_encoding/aliases | 2 + .../targets/preflight_encoding/tasks/main.yml | 62 +++++++++++++++ .../targets/preflight_encoding/vars/main.yml | 2 + test/units/utils/test_display.py | 18 ++--- 9 files changed, 115 insertions(+), 85 deletions(-) create mode 100644 changelogs/fragments/ansible-require-utf8.yml create mode 100644 test/integration/targets/preflight_encoding/aliases create mode 100644 test/integration/targets/preflight_encoding/tasks/main.yml create mode 100644 test/integration/targets/preflight_encoding/vars/main.yml diff --git a/changelogs/fragments/ansible-require-utf8.yml b/changelogs/fragments/ansible-require-utf8.yml new file mode 100644 index 00000000000..ee5a4198359 --- /dev/null +++ b/changelogs/fragments/ansible-require-utf8.yml @@ -0,0 +1,6 @@ +major_changes: + - ansible - At startup the filesystem encoding and locale are checked to verify they are UTF-8. + If not, the process exits with an error reporting the errant encoding. +breaking_changes: + - ansible - At startup the filesystem encoding and locale are checked to verify they are UTF-8. + If not, the process exits with an error reporting the errant encoding. diff --git a/docs/docsite/rst/porting_guides/porting_guide_core_2.14.rst b/docs/docsite/rst/porting_guides/porting_guide_core_2.14.rst index 6e0cf9bca0d..09221997b34 100644 --- a/docs/docsite/rst/porting_guides/porting_guide_core_2.14.rst +++ b/docs/docsite/rst/porting_guides/porting_guide_core_2.14.rst @@ -37,7 +37,7 @@ Playbook Command Line ============ -No notable changes +* At startup the filesystem encoding and locale are checked to verify they are UTF-8. If not, the process exits with an error reporting the errant encoding. If you were previously using the ``C`` or ``POSIX`` locale, you may be able to use ``C.UTF-8``. If you were previously using a locale such as ``en_US.ISO-8859-1``, you may be able to use ``en_US.UTF-8``. For simplicity it may be easiest to export the appropriate locale using the ``LC_ALL`` environment variable. An alternative to modifying your system locale is to run Python in UTF-8 mode; See the `Python documentation `_ for more information. Deprecated diff --git a/lib/ansible/cli/__init__.py b/lib/ansible/cli/__init__.py index 731033fcf28..7e39627f5e8 100644 --- a/lib/ansible/cli/__init__.py +++ b/lib/ansible/cli/__init__.py @@ -7,6 +7,7 @@ from __future__ import (absolute_import, division, print_function) __metaclass__ = type +import locale import os import sys @@ -40,6 +41,30 @@ def check_blocking_io(): check_blocking_io() + +def initialize_locale(): + """Set the locale to the users default setting and ensure + the locale and filesystem encoding are UTF-8. + """ + try: + locale.setlocale(locale.LC_ALL, '') + dummy, encoding = locale.getlocale() + except (locale.Error, ValueError) as e: + raise SystemExit( + 'ERROR: Ansible could not initialize the preferred locale: %s' % e + ) + + if not encoding or encoding.lower() not in ('utf-8', 'utf8'): + raise SystemExit('ERROR: Ansible requires the locale encoding to be UTF-8; Detected %s.' % encoding) + + fs_enc = sys.getfilesystemencoding() + if fs_enc.lower() != 'utf-8': + raise SystemExit('ERROR: Ansible requires the filesystem encoding to be UTF-8; Detected %s.' % fs_enc) + + +initialize_locale() + + from importlib.metadata import version from ansible.module_utils.compat.version import LooseVersion @@ -61,8 +86,7 @@ from pathlib import Path try: from ansible import constants as C - from ansible.utils.display import Display, initialize_locale - initialize_locale() + from ansible.utils.display import Display display = Display() except Exception as e: print('ERROR: %s' % e, file=sys.stderr) diff --git a/lib/ansible/utils/display.py b/lib/ansible/utils/display.py index b11998fe584..c3a5de98e2f 100644 --- a/lib/ansible/utils/display.py +++ b/lib/ansible/utils/display.py @@ -19,10 +19,8 @@ from __future__ import (absolute_import, division, print_function) __metaclass__ = type import ctypes.util -import errno import fcntl import getpass -import locale import logging import os import random @@ -53,24 +51,6 @@ _LIBC.wcswidth.argtypes = (ctypes.c_wchar_p, ctypes.c_int) # Max for c_int _MAX_INT = 2 ** (ctypes.sizeof(ctypes.c_int) * 8 - 1) - 1 -_LOCALE_INITIALIZED = False -_LOCALE_INITIALIZATION_ERR = None - - -def initialize_locale(): - """Set the locale to the users default setting - and set ``_LOCALE_INITIALIZED`` to indicate whether - ``get_text_width`` may run into trouble - """ - global _LOCALE_INITIALIZED, _LOCALE_INITIALIZATION_ERR - if _LOCALE_INITIALIZED is False: - try: - locale.setlocale(locale.LC_ALL, '') - except locale.Error as e: - _LOCALE_INITIALIZATION_ERR = e - else: - _LOCALE_INITIALIZED = True - def get_text_width(text): """Function that utilizes ``wcswidth`` or ``wcwidth`` to determine the @@ -78,27 +58,11 @@ def get_text_width(text): We try first with ``wcswidth``, and fallback to iterating each character and using wcwidth individually, falling back to a value of 0 - for non-printable wide characters - - On Py2, this depends on ``locale.setlocale(locale.LC_ALL, '')``, - that in the case of Ansible is done in ``bin/ansible`` + for non-printable wide characters. """ if not isinstance(text, text_type): raise TypeError('get_text_width requires text, not %s' % type(text)) - if _LOCALE_INITIALIZATION_ERR: - Display().warning( - 'An error occurred while calling ansible.utils.display.initialize_locale ' - '(%s). This may result in incorrectly calculated text widths that can ' - 'cause Display to print incorrect line lengths' % _LOCALE_INITIALIZATION_ERR - ) - elif not _LOCALE_INITIALIZED: - Display().warning( - 'ansible.utils.display.initialize_locale has not been called, ' - 'this may result in incorrectly calculated text widths that can ' - 'cause Display to print incorrect line lengths' - ) - try: width = _LIBC.wcswidth(text, _MAX_INT) except ctypes.ArgumentError: @@ -130,10 +94,9 @@ def get_text_width(text): w = 0 width += w - if width == 0 and counter and not _LOCALE_INITIALIZED: + if width == 0 and counter: raise EnvironmentError( - 'ansible.utils.display.initialize_locale has not been called, ' - 'and get_text_width could not calculate text width of %r' % text + 'get_text_width could not calculate text width of %r' % text ) # It doesn't make sense to have a negative printable width @@ -286,12 +249,6 @@ class Display(metaclass=Singleton): if has_newline or newline: msg2 = msg2 + u'\n' - msg2 = to_bytes(msg2, encoding=self._output_encoding(stderr=stderr)) - # Convert back to text string - # We first convert to a byte string so that we get rid of - # characters that are invalid in the user's locale - msg2 = to_text(msg2, self._output_encoding(stderr=stderr), errors='replace') - # Note: After Display() class is refactored need to update the log capture # code in 'bin/ansible-connection' (and other relevant places). if not stderr: @@ -316,12 +273,7 @@ class Display(metaclass=Singleton): # raise if logger and not screen_only: - # We first convert to a byte string so that we get rid of - # color and characters that are invalid in the user's locale - msg2 = to_bytes(nocolor.lstrip(u'\n')) - - # Convert back to text string - msg2 = to_text(msg2, self._output_encoding(stderr=stderr)) + msg2 = nocolor.lstrip('\n') lvl = logging.INFO if color: @@ -489,15 +441,10 @@ class Display(metaclass=Singleton): @staticmethod def prompt(msg, private=False): - prompt_string = to_bytes(msg, encoding=Display._output_encoding()) - # Convert back into text. We do this double conversion - # to get rid of characters that are illegal in the user's locale - prompt_string = to_text(prompt_string) - if private: - return getpass.getpass(prompt_string) + return getpass.getpass(msg) else: - return input(prompt_string) + return input(msg) def do_var_prompt(self, varname, private=True, prompt=None, encrypt=None, confirm=False, salt_size=None, salt=None, default=None, unsafe=None): @@ -542,16 +489,6 @@ class Display(metaclass=Singleton): result = wrap_var(result) return result - @staticmethod - def _output_encoding(stderr=False): - encoding = locale.getpreferredencoding() - # https://bugs.python.org/issue6202 - # Python2 hardcodes an obsolete value on Mac. Use MacOSX defaults - # instead. - if encoding in ('mac-roman',): - encoding = 'utf-8' - return encoding - def _set_column_width(self): if os.isatty(1): tty_size = unpack('HHHH', fcntl.ioctl(1, TIOCGWINSZ, pack('HHHH', 0, 0, 0, 0)))[1] diff --git a/test/integration/targets/connection/test.sh b/test/integration/targets/connection/test.sh index ad672e23c64..6e16a87ea7b 100755 --- a/test/integration/targets/connection/test.sh +++ b/test/integration/targets/connection/test.sh @@ -4,10 +4,7 @@ set -eux [ -f "${INVENTORY}" ] -# Run connection tests with both the default and C locale. - - ansible-playbook test_connection.yml -i "${INVENTORY}" "$@" -LC_ALL=C LANG=C ansible-playbook test_connection.yml -i "${INVENTORY}" "$@" +ansible-playbook test_connection.yml -i "${INVENTORY}" "$@" # Check that connection vars do not appear in the output # https://github.com/ansible/ansible/pull/70853 diff --git a/test/integration/targets/preflight_encoding/aliases b/test/integration/targets/preflight_encoding/aliases new file mode 100644 index 00000000000..0ac86c9200c --- /dev/null +++ b/test/integration/targets/preflight_encoding/aliases @@ -0,0 +1,2 @@ +context/controller +shippable/posix/group1 diff --git a/test/integration/targets/preflight_encoding/tasks/main.yml b/test/integration/targets/preflight_encoding/tasks/main.yml new file mode 100644 index 00000000000..aa33b6c9d40 --- /dev/null +++ b/test/integration/targets/preflight_encoding/tasks/main.yml @@ -0,0 +1,62 @@ +- name: find bash + shell: command -v bash + register: bash + ignore_errors: true + +- meta: end_host + when: bash is failed + +- name: get available locales + command: locale -a + register: locale_a + ignore_errors: true + +- set_fact: + non_utf8: '{{ locale_a.stdout_lines | select("contains", ".") | reject("search", "(?i)(\.UTF-?8$)") | default([None], true) | first }}' + has_cutf8: '{{ locale_a.stdout_lines | select("search", "(?i)C.UTF-?8") != [] }}' + +- name: Test successful encodings + shell: '{{ item }} ansible --version' + args: + executable: '{{ bash.stdout_lines|first }}' + loop: + - LC_ALL={{ utf8 }} + - LC_ALL={{ cutf8 }} + - LC_ALL= LC_CTYPE={{ utf8 }} + - LC_ALL= LC_CTYPE={{ cutf8 }} + when: cutf8 not in item or (cutf8 in item and has_cutf8) + +- name: test locales error + shell: LC_ALL=ham_sandwich LC_CTYPE={{ utf8 }} ansible --version + args: + executable: '{{ bash.stdout_lines|first }}' + ignore_errors: true + register: locales_error + +- assert: + that: + - locales_error is failed + - >- + 'ERROR: Ansible could not initialize the preferred locale' in locales_error.stderr + +- meta: end_host + when: non_utf8 is falsy + +- name: Test unsuccessful encodings + shell: '{{ item }} ansible --version' + args: + executable: '{{ bash.stdout_lines|first }}' + loop: + - LC_ALL={{ non_utf8 }} + - LC_ALL= LC_CTYPE={{ non_utf8 }} + ignore_errors: true + register: result + +- assert: + that: + - result is failed + - result.results | select('failed') | length == 2 + - >- + 'ERROR: Ansible requires the locale encoding to be UTF-8' in result.results[0].stderr + - >- + 'ERROR: Ansible requires the locale encoding to be UTF-8' in result.results[1].stderr diff --git a/test/integration/targets/preflight_encoding/vars/main.yml b/test/integration/targets/preflight_encoding/vars/main.yml new file mode 100644 index 00000000000..34eb2a6db7f --- /dev/null +++ b/test/integration/targets/preflight_encoding/vars/main.yml @@ -0,0 +1,2 @@ +utf8: en_US.UTF-8 +cutf8: C.UTF-8 diff --git a/test/units/utils/test_display.py b/test/units/utils/test_display.py index f0a6b6eefbb..34e39b6f619 100644 --- a/test/units/utils/test_display.py +++ b/test/units/utils/test_display.py @@ -5,17 +5,17 @@ from __future__ import absolute_import, division, print_function __metaclass__ = type +import locale from unittest.mock import MagicMock import pytest -from ansible.module_utils.six import PY3 -from ansible.utils.display import Display, get_text_width, initialize_locale +from ansible.utils.display import Display, get_text_width from ansible.utils.multiprocessing import context as multiprocessing_context def test_get_text_width(): - initialize_locale() + locale.setlocale(locale.LC_ALL, '') assert get_text_width(u'コンニチハ') == 10 assert get_text_width(u'abコcd') == 6 assert get_text_width(u'café') == 4 @@ -35,13 +35,13 @@ def test_get_text_width(): pytest.raises(TypeError, get_text_width, b'four') -@pytest.mark.skipif(PY3, reason='Fallback only happens reliably on py2') def test_get_text_width_no_locale(): - pytest.raises(EnvironmentError, get_text_width, u'🚀🐮') + locale.setlocale(locale.LC_ALL, 'C.UTF-8') + pytest.raises(EnvironmentError, get_text_width, '\U000110cd') def test_Display_banner_get_text_width(monkeypatch): - initialize_locale() + locale.setlocale(locale.LC_ALL, '') display = Display() display_mock = MagicMock() monkeypatch.setattr(display, 'display', display_mock) @@ -53,16 +53,16 @@ def test_Display_banner_get_text_width(monkeypatch): assert msg.endswith(stars) -@pytest.mark.skipif(PY3, reason='Fallback only happens reliably on py2') def test_Display_banner_get_text_width_fallback(monkeypatch): + locale.setlocale(locale.LC_ALL, 'C.UTF-8') display = Display() display_mock = MagicMock() monkeypatch.setattr(display, 'display', display_mock) - display.banner(u'🚀🐮', color=False, cows=False) + display.banner(u'\U000110cd', color=False, cows=False) args, kwargs = display_mock.call_args msg = args[0] - stars = u' %s' % (77 * u'*') + stars = u' %s' % (78 * u'*') assert msg.endswith(stars)