Preflight utf8 encodings check (#78175)

pull/78237/head
Matt Martz 2 years ago committed by GitHub
parent 9950a86f73
commit b1dd2af4ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,6 @@
major_changes:
- ansible - At startup the filesystem encoding and locale are checked to verify they are UTF-8.
If not, the process exits with an error reporting the errant encoding.
breaking_changes:
- ansible - At startup the filesystem encoding and locale are checked to verify they are UTF-8.
If not, the process exits with an error reporting the errant encoding.

@ -37,7 +37,7 @@ Playbook
Command Line
============
No notable changes
* At startup the filesystem encoding and locale are checked to verify they are UTF-8. If not, the process exits with an error reporting the errant encoding. If you were previously using the ``C`` or ``POSIX`` locale, you may be able to use ``C.UTF-8``. If you were previously using a locale such as ``en_US.ISO-8859-1``, you may be able to use ``en_US.UTF-8``. For simplicity it may be easiest to export the appropriate locale using the ``LC_ALL`` environment variable. An alternative to modifying your system locale is to run Python in UTF-8 mode; See the `Python documentation <https://docs.python.org/3/using/cmdline.html#envvar-PYTHONUTF8>`_ for more information.
Deprecated

@ -7,6 +7,7 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import locale
import os
import sys
@ -40,6 +41,30 @@ def check_blocking_io():
check_blocking_io()
def initialize_locale():
"""Set the locale to the users default setting and ensure
the locale and filesystem encoding are UTF-8.
"""
try:
locale.setlocale(locale.LC_ALL, '')
dummy, encoding = locale.getlocale()
except (locale.Error, ValueError) as e:
raise SystemExit(
'ERROR: Ansible could not initialize the preferred locale: %s' % e
)
if not encoding or encoding.lower() not in ('utf-8', 'utf8'):
raise SystemExit('ERROR: Ansible requires the locale encoding to be UTF-8; Detected %s.' % encoding)
fs_enc = sys.getfilesystemencoding()
if fs_enc.lower() != 'utf-8':
raise SystemExit('ERROR: Ansible requires the filesystem encoding to be UTF-8; Detected %s.' % fs_enc)
initialize_locale()
from importlib.metadata import version
from ansible.module_utils.compat.version import LooseVersion
@ -61,8 +86,7 @@ from pathlib import Path
try:
from ansible import constants as C
from ansible.utils.display import Display, initialize_locale
initialize_locale()
from ansible.utils.display import Display
display = Display()
except Exception as e:
print('ERROR: %s' % e, file=sys.stderr)

@ -19,10 +19,8 @@ from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import ctypes.util
import errno
import fcntl
import getpass
import locale
import logging
import os
import random
@ -53,24 +51,6 @@ _LIBC.wcswidth.argtypes = (ctypes.c_wchar_p, ctypes.c_int)
# Max for c_int
_MAX_INT = 2 ** (ctypes.sizeof(ctypes.c_int) * 8 - 1) - 1
_LOCALE_INITIALIZED = False
_LOCALE_INITIALIZATION_ERR = None
def initialize_locale():
"""Set the locale to the users default setting
and set ``_LOCALE_INITIALIZED`` to indicate whether
``get_text_width`` may run into trouble
"""
global _LOCALE_INITIALIZED, _LOCALE_INITIALIZATION_ERR
if _LOCALE_INITIALIZED is False:
try:
locale.setlocale(locale.LC_ALL, '')
except locale.Error as e:
_LOCALE_INITIALIZATION_ERR = e
else:
_LOCALE_INITIALIZED = True
def get_text_width(text):
"""Function that utilizes ``wcswidth`` or ``wcwidth`` to determine the
@ -78,27 +58,11 @@ def get_text_width(text):
We try first with ``wcswidth``, and fallback to iterating each
character and using wcwidth individually, falling back to a value of 0
for non-printable wide characters
On Py2, this depends on ``locale.setlocale(locale.LC_ALL, '')``,
that in the case of Ansible is done in ``bin/ansible``
for non-printable wide characters.
"""
if not isinstance(text, text_type):
raise TypeError('get_text_width requires text, not %s' % type(text))
if _LOCALE_INITIALIZATION_ERR:
Display().warning(
'An error occurred while calling ansible.utils.display.initialize_locale '
'(%s). This may result in incorrectly calculated text widths that can '
'cause Display to print incorrect line lengths' % _LOCALE_INITIALIZATION_ERR
)
elif not _LOCALE_INITIALIZED:
Display().warning(
'ansible.utils.display.initialize_locale has not been called, '
'this may result in incorrectly calculated text widths that can '
'cause Display to print incorrect line lengths'
)
try:
width = _LIBC.wcswidth(text, _MAX_INT)
except ctypes.ArgumentError:
@ -130,10 +94,9 @@ def get_text_width(text):
w = 0
width += w
if width == 0 and counter and not _LOCALE_INITIALIZED:
if width == 0 and counter:
raise EnvironmentError(
'ansible.utils.display.initialize_locale has not been called, '
'and get_text_width could not calculate text width of %r' % text
'get_text_width could not calculate text width of %r' % text
)
# It doesn't make sense to have a negative printable width
@ -286,12 +249,6 @@ class Display(metaclass=Singleton):
if has_newline or newline:
msg2 = msg2 + u'\n'
msg2 = to_bytes(msg2, encoding=self._output_encoding(stderr=stderr))
# Convert back to text string
# We first convert to a byte string so that we get rid of
# characters that are invalid in the user's locale
msg2 = to_text(msg2, self._output_encoding(stderr=stderr), errors='replace')
# Note: After Display() class is refactored need to update the log capture
# code in 'bin/ansible-connection' (and other relevant places).
if not stderr:
@ -316,12 +273,7 @@ class Display(metaclass=Singleton):
# raise
if logger and not screen_only:
# We first convert to a byte string so that we get rid of
# color and characters that are invalid in the user's locale
msg2 = to_bytes(nocolor.lstrip(u'\n'))
# Convert back to text string
msg2 = to_text(msg2, self._output_encoding(stderr=stderr))
msg2 = nocolor.lstrip('\n')
lvl = logging.INFO
if color:
@ -489,15 +441,10 @@ class Display(metaclass=Singleton):
@staticmethod
def prompt(msg, private=False):
prompt_string = to_bytes(msg, encoding=Display._output_encoding())
# Convert back into text. We do this double conversion
# to get rid of characters that are illegal in the user's locale
prompt_string = to_text(prompt_string)
if private:
return getpass.getpass(prompt_string)
return getpass.getpass(msg)
else:
return input(prompt_string)
return input(msg)
def do_var_prompt(self, varname, private=True, prompt=None, encrypt=None, confirm=False, salt_size=None, salt=None, default=None, unsafe=None):
@ -542,16 +489,6 @@ class Display(metaclass=Singleton):
result = wrap_var(result)
return result
@staticmethod
def _output_encoding(stderr=False):
encoding = locale.getpreferredencoding()
# https://bugs.python.org/issue6202
# Python2 hardcodes an obsolete value on Mac. Use MacOSX defaults
# instead.
if encoding in ('mac-roman',):
encoding = 'utf-8'
return encoding
def _set_column_width(self):
if os.isatty(1):
tty_size = unpack('HHHH', fcntl.ioctl(1, TIOCGWINSZ, pack('HHHH', 0, 0, 0, 0)))[1]

@ -4,10 +4,7 @@ set -eux
[ -f "${INVENTORY}" ]
# Run connection tests with both the default and C locale.
ansible-playbook test_connection.yml -i "${INVENTORY}" "$@"
LC_ALL=C LANG=C ansible-playbook test_connection.yml -i "${INVENTORY}" "$@"
ansible-playbook test_connection.yml -i "${INVENTORY}" "$@"
# Check that connection vars do not appear in the output
# https://github.com/ansible/ansible/pull/70853

@ -0,0 +1,2 @@
context/controller
shippable/posix/group1

@ -0,0 +1,62 @@
- name: find bash
shell: command -v bash
register: bash
ignore_errors: true
- meta: end_host
when: bash is failed
- name: get available locales
command: locale -a
register: locale_a
ignore_errors: true
- set_fact:
non_utf8: '{{ locale_a.stdout_lines | select("contains", ".") | reject("search", "(?i)(\.UTF-?8$)") | default([None], true) | first }}'
has_cutf8: '{{ locale_a.stdout_lines | select("search", "(?i)C.UTF-?8") != [] }}'
- name: Test successful encodings
shell: '{{ item }} ansible --version'
args:
executable: '{{ bash.stdout_lines|first }}'
loop:
- LC_ALL={{ utf8 }}
- LC_ALL={{ cutf8 }}
- LC_ALL= LC_CTYPE={{ utf8 }}
- LC_ALL= LC_CTYPE={{ cutf8 }}
when: cutf8 not in item or (cutf8 in item and has_cutf8)
- name: test locales error
shell: LC_ALL=ham_sandwich LC_CTYPE={{ utf8 }} ansible --version
args:
executable: '{{ bash.stdout_lines|first }}'
ignore_errors: true
register: locales_error
- assert:
that:
- locales_error is failed
- >-
'ERROR: Ansible could not initialize the preferred locale' in locales_error.stderr
- meta: end_host
when: non_utf8 is falsy
- name: Test unsuccessful encodings
shell: '{{ item }} ansible --version'
args:
executable: '{{ bash.stdout_lines|first }}'
loop:
- LC_ALL={{ non_utf8 }}
- LC_ALL= LC_CTYPE={{ non_utf8 }}
ignore_errors: true
register: result
- assert:
that:
- result is failed
- result.results | select('failed') | length == 2
- >-
'ERROR: Ansible requires the locale encoding to be UTF-8' in result.results[0].stderr
- >-
'ERROR: Ansible requires the locale encoding to be UTF-8' in result.results[1].stderr

@ -0,0 +1,2 @@
utf8: en_US.UTF-8
cutf8: C.UTF-8

@ -5,17 +5,17 @@
from __future__ import absolute_import, division, print_function
__metaclass__ = type
import locale
from unittest.mock import MagicMock
import pytest
from ansible.module_utils.six import PY3
from ansible.utils.display import Display, get_text_width, initialize_locale
from ansible.utils.display import Display, get_text_width
from ansible.utils.multiprocessing import context as multiprocessing_context
def test_get_text_width():
initialize_locale()
locale.setlocale(locale.LC_ALL, '')
assert get_text_width(u'コンニチハ') == 10
assert get_text_width(u'abコcd') == 6
assert get_text_width(u'café') == 4
@ -35,13 +35,13 @@ def test_get_text_width():
pytest.raises(TypeError, get_text_width, b'four')
@pytest.mark.skipif(PY3, reason='Fallback only happens reliably on py2')
def test_get_text_width_no_locale():
pytest.raises(EnvironmentError, get_text_width, u'🚀🐮')
locale.setlocale(locale.LC_ALL, 'C.UTF-8')
pytest.raises(EnvironmentError, get_text_width, '\U000110cd')
def test_Display_banner_get_text_width(monkeypatch):
initialize_locale()
locale.setlocale(locale.LC_ALL, '')
display = Display()
display_mock = MagicMock()
monkeypatch.setattr(display, 'display', display_mock)
@ -53,16 +53,16 @@ def test_Display_banner_get_text_width(monkeypatch):
assert msg.endswith(stars)
@pytest.mark.skipif(PY3, reason='Fallback only happens reliably on py2')
def test_Display_banner_get_text_width_fallback(monkeypatch):
locale.setlocale(locale.LC_ALL, 'C.UTF-8')
display = Display()
display_mock = MagicMock()
monkeypatch.setattr(display, 'display', display_mock)
display.banner(u'🚀🐮', color=False, cows=False)
display.banner(u'\U000110cd', color=False, cows=False)
args, kwargs = display_mock.call_args
msg = args[0]
stars = u' %s' % (77 * u'*')
stars = u' %s' % (78 * u'*')
assert msg.endswith(stars)

Loading…
Cancel
Save