From 57701d71150d9b157c1e34384b32ec268dc82fa0 Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi Date: Mon, 22 Aug 2016 16:44:13 -0700 Subject: [PATCH] Give native strings to selinux library functions. (#17184) * Give native strings to selinux library functions. SELinux takes pathnames as native strings. That means we need to convert to bytes on python2 and convert to text on python3. Fixes #17155 * Read kitchen documentation, make module_utils params more like kitchen API * Remove none nonstring strategy and add strict * Raise TypeError on invalid nonstring strategy * Document to_native() * Make unittests for testing module_utils.text --- lib/ansible/module_utils/_text.py | 171 +++++++++++++++++++++ lib/ansible/module_utils/basic.py | 20 +-- test/units/module_utils/basic/test_text.py | 65 ++++++++ test/units/module_utils/test_basic.py | 15 +- 4 files changed, 242 insertions(+), 29 deletions(-) create mode 100644 lib/ansible/module_utils/_text.py create mode 100644 test/units/module_utils/basic/test_text.py diff --git a/lib/ansible/module_utils/_text.py b/lib/ansible/module_utils/_text.py new file mode 100644 index 00000000000..7dec6475807 --- /dev/null +++ b/lib/ansible/module_utils/_text.py @@ -0,0 +1,171 @@ +# This code is part of Ansible, but is an independent component. +# This particular file snippet, and this file snippet only, is BSD licensed. +# Modules you write using this snippet, which is embedded dynamically by Ansible +# still belong to the author of the module, and may assign their own license +# to the complete work. +# +# Copyright (c), Toshio Kuratomi , 2016 +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +""" +.. warn:: This module_util is currently internal implementation. + We want to evaluate this code for stability and API suitability before + making backwards compatibility guarantees. The API may change between + releases. Do not use this unless you are willing to port your module code. +""" + +from ansible.module_utils.six import PY3, text_type, binary_type + +def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): + """Make sure that a string is a byte string + + :arg obj: An object to make sure is a byte string. In most cases this + will be either a text string or a byte string. However, with + ``nonstring='simplerepr'``, this can be used as a traceback-free + version of ``str(obj)``. + :kwarg encoding: The encoding to use to transform from a text string to + a byte string. Defaults to using 'utf-8'. + :kwarg errors: The error handler to use if the text string is not + encodable using the specified encoding. Any valid `codecs error + handler `_ + may be specified. On Python3 this defaults to 'surrogateescape'. On + Python2, this defaults to 'replace'. + :kwarg nonstring: The strategy to use if a nonstring is specified in + ``obj``. Default is 'simplerepr'. Valid values are: + + :simplerepr: The default. This takes the ``str`` of the object and + then returns the bytes version of that string. + :empty: Return an empty byte string + :passthru: Return the object passed in + :strict: Raise a :exc:`TypeError` + + :returns: Typically this returns a byte string. If a nonstring object is + passed in this may be a different type depending on the strategy + specified by nonstring. This will never return a text string. + + .. note:: If passed a byte string, this function does not check that the + string is valid in the specified encoding. If it's important that the + byte string is in the specified encoding do:: + + encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') + """ + if isinstance(obj, binary_type): + return obj + + if errors is None: + if PY3: + errors = 'surrogateescape' + else: + errors = 'replace' + + if isinstance(obj, text_type): + return obj.encode(encoding, errors) + + # Note: We do these last even though we have to call to_bytes again on the + # value because we're optimizing the common case + if nonstring == 'simplerepr': + value = str(obj) + elif nonstring == 'passthru': + return obj + elif nonstring == 'empty': + # python2.4 doesn't have b'' + return to_bytes('') + elif nonstring == 'strict': + raise TypeError('obj must be a string type') + else: + raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) + + return to_bytes(value, encoding, errors) + +def to_text(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): + """Make sure that a string is a text string + + :arg obj: An object to make sure is a text string. In most cases this + will be either a text string or a byte string. However, with + ``nonstring='simplerepr'``, this can be used as a traceback-free + version of ``str(obj)``. + :kwarg encoding: The encoding to use to transform from a byte string to + a text string. Defaults to using 'utf-8'. + :kwarg errors: The error handler to use if the byte string is not + decodable using the specified encoding. Any valid `codecs error + handler `_ + may be specified. On Python3 this defaults to 'surrogateescape'. On + Python2, this defaults to 'replace'. + :kwarg nonstring: The strategy to use if a nonstring is specified in + ``obj``. Default is 'simplerepr'. Valid values are: + + :simplerepr: The default. This takes the ``str`` of the object and + then returns the text version of that string. + :empty: Return an empty text string + :passthru: Return the object passed in + :strict: Raise a :exc:`TypeError` + + :returns: Typically this returns a text string. If a nonstring object is + passed in this may be a different type depending on the strategy + specified by nonstring. This will never return a byte string. + """ + if isinstance(obj, text_type): + return obj + + if errors is None: + if PY3: + errors = 'surrogateescape' + else: + errors = 'replace' + + if isinstance(obj, binary_type): + return obj.decode(encoding, errors) + + # Note: We do these last even though we have to call to_text again on the + # value because we're optimizing the common case + if nonstring == 'simplerepr': + value = str(obj) + elif nonstring == 'passthru': + return obj + elif nonstring == 'empty': + return u'' + elif nonstring == 'strict': + raise TypeError('obj must be a string type') + else: + raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) + + return to_text(value, encoding, errors) + +#: :py:func:`to_native` +#: Transform a variable into the native str type for the python version +#: +#: On Python2, this is an alias for +#: :func:`~ansible.module_utils.to_bytes`. On Python3 it is an alias for +#: :func:`~ansible.module_utils.to_text`. It makes it easier to +#: transform a variable into the native str type for the python version +#: the code is running on. Use this when constructing the message to +#: send to exceptions or when dealing with an API that needs to take +#: a native string. Example:: +#: +#: try: +#: 1//0 +#: except ZeroDivisionError as e: +#: raise MyException('Encountered and error: %s' % to_native(e)) +if PY3: + to_native = to_text +else: + to_native = to_bytes diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py index fec380ec864..8695a768445 100644 --- a/lib/ansible/module_utils/basic.py +++ b/lib/ansible/module_utils/basic.py @@ -181,6 +181,7 @@ from ansible.module_utils.six import (PY2, PY3, b, binary_type, integer_types, iteritems, text_type, string_types) from ansible.module_utils.six.moves import map, reduce from ansible.module_utils.pycompat24 import get_exception +from ansible.module_utils._text import to_native _NUMBERTYPES = tuple(list(integer_types) + [float]) @@ -773,26 +774,13 @@ class AnsibleModule(object): context.append(None) return context - def _to_filesystem_str(self, path): - '''Returns filesystem path as a str, if it wasn't already. - - Used in selinux interactions because it cannot accept unicode - instances, and specifying complex args in a playbook leaves - you with unicode instances. This method currently assumes - that your filesystem encoding is UTF-8. - - ''' - if isinstance(path, text_type): - path = path.encode("utf-8") - return path - # If selinux fails to find a default, return an array of None def selinux_default_context(self, path, mode=0): context = self.selinux_initial_context() if not HAVE_SELINUX or not self.selinux_enabled(): return context try: - ret = selinux.matchpathcon(self._to_filesystem_str(path), mode) + ret = selinux.matchpathcon(to_native(path, 'strict'), mode) except OSError: return context if ret[0] == -1: @@ -807,7 +795,7 @@ class AnsibleModule(object): if not HAVE_SELINUX or not self.selinux_enabled(): return context try: - ret = selinux.lgetfilecon_raw(self._to_filesystem_str(path)) + ret = selinux.lgetfilecon_raw(to_native(path, 'strict')) except OSError: e = get_exception() if e.errno == errno.ENOENT: @@ -895,7 +883,7 @@ class AnsibleModule(object): try: if self.check_mode: return True - rc = selinux.lsetfilecon(self._to_filesystem_str(path), + rc = selinux.lsetfilecon(to_native(path), str(':'.join(new_context))) except OSError: e = get_exception() diff --git a/test/units/module_utils/basic/test_text.py b/test/units/module_utils/basic/test_text.py new file mode 100644 index 00000000000..6b11fa4e2a7 --- /dev/null +++ b/test/units/module_utils/basic/test_text.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +# (c) 2016 Toshio Kuratomi +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see . + +# Make coding more python3-ish +from __future__ import (absolute_import, division) +__metaclass__ = type + +from ansible.compat.six import PY3 +from ansible.compat.tests import unittest +from units.mock.generator import add_method + + +# Internal API while this is still being developed. Eventually move to +# module_utils.text +from ansible.module_utils._text import to_text, to_bytes, to_native + +# Format: byte representation, text representation, encoding of byte representation +VALID_STRINGS = ( + (b'abcde', u'abcde', 'ascii'), + (b'caf\xc3\xa9', u'caf\xe9', 'utf-8'), + (b'caf\xe9', u'caf\xe9', 'latin-1'), + # u'くらとみ' + (b'\xe3\x81\x8f\xe3\x82\x89\xe3\x81\xa8\xe3\x81\xbf', u'\u304f\u3089\u3068\u307f', 'utf-8'), + (b'\x82\xad\x82\xe7\x82\xc6\x82\xdd', u'\u304f\u3089\u3068\u307f', 'shift-jis'), + ) + +def _check_to_text(self, in_string, encoding, expected): + """test happy path of decoding to text""" + self.assertEqual(to_text(in_string, encoding), expected) + +def _check_to_bytes(self, in_string, encoding, expected): + """test happy path of encoding to bytes""" + self.assertEqual(to_bytes(in_string, encoding), expected) + +def _check_to_native(self, in_string, encoding, py2_expected, py3_expected): + """test happy path of encoding to native strings""" + if PY3: + self.assertEqual(to_native(in_string, encoding), py3_expected) + else: + self.assertEqual(to_native(in_string, encoding), py2_expected) + + +@add_method(_check_to_text, [(i[0], i[2], i[1]) for i in VALID_STRINGS]) +@add_method(_check_to_text, [(i[1], i[2], i[1]) for i in VALID_STRINGS]) +@add_method(_check_to_bytes, [(i[0], i[2], i[0]) for i in VALID_STRINGS]) +@add_method(_check_to_bytes, [(i[1], i[2], i[0]) for i in VALID_STRINGS]) +@add_method(_check_to_native, [(i[0], i[2], i[0], i[1]) for i in VALID_STRINGS]) +@add_method(_check_to_native, [(i[1], i[2], i[0], i[1]) for i in VALID_STRINGS]) +class TestModuleUtilsText(unittest.TestCase): + pass diff --git a/test/units/module_utils/test_basic.py b/test/units/module_utils/test_basic.py index 758fe38a30a..a68352aea67 100644 --- a/test/units/module_utils/test_basic.py +++ b/test/units/module_utils/test_basic.py @@ -580,17 +580,6 @@ class TestModuleUtilsBasic(ModuleTestCase): self.assertEqual(am.is_special_selinux_path('/some/path/that/should/be/nfs'), (True, ['foo_u', 'foo_r', 'foo_t', 's0'])) self.assertEqual(am.is_special_selinux_path('/weird/random/fstype/path'), (True, ['foo_u', 'foo_r', 'foo_t', 's0'])) - def test_module_utils_basic_ansible_module_to_filesystem_str(self): - from ansible.module_utils import basic - basic._ANSIBLE_ARGS = None - - am = basic.AnsibleModule( - argument_spec = dict(), - ) - - self.assertEqual(am._to_filesystem_str(u'foo'), b'foo') - self.assertEqual(am._to_filesystem_str(u'föö'), b'f\xc3\xb6\xc3\xb6') - def test_module_utils_basic_ansible_module_user_and_group(self): from ansible.module_utils import basic @@ -653,7 +642,7 @@ class TestModuleUtilsBasic(ModuleTestCase): with patch.dict('sys.modules', {'selinux': basic.selinux}): with patch('selinux.lsetfilecon', return_value=0) as m: self.assertEqual(am.set_context_if_different('/path/to/file', ['foo_u', 'foo_r', 'foo_t', 's0'], False), True) - m.assert_called_with(b'/path/to/file', 'foo_u:foo_r:foo_t:s0') + m.assert_called_with('/path/to/file', 'foo_u:foo_r:foo_t:s0') m.reset_mock() am.check_mode = True self.assertEqual(am.set_context_if_different('/path/to/file', ['foo_u', 'foo_r', 'foo_t', 's0'], False), True) @@ -670,7 +659,7 @@ class TestModuleUtilsBasic(ModuleTestCase): with patch('selinux.lsetfilecon', return_value=0) as m: self.assertEqual(am.set_context_if_different('/path/to/file', ['foo_u', 'foo_r', 'foo_t', 's0'], False), True) - m.assert_called_with(b'/path/to/file', 'sp_u:sp_r:sp_t:s0') + m.assert_called_with('/path/to/file', 'sp_u:sp_r:sp_t:s0') delattr(basic, 'selinux')