Add typing to to_text and to_bytes, improve typing and type juggling in DataLoader (#85746)

3 months ago · c59db5349e
parent 4209d714db
commit c59db5349e
6 changed files with 184 additions and 45 deletions
--- a/changelogs/fragments/to-text-to-bytes.yml
+++ b/changelogs/fragments/to-text-to-bytes.yml
@ -0,0 +1,2 @@
 minor_changes:
  - Python type hints applied to ``to_text`` and ``to_bytes`` functions for better type hint interactions with code utilizing these functions.
--- a/lib/ansible/galaxy/collection/init.py
+++ b/lib/ansible/galaxy/collection/init.py
@ -339,12 +339,12 @@ def verify_local_collection(local_collection, remote_collection, artifacts_manag
    ]
    # Find any paths not in the FILES.json
-    for root, dirs, files in os.walk(b_collection_path):
+    for root, dirs, filenames in os.walk(b_collection_path):
-        for name in files:
+        for name in filenames:
            full_path = os.path.join(root, name)
            path = to_text(full_path[len(b_collection_path) + 1::], errors='surrogate_or_strict')
            if any(fnmatch.fnmatch(full_path, b_pattern) for b_pattern in b_ignore_patterns):
-                display.v("Ignoring verification for %s" % full_path)
+                display.v("Ignoring verification for %s" % to_text(full_path))
                continue
            if full_path not in collection_files:
--- a/lib/ansible/module_utils/common/text/converters.py
+++ b/lib/ansible/module_utils/common/text/converters.py
@ -8,9 +8,9 @@ from __future__ import annotations
 import codecs
 import json
 from ansible.module_utils.compat import typing as _t
 from ansible.module_utils._internal import _no_six
 try:
    codecs.lookup_error('surrogateescape')
    HAS_SURROGATEESCAPE = True
@ -22,8 +22,54 @@ _COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_replace',
                                      'surrogate_or_strict',
                                      'surrogate_then_replace'))
 _T = _t.TypeVar('_T')
 _NonStringPassthru: _t.TypeAlias = _t.Literal['passthru']
 _NonStringOther: _t.TypeAlias = _t.Literal['simplerepr', 'empty', 'strict']
 _NonStringAll: _t.TypeAlias = _t.Union[_NonStringPassthru, _NonStringOther]
@_t.overload
 def to_bytes(
    obj: object,
    encoding: str = 'utf-8',
    errors: str | None = None,
 ) -> bytes: ...
@_t.overload
 def to_bytes(
    obj: bytes | str,
    encoding: str = 'utf-8',
    errors: str | None = None,
    nonstring: _NonStringPassthru = 'passthru',
 ) -> bytes: ...
@_t.overload
 def to_bytes(
    obj: _T,
    encoding: str = 'utf-8',
    errors: str | None = None,
    nonstring: _NonStringPassthru = 'passthru',
 ) -> _T: ...
@_t.overload
 def to_bytes(
    obj: object,
    encoding: str = 'utf-8',
    errors: str | None = None,
    nonstring: _NonStringOther = 'simplerepr',
 ) -> bytes: ...
-def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
+
 def to_bytes(
    obj: _T,
    encoding: str = 'utf-8',
    errors: str | None = None,
    nonstring: _NonStringAll = 'simplerepr'
 ) -> _T | bytes:
    """Make sure that a string is a byte string
    :arg obj: An object to make sure is a byte string.  In most cases this
@ -81,7 +127,7 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
        string is valid in the specified encoding.  If it's important that the
        byte string is in the specified encoding do::
-            encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')
+            encoded_string = to_bytes(to_text(input_string, encoding='latin-1'), encoding='utf-8')
    .. version_changed:: 2.3
@ -126,21 +172,60 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
                value = repr(obj)
            except UnicodeError:
                # Giving up
-                return to_bytes('')
+                return b''
    elif nonstring == 'passthru':
        return obj
    elif nonstring == 'empty':
-        # python2.4 doesn't have b''
+        return b''
        return to_bytes('')
    elif nonstring == 'strict':
        raise TypeError('obj must be a string type')
    else:
        raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring)
-    return to_bytes(value, encoding, errors)
+    return to_bytes(value, encoding=encoding, errors=errors)
@_t.overload
 def to_text(
    obj: object,
    encoding: str = 'utf-8',
    errors: str | None = None,
 ) -> str: ...
@_t.overload
 def to_text(
    obj: str | bytes,
    encoding: str = 'utf-8',
    errors: str | None = None,
    nonstring: _NonStringPassthru = 'passthru',
 ) -> str: ...
@_t.overload
 def to_text(
    obj: _T,
    encoding: str = 'utf-8',
    errors: str | None = None,
    nonstring: _NonStringPassthru = 'passthru',
 ) -> _T: ...
@_t.overload
 def to_text(
    obj: object,
    encoding: str = 'utf-8',
    errors: str | None = None,
    nonstring: _NonStringOther = 'simplerepr',
 ) -> str: ...
-def to_text(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
+def to_text(
    obj: _T,
    encoding: str = 'utf-8',
    errors: str | None = None,
    nonstring: _NonStringAll = 'simplerepr'
 ) -> _T | str:
    """Make sure that a string is a text string
    :arg obj: An object to make sure is a text string.  In most cases this
@ -218,17 +303,17 @@ def to_text(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
                value = repr(obj)
            except UnicodeError:
                # Giving up
-                return u''
+                return ''
    elif nonstring == 'passthru':
        return obj
    elif nonstring == 'empty':
-        return u''
+        return ''
    elif nonstring == 'strict':
        raise TypeError('obj must be a string type')
    else:
        raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring)
-    return to_text(value, encoding, errors)
+    return to_text(value, encoding=encoding, errors=errors)
 to_native = to_text
--- a/lib/ansible/parsing/dataloader.py
+++ b/lib/ansible/parsing/dataloader.py
@ -31,7 +31,7 @@ display = Display()
 # Tries to determine if a path is inside a role, last dir must be 'tasks'
 # this is not perfect but people should really avoid 'tasks' dirs outside roles when using Ansible.
-RE_TASKS = re.compile(u'(?:^|%s)+tasks%s?$' % (os.path.sep, os.path.sep))
+RE_TASKS = re.compile('(?:^|%s)+tasks%s?$' % (os.path.sep, os.path.sep))
 class DataLoader:
@ -53,23 +53,22 @@ class DataLoader:
        ds = dl.load_from_file('/path/to/file')
    """
-    def __init__(self):
+    def __init__(self) -> None:
-        self._basedir = '.'
+        self._basedir: str = '.'
        # NOTE: not effective with forks as the main copy does not get updated.
        # avoids rereading files
-        self._FILE_CACHE = dict()
+        self._FILE_CACHE: dict[str, object] = {}
        # NOTE: not thread safe, also issues with forks not returning data to main proc
        #       so they need to be cleaned independently. See WorkerProcess for example.
        # used to keep track of temp files for cleaning
-        self._tempfiles = set()
+        self._tempfiles: set[str] = set()
        # initialize the vault stuff with an empty password
        # TODO: replace with a ref to something that can get the password
        #       a creds/auth provider
        self._vaults = {}
        self._vault = VaultLib()
        self.set_vault_secrets(None)
@ -229,23 +228,19 @@ class DataLoader:
    def set_basedir(self, basedir: str) -> None:
        """ sets the base directory, used to find files when a relative path is given """
-
+        self._basedir = basedir
        if basedir is not None:
            self._basedir = to_text(basedir)
    def path_dwim(self, given: str) -> str:
        """
        make relative paths work like folks expect.
        """
        given = to_text(given, errors='surrogate_or_strict')
        given = unquote(given)
-        if given.startswith(to_text(os.path.sep)) or given.startswith(u'~'):
+        if given.startswith(os.path.sep) or given.startswith('~'):
            path = given
        else:
-            basedir = to_text(self._basedir, errors='surrogate_or_strict')
+            path = os.path.join(self._basedir, given)
            path = os.path.join(basedir, given)
        return unfrackpath(path, follow=False)
@ -293,10 +288,9 @@ class DataLoader:
        """
        search = []
        source = to_text(source, errors='surrogate_or_strict')
        # I have full path, nothing else needs to be looked at
-        if source.startswith(to_text(os.path.sep)) or source.startswith(u'~'):
+        if source.startswith(os.path.sep) or source.startswith('~'):
            search.append(unfrackpath(source, follow=False))
        else:
            # base role/play path + templates/files/vars + relative filename
@ -363,7 +357,7 @@ class DataLoader:
            if os.path.exists(to_bytes(test_path, errors='surrogate_or_strict')):
                result = test_path
        else:
-            display.debug(u'evaluation_path:\n\t%s' % '\n\t'.join(paths))
+            display.debug('evaluation_path:\n\t%s' % '\n\t'.join(paths))
            for path in paths:
                upath = unfrackpath(path, follow=False)
                b_upath = to_bytes(upath, errors='surrogate_or_strict')
@ -384,9 +378,9 @@ class DataLoader:
                search.append(os.path.join(to_bytes(self.get_basedir(), errors='surrogate_or_strict'), b_dirname, b_source))
            search.append(os.path.join(to_bytes(self.get_basedir(), errors='surrogate_or_strict'), b_source))
-            display.debug(u'search_path:\n\t%s' % to_text(b'\n\t'.join(search)))
+            display.debug('search_path:\n\t%s' % to_text(b'\n\t'.join(search)))
            for b_candidate in search:
-                display.vvvvv(u'looking for "%s" at "%s"' % (source, to_text(b_candidate)))
+                display.vvvvv('looking for "%s" at "%s"' % (source, to_text(b_candidate)))
                if os.path.exists(b_candidate):
                    result = to_text(b_candidate)
                    break
@ -420,8 +414,7 @@ class DataLoader:
        if not file_path or not isinstance(file_path, (bytes, str)):
            raise AnsibleParserError("Invalid filename: '%s'" % to_native(file_path))
-        b_file_path = to_bytes(file_path, errors='surrogate_or_strict')
+        if not self.path_exists(file_path) or not self.is_file(file_path):
        if not self.path_exists(b_file_path) or not self.is_file(b_file_path):
            raise AnsibleFileNotFound(file_name=file_path)
        real_path = self.path_dwim(file_path)
@ -479,7 +472,7 @@ class DataLoader:
        """
        b_path = to_bytes(os.path.join(path, name))
-        found = []
+        found: list[str] = []
        if extensions is None:
            # Look for file with no extension first to find dir before file
@ -488,27 +481,29 @@ class DataLoader:
        for ext in extensions:
            if '.' in ext:
-                full_path = b_path + to_bytes(ext)
+                b_full_path = b_path + to_bytes(ext)
            elif ext:
-                full_path = b'.'.join([b_path, to_bytes(ext)])
+                b_full_path = b'.'.join([b_path, to_bytes(ext)])
            else:
-                full_path = b_path
+                b_full_path = b_path
            full_path = to_text(b_full_path)
            if self.path_exists(full_path):
                if self.is_directory(full_path):
                    if allow_dir:
-                        found.extend(self._get_dir_vars_files(to_text(full_path), extensions))
+                        found.extend(self._get_dir_vars_files(full_path, extensions))
                    else:
                        continue
                else:
-                    found.append(to_text(full_path))
+                    found.append(full_path)
                break
        return found
    def _get_dir_vars_files(self, path: str, extensions: list[str]) -> list[str]:
        found = []
        for spath in sorted(self.list_directory(path)):
-            if not spath.startswith(u'.') and not spath.endswith(u'~'):  # skip hidden and backups
+            if not spath.startswith('.') and not spath.endswith('~'):  # skip hidden and backups
                ext = os.path.splitext(spath)[-1]
                full_spath = os.path.join(path, spath)
--- a/lib/ansible/plugins/inventory/toml.py
+++ b/lib/ansible/plugins/inventory/toml.py
@ -89,7 +89,6 @@ import tomllib
 from collections.abc import MutableMapping, MutableSequence
 from ansible.errors import AnsibleFileNotFound, AnsibleParserError
 from ansible.module_utils.common.text.converters import to_bytes, to_native
 from ansible.plugins.inventory import BaseFileInventoryPlugin
 from ansible.utils.display import Display
@ -147,10 +146,9 @@ class InventoryModule(BaseFileInventoryPlugin):
    def _load_file(self, file_name):
        if not file_name or not isinstance(file_name, str):
-            raise AnsibleParserError("Invalid filename: '%s'" % to_native(file_name))
+            raise AnsibleParserError("Invalid filename: '%s'" % file_name)
-        b_file_name = to_bytes(self.loader.path_dwim(file_name))
+        if not self.loader.path_exists(file_name):
        if not self.loader.path_exists(b_file_name):
            raise AnsibleFileNotFound("Unable to retrieve file contents", file_name=file_name)
        try:
--- a/test/units/module_utils/common/text/converters/test_to_str.py
+++ b/test/units/module_utils/common/text/converters/test_to_str.py
@ -45,3 +45,62 @@ def test_to_bytes(in_string, encoding, expected):
 def test_to_native(in_string, encoding, expected):
    """test happy path of encoding to native strings"""
    assert to_native(in_string, encoding) == expected
 def test_type_hints() -> None:
    """This test isn't really here to test the functionality of to_text/to_bytes
    but more to ensure the overloads are properly validated for type hinting
    """
    d: dict[str, str] = {'k': 'v'}
    s: str = 's'
    b: bytes = b'b'
    to_bytes_bytes: bytes = to_bytes(b)
    to_bytes_str: bytes = to_bytes(s)
    to_bytes_dict: bytes = to_bytes(d)
    assert to_bytes_dict == repr(d).encode('utf-8')
    to_bytes_bytes_repr: bytes = to_bytes(b, nonstring='simplerepr')
    to_bytes_str_repr: bytes = to_bytes(s, nonstring='simplerepr')
    to_bytes_dict_repr: bytes = to_bytes(d, nonstring='simplerepr')
    assert to_bytes_dict_repr == repr(d).encode('utf-8')
    to_bytes_bytes_passthru: bytes = to_bytes(b, nonstring='passthru')
    to_bytes_str_passthru: bytes = to_bytes(s, nonstring='passthru')
    to_bytes_dict_passthru: dict[str, str] = to_bytes(d, nonstring='passthru')
    assert to_bytes_dict_passthru == d
    to_bytes_bytes_empty: bytes = to_bytes(b, nonstring='empty')
    to_bytes_str_empty: bytes = to_bytes(s, nonstring='empty')
    to_bytes_dict_empty: bytes = to_bytes(d, nonstring='empty')
    assert to_bytes_dict_empty == b''
    to_bytes_bytes_strict: bytes = to_bytes(b, nonstring='strict')
    to_bytes_str_strict: bytes = to_bytes(s, nonstring='strict')
    with pytest.raises(TypeError):
        to_bytes_dict_strict: bytes = to_bytes(d, nonstring='strict')
    to_text_bytes: str = to_text(b)
    to_text_str: str = to_text(s)
    to_text_dict: str = to_text(d)
    assert to_text_dict == repr(d)
    to_text_bytes_repr: str = to_text(b, nonstring='simplerepr')
    to_text_str_repr: str = to_text(s, nonstring='simplerepr')
    to_text_dict_repr: str = to_text(d, nonstring='simplerepr')
    assert to_text_dict_repr == repr(d)
    to_text_bytes_passthru: str = to_text(b, nonstring='passthru')
    to_text_str_passthru: str = to_text(s, nonstring='passthru')
    to_text_dict_passthru: dict[str, str] = to_text(d, nonstring='passthru')
    assert to_text_dict_passthru == d
    to_text_bytes_empty: str = to_text(b, nonstring='empty')
    to_text_str_empty: str = to_text(s, nonstring='empty')
    to_text_dict_empty: str = to_text(d, nonstring='empty')
    assert to_text_dict_empty == ''
    to_text_bytes_strict: str = to_text(b, nonstring='strict')
    to_text_str_strict: str = to_text(s, nonstring='strict')
    with pytest.raises(TypeError):
        to_text_dict_strict: str = to_text(d, nonstring='strict')
		`@ -0,0 +1,2 @@`
							`minor_changes:`
							- Python type hints applied to ``to_text`` and ``to_bytes`` functions for better type hint interactions with code utilizing these functions.