From 5d29a2eabdb4d85419e11d4bf20f316d6222d142 Mon Sep 17 00:00:00 2001 From: Marius Gedminas Date: Thu, 24 Sep 2015 12:26:10 +0300 Subject: [PATCH] Python 3: shlex.split() wants unicode On Python 2, shlex.split() raises if you pass it a unicode object with non-ASCII characters in it. The Ansible codebase copes by explicitly converting the string using to_bytes() before passing it to shlex.split(). On Python 3, shlex.split() raises ('bytes' object has no attribute 'read') if you pass a bytes object. Oops. This commit introduces a new wrapper function, shlex_split, that transparently performs the to_bytes/to_unicode conversions only on Python 2. Currently I've only converted one call site (the one that was causing a unit test to fail on Python 3). If this approach is deemed suitable, I'll convert them all. --- lib/ansible/inventory/ini.py | 8 +++---- lib/ansible/utils/shlex.py | 33 ++++++++++++++++++++++++++++ test/units/utils/test_shlex.py | 39 ++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 5 deletions(-) create mode 100644 lib/ansible/utils/shlex.py create mode 100644 test/units/utils/test_shlex.py diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index a2a90c76cfc..f29a5f73ec5 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -20,7 +20,6 @@ from __future__ import (absolute_import, division, print_function) __metaclass__ = type import ast -import shlex import re from ansible import constants as C @@ -30,7 +29,8 @@ from ansible.inventory.group import Group from ansible.inventory.expand_hosts import detect_range from ansible.inventory.expand_hosts import expand_hostname_range from ansible.parsing.utils.addresses import parse_address -from ansible.utils.unicode import to_unicode, to_bytes +from ansible.utils.shlex import shlex_split +from ansible.utils.unicode import to_unicode class InventoryParser(object): """ @@ -231,13 +231,11 @@ class InventoryParser(object): # beta:2345 user=admin # we'll tell shlex # gamma sudo=True user=root # to ignore comments - line = to_bytes(line) try: - tokens = shlex.split(line, comments=True) + tokens = shlex_split(line, comments=True) except ValueError as e: self._raise_error("Error parsing host definition '%s': %s" % (varstring, e)) - tokens = [ to_unicode(t) for t in tokens] (hostnames, port) = self._expand_hostpattern(tokens[0]) hosts = self._Hosts(hostnames, port) diff --git a/lib/ansible/utils/shlex.py b/lib/ansible/utils/shlex.py new file mode 100644 index 00000000000..79a170402cd --- /dev/null +++ b/lib/ansible/utils/shlex.py @@ -0,0 +1,33 @@ +# (c) 2015, Marius Gedminas +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# alongwith Ansible. If not, see . + +from __future__ import absolute_import + +import shlex +from six import PY3 + +from ansible.utils.unicode import to_bytes, to_unicode + + +if PY3: + # shlex.split() wants Unicode (i.e. ``str``) input on Python 3 + shlex_split = shlex.split +else: + # shlex.split() wants bytes (i.e. ``str``) input on Python 2 + def shlex_split(s, comments=False, posix=True): + return map(to_unicode, shlex.split(to_bytes(s), comments, posix)) + shlex_split.__doc__ = shlex.split.__doc__ diff --git a/test/units/utils/test_shlex.py b/test/units/utils/test_shlex.py new file mode 100644 index 00000000000..ef1fc28a66e --- /dev/null +++ b/test/units/utils/test_shlex.py @@ -0,0 +1,39 @@ +# (c) 2015, Marius Gedminas +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see . + +import unittest + +from ansible.utils.shlex import shlex_split + + +class TestSplit(unittest.TestCase): + + def test_trivial(self): + self.assertEqual(shlex_split("a b c"), ["a", "b", "c"]) + + def test_unicode(self): + self.assertEqual(shlex_split(u"a b \u010D"), [u"a", u"b", u"\u010D"]) + + def test_quoted(self): + self.assertEqual(shlex_split('"a b" c'), ["a b", "c"]) + + def test_comments(self): + self.assertEqual(shlex_split('"a b" c # d', comments=True), ["a b", "c"]) + + def test_error(self): + self.assertRaises(ValueError, shlex_split, 'a "b') +