From 065bb52109527ff60f4d9e15fac537dd221e6cb2 Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Mon, 31 Aug 2015 10:37:09 +0530 Subject: [PATCH 1/3] Be systematic about parsing and validating hostnames and addresses This adds a parse_address(pattern) utility function that returns (host,port), and uses it wherever where we accept IPv4 and IPv6 addresses and hostnames (or host patterns): the inventory parser the the add_host action plugin. It also introduces a more extensive set of unit tests that supersedes the old add_host unit tests (which didn't actually test add_host, but only the parsing function). --- lib/ansible/inventory/__init__.py | 24 +-- lib/ansible/inventory/expand_hosts.py | 2 +- lib/ansible/inventory/ini.py | 57 ++---- lib/ansible/parsing/utils/addresses.py | 200 +++++++++++++++++++++ lib/ansible/plugins/action/add_host.py | 36 +--- test/units/parsing/test_addresses.py | 56 ++++++ test/units/plugins/action/test_add_host.py | 47 ----- 7 files changed, 283 insertions(+), 139 deletions(-) create mode 100644 lib/ansible/parsing/utils/addresses.py create mode 100644 test/units/parsing/test_addresses.py delete mode 100644 test/units/plugins/action/test_add_host.py diff --git a/lib/ansible/inventory/__init__.py b/lib/ansible/inventory/__init__.py index 2eda3e6649d..657084c724d 100644 --- a/lib/ansible/inventory/__init__.py +++ b/lib/ansible/inventory/__init__.py @@ -33,6 +33,7 @@ from ansible.inventory.group import Group from ansible.inventory.host import Host from ansible.plugins import vars_loader from ansible.utils.vars import combine_vars +from ansible.parsing.utils.addresses import parse_address try: from __main__ import display @@ -83,27 +84,16 @@ class Inventory(object): host_list = host_list.split(",") host_list = [ h for h in host_list if h and h.strip() ] + self.parser = None + if host_list is None: - self.parser = None + pass elif isinstance(host_list, list): - self.parser = None all = Group('all') self.groups = [ all ] - ipv6_re = re.compile('\[([a-f:A-F0-9]*[%[0-z]+]?)\](?::(\d+))?') - for x in host_list: - m = ipv6_re.match(x) - if m: - all.add_host(Host(m.groups()[0], m.groups()[1])) - else: - if ":" in x: - tokens = x.rsplit(":", 1) - # if there is ':' in the address, then this is an ipv6 - if ':' in tokens[0]: - all.add_host(Host(x)) - else: - all.add_host(Host(tokens[0], tokens[1])) - else: - all.add_host(Host(x)) + for h in host_list: + (host, port) = parse_address(h, allow_ranges=False) + all.add_host(Host(host, port)) elif self._loader.path_exists(host_list): #TODO: switch this to a plugin loader and a 'condition' per plugin on which it should be tried, restoring 'inventory pllugins' if self._loader.is_directory(host_list): diff --git a/lib/ansible/inventory/expand_hosts.py b/lib/ansible/inventory/expand_hosts.py index 0d63ba08bbc..7e1c127c27e 100644 --- a/lib/ansible/inventory/expand_hosts.py +++ b/lib/ansible/inventory/expand_hosts.py @@ -44,7 +44,7 @@ def detect_range(line = None): Returnes True if the given line contains a pattern, else False. ''' - if 0 <= line.find("[") < line.find(":") < line.find("]"): + if '[' in line: return True else: return False diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index 36115c57975..30408048136 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -29,6 +29,7 @@ from ansible.inventory.host import Host from ansible.inventory.group import Group from ansible.inventory.expand_hosts import detect_range from ansible.inventory.expand_hosts import expand_hostname_range +from ansible.parsing.utils.addresses import parse_address from ansible.utils.unicode import to_unicode, to_bytes class InventoryParser(object): @@ -265,30 +266,20 @@ class InventoryParser(object): optional port number that applies to all of them. ''' - # Is a port number specified? - # - # This may be a mandatory :NN suffix on any square-bracketed expression - # (IPv6 address, IPv4 address, host name, host pattern), or an optional - # :NN suffix on an IPv4 address, host name, or pattern. IPv6 addresses - # must be in square brackets if a port is specified. + # Can the given hostpattern be parsed as a host with an optional port + # specification? - port = None + (pattern, port) = parse_address(hostpattern, allow_ranges=True) + if not pattern: + self._raise_error("Can't parse '%s' as host[:port]" % hostpattern) - for type in ['bracketed_hostport', 'hostport']: - m = self.patterns[type].match(hostpattern) - if m: - (hostpattern, port) = m.groups() - continue + # Once we have separated the pattern, we expand it into list of one or + # more hostnames, depending on whether it contains any [x:y] ranges. - # Now we're left with just the pattern, which results in a list of one - # or more hostnames, depending on whether it contains any [x:y] ranges. - # - # FIXME: We could be more strict here about validation. - - if detect_range(hostpattern): - hostnames = expand_hostname_range(hostpattern) + if detect_range(pattern): + hostnames = expand_hostname_range(pattern) else: - hostnames = [hostpattern] + hostnames = [pattern] return (hostnames, port) @@ -374,29 +365,3 @@ class InventoryParser(object): $ # end of the line ''', re.X ) - - # The following patterns match the various ways in which a port number - # may be specified on an IPv6 address, IPv4 address, hostname, or host - # pattern. All of the above may be enclosed in square brackets with a - # mandatory :NN suffix; or all but the first may be given without any - # brackets but with an :NN suffix. - - self.patterns['bracketed_hostport'] = re.compile( - r'''^ - \[(.+)\] # [host identifier] - :([0-9]+) # :port number - $ - ''', re.X - ) - - self.patterns['hostport'] = re.compile( - r'''^ - ((?: # We want to match: - [^:\[\]] # (a non-range character - | # ...or... - \[[^\]]*\] # a complete bracketed expression) - )*) # repeated as many times as possible - :([0-9]+) # followed by a port number - $ - ''', re.X - ) diff --git a/lib/ansible/parsing/utils/addresses.py b/lib/ansible/parsing/utils/addresses.py new file mode 100644 index 00000000000..3a128a35249 --- /dev/null +++ b/lib/ansible/parsing/utils/addresses.py @@ -0,0 +1,200 @@ +# Copyright 2015 Abhijit Menon-Sen +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see . + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import re + +# Components that match a numeric or alphanumeric begin:end or begin:end:step +# range expression inside square brackets. + +numeric_range = r''' + \[ + (?:[0-9]+:[0-9]+) # numeric begin:end + (?::[0-9]+)? # numeric :step (optional) + \] +''' + +alphanumeric_range = r''' + \[ + (?: + [a-z]:[a-z]| # one-char alphabetic range + [0-9]+:[0-9]+ # ...or a numeric one + ) + (?::[0-9]+)? # numeric :step (optional) + \] +''' + +# Components that match a 16-bit portion of an IPv6 address in hexadecimal +# notation (0..ffff) or an 8-bit portion of an IPv4 address in decimal notation +# (0..255) or an [x:y(:z)] numeric range. + +ipv6_component = r''' + (?: + [0-9a-f]{{1,4}}| # 0..ffff + {range} # or a numeric range + ) +'''.format(range=numeric_range) + +ipv4_component = r''' + (?: + [01]?[0-9]{{1,2}}| # 0..199 + 2[0-4][0-9]| # 200..249 + 25[0-5]| # 250..255 + {range} # or a numeric range + ) +'''.format(range=numeric_range) + +patterns = { + # This matches a square-bracketed expression with a port specification. What + # is inside the square brackets is validated later. + + 'bracketed_hostport': re.compile( + r'''^ + \[(.+)\] # [host identifier] + :([0-9]+) # :port number + $ + ''', re.X + ), + + # This matches a bare IPv4 address or hostname (or host pattern including + # [x:y(:z)] ranges) with a port specification. + + 'hostport': re.compile( + r'''^ + ((?: # We want to match: + [^:\[\]] # (a non-range character + | # ...or... + \[[^\]]*\] # a complete bracketed expression) + )*) # repeated as many times as possible + :([0-9]+) # followed by a port number + $ + ''', re.X + ), + + # This matches an IPv4 address, but also permits range expressions. + + 'ipv4': re.compile( + r'''^ + (?:{i4}\.){{3}}{i4} # Three parts followed by dots plus one + $ + '''.format(i4=ipv4_component), re.X|re.I + ), + + # This matches an IPv6 address, but also permits range expressions. + # + # This expression looks complex, but it really only spells out the various + # combinations in which the basic unit of an IPv6 address (0..ffff) can be + # written, from :: to 1:2:3:4:5:6:7:8, plus the IPv4-in-IPv6 variants such + # as ::ffff:192.0.2.3. + # + # Note that we can't just use ipaddress.ip_address() because we also have to + # accept ranges in place of each component. + + 'ipv6': re.compile( + r'''^ + (?:{0}:){{7}}{0}| # uncompressed: 1:2:3:4:5:6:7:8 + (?:{0}:){{1,6}}:| # compressed variants, which are all + (?:{0}:)(?:{0}){{1,6}}| # a::b for various lengths of a,b + (?:{0}:){{2}}(?::{0}){{1,5}}| + (?:{0}:){{3}}(?::{0}){{1,4}}| + (?:{0}:){{4}}(?::{0}){{1,3}}| + (?:{0}:){{5}}(?::{0}){{1,2}}| + (?:{0}:){{6}}(?::{0})| # ...all with 2 <= a+b <= 7 + :(?::{0}){{1,6}}| # ::ffff(:ffff...) + {0}?::| # ffff::, :: + # ipv4-in-ipv6 variants + (?:0:){{6}}(?:{0}\.){{3}}{0}| + ::(?:ffff:)?(?:{0}\.){{3}}{0}| + (?:0:){{5}}ffff:(?:{0}\.){{3}}{0} + $ + '''.format(ipv6_component), re.X|re.I + ), + + # This matches a hostname or host pattern including [x:y(:z)] ranges. + # + # We roughly follow DNS rules here, but also allow ranges (and underscores). + # In the past, no systematic rules were enforced about inventory hostnames, + # but the parsing context (e.g. shlex.split(), fnmatch.fnmatch()) excluded + # various metacharacters anyway. + # + # We don't enforce DNS length restrictions here (63 characters per label, + # 253 characters total) or make any attempt to process IDNs. + + 'hostname': re.compile( + r'''^ # We need at least one label, + (?: # which comprises: + [a-z0-9_-]| # (a valid domain label character + {0} # or a bracketed alphanumeric range) + )+ + (?:\.(?:[a-z0-9_-]|{0})+)* # Followed by zero or more .labels + $ + '''.format(alphanumeric_range), re.X|re.I + ), +} + +def parse_address(address, allow_ranges=False): + """ + Takes a string and returns a (host, port) tuple. If the host is None, then + the string could not be parsed as a host identifier with an optional port + specification. If the port is None, then no port was specified. + + The host identifier may be a hostname (qualified or not), an IPv4 address, + or an IPv6 address. If allow_ranges is True, then any of those may contain + [x:y] range specifications, e.g. foo[1:3] or foo[0:5]-bar[x-z]. + + The port number is an optional :NN suffix on an IPv4 address or host name, + or a mandatory :NN suffix on any square-bracketed expression: IPv6 address, + IPv4 address, or host name. (This means the only way to specify a port for + an IPv6 address is to enclose it in square brackets.) + """ + + # First, we extract the port number if one is specified. + + port = None + for type in ['bracketed_hostport', 'hostport']: + m = patterns[type].match(address) + if m: + (address, port) = m.groups() + port = int(port) + continue + + # What we're left with now must be an IPv4 or IPv6 address, possibly with + # numeric ranges, or a hostname with alphanumeric ranges. + + host = None + for type in ['ipv4', 'ipv6', 'hostname']: + m = patterns[type].match(address) + if m: + host = address + continue + + # If it isn't any of the above, we don't understand it. + + if not host: + return (None, None) + + # If we get to this point, we know that any included ranges are valid. If + # the caller is prepared to handle them, all is well. Otherwise we treat + # it as a parse failure. + + if not allow_ranges and '[' in host: + return (None, None) + + return (host, port) diff --git a/lib/ansible/plugins/action/add_host.py b/lib/ansible/plugins/action/add_host.py index cf2dab17373..0e7d3187e56 100644 --- a/lib/ansible/plugins/action/add_host.py +++ b/lib/ansible/plugins/action/add_host.py @@ -23,6 +23,8 @@ __metaclass__ = type import re from ansible.plugins.action import ActionBase +from ansible.parsing.utils.addresses import parse_address +from ansible.errors import AnsibleError, AnsibleParserError class ActionModule(ActionBase): ''' Create inventory hosts and groups in the memory inventory''' @@ -40,9 +42,11 @@ class ActionModule(ActionBase): new_name = self._task.args.get('name', self._task.args.get('hostname', None)) #vv("creating host via 'add_host': hostname=%s" % new_name) - new_name, new_port = _parse_ip_host_and_port(new_name) - if new_port: - self._task.args['ansible_ssh_port'] = new_port + name, port = parse_address(new_name, allow_ranges=False) + if not name: + raise AnsibleError("Invalid inventory hostname: %s" % new_name) + if port: + self._task.args['ansible_ssh_port'] = port groups = self._task.args.get('groupname', self._task.args.get('groups', self._task.args.get('group', ''))) # add it to the group if that was specified @@ -58,28 +62,4 @@ class ActionModule(ActionBase): if not k in [ 'name', 'hostname', 'groupname', 'groups' ]: host_vars[k] = self._task.args[k] - return dict(changed=True, add_host=dict(host_name=new_name, groups=new_groups, host_vars=host_vars)) - -def _parse_ip_host_and_port(hostname): - """ - Attempt to parse the hostname and port from a hostname, e.g., - - some-host-name - some-host-name:80 - 8.8.8.8 - 8.8.8.8:80 - 2001:db8:0:1 - [2001:db8:0:1]:80 - """ - if hostname.count(':') > 1: - match = re.match( - '\[(?P[^\]]+)\](:(?P[0-9]+))?', - hostname - ) - if match: - return match.group('ip'), match.group('port') - else: - return hostname, None - elif ':' in hostname: - return hostname.rsplit(':', 1) - return hostname, None + return dict(changed=True, add_host=dict(host_name=name, groups=new_groups, host_vars=host_vars)) diff --git a/test/units/parsing/test_addresses.py b/test/units/parsing/test_addresses.py new file mode 100644 index 00000000000..2bfb1102181 --- /dev/null +++ b/test/units/parsing/test_addresses.py @@ -0,0 +1,56 @@ +import unittest + +from ansible.parsing.utils.addresses import parse_address + +class TestParseAddress(unittest.TestCase): + + tests = { + # IPv4 addresses + '192.0.2.3': ['192.0.2.3', None], + '192.0.2.3:23': ['192.0.2.3', 23], + + # IPv6 addresses + '::': ['::', None], + '::1': ['::1', None], + '[::1]:442': ['::1', 442], + 'abcd:ef98:7654:3210:abcd:ef98:7654:3210': ['abcd:ef98:7654:3210:abcd:ef98:7654:3210', None], + '[abcd:ef98:7654:3210:abcd:ef98:7654:3210]:42': ['abcd:ef98:7654:3210:abcd:ef98:7654:3210', 42], + + # Hostnames + 'some-host': ['some-host', None], + 'some-host:80': ['some-host', 80], + 'some.host.com:492': ['some.host.com', 492], + '[some.host.com]:493': ['some.host.com', 493], + + # Various errors + '': [None, None], + 'some..host': [None, None], + 'some.': [None, None], + '[example.com]': [None, None], + } + + range_tests = { + '192.0.2.[3:10]': ['192.0.2.[3:10]', None], + '192.0.2.[3:10]:23': ['192.0.2.[3:10]', 23], + 'abcd:ef98::7654:[1:9]': ['abcd:ef98::7654:[1:9]', None], + '[abcd:ef98::7654:[6:32]]:2222': ['abcd:ef98::7654:[6:32]', 2222], + 'foo[a:b]:42': ['foo[a:b]', 42], + 'foo[a-b]:32': [None, None], + 'foo[x-y]': [None, None], + } + + def test_without_ranges(self): + for t in self.tests: + test = self.tests[t] + + (host, port) = parse_address(t) + assert host == test[0] + assert port == test[1] + + def test_with_ranges(self): + for t in self.range_tests: + test = self.range_tests[t] + + (host, port) = parse_address(t, allow_ranges=True) + assert host == test[0] + assert port == test[1] diff --git a/test/units/plugins/action/test_add_host.py b/test/units/plugins/action/test_add_host.py deleted file mode 100644 index c694d387a3a..00000000000 --- a/test/units/plugins/action/test_add_host.py +++ /dev/null @@ -1,47 +0,0 @@ -import unittest - -from ansible.plugins.action import add_host - - -class TestAddHost(unittest.TestCase): - - def test_hostname(self): - host, port = add_host._parse_ip_host_and_port('some-remote-host') - assert host == 'some-remote-host' - assert port is None - - def test_hostname_with_port(self): - host, port = add_host._parse_ip_host_and_port('some-remote-host:80') - assert host == 'some-remote-host' - assert port == '80' - - def test_parse_ip_host_and_port_v4(self): - host, port = add_host._parse_ip_host_and_port('8.8.8.8') - assert host == '8.8.8.8' - assert port is None - - def test_parse_ip_host_and_port_v4_and_port(self): - host, port = add_host._parse_ip_host_and_port('8.8.8.8:80') - assert host == '8.8.8.8' - assert port == '80' - - def test_parse_ip_host_and_port_v6(self): - host, port = add_host._parse_ip_host_and_port( - 'dead:beef:dead:beef:dead:beef:dead:beef' - ) - assert host == 'dead:beef:dead:beef:dead:beef:dead:beef' - assert port is None - - def test_parse_ip_host_and_port_v6_with_brackets(self): - host, port = add_host._parse_ip_host_and_port( - '[dead:beef:dead:beef:dead:beef:dead:beef]' - ) - assert host == 'dead:beef:dead:beef:dead:beef:dead:beef' - assert port is None - - def test_parse_ip_host_and_port_v6_with_brackets_and_port(self): - host, port = add_host._parse_ip_host_and_port( - '[dead:beef:dead:beef:dead:beef:dead:beef]:80' - ) - assert host == 'dead:beef:dead:beef:dead:beef:dead:beef' - assert port == '80' From 7479ab47e07efaded3d0aeb3bd169cbf27dd8f7c Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Thu, 3 Sep 2015 16:32:06 +0530 Subject: [PATCH 2/3] Be stricter about parsing hostname labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Labels must start with an alphanumeric character, may contain alphanumeric characters or hyphens, but must not end with a hyphen. We enforce those rules, but allow underscores wherever hyphens are accepted, and allow alphanumeric ranges anywhere. We relax the definition of "alphanumeric" to include Unicode characters even though such inventory hostnames cannot be used in practice unless an ansible_ssh_host is set for each of them. We still don't enforce length restrictions—the fact that we have to accept ranges makes it more complex, and it doesn't seem especially worthwhile. --- lib/ansible/parsing/utils/addresses.py | 22 +++++++++++++++------- test/units/parsing/test_addresses.py | 13 +++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/lib/ansible/parsing/utils/addresses.py b/lib/ansible/parsing/utils/addresses.py index 3a128a35249..71675769d27 100644 --- a/lib/ansible/parsing/utils/addresses.py +++ b/lib/ansible/parsing/utils/addresses.py @@ -61,6 +61,17 @@ ipv4_component = r''' ) '''.format(range=numeric_range) +# A hostname label, e.g. 'foo' in 'foo.example.com'. Consists of alphanumeric +# characters plus dashes (and underscores) or valid ranges. The label may not +# start or end with a hyphen or an underscore. This is interpolated into the +# hostname pattern below. We don't try to enforce the 63-char length limit. + +label = r''' + (?:[\w]|{range}) # Starts with an alphanumeric or a range + (?:[\w_-]|{range})* # Then zero or more of the same or [_-] + (? Date: Thu, 10 Sep 2015 10:07:19 +0530 Subject: [PATCH 3/3] Fix broken integration test with unicode hostnames 1. The test did "name: '{{hostnames}}.{{item}}'" inside a with_sequence loop, which didn't do what was intended: it expanded hostnames into an array, appended ".1", and set name to the resulting string. This can be converted to a simple with_items loop. 2. Some of the entries in hostnames contained punctuation characters, which I see no reason to support in inventory hostnames anyway. 3. Once the add_host failures are fixed, the playbook later fails when the unicode hostnames are interpolated into debug output in ssh.py due to an encoding error. This is only one of the many places that may fail when using unicode inventory hostnames; we work around it by providing an ansible_ssh_host setting. --- test/integration/unicode.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test/integration/unicode.yml b/test/integration/unicode.yml index 1044c252705..6e8e073a79d 100644 --- a/test/integration/unicode.yml +++ b/test/integration/unicode.yml @@ -4,10 +4,9 @@ connection: local vars: test_var: 'Ī ī Ĭ ĭ Į į İ ı IJ ij Ĵ ĵ Ķ ķ ĸ Ĺ ĺ Ļ ļ Ľ ľ Ŀ ŀ Ł ł Ń ń Ņ ņ Ň ň ʼn Ŋ ŋ Ō ō Ŏ ŏ Ő ő Œ' - num_hosts: 5 hostnames: - - 'host-#ϬϭϮϯϰ' - - 'host-ͰͱͲͳʹ͵' + - 'host-ϬϭϮϯϰ' + - 'host-fóöbär' - 'host-ΙΚΛΜΝΞ' - 'host-στυφχψ' - 'host-ϬϭϮϯϰϱ' @@ -29,11 +28,11 @@ - 'ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď ď Đ đ Ē ē Ĕ ĕ Ė ė Ę ę Ě ě Ĝ ĝ Ğ ğ Ġ ġ Ģ ģ Ĥ ĥ Ħ ħ Ĩ ĩ' - add_host: - name: '{{hostnames}}.{{item}}' + name: '{{item}}' groups: 'ĪīĬĭ' + ansible_ssh_host: 127.0.0.1 ansible_connection: local - host_id: '{{item}}' - with_sequence: start=1 end={{num_hosts}} format=%d + with_items: hostnames - name: 'A task with unicode extra vars' debug: var=extra_var