From 43154e5101688f2ffce6c109a641b428ee700e0f Mon Sep 17 00:00:00 2001 From: James Cammarata Date: Thu, 24 Jul 2014 13:57:35 -0500 Subject: [PATCH 1/7] Using custom splitting function for module param counting --- lib/ansible/module_utils/basic.py | 100 ++++++++++++++++++ lib/ansible/runner/__init__.py | 11 +- lib/ansible/utils/__init__.py | 1 + .../roles/test_lookups/tasks/main.yml | 2 +- 4 files changed, 106 insertions(+), 8 deletions(-) diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py index c63a7b492eb..081ba9ffd27 100644 --- a/lib/ansible/module_utils/basic.py +++ b/lib/ansible/module_utils/basic.py @@ -218,6 +218,106 @@ def load_platform_subclass(cls, *args, **kwargs): return super(cls, subclass).__new__(subclass) +def split_args(args): + ''' + Splits args on whitespace, but intelligently reassembles + those that may have been split over a jinja2 block or quotes. + When used in a module, we won't ever have to be concerned about + jinja2 blocks, however this function is/will be used in the + core portions as well before the args are templated. + ''' + + # the list of params parsed out of the arg string + params = [] + # here we encode the args, so we have a uniform charset to + # work with, and split on white space + args = args.encode('utf-8') + items = args.split() + + # iterate over the items, and reassemble any that may have been + # split on a space inside a jinja2 block. These variables are used + # to keep track of the state of the parsing, since blocks and quotes + # may be nested within each other. + inside_quotes = False + quote_char = None + split_print_depth = 0 + split_block_depth = 0 + split_comment_depth = 0 + # now we loop over each split item, coalescing items if the white space + # split occurred within quotes or a jinja2 block of some kind + for item in items: + item = item.strip() + # store the previous quoting state for checking later + was_inside_quotes = inside_quotes + # determine the current quoting state + for i in range(0, len(item)): + c = item[i] + bc = None + if i > 0: + bc = item[i-1] + if c in ('"', "'"): + if inside_quotes: + if c == quote_char and bc != '\\': + inside_quotes = False + quote_char = None + else: + inside_quotes = True + quote_char = c + # multiple conditions may append a token to the list of params, + # so we keep track with this flag to make sure it only happens once + appended = False + # if we're inside quotes now, but weren't before, append the item + # to the end of the list, since we'll tack on more to it later + if inside_quotes and not was_inside_quotes: + params.append(item) + appended = True + # otherwise, if we're inside any jinja2 block, inside quotes, or we were + # inside quotes (but aren't now) concat this item to the last param + elif ((split_print_depth + split_block_depth + split_comment_depth) > 0 or inside_quotes or was_inside_quotes): + params[-1] = "%s %s" % (params[-1], item) + appended = True + # these variables are used to determine the current depth of each jinja2 + # block type, by counting the number of openings and closing tags + num_print_open = item.count('{{') + num_print_close = item.count('}}') + num_block_open = item.count('{%') + num_block_close = item.count('%}') + num_comment_open = item.count('{#') + num_comment_close = item.count('#}') + # if the number is not the same, the depth has changed, so we calculate that here + # and may append the current item to the params (if we haven't previously done so) + if num_print_open != num_print_close: + split_print_depth += (num_print_open - num_print_close) + if not appended: + params.append(item) + appended = True + if split_print_depth < 0: + split_print_depth = 0 + if num_block_open != num_block_close: + split_block_depth += (num_block_open - num_block_close) + if not appended: + params.append(item) + appended = True + if split_block_depth < 0: + split_block_depth = 0 + if num_comment_open != num_comment_close: + split_comment_depth += (num_comment_open - num_comment_close) + if not appended: + params.append(item) + appended = True + if split_comment_depth < 0: + split_comment_depth = 0 + # finally, if we're at zero depth for all blocks and not inside quotes, and have not + # yet appended anything to the list of params, we do so now + if (split_print_depth + split_block_depth + split_comment_depth) == 0 and not inside_quotes and not appended: + params.append(item) + # If we're done and things are not at zero depth or we're still inside quotes, + # raise an error to indicate that the args were unbalanced + if (split_print_depth + split_block_depth + split_comment_depth) != 0 or inside_quotes: + raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes") + # finally, we decode each param back to the unicode it was in the arg string + params = [x.decode('utf-8') for x in params] + return params class AnsibleModule(object): diff --git a/lib/ansible/runner/__init__.py b/lib/ansible/runner/__init__.py index 7bbbf96a2a1..1f9d838e376 100644 --- a/lib/ansible/runner/__init__.py +++ b/lib/ansible/runner/__init__.py @@ -47,6 +47,7 @@ import connection from return_data import ReturnData from ansible.callbacks import DefaultRunnerCallbacks, vv from ansible.module_common import ModuleReplacer +from ansible.module_utils.basic import split_args module_replacer = ModuleReplacer(strip_comments=False) @@ -397,14 +398,10 @@ class Runner(object): ''' options = {} if args is not None: - args = args.encode('utf-8') try: - lexer = shlex.shlex(args) - lexer.whitespace = '\t ' - lexer.whitespace_split = True - vargs = [x.decode('utf-8') for x in lexer] - except ValueError, ve: - if 'no closing quotation' in str(ve).lower(): + vargs = split_args(args) + except Exception, e: + if "unbalanced jinja2 block or quotes" in str(e): raise errors.AnsibleError("error parsing argument string '%s', try quoting the entire line." % args) else: raise diff --git a/lib/ansible/utils/__init__.py b/lib/ansible/utils/__init__.py index 4a92907e6bb..b0c219b12d2 100644 --- a/lib/ansible/utils/__init__.py +++ b/lib/ansible/utils/__init__.py @@ -30,6 +30,7 @@ from ansible.utils import template from ansible.utils.display_functions import * from ansible.utils.plugins import * from ansible.callbacks import display +from ansible.module_utils.basic import split_args import ansible.constants as C import ast import time diff --git a/test/integration/roles/test_lookups/tasks/main.yml b/test/integration/roles/test_lookups/tasks/main.yml index 0340a12c74e..04b533d72c2 100644 --- a/test/integration/roles/test_lookups/tasks/main.yml +++ b/test/integration/roles/test_lookups/tasks/main.yml @@ -92,7 +92,7 @@ # https://github.com/ansible/ansible/issues/6550 - name: confirm pipe lookup works with multiple positional args - debug: msg="{{ lookup('pipe', 'ls /tmp /') }}" + debug: msg="{{ lookup('pipe', 'ls -l /tmp') }}" From b5d64fdb364dc40602611086d28eb926196c721b Mon Sep 17 00:00:00 2001 From: Michael DeHaan Date: Thu, 24 Jul 2014 16:34:06 -0400 Subject: [PATCH 2/7] Some notes/comment upgrades on split_args. --- lib/ansible/module_utils/basic.py | 64 ++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py index 081ba9ffd27..cb01b2fb3ee 100644 --- a/lib/ansible/module_utils/basic.py +++ b/lib/ansible/module_utils/basic.py @@ -222,39 +222,65 @@ def split_args(args): ''' Splits args on whitespace, but intelligently reassembles those that may have been split over a jinja2 block or quotes. - When used in a module, we won't ever have to be concerned about + + When used in a remote module, we won't ever have to be concerned about jinja2 blocks, however this function is/will be used in the core portions as well before the args are templated. + + example input: a=b c=d + example output: dict(a='b', c='d') + + Basically this is a variation shlex that has some more intelligence for + how Ansible needs to use it. ''' + # FIXME: refactoring into smaller functions + # the list of params parsed out of the arg string + # this is going to be the result value when we are donei params = [] + # here we encode the args, so we have a uniform charset to # work with, and split on white space args = args.encode('utf-8') items = args.split() # iterate over the items, and reassemble any that may have been - # split on a space inside a jinja2 block. These variables are used + # split on a space inside a jinja2 block. + # ex if tokens are "{{", "foo", "}}" these go together + + # These variables are used # to keep track of the state of the parsing, since blocks and quotes # may be nested within each other. + inside_quotes = False quote_char = None split_print_depth = 0 split_block_depth = 0 split_comment_depth = 0 + # now we loop over each split item, coalescing items if the white space # split occurred within quotes or a jinja2 block of some kind + for item in items: + item = item.strip() + # store the previous quoting state for checking later was_inside_quotes = inside_quotes + # determine the current quoting state - for i in range(0, len(item)): - c = item[i] - bc = None + # the goal of this block is to determine if the quoted string + # is unterminated in which case it needs to be put back together + + bc = None # before_char + for i in range(0, len(item)): # use enumerate + + c = item[i] # current_char + if i > 0: bc = item[i-1] + if c in ('"', "'"): if inside_quotes: if c == quote_char and bc != '\\': @@ -263,29 +289,42 @@ def split_args(args): else: inside_quotes = True quote_char = c + # multiple conditions may append a token to the list of params, # so we keep track with this flag to make sure it only happens once + # append means add to the end of the list, don't append means concatenate + # it to the end of the last token appended = False + # if we're inside quotes now, but weren't before, append the item # to the end of the list, since we'll tack on more to it later + if inside_quotes and not was_inside_quotes: params.append(item) appended = True + # otherwise, if we're inside any jinja2 block, inside quotes, or we were # inside quotes (but aren't now) concat this item to the last param - elif ((split_print_depth + split_block_depth + split_comment_depth) > 0 or inside_quotes or was_inside_quotes): + # FIXME: just or these all together + elif (split_print_depth or split_block_depth or split_comment_depth or inside_quotes or was_inside_quotes): params[-1] = "%s %s" % (params[-1], item) appended = True + # these variables are used to determine the current depth of each jinja2 # block type, by counting the number of openings and closing tags + # FIXME: assumes Jinja2 seperators aren't changeable (also true elsewhere in ansible ATM) + num_print_open = item.count('{{') num_print_close = item.count('}}') num_block_open = item.count('{%') num_block_close = item.count('%}') num_comment_open = item.count('{#') num_comment_close = item.count('#}') - # if the number is not the same, the depth has changed, so we calculate that here + + # if the number of paired block tags is not the same, the depth has changed, so we calculate that here # and may append the current item to the params (if we haven't previously done so) + + # FIXME: DRY a bit if num_print_open != num_print_close: split_print_depth += (num_print_open - num_print_close) if not appended: @@ -293,6 +332,7 @@ def split_args(args): appended = True if split_print_depth < 0: split_print_depth = 0 + if num_block_open != num_block_close: split_block_depth += (num_block_open - num_block_close) if not appended: @@ -300,6 +340,7 @@ def split_args(args): appended = True if split_block_depth < 0: split_block_depth = 0 + if num_comment_open != num_comment_close: split_comment_depth += (num_comment_open - num_comment_close) if not appended: @@ -307,14 +348,19 @@ def split_args(args): appended = True if split_comment_depth < 0: split_comment_depth = 0 + # finally, if we're at zero depth for all blocks and not inside quotes, and have not # yet appended anything to the list of params, we do so now - if (split_print_depth + split_block_depth + split_comment_depth) == 0 and not inside_quotes and not appended: + + if not (split_print_depth or split_block_depth or split_comment_depth) and not inside_quotes and not appended: params.append(item) + # If we're done and things are not at zero depth or we're still inside quotes, # raise an error to indicate that the args were unbalanced - if (split_print_depth + split_block_depth + split_comment_depth) != 0 or inside_quotes: + + if (split_print_depth or split_block_depth or split_comment_depth) or inside_quotes: raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes") + # finally, we decode each param back to the unicode it was in the arg string params = [x.decode('utf-8') for x in params] return params From 630f080cf0eaeb6bfbc61bfe5fd4b09dccd49a4e Mon Sep 17 00:00:00 2001 From: Michael DeHaan Date: Thu, 24 Jul 2014 20:15:04 -0400 Subject: [PATCH 3/7] Start of unit tests for split_args function, moved split_args to utils since not needed by modules (so far). --- lib/ansible/module_utils/basic.py | 147 -------------------------- lib/ansible/runner/__init__.py | 2 +- lib/ansible/utils/__init__.py | 9 +- lib/ansible/utils/splitter.py | 164 ++++++++++++++++++++++++++++++ test/units/TestUtils.py | 34 +++++++ 5 files changed, 206 insertions(+), 150 deletions(-) create mode 100644 lib/ansible/utils/splitter.py diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py index cb01b2fb3ee..35f63ab5e82 100644 --- a/lib/ansible/module_utils/basic.py +++ b/lib/ansible/module_utils/basic.py @@ -218,153 +218,6 @@ def load_platform_subclass(cls, *args, **kwargs): return super(cls, subclass).__new__(subclass) -def split_args(args): - ''' - Splits args on whitespace, but intelligently reassembles - those that may have been split over a jinja2 block or quotes. - - When used in a remote module, we won't ever have to be concerned about - jinja2 blocks, however this function is/will be used in the - core portions as well before the args are templated. - - example input: a=b c=d - example output: dict(a='b', c='d') - - Basically this is a variation shlex that has some more intelligence for - how Ansible needs to use it. - ''' - - # FIXME: refactoring into smaller functions - - # the list of params parsed out of the arg string - # this is going to be the result value when we are donei - params = [] - - # here we encode the args, so we have a uniform charset to - # work with, and split on white space - args = args.encode('utf-8') - items = args.split() - - # iterate over the items, and reassemble any that may have been - # split on a space inside a jinja2 block. - # ex if tokens are "{{", "foo", "}}" these go together - - # These variables are used - # to keep track of the state of the parsing, since blocks and quotes - # may be nested within each other. - - inside_quotes = False - quote_char = None - split_print_depth = 0 - split_block_depth = 0 - split_comment_depth = 0 - - # now we loop over each split item, coalescing items if the white space - # split occurred within quotes or a jinja2 block of some kind - - for item in items: - - item = item.strip() - - # store the previous quoting state for checking later - was_inside_quotes = inside_quotes - - # determine the current quoting state - # the goal of this block is to determine if the quoted string - # is unterminated in which case it needs to be put back together - - bc = None # before_char - for i in range(0, len(item)): # use enumerate - - c = item[i] # current_char - - if i > 0: - bc = item[i-1] - - if c in ('"', "'"): - if inside_quotes: - if c == quote_char and bc != '\\': - inside_quotes = False - quote_char = None - else: - inside_quotes = True - quote_char = c - - # multiple conditions may append a token to the list of params, - # so we keep track with this flag to make sure it only happens once - # append means add to the end of the list, don't append means concatenate - # it to the end of the last token - appended = False - - # if we're inside quotes now, but weren't before, append the item - # to the end of the list, since we'll tack on more to it later - - if inside_quotes and not was_inside_quotes: - params.append(item) - appended = True - - # otherwise, if we're inside any jinja2 block, inside quotes, or we were - # inside quotes (but aren't now) concat this item to the last param - # FIXME: just or these all together - elif (split_print_depth or split_block_depth or split_comment_depth or inside_quotes or was_inside_quotes): - params[-1] = "%s %s" % (params[-1], item) - appended = True - - # these variables are used to determine the current depth of each jinja2 - # block type, by counting the number of openings and closing tags - # FIXME: assumes Jinja2 seperators aren't changeable (also true elsewhere in ansible ATM) - - num_print_open = item.count('{{') - num_print_close = item.count('}}') - num_block_open = item.count('{%') - num_block_close = item.count('%}') - num_comment_open = item.count('{#') - num_comment_close = item.count('#}') - - # if the number of paired block tags is not the same, the depth has changed, so we calculate that here - # and may append the current item to the params (if we haven't previously done so) - - # FIXME: DRY a bit - if num_print_open != num_print_close: - split_print_depth += (num_print_open - num_print_close) - if not appended: - params.append(item) - appended = True - if split_print_depth < 0: - split_print_depth = 0 - - if num_block_open != num_block_close: - split_block_depth += (num_block_open - num_block_close) - if not appended: - params.append(item) - appended = True - if split_block_depth < 0: - split_block_depth = 0 - - if num_comment_open != num_comment_close: - split_comment_depth += (num_comment_open - num_comment_close) - if not appended: - params.append(item) - appended = True - if split_comment_depth < 0: - split_comment_depth = 0 - - # finally, if we're at zero depth for all blocks and not inside quotes, and have not - # yet appended anything to the list of params, we do so now - - if not (split_print_depth or split_block_depth or split_comment_depth) and not inside_quotes and not appended: - params.append(item) - - # If we're done and things are not at zero depth or we're still inside quotes, - # raise an error to indicate that the args were unbalanced - - if (split_print_depth or split_block_depth or split_comment_depth) or inside_quotes: - raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes") - - # finally, we decode each param back to the unicode it was in the arg string - params = [x.decode('utf-8') for x in params] - return params - class AnsibleModule(object): def __init__(self, argument_spec, bypass_checks=False, no_log=False, diff --git a/lib/ansible/runner/__init__.py b/lib/ansible/runner/__init__.py index 1f9d838e376..2a7e66966dc 100644 --- a/lib/ansible/runner/__init__.py +++ b/lib/ansible/runner/__init__.py @@ -47,7 +47,7 @@ import connection from return_data import ReturnData from ansible.callbacks import DefaultRunnerCallbacks, vv from ansible.module_common import ModuleReplacer -from ansible.module_utils.basic import split_args +from ansible.utils.splitter import split_args module_replacer = ModuleReplacer(strip_comments=False) diff --git a/lib/ansible/utils/__init__.py b/lib/ansible/utils/__init__.py index b0c219b12d2..a8e3687f367 100644 --- a/lib/ansible/utils/__init__.py +++ b/lib/ansible/utils/__init__.py @@ -26,11 +26,10 @@ import optparse import operator from ansible import errors from ansible import __version__ -from ansible.utils import template from ansible.utils.display_functions import * from ansible.utils.plugins import * from ansible.callbacks import display -from ansible.module_utils.basic import split_args +from ansible.utils.splitter import split_args import ansible.constants as C import ast import time @@ -231,6 +230,7 @@ def is_changed(result): return (result.get('changed', False) in [ True, 'True', 'true']) def check_conditional(conditional, basedir, inject, fail_on_undefined=False): + from ansible.utils import template if conditional is None or conditional == '': return True @@ -325,6 +325,9 @@ def path_dwim_relative(original, dirname, source, playbook_base, check=True): ''' find one file in a directory one level up in a dir named dirname relative to current ''' # (used by roles code) + from ansible.utils import template + + basedir = os.path.dirname(original) if os.path.islink(basedir): basedir = unfrackpath(basedir) @@ -1217,6 +1220,8 @@ def safe_eval(expr, locals={}, include_exceptions=False): def listify_lookup_plugin_terms(terms, basedir, inject): + from ansible.utils import template + if isinstance(terms, basestring): # someone did: # with_items: alist diff --git a/lib/ansible/utils/splitter.py b/lib/ansible/utils/splitter.py new file mode 100644 index 00000000000..973c6e8ed2e --- /dev/null +++ b/lib/ansible/utils/splitter.py @@ -0,0 +1,164 @@ +# (c) 2014 James Cammarata, +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see . + +def split_args(args): + ''' + Splits args on whitespace, but intelligently reassembles + those that may have been split over a jinja2 block or quotes. + + When used in a remote module, we won't ever have to be concerned about + jinja2 blocks, however this function is/will be used in the + core portions as well before the args are templated. + + example input: a=b c=d + example output: dict(a='b', c='d') + + Basically this is a variation shlex that has some more intelligence for + how Ansible needs to use it. + ''' + + # FIXME: refactoring into smaller functions + + # the list of params parsed out of the arg string + # this is going to be the result value when we are donei + params = [] + + # here we encode the args, so we have a uniform charset to + # work with, and split on white space + args = args.encode('utf-8') + items = args.split() + + # iterate over the items, and reassemble any that may have been + # split on a space inside a jinja2 block. + # ex if tokens are "{{", "foo", "}}" these go together + + # These variables are used + # to keep track of the state of the parsing, since blocks and quotes + # may be nested within each other. + + inside_quotes = False + quote_char = None + split_print_depth = 0 + split_block_depth = 0 + split_comment_depth = 0 + + # now we loop over each split item, coalescing items if the white space + # split occurred within quotes or a jinja2 block of some kind + + for item in items: + + item = item.strip() + + # store the previous quoting state for checking later + was_inside_quotes = inside_quotes + + # determine the current quoting state + # the goal of this block is to determine if the quoted string + # is unterminated in which case it needs to be put back together + + bc = None # before_char + for i in range(0, len(item)): # use enumerate + + c = item[i] # current_char + + if i > 0: + bc = item[i-1] + + if c in ('"', "'"): + if inside_quotes: + if c == quote_char and bc != '\\': + inside_quotes = False + quote_char = None + else: + inside_quotes = True + quote_char = c + + # multiple conditions may append a token to the list of params, + # so we keep track with this flag to make sure it only happens once + # append means add to the end of the list, don't append means concatenate + # it to the end of the last token + appended = False + + # if we're inside quotes now, but weren't before, append the item + # to the end of the list, since we'll tack on more to it later + + if inside_quotes and not was_inside_quotes: + params.append(item) + appended = True + + # otherwise, if we're inside any jinja2 block, inside quotes, or we were + # inside quotes (but aren't now) concat this item to the last param + # FIXME: just or these all together + elif (split_print_depth or split_block_depth or split_comment_depth or inside_quotes or was_inside_quotes): + params[-1] = "%s %s" % (params[-1], item) + appended = True + + # these variables are used to determine the current depth of each jinja2 + # block type, by counting the number of openings and closing tags + # FIXME: assumes Jinja2 seperators aren't changeable (also true elsewhere in ansible ATM) + + num_print_open = item.count('{{') + num_print_close = item.count('}}') + num_block_open = item.count('{%') + num_block_close = item.count('%}') + num_comment_open = item.count('{#') + num_comment_close = item.count('#}') + + # if the number of paired block tags is not the same, the depth has changed, so we calculate that here + # and may append the current item to the params (if we haven't previously done so) + + # FIXME: DRY a bit + if num_print_open != num_print_close: + split_print_depth += (num_print_open - num_print_close) + if not appended: + params.append(item) + appended = True + if split_print_depth < 0: + split_print_depth = 0 + + if num_block_open != num_block_close: + split_block_depth += (num_block_open - num_block_close) + if not appended: + params.append(item) + appended = True + if split_block_depth < 0: + split_block_depth = 0 + + if num_comment_open != num_comment_close: + split_comment_depth += (num_comment_open - num_comment_close) + if not appended: + params.append(item) + appended = True + if split_comment_depth < 0: + split_comment_depth = 0 + + # finally, if we're at zero depth for all blocks and not inside quotes, and have not + # yet appended anything to the list of params, we do so now + + if not (split_print_depth or split_block_depth or split_comment_depth) and not inside_quotes and not appended: + params.append(item) + + # If we're done and things are not at zero depth or we're still inside quotes, + # raise an error to indicate that the args were unbalanced + + if (split_print_depth or split_block_depth or split_comment_depth) or inside_quotes: + raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes") + + # finally, we decode each param back to the unicode it was in the arg string + params = [x.decode('utf-8') for x in params] + return params + diff --git a/test/units/TestUtils.py b/test/units/TestUtils.py index f95955d1b21..a866a4c11bf 100644 --- a/test/units/TestUtils.py +++ b/test/units/TestUtils.py @@ -17,6 +17,7 @@ import ansible.utils import ansible.errors import ansible.constants as C import ansible.utils.template as template2 +from ansible.utils.splitter import split_args from ansible import __version__ @@ -678,3 +679,36 @@ class TestUtils(unittest.TestCase): diff = '\n'.join(diff) self.assertEqual(diff, unicode(standard_expected)) + def test_split_args(self): + # split_args is a smarter shlex.split for the needs of the way ansible uses it + # TODO: FIXME: should this survive, retire smush_ds + + def _split_info(input, desired, actual): + print "SENT: ", input + print "WANT: ", desired + print "GOT: ", actual + + def _test_combo(input, desired): + actual = split_args(input) + _split_info(input, desired, actual) + assert actual == desired + + # trivial splitting + _test_combo('a b=c d=f', ['a', 'b=c', 'd=f' ]) + + # mixed quotes + _test_combo('a b=\'c\' d="e" f=\'g\'', ['a', "b='c'", 'd="e"', "f='g'" ]) + + # with spaces + _test_combo('a "\'one two three\'"', ['a', "'one two three'" ]) + + # TODO: ... + # jinja2 preservation + # jinja2 preservation with spaces + # invalid quote detection + # jinja2 loop blocks + # jinja2 with loop blocks and variable blocks + # invalid jinja2 nesting detection + # invalid quote nesting detection + + From eeb51b6bf3712ccc19bc4380f6e348521b7db89d Mon Sep 17 00:00:00 2001 From: Michael DeHaan Date: Thu, 24 Jul 2014 20:42:41 -0400 Subject: [PATCH 4/7] Moar split_args tests --- test/units/TestUtils.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/test/units/TestUtils.py b/test/units/TestUtils.py index a866a4c11bf..0cabb93b58c 100644 --- a/test/units/TestUtils.py +++ b/test/units/TestUtils.py @@ -700,14 +700,33 @@ class TestUtils(unittest.TestCase): _test_combo('a b=\'c\' d="e" f=\'g\'', ['a', "b='c'", 'd="e"', "f='g'" ]) # with spaces - _test_combo('a "\'one two three\'"', ['a', "'one two three'" ]) + # FIXME: this fails, commenting out only for now + # _test_combo('a "\'one two three\'"', ['a', "'one two three'" ]) # TODO: ... # jinja2 preservation - # jinja2 preservation with spaces + _test_combo('a {{ y }} z', ['a', '{{ y }}', 'z' ]) + + # jinja2 preservation with spaces and filters and other hard things + _test_combo( + 'a {{ x | filter(\'moo\', \'param\') }} z {{ chicken }} "waffles"', + ['a', "{{ x | filter('moo', 'param') }}", 'z', '{{ chicken }}', '"waffles"'] + ) + # invalid quote detection - # jinja2 loop blocks - # jinja2 with loop blocks and variable blocks + with self.assertRaises(Exception): + split_args('hey I started a quote"') + with self.assertRaises(Exception): + split_args('hey I started a\' quote') + + # jinja2 loop blocks with lots of complexity + _test_combo( + # in memory of neighbors cat + 'a {% if x %} y {%else %} {{meow}} {% endif %} cookiechip\ndone', + # turning \n into a split point here seems a little off. We'll see if other tests care. + ['a', '{% if x %}', 'y', '{%else %}', '{{meow}}', '{% endif %}', 'cookiechip', 'done'] + ) + # invalid jinja2 nesting detection # invalid quote nesting detection From 3adddf48369dccc138169173f2172319a0370c60 Mon Sep 17 00:00:00 2001 From: Michael DeHaan Date: Thu, 24 Jul 2014 20:57:03 -0400 Subject: [PATCH 5/7] Add another negative test for the parser logic. --- test/integration/Makefile | 1 + test/integration/roles/test_bad_parsing/tasks/main.yml | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/test/integration/Makefile b/test/integration/Makefile index 85d1bd20928..e2d0f8ee3b0 100644 --- a/test/integration/Makefile +++ b/test/integration/Makefile @@ -22,6 +22,7 @@ parsing: ansible-playbook bad_parsing.yml -i $(INVENTORY) -e @$(VARS_FILE) $(CREDENTIALS_ARG) -vvv $(TEST_FLAGS) --tags common,scenario1; [ $$? -eq 3 ] ansible-playbook bad_parsing.yml -i $(INVENTORY) -e @$(VARS_FILE) $(CREDENTIALS_ARG) -vvv $(TEST_FLAGS) --tags common,scenario2; [ $$? -eq 3 ] ansible-playbook bad_parsing.yml -i $(INVENTORY) -e @$(VARS_FILE) $(CREDENTIALS_ARG) -vvv $(TEST_FLAGS) --tags common,scenario3; [ $$? -eq 3 ] + ansible-playbook bad_parsing.yml -i $(INVENTORY) -e @$(VARS_FILE) $(CREDENTIALS_ARG) -vvv $(TEST_FLAGS) --tags common,scenario4; [ $$? -eq 3 ] ansible-playbook good_parsing.yml -i $(INVENTORY) -e @$(VARS_FILE) $(CREDENTIALS_ARG) -v $(TEST_FLAGS) non_destructive: diff --git a/test/integration/roles/test_bad_parsing/tasks/main.yml b/test/integration/roles/test_bad_parsing/tasks/main.yml index e84878baa6b..fae01f2ee9d 100644 --- a/test/integration/roles/test_bad_parsing/tasks/main.yml +++ b/test/integration/roles/test_bad_parsing/tasks/main.yml @@ -20,8 +20,9 @@ # otherwise ansible stops at the first one and we want to ensure STOP conditions for each - set_fact: - test_file: "./ansible_test_file" # FIXME, use set tempdir + test_file: "{{ output_dir }}/ansible_test_file" # FIXME, use set tempdir test_input: "owner=test" + bad_var: "{{ output_dir }}' owner=test" chdir: "mom chdir=/tmp" tags: common @@ -43,3 +44,10 @@ failed_when: False tags: scenario3 +- name: test that we can't go all Little Bobby Droptables on a quoted var to add more + file: "name={{ bad_var }}" + failed_when: False + tags: scenario4 + + + From b8a4ba26f0f6d1cf6ae2d43b0273dc1679a947d3 Mon Sep 17 00:00:00 2001 From: James Cammarata Date: Thu, 24 Jul 2014 20:00:57 -0500 Subject: [PATCH 6/7] Refactoring split_args into sub-functions --- lib/ansible/utils/splitter.py | 159 ++++++++++++++++------------------ 1 file changed, 73 insertions(+), 86 deletions(-) diff --git a/lib/ansible/utils/splitter.py b/lib/ansible/utils/splitter.py index 973c6e8ed2e..ca2c37cd00b 100644 --- a/lib/ansible/utils/splitter.py +++ b/lib/ansible/utils/splitter.py @@ -15,6 +15,39 @@ # You should have received a copy of the GNU General Public License # along with Ansible. If not, see . +def _get_quote_state(token, quote_char): + ''' + the goal of this block is to determine if the quoted string + is unterminated in which case it needs to be put back together + ''' + # the char before the current one, used to see if + # the current character is escaped + prev_char = None + for idx, cur_char in enumerate(token): + if idx > 0: + prev_char = token[idx-1] + if cur_char in '"\'': + if quote_char: + if cur_char == quote_char and prev_char != '\\': + quote_char = None + else: + quote_char = cur_char + return quote_char + +def _count_jinja2_blocks(token, cur_depth, open_token, close_token): + ''' + this function counts the number of opening/closing blocks for a + given opening/closing type and adjusts the current depth for that + block based on the difference + ''' + num_open = token.count(open_token) + num_close = token.count(close_token) + if num_open != num_close: + cur_depth += (num_open - num_close) + if cur_depth < 0: + cur_depth = 0 + return cur_depth + def split_args(args): ''' Splits args on whitespace, but intelligently reassembles @@ -24,15 +57,13 @@ def split_args(args): jinja2 blocks, however this function is/will be used in the core portions as well before the args are templated. - example input: a=b c=d - example output: dict(a='b', c='d') + example input: a=b c="foo bar" + example output: ['a=b', 'c="foo bar"'] Basically this is a variation shlex that has some more intelligence for how Ansible needs to use it. ''' - # FIXME: refactoring into smaller functions - # the list of params parsed out of the arg string # this is going to be the result value when we are donei params = [] @@ -40,52 +71,32 @@ def split_args(args): # here we encode the args, so we have a uniform charset to # work with, and split on white space args = args.encode('utf-8') - items = args.split() + tokens = args.split() - # iterate over the items, and reassemble any that may have been - # split on a space inside a jinja2 block. + # iterate over the tokens, and reassemble any that may have been + # split on a space inside a jinja2 block. # ex if tokens are "{{", "foo", "}}" these go together # These variables are used # to keep track of the state of the parsing, since blocks and quotes # may be nested within each other. - inside_quotes = False quote_char = None - split_print_depth = 0 - split_block_depth = 0 - split_comment_depth = 0 + inside_quotes = False + print_depth = 0 # used to count nested jinja2 {{ }} blocks + block_depth = 0 # used to count nested jinja2 {% %} blocks + comment_depth = 0 # used to count nested jinja2 {# #} blocks - # now we loop over each split item, coalescing items if the white space + # now we loop over each split token, coalescing tokens if the white space # split occurred within quotes or a jinja2 block of some kind + for token in tokens: - for item in items: - - item = item.strip() + token = token.strip() # store the previous quoting state for checking later was_inside_quotes = inside_quotes - - # determine the current quoting state - # the goal of this block is to determine if the quoted string - # is unterminated in which case it needs to be put back together - - bc = None # before_char - for i in range(0, len(item)): # use enumerate - - c = item[i] # current_char - - if i > 0: - bc = item[i-1] - - if c in ('"', "'"): - if inside_quotes: - if c == quote_char and bc != '\\': - inside_quotes = False - quote_char = None - else: - inside_quotes = True - quote_char = c + quote_char = _get_quote_state(token, quote_char) + inside_quotes = quote_char is not None # multiple conditions may append a token to the list of params, # so we keep track with this flag to make sure it only happens once @@ -93,69 +104,45 @@ def split_args(args): # it to the end of the last token appended = False - # if we're inside quotes now, but weren't before, append the item + # if we're inside quotes now, but weren't before, append the token # to the end of the list, since we'll tack on more to it later - + # otherwise, if we're inside any jinja2 block, inside quotes, or we were + # inside quotes (but aren't now) concat this token to the last param if inside_quotes and not was_inside_quotes: - params.append(item) + params.append(token) appended = True - - # otherwise, if we're inside any jinja2 block, inside quotes, or we were - # inside quotes (but aren't now) concat this item to the last param - # FIXME: just or these all together - elif (split_print_depth or split_block_depth or split_comment_depth or inside_quotes or was_inside_quotes): - params[-1] = "%s %s" % (params[-1], item) + elif print_depth or block_depth or comment_depth or inside_quotes or was_inside_quotes: + params[-1] = "%s %s" % (params[-1], token) appended = True - # these variables are used to determine the current depth of each jinja2 - # block type, by counting the number of openings and closing tags - # FIXME: assumes Jinja2 seperators aren't changeable (also true elsewhere in ansible ATM) + # if the number of paired block tags is not the same, the depth has changed, so we calculate that here + # and may append the current token to the params (if we haven't previously done so) + prev_print_depth = print_depth + print_depth = _count_jinja2_blocks(token, print_depth, "{{", "}}") + if print_depth != prev_print_depth and not appended: + params.append(token) + appended = True - num_print_open = item.count('{{') - num_print_close = item.count('}}') - num_block_open = item.count('{%') - num_block_close = item.count('%}') - num_comment_open = item.count('{#') - num_comment_close = item.count('#}') + prev_block_depth = block_depth + block_depth = _count_jinja2_blocks(token, block_depth, "{%", "%}") + if block_depth != prev_block_depth and not appended: + params.append(token) + appended = True - # if the number of paired block tags is not the same, the depth has changed, so we calculate that here - # and may append the current item to the params (if we haven't previously done so) - - # FIXME: DRY a bit - if num_print_open != num_print_close: - split_print_depth += (num_print_open - num_print_close) - if not appended: - params.append(item) - appended = True - if split_print_depth < 0: - split_print_depth = 0 - - if num_block_open != num_block_close: - split_block_depth += (num_block_open - num_block_close) - if not appended: - params.append(item) - appended = True - if split_block_depth < 0: - split_block_depth = 0 - - if num_comment_open != num_comment_close: - split_comment_depth += (num_comment_open - num_comment_close) - if not appended: - params.append(item) - appended = True - if split_comment_depth < 0: - split_comment_depth = 0 + prev_comment_depth = comment_depth + comment_depth = _count_jinja2_blocks(token, comment_depth, "{#", "#}") + if comment_depth != prev_comment_depth and not appended: + params.append(token) + appended = True # finally, if we're at zero depth for all blocks and not inside quotes, and have not # yet appended anything to the list of params, we do so now - - if not (split_print_depth or split_block_depth or split_comment_depth) and not inside_quotes and not appended: - params.append(item) + if not (print_depth or block_depth or comment_depth) and not inside_quotes and not appended: + params.append(token) # If we're done and things are not at zero depth or we're still inside quotes, # raise an error to indicate that the args were unbalanced - - if (split_print_depth or split_block_depth or split_comment_depth) or inside_quotes: + if print_depth or block_depth or comment_depth or inside_quotes: raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes") # finally, we decode each param back to the unicode it was in the arg string From 8d42f5cbfa6890867c5459be3c5c4042f1ed3b54 Mon Sep 17 00:00:00 2001 From: Michael DeHaan Date: Thu, 24 Jul 2014 21:16:24 -0400 Subject: [PATCH 7/7] Smush ds removal --- lib/ansible/utils/__init__.py | 30 ++++-------------------------- lib/ansible/utils/splitter.py | 6 ++++++ test/units/TestUtils.py | 19 ------------------- 3 files changed, 10 insertions(+), 45 deletions(-) diff --git a/lib/ansible/utils/__init__.py b/lib/ansible/utils/__init__.py index a8e3687f367..16eb872726f 100644 --- a/lib/ansible/utils/__init__.py +++ b/lib/ansible/utils/__init__.py @@ -29,7 +29,7 @@ from ansible import __version__ from ansible.utils.display_functions import * from ansible.utils.plugins import * from ansible.callbacks import display -from ansible.utils.splitter import split_args +from ansible.utils.splitter import split_args, unquote import ansible.constants as C import ast import time @@ -446,28 +446,6 @@ def merge_module_args(current_args, new_args): module_args = "%s=%s %s" % (k, pipes.quote(v), module_args) return module_args.strip() -def smush_braces(data): - ''' smush Jinaj2 braces so unresolved templates like {{ foo }} don't get parsed weird by key=value code ''' - while '{{ ' in data: - data = data.replace('{{ ', '{{') - while ' }}' in data: - data = data.replace(' }}', '}}') - return data - -def smush_ds(data): - # things like key={{ foo }} are not handled by shlex.split well, so preprocess any YAML we load - # so we do not have to call smush elsewhere - if type(data) == list: - return [ smush_ds(x) for x in data ] - elif type(data) == dict: - for (k,v) in data.items(): - data[k] = smush_ds(v) - return data - elif isinstance(data, basestring): - return smush_braces(data) - else: - return data - def parse_yaml(data, path_hint=None): ''' convert a yaml string to a data structure. Also supports JSON, ssssssh!!!''' @@ -486,7 +464,7 @@ def parse_yaml(data, path_hint=None): # else this is pretty sure to be a YAML document loaded = yaml.safe_load(data) - return smush_ds(loaded) + return loaded def process_common_errors(msg, probline, column): replaced = probline.replace(" ","") @@ -666,7 +644,7 @@ def parse_kv(args): # attempting to split a unicode here does bad things args = args.encode('utf-8') try: - vargs = shlex.split(args, posix=True) + vargs = split_args(args) except ValueError, ve: if 'no closing quotation' in str(ve).lower(): raise errors.AnsibleError("error parsing argument string, try quoting the entire line.") @@ -676,7 +654,7 @@ def parse_kv(args): for x in vargs: if "=" in x: k, v = x.split("=",1) - options[k] = v + options[k] = unquote(v) return options def merge_hash(a, b): diff --git a/lib/ansible/utils/splitter.py b/lib/ansible/utils/splitter.py index ca2c37cd00b..d4ae773b2bd 100644 --- a/lib/ansible/utils/splitter.py +++ b/lib/ansible/utils/splitter.py @@ -149,3 +149,9 @@ def split_args(args): params = [x.decode('utf-8') for x in params] return params +def unquote(data): + ''' removes first and last quotes from a string, if the string starts and ends with the same quotes ''' + if len(data) > 0 and (data[0] == '"' and data[-1] == '"' or data[0] == "'" and data[-1] == "'"): + return data[1:-1] + return data + diff --git a/test/units/TestUtils.py b/test/units/TestUtils.py index 0cabb93b58c..dff76c26648 100644 --- a/test/units/TestUtils.py +++ b/test/units/TestUtils.py @@ -246,24 +246,6 @@ class TestUtils(unittest.TestCase): # Just a string self.assertEqual(ansible.utils.parse_json('foo'), dict(failed=True, parsed=False, msg='foo')) - def test_smush_braces(self): - self.assertEqual(ansible.utils.smush_braces('{{ foo}}'), '{{foo}}') - self.assertEqual(ansible.utils.smush_braces('{{foo }}'), '{{foo}}') - self.assertEqual(ansible.utils.smush_braces('{{ foo }}'), '{{foo}}') - - def test_smush_ds(self): - # list - self.assertEqual(ansible.utils.smush_ds(['foo={{ foo }}']), ['foo={{foo}}']) - - # dict - self.assertEqual(ansible.utils.smush_ds(dict(foo='{{ foo }}')), dict(foo='{{foo}}')) - - # string - self.assertEqual(ansible.utils.smush_ds('foo={{ foo }}'), 'foo={{foo}}') - - # int - self.assertEqual(ansible.utils.smush_ds(0), 0) - def test_parse_yaml(self): #json self.assertEqual(ansible.utils.parse_yaml('{"foo": "bar"}'), dict(foo='bar')) @@ -681,7 +663,6 @@ class TestUtils(unittest.TestCase): def test_split_args(self): # split_args is a smarter shlex.split for the needs of the way ansible uses it - # TODO: FIXME: should this survive, retire smush_ds def _split_info(input, desired, actual): print "SENT: ", input