From fb738bd7b144c502d931d0fc0330c3135c6cbb42 Mon Sep 17 00:00:00 2001 From: James Cammarata Date: Thu, 24 Jul 2014 20:00:57 -0500 Subject: [PATCH] Refactoring split_args into sub-functions --- lib/ansible/utils/splitter.py | 159 ++++++++++++++++------------------ 1 file changed, 73 insertions(+), 86 deletions(-) diff --git a/lib/ansible/utils/splitter.py b/lib/ansible/utils/splitter.py index 973c6e8ed2e..ca2c37cd00b 100644 --- a/lib/ansible/utils/splitter.py +++ b/lib/ansible/utils/splitter.py @@ -15,6 +15,39 @@ # You should have received a copy of the GNU General Public License # along with Ansible. If not, see . +def _get_quote_state(token, quote_char): + ''' + the goal of this block is to determine if the quoted string + is unterminated in which case it needs to be put back together + ''' + # the char before the current one, used to see if + # the current character is escaped + prev_char = None + for idx, cur_char in enumerate(token): + if idx > 0: + prev_char = token[idx-1] + if cur_char in '"\'': + if quote_char: + if cur_char == quote_char and prev_char != '\\': + quote_char = None + else: + quote_char = cur_char + return quote_char + +def _count_jinja2_blocks(token, cur_depth, open_token, close_token): + ''' + this function counts the number of opening/closing blocks for a + given opening/closing type and adjusts the current depth for that + block based on the difference + ''' + num_open = token.count(open_token) + num_close = token.count(close_token) + if num_open != num_close: + cur_depth += (num_open - num_close) + if cur_depth < 0: + cur_depth = 0 + return cur_depth + def split_args(args): ''' Splits args on whitespace, but intelligently reassembles @@ -24,15 +57,13 @@ def split_args(args): jinja2 blocks, however this function is/will be used in the core portions as well before the args are templated. - example input: a=b c=d - example output: dict(a='b', c='d') + example input: a=b c="foo bar" + example output: ['a=b', 'c="foo bar"'] Basically this is a variation shlex that has some more intelligence for how Ansible needs to use it. ''' - # FIXME: refactoring into smaller functions - # the list of params parsed out of the arg string # this is going to be the result value when we are donei params = [] @@ -40,52 +71,32 @@ def split_args(args): # here we encode the args, so we have a uniform charset to # work with, and split on white space args = args.encode('utf-8') - items = args.split() + tokens = args.split() - # iterate over the items, and reassemble any that may have been - # split on a space inside a jinja2 block. + # iterate over the tokens, and reassemble any that may have been + # split on a space inside a jinja2 block. # ex if tokens are "{{", "foo", "}}" these go together # These variables are used # to keep track of the state of the parsing, since blocks and quotes # may be nested within each other. - inside_quotes = False quote_char = None - split_print_depth = 0 - split_block_depth = 0 - split_comment_depth = 0 + inside_quotes = False + print_depth = 0 # used to count nested jinja2 {{ }} blocks + block_depth = 0 # used to count nested jinja2 {% %} blocks + comment_depth = 0 # used to count nested jinja2 {# #} blocks - # now we loop over each split item, coalescing items if the white space + # now we loop over each split token, coalescing tokens if the white space # split occurred within quotes or a jinja2 block of some kind + for token in tokens: - for item in items: - - item = item.strip() + token = token.strip() # store the previous quoting state for checking later was_inside_quotes = inside_quotes - - # determine the current quoting state - # the goal of this block is to determine if the quoted string - # is unterminated in which case it needs to be put back together - - bc = None # before_char - for i in range(0, len(item)): # use enumerate - - c = item[i] # current_char - - if i > 0: - bc = item[i-1] - - if c in ('"', "'"): - if inside_quotes: - if c == quote_char and bc != '\\': - inside_quotes = False - quote_char = None - else: - inside_quotes = True - quote_char = c + quote_char = _get_quote_state(token, quote_char) + inside_quotes = quote_char is not None # multiple conditions may append a token to the list of params, # so we keep track with this flag to make sure it only happens once @@ -93,69 +104,45 @@ def split_args(args): # it to the end of the last token appended = False - # if we're inside quotes now, but weren't before, append the item + # if we're inside quotes now, but weren't before, append the token # to the end of the list, since we'll tack on more to it later - + # otherwise, if we're inside any jinja2 block, inside quotes, or we were + # inside quotes (but aren't now) concat this token to the last param if inside_quotes and not was_inside_quotes: - params.append(item) + params.append(token) appended = True - - # otherwise, if we're inside any jinja2 block, inside quotes, or we were - # inside quotes (but aren't now) concat this item to the last param - # FIXME: just or these all together - elif (split_print_depth or split_block_depth or split_comment_depth or inside_quotes or was_inside_quotes): - params[-1] = "%s %s" % (params[-1], item) + elif print_depth or block_depth or comment_depth or inside_quotes or was_inside_quotes: + params[-1] = "%s %s" % (params[-1], token) appended = True - # these variables are used to determine the current depth of each jinja2 - # block type, by counting the number of openings and closing tags - # FIXME: assumes Jinja2 seperators aren't changeable (also true elsewhere in ansible ATM) + # if the number of paired block tags is not the same, the depth has changed, so we calculate that here + # and may append the current token to the params (if we haven't previously done so) + prev_print_depth = print_depth + print_depth = _count_jinja2_blocks(token, print_depth, "{{", "}}") + if print_depth != prev_print_depth and not appended: + params.append(token) + appended = True - num_print_open = item.count('{{') - num_print_close = item.count('}}') - num_block_open = item.count('{%') - num_block_close = item.count('%}') - num_comment_open = item.count('{#') - num_comment_close = item.count('#}') + prev_block_depth = block_depth + block_depth = _count_jinja2_blocks(token, block_depth, "{%", "%}") + if block_depth != prev_block_depth and not appended: + params.append(token) + appended = True - # if the number of paired block tags is not the same, the depth has changed, so we calculate that here - # and may append the current item to the params (if we haven't previously done so) - - # FIXME: DRY a bit - if num_print_open != num_print_close: - split_print_depth += (num_print_open - num_print_close) - if not appended: - params.append(item) - appended = True - if split_print_depth < 0: - split_print_depth = 0 - - if num_block_open != num_block_close: - split_block_depth += (num_block_open - num_block_close) - if not appended: - params.append(item) - appended = True - if split_block_depth < 0: - split_block_depth = 0 - - if num_comment_open != num_comment_close: - split_comment_depth += (num_comment_open - num_comment_close) - if not appended: - params.append(item) - appended = True - if split_comment_depth < 0: - split_comment_depth = 0 + prev_comment_depth = comment_depth + comment_depth = _count_jinja2_blocks(token, comment_depth, "{#", "#}") + if comment_depth != prev_comment_depth and not appended: + params.append(token) + appended = True # finally, if we're at zero depth for all blocks and not inside quotes, and have not # yet appended anything to the list of params, we do so now - - if not (split_print_depth or split_block_depth or split_comment_depth) and not inside_quotes and not appended: - params.append(item) + if not (print_depth or block_depth or comment_depth) and not inside_quotes and not appended: + params.append(token) # If we're done and things are not at zero depth or we're still inside quotes, # raise an error to indicate that the args were unbalanced - - if (split_print_depth or split_block_depth or split_comment_depth) or inside_quotes: + if print_depth or block_depth or comment_depth or inside_quotes: raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes") # finally, we decode each param back to the unicode it was in the arg string