Fix arg splitting and key/value parsing (#80030)

* Improve readability of unit test output This drops the trailing `-expectedXXX` suffixes from test names generated by parametrize. * Add more splitter unit tests This fills in code coverage gaps in the exising unit tests. * Bug fixes and code cleanup - Fix IndexError exceptions caused by parsing a leading newline, space or escaped space. - Fix an AttributeError exception in `parse_args` when parsing `None`. - Fix incorrect parsing of multi-line Jinja2 blocks, which resulted in doubling newlines. - Remove unreachable exception handlers in the `parse_kv` function. The unreachable code was verified through analysis of the code as well as use of the `atheris` fuzzer. - Remove unnecessary code in the `split_args` function. - Add an optimization to `split_args` for the empty args case. * Add unit tests for bug fixes The splitter code is now fully covered by unit tests. * Add another issue ref in changelog
1 year ago · da2cd157f1
parent 98d1cf7aa2
commit da2cd157f1
3 changed files with 89 additions and 19 deletions
--- a/changelogs/fragments/parsing-splitter-fixes.yml
+++ b/changelogs/fragments/parsing-splitter-fixes.yml
@ -0,0 +1,5 @@
+bugfixes:
+  - Fix exceptions caused by various inputs when performing arg splitting or parsing key/value pairs.
+    Resolves issue https://github.com/ansible/ansible/issues/46379
+    and issue https://github.com/ansible/ansible/issues/61497
+  - Fix incorrect parsing of multi-line Jinja2 blocks when performing arg splitting or parsing key/value pairs.
--- a/lib/ansible/parsing/splitter.py
+++ b/lib/ansible/parsing/splitter.py
@ -58,15 +58,7 @@ def parse_kv(args, check_raw=False):

    options = {}
    if args is not None:
-        try:
        vargs = split_args(args)
-        except IndexError as e:
-            raise AnsibleParserError("Unable to parse argument string", orig_exc=e)
-        except ValueError as ve:
-            if 'no closing quotation' in str(ve).lower():
-                raise AnsibleParserError("error parsing argument string, try quoting the entire line.", orig_exc=ve)
-            else:
-                raise

        raw_params = []
        for orig_x in vargs:
@ -168,6 +160,9 @@ def split_args(args):
    how Ansible needs to use it.
    '''

+    if not args:
+        return []
+
    # the list of params parsed out of the arg string
    # this is going to be the result value when we are done
    params = []
@ -204,6 +199,10 @@ def split_args(args):
            # Empty entries means we have subsequent spaces
            # We want to hold onto them so we can reconstruct them later
            if len(token) == 0 and idx != 0:
+                # Make sure there is a params item to store result in.
+                if not params:
+                    params.append('')
+
                params[-1] += ' '
                continue

@ -235,13 +234,11 @@ def split_args(args):
            elif print_depth or block_depth or comment_depth or inside_quotes or was_inside_quotes:
                if idx == 0 and was_inside_quotes:
                    params[-1] = "%s%s" % (params[-1], token)
-                elif len(tokens) > 1:
+                else:
                    spacer = ''
                    if idx > 0:
                        spacer = ' '
                    params[-1] = "%s%s%s" % (params[-1], spacer, token)
-                else:
-                    params[-1] = "%s\n%s" % (params[-1], token)
                appended = True

            # if the number of paired block tags is not the same, the depth has changed, so we calculate that here
@ -273,10 +270,11 @@ def split_args(args):
        # one item (meaning we split on newlines), add a newline back here
        # to preserve the original structure
        if len(items) > 1 and itemidx != len(items) - 1 and not line_continuation:
-            params[-1] += '\n'
+            # Make sure there is a params item to store result in.
+            if not params:
+                params.append('')

-        # always clear the line continuation flag
-        line_continuation = False
+            params[-1] += '\n'

    # If we're done and things are not at zero depth or we're still inside quotes,
    # raise an error to indicate that the args were unbalanced
--- a/test/units/parsing/test_splitter.py
+++ b/test/units/parsing/test_splitter.py
@ -21,10 +21,17 @@ from __future__ import (absolute_import, division, print_function)
 __metaclass__ = type

 from ansible.parsing.splitter import split_args, parse_kv
+from ansible.errors import AnsibleParserError

 import pytest

 SPLIT_DATA = (
+    (None,
+        [],
+        {}),
+    (u'',
+        [],
+        {}),
    (u'a',
        [u'a'],
        {u'_raw_params': u'a'}),
@ -46,6 +53,18 @@ SPLIT_DATA = (
    (u'a="echo \\"hello world\\"" b=bar',
        [u'a="echo \\"hello world\\""', u'b=bar'],
        {u'a': u'echo "hello world"', u'b': u'bar'}),
+    (u'a="nest\'ed"',
+        [u'a="nest\'ed"'],
+        {u'a': u'nest\'ed'}),
+    (u' ',
+        [u' '],
+        {u'_raw_params': u' '}),
+    (u'\\ ',
+        [u' '],
+        {u'_raw_params': u' '}),
+    (u'a\\=escaped',
+        [u'a\\=escaped'],
+        {u'_raw_params': u'a=escaped'}),
    (u'a="multi\nline"',
        [u'a="multi\nline"'],
        {u'a': u'multi\nline'}),
@ -61,12 +80,27 @@ SPLIT_DATA = (
    (u'a="multiline\nmessage1\\\n" b="multiline\nmessage2\\\n"',
        [u'a="multiline\nmessage1\\\n"', u'b="multiline\nmessage2\\\n"'],
        {u'a': 'multiline\nmessage1\\\n', u'b': u'multiline\nmessage2\\\n'}),
+    (u'line \\\ncontinuation',
+        [u'line', u'continuation'],
+        {u'_raw_params': u'line continuation'}),
+    (u'not jinja}}',
+        [u'not', u'jinja}}'],
+        {u'_raw_params': u'not jinja}}'}),
+    (u'a={{multiline\njinja}}',
+        [u'a={{multiline\njinja}}'],
+        {u'a': u'{{multiline\njinja}}'}),
    (u'a={{jinja}}',
        [u'a={{jinja}}'],
        {u'a': u'{{jinja}}'}),
    (u'a={{ jinja }}',
        [u'a={{ jinja }}'],
        {u'a': u'{{ jinja }}'}),
+    (u'a={% jinja %}',
+        [u'a={% jinja %}'],
+        {u'a': u'{% jinja %}'}),
+    (u'a={# jinja #}',
+        [u'a={# jinja #}'],
+        {u'a': u'{# jinja #}'}),
    (u'a="{{jinja}}"',
        [u'a="{{jinja}}"'],
        {u'a': u'{{jinja}}'}),
@ -94,17 +128,50 @@ SPLIT_DATA = (
    (u'One\n  Two\n    Three\n',
        [u'One\n ', u'Two\n   ', u'Three\n'],
        {u'_raw_params': u'One\n  Two\n    Three\n'}),
+    (u'\nOne\n  Two\n    Three\n',
+        [u'\n', u'One\n ', u'Two\n   ', u'Three\n'],
+        {u'_raw_params': u'\nOne\n  Two\n    Three\n'}),
 )

-SPLIT_ARGS = ((test[0], test[1]) for test in SPLIT_DATA)
-PARSE_KV = ((test[0], test[2]) for test in SPLIT_DATA)
+PARSE_KV_CHECK_RAW = (
+    (u'raw=yes', {u'_raw_params': u'raw=yes'}),
+    (u'creates=something', {u'creates': u'something'}),
+)
+
+PARSER_ERROR = (
+    '"',
+    "'",
+    '{{',
+    '{%',
+    '{#',
+)

+SPLIT_ARGS = tuple((test[0], test[1]) for test in SPLIT_DATA)
+PARSE_KV = tuple((test[0], test[2]) for test in SPLIT_DATA)

-@pytest.mark.parametrize("args, expected", SPLIT_ARGS)
+
+@pytest.mark.parametrize("args, expected", SPLIT_ARGS, ids=[str(arg[0]) for arg in SPLIT_ARGS])
 def test_split_args(args, expected):
    assert split_args(args) == expected


-@pytest.mark.parametrize("args, expected", PARSE_KV)
+@pytest.mark.parametrize("args, expected", PARSE_KV, ids=[str(arg[0]) for arg in PARSE_KV])
 def test_parse_kv(args, expected):
    assert parse_kv(args) == expected
+
+
+@pytest.mark.parametrize("args, expected", PARSE_KV_CHECK_RAW, ids=[str(arg[0]) for arg in PARSE_KV_CHECK_RAW])
+def test_parse_kv_check_raw(args, expected):
+    assert parse_kv(args, check_raw=True) == expected
+
+
+@pytest.mark.parametrize("args", PARSER_ERROR)
+def test_split_args_error(args):
+    with pytest.raises(AnsibleParserError):
+        split_args(args)
+
+
+@pytest.mark.parametrize("args", PARSER_ERROR)
+def test_parse_kv_error(args):
+    with pytest.raises(AnsibleParserError):
+        parse_kv(args)