diff --git a/changelogs/fragments/find-encoding.yml b/changelogs/fragments/find-encoding.yml new file mode 100644 index 00000000000..77449455517 --- /dev/null +++ b/changelogs/fragments/find-encoding.yml @@ -0,0 +1,2 @@ +minor_changes: + - find - add a encoding parameter to specify which encoding of the files to be searched. diff --git a/lib/ansible/modules/find.py b/lib/ansible/modules/find.py index 61c71813e7c..c85d19e7b99 100644 --- a/lib/ansible/modules/find.py +++ b/lib/ansible/modules/find.py @@ -149,6 +149,11 @@ options: - Default is unlimited depth. type: int version_added: "2.6" + encoding: + description: + - When doing a C(contains) search, determine the encoding of the files to be searched. + type: str + version_added: "2.17" extends_documentation_fragment: action_common_attributes attributes: check_mode: @@ -337,11 +342,12 @@ def sizefilter(st, size): return False -def contentfilter(fsname, pattern, read_whole_file=False): +def contentfilter(fsname, pattern, encoding, read_whole_file=False): """ Filter files which contain the given expression :arg fsname: Filename to scan for lines matching a pattern :arg pattern: Pattern to look for inside of line + :arg encoding: Encoding of the file to be scanned :arg read_whole_file: If true, the whole file is read into memory before the regex is applied against it. Otherwise, the regex is applied line-by-line. :rtype: bool :returns: True if one of the lines in fsname matches the pattern. Otherwise False @@ -352,7 +358,7 @@ def contentfilter(fsname, pattern, read_whole_file=False): prog = re.compile(pattern) try: - with open(fsname) as f: + with open(fsname, encoding=encoding) as f: if read_whole_file: return bool(prog.search(f.read())) @@ -360,6 +366,13 @@ def contentfilter(fsname, pattern, read_whole_file=False): if prog.match(line): return True + except LookupError as e: + raise e + except UnicodeDecodeError as e: + if encoding is None: + encoding = 'None (default determined by the Python built-in function "open")' + msg = f'Failed to read the file {fsname} due to an encoding error. current encoding: {encoding}' + raise Exception(msg) from e except Exception: pass @@ -457,6 +470,7 @@ def main(): depth=dict(type='int'), mode=dict(type='raw'), exact_mode=dict(type='bool', default=True), + encoding=dict(type='str') ), supports_check_mode=True, ) @@ -563,7 +577,7 @@ def main(): if (pfilter(fsobj, params['patterns'], params['excludes'], params['use_regex']) and agefilter(st, now, age, params['age_stamp']) and sizefilter(st, size) and - contentfilter(fsname, params['contains'], params['read_whole_file']) and + contentfilter(fsname, params['contains'], params['encoding'], params['read_whole_file']) and mode_filter(st, params['mode'], params['exact_mode'], module)): r.update(statinfo(st)) diff --git a/test/integration/targets/find/files/hello_world.gbk b/test/integration/targets/find/files/hello_world.gbk new file mode 100644 index 00000000000..8e3d1586287 --- /dev/null +++ b/test/integration/targets/find/files/hello_world.gbk @@ -0,0 +1 @@ +ÄãºÃÊÀ½ç diff --git a/test/integration/targets/find/tasks/main.yml b/test/integration/targets/find/tasks/main.yml index 189eab525e8..791a975e9b7 100644 --- a/test/integration/targets/find/tasks/main.yml +++ b/test/integration/targets/find/tasks/main.yml @@ -124,6 +124,7 @@ with_items: - a.txt - log.txt + - hello_world.gbk - name: Ensure '$' only matches the true end of the file with read_whole_file, not a line find: @@ -195,6 +196,51 @@ that: - no_match_line_boundaries.matched == 0 +- name: read a gbk file by utf-8 + find: + paths: "{{ remote_tmp_dir_test }}" + patterns: "*.gbk" + contains: "你好世界" + encoding: "utf-8" + register: fail_to_read_wrong_encoding_file + +- debug: var=fail_to_read_wrong_encoding_file + +- assert: + that: + - fail_to_read_wrong_encoding_file.msg == 'Not all paths examined, check warnings for details' + - >- + fail_to_read_wrong_encoding_file.skipped_paths[remote_tmp_dir_test] == + ("Failed to read the file %s/hello_world.gbk due to an encoding error. current encoding: utf-8" % (remote_tmp_dir_test)) + +- name: read a gbk file by gbk + find: + paths: "{{ remote_tmp_dir_test }}" + encoding: "gbk" + patterns: "*.gbk" + contains: "你好世界" + register: success_to_read_right_encoding_file + +- debug: var=success_to_read_right_encoding_file + +- assert: + that: + - success_to_read_right_encoding_file.matched == 1 + +- name: read a gbk file by non-exists encoding + find: + paths: "{{ remote_tmp_dir_test }}" + encoding: "idontexist" + patterns: "*.gbk" + contains: "你好世界" + register: fail_to_search_file_by_non_exists_encoding + +- debug: var=fail_to_search_file_by_non_exists_encoding + +- assert: + that: + - fail_to_search_file_by_non_exists_encoding.skipped_paths[remote_tmp_dir_test] == "unknown encoding: idontexist" + - block: - set_fact: mypath: /idontexist{{lookup('pipe', 'mktemp')}} @@ -221,8 +267,8 @@ - assert: that: - - total_contents.matched == 18 - - total_contents.examined == 18 + - total_contents.matched == 19 + - total_contents.examined == 19 - name: Get files and directories with depth find: @@ -234,10 +280,10 @@ - assert: that: - - contents_with_depth.matched == 8 + - contents_with_depth.matched == 9 # dir contents are considered until the depth exceeds the requested depth # there are 8 files/directories in the requested depth and 4 that exceed it by 1 - - contents_with_depth.examined == 12 + - contents_with_depth.examined == 13 - name: Find files with depth find: @@ -248,10 +294,10 @@ - assert: that: - - files_with_depth.matched == 4 + - files_with_depth.matched == 5 # dir contents are considered until the depth exceeds the requested depth # there are 8 files/directories in the requested depth and 4 that exceed it by 1 - - files_with_depth.examined == 12 + - files_with_depth.examined == 13 - name: exclude with regex find: diff --git a/test/sanity/ignore.txt b/test/sanity/ignore.txt index 66683c80bd1..a8dbf19e82d 100644 --- a/test/sanity/ignore.txt +++ b/test/sanity/ignore.txt @@ -197,3 +197,5 @@ README.md pymarkdown:line-length test/integration/targets/ansible-vault/invalid_format/README.md pymarkdown:no-bare-urls test/support/README.md pymarkdown:no-bare-urls test/units/cli/test_data/role_skeleton/README.md pymarkdown:line-length +test/integration/targets/find/files/hello_world.gbk no-smart-quotes +test/integration/targets/find/files/hello_world.gbk no-unwanted-characters