find - add encoding option to use in conjunction with contains (#82284)

*  when doing a 'contains' search, determine the encoding of the files to be searched

* set default encoding to None for backwards compatibility

* changelog, error handling, tests added

* add sanity ignore.txt for non-utf-8 test
pull/82480/merge
ShIRannx 4 months ago committed by GitHub
parent b01f1f207c
commit aa40167f40
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,2 @@
minor_changes:
- find - add a encoding parameter to specify which encoding of the files to be searched.

@ -149,6 +149,11 @@ options:
- Default is unlimited depth.
type: int
version_added: "2.6"
encoding:
description:
- When doing a C(contains) search, determine the encoding of the files to be searched.
type: str
version_added: "2.17"
extends_documentation_fragment: action_common_attributes
attributes:
check_mode:
@ -337,11 +342,12 @@ def sizefilter(st, size):
return False
def contentfilter(fsname, pattern, read_whole_file=False):
def contentfilter(fsname, pattern, encoding, read_whole_file=False):
"""
Filter files which contain the given expression
:arg fsname: Filename to scan for lines matching a pattern
:arg pattern: Pattern to look for inside of line
:arg encoding: Encoding of the file to be scanned
:arg read_whole_file: If true, the whole file is read into memory before the regex is applied against it. Otherwise, the regex is applied line-by-line.
:rtype: bool
:returns: True if one of the lines in fsname matches the pattern. Otherwise False
@ -352,7 +358,7 @@ def contentfilter(fsname, pattern, read_whole_file=False):
prog = re.compile(pattern)
try:
with open(fsname) as f:
with open(fsname, encoding=encoding) as f:
if read_whole_file:
return bool(prog.search(f.read()))
@ -360,6 +366,13 @@ def contentfilter(fsname, pattern, read_whole_file=False):
if prog.match(line):
return True
except LookupError as e:
raise e
except UnicodeDecodeError as e:
if encoding is None:
encoding = 'None (default determined by the Python built-in function "open")'
msg = f'Failed to read the file {fsname} due to an encoding error. current encoding: {encoding}'
raise Exception(msg) from e
except Exception:
pass
@ -457,6 +470,7 @@ def main():
depth=dict(type='int'),
mode=dict(type='raw'),
exact_mode=dict(type='bool', default=True),
encoding=dict(type='str')
),
supports_check_mode=True,
)
@ -563,7 +577,7 @@ def main():
if (pfilter(fsobj, params['patterns'], params['excludes'], params['use_regex']) and
agefilter(st, now, age, params['age_stamp']) and
sizefilter(st, size) and
contentfilter(fsname, params['contains'], params['read_whole_file']) and
contentfilter(fsname, params['contains'], params['encoding'], params['read_whole_file']) and
mode_filter(st, params['mode'], params['exact_mode'], module)):
r.update(statinfo(st))

@ -124,6 +124,7 @@
with_items:
- a.txt
- log.txt
- hello_world.gbk
- name: Ensure '$' only matches the true end of the file with read_whole_file, not a line
find:
@ -195,6 +196,51 @@
that:
- no_match_line_boundaries.matched == 0
- name: read a gbk file by utf-8
find:
paths: "{{ remote_tmp_dir_test }}"
patterns: "*.gbk"
contains: "你好世界"
encoding: "utf-8"
register: fail_to_read_wrong_encoding_file
- debug: var=fail_to_read_wrong_encoding_file
- assert:
that:
- fail_to_read_wrong_encoding_file.msg == 'Not all paths examined, check warnings for details'
- >-
fail_to_read_wrong_encoding_file.skipped_paths[remote_tmp_dir_test] ==
("Failed to read the file %s/hello_world.gbk due to an encoding error. current encoding: utf-8" % (remote_tmp_dir_test))
- name: read a gbk file by gbk
find:
paths: "{{ remote_tmp_dir_test }}"
encoding: "gbk"
patterns: "*.gbk"
contains: "你好世界"
register: success_to_read_right_encoding_file
- debug: var=success_to_read_right_encoding_file
- assert:
that:
- success_to_read_right_encoding_file.matched == 1
- name: read a gbk file by non-exists encoding
find:
paths: "{{ remote_tmp_dir_test }}"
encoding: "idontexist"
patterns: "*.gbk"
contains: "你好世界"
register: fail_to_search_file_by_non_exists_encoding
- debug: var=fail_to_search_file_by_non_exists_encoding
- assert:
that:
- fail_to_search_file_by_non_exists_encoding.skipped_paths[remote_tmp_dir_test] == "unknown encoding: idontexist"
- block:
- set_fact:
mypath: /idontexist{{lookup('pipe', 'mktemp')}}
@ -221,8 +267,8 @@
- assert:
that:
- total_contents.matched == 18
- total_contents.examined == 18
- total_contents.matched == 19
- total_contents.examined == 19
- name: Get files and directories with depth
find:
@ -234,10 +280,10 @@
- assert:
that:
- contents_with_depth.matched == 8
- contents_with_depth.matched == 9
# dir contents are considered until the depth exceeds the requested depth
# there are 8 files/directories in the requested depth and 4 that exceed it by 1
- contents_with_depth.examined == 12
- contents_with_depth.examined == 13
- name: Find files with depth
find:
@ -248,10 +294,10 @@
- assert:
that:
- files_with_depth.matched == 4
- files_with_depth.matched == 5
# dir contents are considered until the depth exceeds the requested depth
# there are 8 files/directories in the requested depth and 4 that exceed it by 1
- files_with_depth.examined == 12
- files_with_depth.examined == 13
- name: exclude with regex
find:

@ -197,3 +197,5 @@ README.md pymarkdown:line-length
test/integration/targets/ansible-vault/invalid_format/README.md pymarkdown:no-bare-urls
test/support/README.md pymarkdown:no-bare-urls
test/units/cli/test_data/role_skeleton/README.md pymarkdown:line-length
test/integration/targets/find/files/hello_world.gbk no-smart-quotes
test/integration/targets/find/files/hello_world.gbk no-unwanted-characters

Loading…
Cancel
Save