From bc4b40d8e788ca362b597c5e92670ca7ad9005c4 Mon Sep 17 00:00:00 2001 From: Brian Coca Date: Tue, 13 Oct 2015 10:04:50 -0400 Subject: [PATCH 1/4] added regex support to find, also added 'singular' aliasess to patterns and paths --- files/find.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/files/find.py b/files/find.py index 659ec16026e..d7042f0027f 100644 --- a/files/find.py +++ b/files/find.py @@ -50,17 +50,18 @@ options: required: false default: '*' description: - - One or more (shell type) file glob patterns, which restrict the list of files to be returned to + - One or more (shell type) patterns, which restrict the list of files to be returned to those whose basenames match at least one of the patterns specified. Multiple patterns can be - specified using a list. + specified using a list. The patterns can be simple shell globs or a python regex prefixed by a '~'. + aliases: ['pattern'] contains: required: false default: null description: - - One or more re patterns which should be matched against the file content + - One or more re patterns which should be matched against the file content paths: required: true - aliases: [ "name" ] + aliases: [ "name", "path" ] description: - List of paths to the file or directory to search. All paths must be fully qualified. file_type: @@ -121,8 +122,9 @@ EXAMPLES = ''' # Recursively find /var/tmp files with last access time greater than 3600 seconds - find: paths="/var/tmp" age="3600" age_stamp=atime recurse=yes -# find /var/log files equal or greater than 10 megabytes ending with .log or .log.gz -- find: paths="/var/tmp" patterns="*.log","*.log.gz" size="10m" +# find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz via regex +- find: paths="/var/tmp" patterns="~.*\.(?:old|log\.gz)$" size="10m" + ''' RETURN = ''' @@ -157,9 +159,11 @@ def pfilter(f, patterns=None): if patterns is None: return True for p in patterns: - if fnmatch.fnmatch(f, p): - return True - return False + if p.startswith('~'): + r = re.compile(p[1:]) + return r.match(f) + else: + return fnmatch.fnmatch(f, p) def agefilter(st, now, age, timestamp): @@ -236,8 +240,8 @@ def statinfo(st): def main(): module = AnsibleModule( argument_spec = dict( - paths = dict(required=True, aliases=['name'], type='list'), - patterns = dict(default=['*'], type='list'), + paths = dict(required=True, aliases=['name','path'], type='list'), + patterns = dict(default=['*'], type='list', aliases['pattern']), contains = dict(default=None, type='str'), file_type = dict(default="file", choices=['file', 'directory'], type='str'), age = dict(default=None, type='str'), From fdd88863d485b5003838459fb9574c5da6ee1598 Mon Sep 17 00:00:00 2001 From: Brian Coca Date: Tue, 13 Oct 2015 18:43:52 -0400 Subject: [PATCH 2/4] fixed bug in spec --- files/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/find.py b/files/find.py index d7042f0027f..10d5de834f3 100644 --- a/files/find.py +++ b/files/find.py @@ -241,7 +241,7 @@ def main(): module = AnsibleModule( argument_spec = dict( paths = dict(required=True, aliases=['name','path'], type='list'), - patterns = dict(default=['*'], type='list', aliases['pattern']), + patterns = dict(default=['*'], type='list', aliases=['pattern']), contains = dict(default=None, type='str'), file_type = dict(default="file", choices=['file', 'directory'], type='str'), age = dict(default=None, type='str'), From e603b1bb693987c732ea116526d164993d2681db Mon Sep 17 00:00:00 2001 From: Brian Coca Date: Mon, 19 Oct 2015 17:42:36 -0400 Subject: [PATCH 3/4] changed so regexes and shell globs work transparently --- files/find.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/files/find.py b/files/find.py index 10d5de834f3..9ec6f6eb8b3 100644 --- a/files/find.py +++ b/files/find.py @@ -50,9 +50,9 @@ options: required: false default: '*' description: - - One or more (shell type) patterns, which restrict the list of files to be returned to + - One or more (shell or regex) patterns, which restrict the list of files to be returned to those whose basenames match at least one of the patterns specified. Multiple patterns can be - specified using a list. The patterns can be simple shell globs or a python regex prefixed by a '~'. + specified using a list. aliases: ['pattern'] contains: required: false @@ -123,7 +123,7 @@ EXAMPLES = ''' - find: paths="/var/tmp" age="3600" age_stamp=atime recurse=yes # find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz via regex -- find: paths="/var/tmp" patterns="~.*\.(?:old|log\.gz)$" size="10m" +- find: paths="/var/tmp" patterns="^.*?\.(?:old|log\.gz)$" size="10m" ''' @@ -156,14 +156,25 @@ examined: def pfilter(f, patterns=None): '''filter using glob patterns''' + if patterns is None: return True + + match = False for p in patterns: - if p.startswith('~'): - r = re.compile(p[1:]) - return r.match(f) - else: - return fnmatch.fnmatch(f, p) + try: + r = re.compile(p) + match = r.match(f) + except: + pass + + if not match: + match = fnmatch.fnmatch(f, p) + + if match: + break + + return match def agefilter(st, now, age, timestamp): From 074aad23e7f39274c165247afeb15d996604e5cc Mon Sep 17 00:00:00 2001 From: Brian Coca Date: Mon, 19 Oct 2015 20:43:50 -0400 Subject: [PATCH 4/4] final form, use_regex now controls if patterns is glob or regex - fixed cases in which stat fails (dangling symlink) - now properly reports name of skipped paths --- files/find.py | 57 +++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/files/find.py b/files/find.py index 9ec6f6eb8b3..04ecddfe607 100644 --- a/files/find.py +++ b/files/find.py @@ -25,8 +25,6 @@ import stat import fnmatch import time import re -import shutil - DOCUMENTATION = ''' --- @@ -50,9 +48,9 @@ options: required: false default: '*' description: - - One or more (shell or regex) patterns, which restrict the list of files to be returned to - those whose basenames match at least one of the patterns specified. Multiple patterns can be - specified using a list. + - One or more (shell or regex) patterns, which type is controled by C(use_regex) option. + - The patterns restrict the list of files to be returned to those whose basenames match at + least one of the patterns specified. Multiple patterns can be specified using a list. aliases: ['pattern'] contains: required: false @@ -109,6 +107,12 @@ options: choices: [ True, False ] description: - Set this to true to retrieve a file's sha1 checksum + use_regex: + required: false + default: "False" + choices: [ True, False ] + description: + - If false the patterns are file globs (shell) if true they are python regexes ''' @@ -122,9 +126,11 @@ EXAMPLES = ''' # Recursively find /var/tmp files with last access time greater than 3600 seconds - find: paths="/var/tmp" age="3600" age_stamp=atime recurse=yes -# find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz via regex -- find: paths="/var/tmp" patterns="^.*?\.(?:old|log\.gz)$" size="10m" +# find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz +- find: paths="/var/tmp" patterns="'*.old','*.log.gz'" size="10m" +# find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz via regex +- find: paths="/var/tmp" patterns="^.*?\.(?:old|log\.gz)$" size="10m" use_regex=True ''' RETURN = ''' @@ -154,27 +160,24 @@ examined: sample: 34 ''' -def pfilter(f, patterns=None): +def pfilter(f, patterns=None, use_regex=False): '''filter using glob patterns''' if patterns is None: return True - match = False - for p in patterns: - try: + if use_regex: + for p in patterns: r = re.compile(p) - match = r.match(f) - except: - pass - - if not match: - match = fnmatch.fnmatch(f, p) + if r.match(f): + return True + else: - if match: - break + for p in patterns: + if fnmatch.fnmatch(f, p): + return True - return match + return False def agefilter(st, now, age, timestamp): @@ -262,6 +265,7 @@ def main(): hidden = dict(default="False", type='bool'), follow = dict(default="False", type='bool'), get_checksum = dict(default="False", type='bool'), + use_regex = dict(default="False", type='bool'), ), ) @@ -307,16 +311,21 @@ def main(): if os.path.basename(fsname).startswith('.') and not params['hidden']: continue - st = os.stat(fsname) + try: + st = os.stat(fsname) + except: + msg+="%s was skipped as it does not seem to be a valid file or it cannot be accessed\n" % fsname + continue + r = {'path': fsname} if stat.S_ISDIR(st.st_mode) and params['file_type'] == 'directory': - if pfilter(fsobj, params['patterns']) and agefilter(st, now, age, params['age_stamp']): + if pfilter(fsobj, params['patterns'], params['use_regex']) and agefilter(st, now, age, params['age_stamp']): r.update(statinfo(st)) filelist.append(r) elif stat.S_ISREG(st.st_mode) and params['file_type'] == 'file': - if pfilter(fsobj, params['patterns']) and \ + if pfilter(fsobj, params['patterns'], params['use_regex']) and \ agefilter(st, now, age, params['age_stamp']) and \ sizefilter(st, size) and \ contentfilter(fsname, params['contains']): @@ -329,7 +338,7 @@ def main(): if not params['recurse']: break else: - msg+="%s was skipped as it does not seem to be a valid directory or it cannot be accessed\n" + msg+="%s was skipped as it does not seem to be a valid directory or it cannot be accessed\n" % npath matched = len(filelist) module.exit_json(files=filelist, changed=False, msg=msg, matched=matched, examined=looked)