@ -1,4 +1,4 @@
# (c) 2012-2014, Michael DeHaan <michael.dehaan@gmail .com>
# Copyright 2015 Abhijit Menon-Sen <ams@2ndQuadrant .com>
#
# This file is part of Ansible
#
@ -33,193 +33,357 @@ from ansible.utils.unicode import to_unicode
class InventoryParser ( object ) :
"""
Host inventory for ansible .
Takes an INI - format inventory file and builds a list of groups and subgroups
with their associated hosts and variable settings .
"""
def __init__ ( self , filename = C . DEFAULT_HOST_LIST ) :
self . filename = filename
with open ( filename ) as fh :
self . lines = fh . readlines ( )
self . groups = { }
self . hosts = { }
self . _parse ( )
# Start with an empty host list and the default 'all' and
# 'ungrouped' groups.
def _parse ( self ) :
self . _parse_base_groups ( )
self . _parse_group_children ( )
self . _add_allgroup_children ( )
self . _parse_group_variables ( )
return self . groups
self . hosts = { }
self . patterns = { }
self . groups = dict (
all = Group ( name = ' all ' ) ,
ungrouped = Group ( name = ' ungrouped ' )
)
@staticmethod
def _parse_value ( v ) :
if " # " not in v :
try :
v = ast . literal_eval ( v )
# Using explicit exceptions.
# Likely a string that literal_eval does not like. We wil then just set it.
except ValueError :
# For some reason this was thought to be malformed.
pass
except SyntaxError :
# Is this a hash with an equals at the end?
pass
return to_unicode ( v , nonstring = ' passthru ' , errors = ' strict ' )
# Read in the hosts, groups, and variables defined in the
# inventory file.
# [webservers]
# alpha
# beta:2345
# gamma sudo=True user=root
# delta asdf=jkl favcolor=red
with open ( filename ) as fh :
self . lines = fh . readlines ( )
self . _parse ( )
def _add_allgroup_children ( self ) :
# Finally, add all top-level groups (including 'ungrouped') as
# children of 'all'.
for group in self . groups . values ( ) :
if group . depth == 0 and group . name != ' all ' :
self . groups [ ' all ' ] . add_child_group ( group )
# Note: we could discard self.hosts after this point.
def _parse_base_groups ( self ) :
# FIXME: refactor
def _parse ( self ) :
'''
Populates self . groups from the contents of self . lines . Raises an error
on any parse failure .
'''
ungrouped = Group ( name = ' ungrouped ' )
all = Group ( name = ' all ' )
all . add_child_group ( ungrouped )
self . _compile_patterns ( )
# We behave as though the first line of the inventory is '[ungrouped]',
# and begin to look for host definitions. We make a single pass through
# each line of the inventory, building up self.groups and adding hosts,
# subgroups, and setting variables as we go.
self . groups = dict ( all = all , ungrouped = ungrouped )
active_group_name = ' ungrouped '
pending_declarations = { }
section = ' ungrouped '
state = ' hosts '
i = 0
for line in self . lines :
i + = 1
line = self . _before_comment ( line ) . strip ( )
if line . startswith ( " [ " ) and line . endswith ( " ] " ) :
active_group_name = line . replace ( " [ " , " " ) . replace ( " ] " , " " )
if " :vars " in line or " :children " in line :
active_group_name = active_group_name . rsplit ( " : " , 1 ) [ 0 ]
if active_group_name not in self . groups :
new_group = self . groups [ active_group_name ] = Group ( name = active_group_name )
active_group_name = None
elif active_group_name not in self . groups :
new_group = self . groups [ active_group_name ] = Group ( name = active_group_name )
elif line . startswith ( " ; " ) or line == ' ' :
pass
elif active_group_name :
try :
tokens = shlex . split ( line )
except ValueError as e :
raise AnsibleError ( " Error in %s , unable to parse L# %d : %s \n \n \t %s \n " % ( self . filename , i , str ( e ) , line ) )
if len ( tokens ) == 0 :
# Is there a better way to get rid of the ending \n?
line = line . strip ( )
# Skip empty lines and comments
if line == ' ' or line . startswith ( " ; " ) or line . startswith ( " # " ) :
continue
hostname = tokens [ 0 ]
port = None
# Three cases to check:
# 0. A hostname that contains a range pesudo-code and a port
# 1. A hostname that contains just a port
if hostname . count ( " : " ) > 1 :
# Possible an IPv6 address, or maybe a host line with multiple ranges
# IPv6 with Port XXX:XXX::XXX.port
# FQDN foo.example.com
if hostname . count ( " . " ) == 1 :
( hostname , port ) = hostname . rsplit ( " . " , 1 )
elif ( " [ " in hostname and
" ] " in hostname and
" : " in hostname and
( hostname . rindex ( " ] " ) < hostname . rindex ( " : " ) ) or
( " ] " not in hostname and " : " in hostname ) ) :
( hostname , port ) = hostname . rsplit ( " : " , 1 )
hostnames = [ ]
if detect_range ( hostname ) :
hostnames = expand_hostname_range ( hostname )
else :
hostnames = [ hostname ]
for hn in hostnames :
host = None
if hn in self . hosts :
host = self . hosts [ hn ]
# Is this a [section] header? That tells us what group we're parsing
# definitions for, and what kind of definitions to expect.
m = self . patterns [ ' section ' ] . match ( line )
if m :
( section , state ) = m . groups ( )
state = state or ' hosts '
if state not in [ ' hosts ' , ' children ' , ' vars ' ] :
title = " : " . join ( m . groups ( ) )
raise AnsibleError ( " %s : %d : Section [ %s ] has unknown type: %s " % ( self . filename , i , title , state ) )
# If we haven't seen this section before, we add a new Group.
#
# Either [groupname] or [groupname:children] is sufficient to
# declare a group, but [groupname:vars] is allowed only if the
# group is declared elsewhere (not necessarily earlier). We add
# the group anyway, but make a note in pending_declarations to
# check at the end.
if section not in self . groups :
self . groups [ section ] = Group ( name = section )
if state == ' vars ' :
pending_declarations [ section ] = dict ( line = i , state = state , name = section )
# When we see a declaration that we've been waiting for, we can
# delete the note.
if section in pending_declarations and state != ' vars ' :
del pending_declarations [ section ]
continue
# It's not a section, so the current state tells us what kind of
# definition it must be. The individual parsers will raise an
# error if we feed them something they can't digest.
# [groupname] contains host definitions that must be added to
# the current group.
if state == ' hosts ' :
hosts = self . _parse_host_definition ( line , i )
for h in hosts :
self . groups [ section ] . add_host ( h )
# [groupname:vars] contains variable definitions that must be
# applied to the current group.
elif state == ' vars ' :
( k , v ) = self . _parse_variable_definition ( line , i )
self . groups [ section ] . set_variable ( k , v )
# [groupname:children] contains subgroup names that must be
# added as children of the current group. The subgroup names
# must themselves be declared as groups, but as before, they
# may only be declared later.
elif state == ' children ' :
child = self . _parse_group_name ( line , i )
if child not in self . groups :
self . groups [ child ] = Group ( name = child )
pending_declarations [ child ] = dict ( line = i , state = state , name = child , parent = section )
self . groups [ section ] . add_child_group ( self . groups [ child ] )
# Note: there's no reason why we couldn't accept variable
# definitions here, and set them on the named child group.
# This is a fencepost. It can happen only if the state checker
# accepts a state that isn't handled above.
else :
host = Host ( name = hn , port = port )
self . hosts [ hn ] = host
if len ( tokens ) > 1 :
for t in tokens [ 1 : ] :
if t . startswith ( ' # ' ) :
break
raise AnsibleError ( " %s : %d : Entered unhandled state: %s " % ( self . filename , i , state ) )
# Any entries in pending_declarations not removed by a group declaration
# above mean that there was an unresolved forward reference. We report
# only the first such error here.
for g in pending_declarations :
decl = pending_declarations [ g ]
if decl [ ' state ' ] == ' vars ' :
raise AnsibleError ( " %s : %d : Section [ %s :vars] not valid for undefined group: %s " % ( self . filename , decl [ ' line ' ] , decl [ ' name ' ] , decl [ ' name ' ] ) )
elif decl [ ' state ' ] == ' children ' :
raise AnsibleError ( " %s : %d : Section [ %s :children] includes undefined group: %s " % ( self . filename , decl [ ' line ' ] , decl [ ' parent ' ] , decl [ ' name ' ] ) )
def _parse_group_name ( self , line , i ) :
'''
Takes a single line and tries to parse it as a group name . Returns the
group name if successful , or raises an error .
'''
m = self . patterns [ ' groupname ' ] . match ( line )
if m :
return m . group ( 1 )
raise AnsibleError ( " %s : %d : Expected group name, got: %s " % ( self . filename , i , line ) )
def _parse_variable_definition ( self , line , i ) :
'''
Takes a string and tries to parse it as a variable definition . Returns
the key and value if successful , or raises an error .
'''
# TODO: We parse variable assignments as a key (anything to the left of
# an '='"), an '=', and a value (anything left) and leave the value to
# _parse_value to sort out. We should be more systematic here about
# defining what is acceptable, how quotes work, and so on.
if ' = ' in line :
( k , v ) = [ e . strip ( ) for e in line . split ( " = " , 1 ) ]
return ( k , self . _parse_value ( v ) )
raise AnsibleError ( " %s : %d : Expected key=value, got: %s " % ( self . filename , i , line ) )
def _parse_host_definition ( self , line , i ) :
'''
Takes a single line and tries to parse it as a host definition . Returns
a list of Hosts if successful , or raises an error .
'''
# A host definition comprises (1) a non-whitespace hostname or range,
# optionally followed by (2) a series of key="some value" assignments.
# We ignore any trailing whitespace and/or comments. For example, here
# are a series of host definitions in a group:
#
# [groupname]
# alpha
# beta:2345 user=admin # we'll tell shlex
# gamma sudo=True user=root # to ignore comments
try :
( k , v ) = t . split ( " = " , 1 )
except ValueError , e :
raise AnsibleError ( " Invalid ini entry in %s : %s - %s " % ( self . filename , t , str ( e ) ) )
v = self . _parse_value ( v )
tokens = shlex . split ( line , comments = True )
except ValueError as e :
raise AnsibleError ( " %s : %d : Error parsing host definition ' %s ' : %s " % ( self . filename , i , varstring , e ) )
( hostnames , port ) = self . _expand_hostpattern ( tokens [ 0 ] )
hosts = self . _Hosts ( hostnames , port )
# Try to process anything remaining as a series of key=value pairs.
variables = { }
for t in tokens [ 1 : ] :
if ' = ' not in t :
raise AnsibleError ( " %s : %d : Expected key=value host variable assignment, got: %s " % ( self . filename , i , t ) )
( k , v ) = t . split ( ' = ' , 1 )
variables [ k ] = self . _parse_value ( v )
# Apply any variable settings found to every host.
for h in hosts :
for k in variables :
h . set_variable ( k , variables [ k ] )
if k == ' ansible_ssh_host ' :
host . ipv4_address = v
host . set_variable ( k , v )
self . groups [ active_group_name ] . add_host ( host )
h . ipv4_address = variables [ k ]
# [southeast:children]
# atlanta
# raleigh
return hosts
def _parse_group_children ( self ) :
group = None
def _expand_hostpattern ( self , pattern ) :
'''
Takes a single host pattern and returns a list of hostnames and an
optional port number that applies to all of them .
'''
for line in self . lines :
line = line . strip ( )
if line is None or line == ' ' :
continue
if line . startswith ( " [ " ) and " :children] " in line :
line = line . replace ( " [ " , " " ) . replace ( " :children] " , " " )
group = self . groups . get ( line , None )
if group is None :
group = self . groups [ line ] = Group ( name = line )
elif line . startswith ( " # " ) or line . startswith ( " ; " ) :
# First, we extract the port number. This is usually ":NN" at the end of
# the expression, but for IPv6 addresses it's ".NN" instead. In either
# case, we remove it.
port = None
if ' : ' in pattern :
pos = pattern . rindex ( ' : ' )
try :
port = int ( pattern [ pos + 1 : ] )
pattern = pattern [ 0 : pos ]
except ValueError :
pass
elif line . startswith ( " [ " ) :
group = None
elif group :
kid_group = self . groups . get ( line , None )
if kid_group is None :
raise AnsibleError ( " child group is not defined: ( %s ) " % line )
else :
group . add_child_group ( kid_group )
m = self . patterns [ ' ipv6_hostport ' ] . match ( pattern )
if m :
( pattern , port ) = m . groups ( )
# We're done, because we know this is a single IPv6 address.
# But should we support ranges for IPv6 address generation?
# See the FIXME note below. We should probably just accept
# "[xxx]:nn" syntax instead, and then let xxx be expanded.
# [webservers:vars]
# http_port=1234
# maxRequestsPerChild=200
return ( [ pattern ] , int ( port ) )
def _parse_group_variables ( self ) :
group = None
for line in self . lines :
line = line . strip ( )
if line . startswith ( " [ " ) and " :vars] " in line :
line = line . replace ( " [ " , " " ) . replace ( " :vars] " , " " )
group = self . groups . get ( line , None )
if group is None :
raise AnsibleError ( " can ' t add vars to undefined group: %s " % line )
elif line . startswith ( " # " ) or line . startswith ( " ; " ) :
# Now we're left with just the pattern, which results in a list of one
# or more hostnames, depending on whether it contains any [x:y] ranges.
if detect_range ( pattern ) :
hostnames = expand_hostname_range ( pattern )
else :
hostnames = [ pattern ]
return ( hostnames , port )
def _Hosts ( self , hostnames , port ) :
'''
Takes a list of hostnames and a port ( which may be None ) and returns a
list of Hosts ( without recreating anything in self . hosts ) .
'''
hosts = [ ]
# Note that we decide whether or not to create a Host based solely on
# the (non-)existence of its hostname in self.hosts. This means that one
# cannot add both "foo:22" and "foo:23" to the inventory. This behaviour
# is preserved for now, but this may be an easy FIXME.
for hn in hostnames :
if hn not in self . hosts :
self . hosts [ hn ] = Host ( name = hn , port = port )
hosts . append ( self . hosts [ hn ] )
return hosts
@staticmethod
def _parse_value ( v ) :
'''
Does something with something and returns something . Not for mere
mortals such as myself to interpret .
'''
if " # " not in v :
try :
v = ast . literal_eval ( v )
# Using explicit exceptions.
# Likely a string that literal_eval does not like. We wil then just set it.
except ValueError :
# For some reason this was thought to be malformed.
pass
elif line . startswith ( " [ " ) :
group = None
elif line == ' ' :
except SyntaxError :
# Is this a hash with an equals at the end?
pass
elif group :
if " = " not in line :
raise AnsibleError ( " variables assigned to group must be in key=value form " )
else :
( k , v ) = [ e . strip ( ) for e in line . split ( " = " , 1 ) ]
group . set_variable ( k , self . _parse_value ( v ) )
return to_unicode ( v , nonstring = ' passthru ' , errors = ' strict ' )
def get_host_variables ( self , host ) :
return { }
def _before_comment ( self , msg ) :
''' what ' s the part of a string before a comment? '''
msg = msg . replace ( " \ # " , " **NOT_A_COMMENT** " )
msg = msg . split ( " # " ) [ 0 ]
msg = msg . replace ( " **NOT_A_COMMENT** " , " # " )
return msg
def _compile_patterns ( self ) :
'''
Compiles the regular expressions required to parse the inventory and
stores them in self . patterns .
'''
# Section names are square-bracketed expressions at the beginning of a
# line, comprising (1) a group name optionally followed by (2) a tag
# that specifies the contents of the section. We ignore any trailing
# whitespace and/or comments. For example:
#
# [groupname]
# [somegroup:vars]
# [naughty:children] # only get coal in their stockings
self . patterns [ ' section ' ] = re . compile (
r ''' ^ \ [
( [ ^ : \] \s ] + ) # group name (see groupname below)
( ? : : ( \w + ) ) ? # optional : and tag name
\]
\s * # ignore trailing whitespace
( ? : \#.*)? # and/or a comment till the
$ # end of the line
''' , re.X
)
# FIXME: What are the real restrictions on group names, or rather, what
# should they be? At the moment, they must be non-empty sequences of non
# whitespace characters excluding ':' and ']', but we should define more
# precise rules in order to support better diagnostics. The same applies
# to hostnames.
self . patterns [ ' groupname ' ] = re . compile (
r ''' ^
( [ ^ : \] \s ] + )
\s * # ignore trailing whitespace
( ? : \#.*)? # and/or a comment till the
$ # end of the line
''' , re.X
)
# This matches an IPv6 address, a '.', and a port number. It's not yet
# very strict about matching the IPv6 address.
#
# FIXME: There are various shortcomings in the IPv6 handling in the
# old code, which aren't fixed here yet. For example, Inventory's
# parse_inventory() method seems to accept "[ipv6]:nn" syntax. We
# should pick one and stick with it.
self . patterns [ ' ipv6_hostport ' ] = re . compile (
r ''' ^
( [ a - fA - F0 - 9 : ] + )
\. ( [ 0 - 9 ] + )
$
''' , re.X
)