ansible-test - Improve container startup handling.

Also improve the ansible-test-container integration test:

 - Add coverage for the no-probe code path.
 - Add work-arounds for centos6 containers (to support backporting).
 - Avoid systemd debug when the container doesn't use cgroup.
pull/79615/head
Matt Clay 1 year ago
parent 69c874f478
commit 04fc98c794

@ -56,6 +56,8 @@ bugfixes:
- ansible-test - Detection for running in a Podman or Docker container has been fixed to detect more scenarios.
The new detection relies on ``/proc/self/mountinfo`` instead of ``/proc/self/cpuset``.
Detection now works with custom cgroups and private cgroup namespaces.
- ansible-test - Avoid using ``exec`` after container startup when possible.
This improves container startup performance and avoids intermittent startup issues with some old containers.
known_issues:
- ansible-test - Using Docker on systems with SELinux may require setting SELinux to permissive mode.
Podman should work with SELinux in enforcing mode.

@ -149,10 +149,29 @@ def get_test_scenarios() -> list[TestScenario]:
image = settings['image']
cgroup = settings.get('cgroup', 'v1-v2')
if container_name == 'centos6' and os_release.id == 'alpine':
# Alpine kernels do not emulate vsyscall by default, which causes the centos6 container to fail during init.
# See: https://unix.stackexchange.com/questions/478387/running-a-centos-docker-image-on-arch-linux-exits-with-code-139
# Other distributions enable settings which trap vsyscall by default.
# See: https://www.kernelconfig.io/config_legacy_vsyscall_xonly
# See: https://www.kernelconfig.io/config_legacy_vsyscall_emulate
continue
for engine in available_engines:
# TODO: figure out how to get tests passing using docker without disabling selinux
disable_selinux = os_release.id == 'fedora' and engine == 'docker' and cgroup != 'none'
expose_cgroup_v1 = cgroup == 'v1-only' and get_docker_info(engine).cgroup_version != 1
debug_systemd = cgroup != 'none'
# The sleep+pkill used to support the cgroup probe causes problems with the centos6 container.
# It results in sshd connections being refused or reset for many, but not all, container instances.
# The underlying cause of this issue is unknown.
probe_cgroups = container_name != 'centos6'
# The default RHEL 9 crypto policy prevents use of SHA-1.
# This results in SSH errors with centos6 containers: ssh_dispatch_run_fatal: Connection to 1.2.3.4 port 22: error in libcrypto
# See: https://access.redhat.com/solutions/6816771
enable_sha1 = os_release.id == 'rhel' and os_release.version_id.startswith('9.') and container_name == 'centos6'
if cgroup != 'none' and get_docker_info(engine).cgroup_version == 1 and not have_cgroup_systemd():
expose_cgroup_v1 = True # the host uses cgroup v1 but there is no systemd cgroup and the container requires cgroup support
@ -182,6 +201,9 @@ def get_test_scenarios() -> list[TestScenario]:
image=image,
disable_selinux=disable_selinux,
expose_cgroup_v1=expose_cgroup_v1,
enable_sha1=enable_sha1,
debug_systemd=debug_systemd,
probe_cgroups=probe_cgroups,
)
)
@ -195,11 +217,21 @@ def run_test(scenario: TestScenario) -> TestResult:
start = time.monotonic()
integration = ['ansible-test', 'integration', 'split']
integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v', '--dev-probe-cgroups', str(LOG_PATH),
'--dev-systemd-debug']
integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v']
target_only_options = []
if scenario.debug_systemd:
integration_options.append('--dev-systemd-debug')
if scenario.probe_cgroups:
target_only_options = ['--dev-probe-cgroups', str(LOG_PATH)]
commands = [
[*integration, *integration_options],
# The cgroup probe is only performed for the first test of the target.
# There's no need to repeat the probe again for the same target.
# The controller will be tested separately as a target.
# This ensures that both the probe and no-probe code paths are functional.
[*integration, *integration_options, *target_only_options],
# For the split test we'll use alpine3 as the controller. There are two reasons for this:
# 1) It doesn't require the cgroup v1 hack, so we can test a target that doesn't need that.
# 2) It doesn't require disabling selinux, so we can test a target that doesn't need that.
@ -260,12 +292,18 @@ def run_test(scenario: TestScenario) -> TestResult:
if scenario.disable_selinux:
run_command('setenforce', 'permissive')
if scenario.enable_sha1:
run_command('update-crypto-policies', '--set', 'DEFAULT:SHA1')
for test_command in test_commands:
retry_command(lambda: run_command(*test_command))
except SubprocessError as ex:
message = str(ex)
display.error(f'{scenario} {message}')
finally:
if scenario.enable_sha1:
run_command('update-crypto-policies', '--set', 'DEFAULT')
if scenario.disable_selinux:
run_command('setenforce', 'enforcing')
@ -519,6 +557,9 @@ class TestScenario:
image: str
disable_selinux: bool
expose_cgroup_v1: bool
enable_sha1: bool
debug_systemd: bool
probe_cgroups: bool
@property
def tags(self) -> tuple[str, ...]:
@ -536,6 +577,9 @@ class TestScenario:
if self.expose_cgroup_v1:
tags.append('cgroup: v1')
if self.enable_sha1:
tags.append('sha1: enabled')
return tuple(tags)
@property

@ -411,6 +411,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do
"""Configuration details required to run the container init."""
options: list[str]
command: str
command_privileged: bool
expected_mounts: tuple[CGroupMount, ...]
@property
@ -452,12 +453,12 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do
publish_ports=not self.controller, # connections to the controller over SSH are not required
options=init_config.options,
cleanup=CleanupMode.NO,
cmd=self.build_sleep_command() if init_config.command or init_probe else None,
cmd=self.build_init_command(init_config, init_probe),
)
if not container:
if self.args.prime_containers:
if init_config.command or init_probe:
if init_config.command_privileged or init_probe:
docker_pull(self.args, UTILITY_IMAGE)
return
@ -467,7 +468,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do
try:
options = ['--pid', 'host', '--privileged']
if init_config.command:
if init_config.command and init_config.command_privileged:
init_command = init_config.command
if not init_probe:
@ -500,6 +501,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do
"""Return init config for running under Podman."""
options = self.get_common_run_options()
command: t.Optional[str] = None
command_privileged = False
expected_mounts: tuple[CGroupMount, ...]
cgroup_version = get_docker_info(self.args).cgroup_version
@ -651,6 +653,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do
return self.InitConfig(
options=options,
command=command,
command_privileged=command_privileged,
expected_mounts=expected_mounts,
)
@ -658,6 +661,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do
"""Return init config for running under Docker."""
options = self.get_common_run_options()
command: t.Optional[str] = None
command_privileged = False
expected_mounts: tuple[CGroupMount, ...]
cgroup_version = get_docker_info(self.args).cgroup_version
@ -724,7 +728,9 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do
elif self.config.cgroup in (CGroupVersion.V1_V2, CGroupVersion.V2_ONLY) and cgroup_version == 2:
# Docker hosts providing cgroup v2 will give each container a read-only cgroup mount.
# It must be remounted read-write before systemd starts.
# This must be done in a privileged container, otherwise a "permission denied" error can occur.
command = 'mount -o remount,rw /sys/fs/cgroup/'
command_privileged = True
options.extend((
# A private cgroup namespace is used to avoid exposing the host cgroup to the container.
@ -768,12 +774,14 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do
return self.InitConfig(
options=options,
command=command,
command_privileged=command_privileged,
expected_mounts=expected_mounts,
)
def build_sleep_command(self) -> list[str]:
def build_init_command(self, init_config: InitConfig, sleep: bool) -> t.Optional[list[str]]:
"""
Build and return the command to put the container to sleep.
Build and return the command to start in the container.
Returns None if the default command for the container should be used.
The sleep duration below was selected to:
@ -783,10 +791,23 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do
NOTE: The container must have a POSIX-compliant default shell "sh" with a non-builtin "sleep" command.
"""
command = ''
if init_config.command and not init_config.command_privileged:
command += f'{init_config.command} && '
if sleep or init_config.command_privileged:
command += 'sleep 60 ; '
if not command:
return None
docker_pull(self.args, self.config.image)
inspect = docker_image_inspect(self.args, self.config.image)
return ['sh', '-c', f'sleep 60; exec {shlex.join(inspect.cmd)}']
command += f'exec {shlex.join(inspect.cmd)}'
return ['sh', '-c', command]
@property
def wake_command(self) -> list[str]:

Loading…
Cancel
Save