From acc8c3ccf87b46e9c7773e70a1d6163c1bfb6adb Mon Sep 17 00:00:00 2001 From: Matt Clay Date: Thu, 15 Dec 2022 16:43:01 -0800 Subject: [PATCH] [stable-2.14] ansible-test - Improve container startup handling. Also improve the ansible-test-container integration test: - Add coverage for the no-probe code path. - Add work-arounds for centos6 containers (to support backporting). - Avoid systemd debug when the container doesn't use cgroup. (cherry picked from commit 04fc98c794d425a42f83a062c163c981d8751512) Co-authored-by: Matt Clay --- .../ansible-test-container-management.yml | 2 + .../targets/ansible-test-container/runme.py | 50 +++++++++++++++++-- .../ansible_test/_internal/host_profiles.py | 33 +++++++++--- 3 files changed, 76 insertions(+), 9 deletions(-) diff --git a/changelogs/fragments/ansible-test-container-management.yml b/changelogs/fragments/ansible-test-container-management.yml index 293d6327136..0d0c77ed8c3 100644 --- a/changelogs/fragments/ansible-test-container-management.yml +++ b/changelogs/fragments/ansible-test-container-management.yml @@ -56,6 +56,8 @@ bugfixes: - ansible-test - Detection for running in a Podman or Docker container has been fixed to detect more scenarios. The new detection relies on ``/proc/self/mountinfo`` instead of ``/proc/self/cpuset``. Detection now works with custom cgroups and private cgroup namespaces. + - ansible-test - Avoid using ``exec`` after container startup when possible. + This improves container startup performance and avoids intermittent startup issues with some old containers. known_issues: - ansible-test - Using Docker on systems with SELinux may require setting SELinux to permissive mode. Podman should work with SELinux in enforcing mode. diff --git a/test/integration/targets/ansible-test-container/runme.py b/test/integration/targets/ansible-test-container/runme.py index d91cf9333d1..687128056f7 100755 --- a/test/integration/targets/ansible-test-container/runme.py +++ b/test/integration/targets/ansible-test-container/runme.py @@ -149,10 +149,29 @@ def get_test_scenarios() -> list[TestScenario]: image = settings['image'] cgroup = settings.get('cgroup', 'v1-v2') + if container_name == 'centos6' and os_release.id == 'alpine': + # Alpine kernels do not emulate vsyscall by default, which causes the centos6 container to fail during init. + # See: https://unix.stackexchange.com/questions/478387/running-a-centos-docker-image-on-arch-linux-exits-with-code-139 + # Other distributions enable settings which trap vsyscall by default. + # See: https://www.kernelconfig.io/config_legacy_vsyscall_xonly + # See: https://www.kernelconfig.io/config_legacy_vsyscall_emulate + continue + for engine in available_engines: # TODO: figure out how to get tests passing using docker without disabling selinux disable_selinux = os_release.id == 'fedora' and engine == 'docker' and cgroup != 'none' expose_cgroup_v1 = cgroup == 'v1-only' and get_docker_info(engine).cgroup_version != 1 + debug_systemd = cgroup != 'none' + + # The sleep+pkill used to support the cgroup probe causes problems with the centos6 container. + # It results in sshd connections being refused or reset for many, but not all, container instances. + # The underlying cause of this issue is unknown. + probe_cgroups = container_name != 'centos6' + + # The default RHEL 9 crypto policy prevents use of SHA-1. + # This results in SSH errors with centos6 containers: ssh_dispatch_run_fatal: Connection to 1.2.3.4 port 22: error in libcrypto + # See: https://access.redhat.com/solutions/6816771 + enable_sha1 = os_release.id == 'rhel' and os_release.version_id.startswith('9.') and container_name == 'centos6' if cgroup != 'none' and get_docker_info(engine).cgroup_version == 1 and not have_cgroup_systemd(): expose_cgroup_v1 = True # the host uses cgroup v1 but there is no systemd cgroup and the container requires cgroup support @@ -182,6 +201,9 @@ def get_test_scenarios() -> list[TestScenario]: image=image, disable_selinux=disable_selinux, expose_cgroup_v1=expose_cgroup_v1, + enable_sha1=enable_sha1, + debug_systemd=debug_systemd, + probe_cgroups=probe_cgroups, ) ) @@ -195,11 +217,21 @@ def run_test(scenario: TestScenario) -> TestResult: start = time.monotonic() integration = ['ansible-test', 'integration', 'split'] - integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v', '--dev-probe-cgroups', str(LOG_PATH), - '--dev-systemd-debug'] + integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v'] + target_only_options = [] + + if scenario.debug_systemd: + integration_options.append('--dev-systemd-debug') + + if scenario.probe_cgroups: + target_only_options = ['--dev-probe-cgroups', str(LOG_PATH)] commands = [ - [*integration, *integration_options], + # The cgroup probe is only performed for the first test of the target. + # There's no need to repeat the probe again for the same target. + # The controller will be tested separately as a target. + # This ensures that both the probe and no-probe code paths are functional. + [*integration, *integration_options, *target_only_options], # For the split test we'll use alpine3 as the controller. There are two reasons for this: # 1) It doesn't require the cgroup v1 hack, so we can test a target that doesn't need that. # 2) It doesn't require disabling selinux, so we can test a target that doesn't need that. @@ -260,12 +292,18 @@ def run_test(scenario: TestScenario) -> TestResult: if scenario.disable_selinux: run_command('setenforce', 'permissive') + if scenario.enable_sha1: + run_command('update-crypto-policies', '--set', 'DEFAULT:SHA1') + for test_command in test_commands: retry_command(lambda: run_command(*test_command)) except SubprocessError as ex: message = str(ex) display.error(f'{scenario} {message}') finally: + if scenario.enable_sha1: + run_command('update-crypto-policies', '--set', 'DEFAULT') + if scenario.disable_selinux: run_command('setenforce', 'enforcing') @@ -519,6 +557,9 @@ class TestScenario: image: str disable_selinux: bool expose_cgroup_v1: bool + enable_sha1: bool + debug_systemd: bool + probe_cgroups: bool @property def tags(self) -> tuple[str, ...]: @@ -536,6 +577,9 @@ class TestScenario: if self.expose_cgroup_v1: tags.append('cgroup: v1') + if self.enable_sha1: + tags.append('sha1: enabled') + return tuple(tags) @property diff --git a/test/lib/ansible_test/_internal/host_profiles.py b/test/lib/ansible_test/_internal/host_profiles.py index b4742d88ebb..6575e7c1ca2 100644 --- a/test/lib/ansible_test/_internal/host_profiles.py +++ b/test/lib/ansible_test/_internal/host_profiles.py @@ -411,6 +411,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do """Configuration details required to run the container init.""" options: list[str] command: str + command_privileged: bool expected_mounts: tuple[CGroupMount, ...] @property @@ -452,12 +453,12 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do publish_ports=not self.controller, # connections to the controller over SSH are not required options=init_config.options, cleanup=CleanupMode.NO, - cmd=self.build_sleep_command() if init_config.command or init_probe else None, + cmd=self.build_init_command(init_config, init_probe), ) if not container: if self.args.prime_containers: - if init_config.command or init_probe: + if init_config.command_privileged or init_probe: docker_pull(self.args, UTILITY_IMAGE) return @@ -467,7 +468,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do try: options = ['--pid', 'host', '--privileged'] - if init_config.command: + if init_config.command and init_config.command_privileged: init_command = init_config.command if not init_probe: @@ -500,6 +501,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do """Return init config for running under Podman.""" options = self.get_common_run_options() command: t.Optional[str] = None + command_privileged = False expected_mounts: tuple[CGroupMount, ...] cgroup_version = get_docker_info(self.args).cgroup_version @@ -651,6 +653,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do return self.InitConfig( options=options, command=command, + command_privileged=command_privileged, expected_mounts=expected_mounts, ) @@ -658,6 +661,7 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do """Return init config for running under Docker.""" options = self.get_common_run_options() command: t.Optional[str] = None + command_privileged = False expected_mounts: tuple[CGroupMount, ...] cgroup_version = get_docker_info(self.args).cgroup_version @@ -724,7 +728,9 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do elif self.config.cgroup in (CGroupVersion.V1_V2, CGroupVersion.V2_ONLY) and cgroup_version == 2: # Docker hosts providing cgroup v2 will give each container a read-only cgroup mount. # It must be remounted read-write before systemd starts. + # This must be done in a privileged container, otherwise a "permission denied" error can occur. command = 'mount -o remount,rw /sys/fs/cgroup/' + command_privileged = True options.extend(( # A private cgroup namespace is used to avoid exposing the host cgroup to the container. @@ -768,12 +774,14 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do return self.InitConfig( options=options, command=command, + command_privileged=command_privileged, expected_mounts=expected_mounts, ) - def build_sleep_command(self) -> list[str]: + def build_init_command(self, init_config: InitConfig, sleep: bool) -> t.Optional[list[str]]: """ - Build and return the command to put the container to sleep. + Build and return the command to start in the container. + Returns None if the default command for the container should be used. The sleep duration below was selected to: @@ -783,10 +791,23 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do NOTE: The container must have a POSIX-compliant default shell "sh" with a non-builtin "sleep" command. """ + command = '' + + if init_config.command and not init_config.command_privileged: + command += f'{init_config.command} && ' + + if sleep or init_config.command_privileged: + command += 'sleep 60 ; ' + + if not command: + return None + docker_pull(self.args, self.config.image) inspect = docker_image_inspect(self.args, self.config.image) - return ['sh', '-c', f'sleep 60; exec {shlex.join(inspect.cmd)}'] + command += f'exec {shlex.join(inspect.cmd)}' + + return ['sh', '-c', command] @property def wake_command(self) -> list[str]: