mirror of https://github.com/ansible/ansible.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1099 lines
39 KiB
Python
1099 lines
39 KiB
Python
#!/usr/bin/env python
|
|
"""Test suite used to verify ansible-test is able to run its containers on various container hosts."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import abc
|
|
import dataclasses
|
|
import datetime
|
|
import errno
|
|
import functools
|
|
import json
|
|
import os
|
|
import pathlib
|
|
import pwd
|
|
import re
|
|
import secrets
|
|
import shlex
|
|
import shutil
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import typing as t
|
|
|
|
UNPRIVILEGED_USER_NAME = 'ansible-test'
|
|
CGROUP_SYSTEMD = pathlib.Path('/sys/fs/cgroup/systemd')
|
|
LOG_PATH = pathlib.Path('/tmp/results')
|
|
|
|
# The value of /proc/*/loginuid when it is not set.
|
|
# It is a reserved UID, which is the maximum 32-bit unsigned integer value.
|
|
# See: https://access.redhat.com/solutions/25404
|
|
LOGINUID_NOT_SET = 4294967295
|
|
|
|
UID = os.getuid()
|
|
|
|
try:
|
|
LOGINUID = int(pathlib.Path('/proc/self/loginuid').read_text())
|
|
LOGINUID_MISMATCH = LOGINUID != LOGINUID_NOT_SET and LOGINUID != UID
|
|
except FileNotFoundError:
|
|
LOGINUID = None
|
|
LOGINUID_MISMATCH = False
|
|
|
|
|
|
def main() -> None:
|
|
"""Main program entry point."""
|
|
display.section('Startup check')
|
|
|
|
try:
|
|
bootstrap_type = pathlib.Path('/etc/ansible-test.bootstrap').read_text().strip()
|
|
except FileNotFoundError:
|
|
bootstrap_type = 'undefined'
|
|
|
|
display.info(f'Bootstrap type: {bootstrap_type}')
|
|
|
|
if bootstrap_type != 'remote':
|
|
display.warning('Skipping destructive test on system which is not an ansible-test remote provisioned instance.')
|
|
return
|
|
|
|
display.info(f'UID: {UID} / {LOGINUID}')
|
|
|
|
if UID != 0:
|
|
raise Exception('This test must be run as root.')
|
|
|
|
if not LOGINUID_MISMATCH:
|
|
if LOGINUID is None:
|
|
display.warning('Tests involving loginuid mismatch will be skipped on this host since it does not have audit support.')
|
|
elif LOGINUID == LOGINUID_NOT_SET:
|
|
display.warning('Tests involving loginuid mismatch will be skipped on this host since it is not set.')
|
|
elif LOGINUID == 0:
|
|
raise Exception('Use sudo, su, etc. as a non-root user to become root before running this test.')
|
|
else:
|
|
raise Exception()
|
|
|
|
display.section(f'Bootstrapping {os_release}')
|
|
|
|
bootstrapper = Bootstrapper.init()
|
|
bootstrapper.run()
|
|
|
|
result_dir = LOG_PATH
|
|
|
|
if result_dir.exists():
|
|
shutil.rmtree(result_dir)
|
|
|
|
result_dir.mkdir()
|
|
result_dir.chmod(0o777)
|
|
|
|
scenarios = get_test_scenarios()
|
|
results = [run_test(scenario) for scenario in scenarios]
|
|
error_total = 0
|
|
|
|
for name in sorted(result_dir.glob('*.log')):
|
|
lines = name.read_text().strip().splitlines()
|
|
error_count = len([line for line in lines if line.startswith('FAIL: ')])
|
|
error_total += error_count
|
|
|
|
display.section(f'Log ({error_count=}/{len(lines)}): {name.name}')
|
|
|
|
for line in lines:
|
|
if line.startswith('FAIL: '):
|
|
display.show(line, display.RED)
|
|
else:
|
|
display.show(line)
|
|
|
|
error_count = len([result for result in results if result.message])
|
|
error_total += error_count
|
|
|
|
duration = datetime.timedelta(seconds=int(sum(result.duration.total_seconds() for result in results)))
|
|
|
|
display.section(f'Test Results ({error_count=}/{len(results)}) [{duration}]')
|
|
|
|
for result in results:
|
|
notes = f' <cleanup: {", ".join(result.cleanup)}>' if result.cleanup else ''
|
|
|
|
if result.cgroup_dirs:
|
|
notes += f' <cgroup_dirs: {len(result.cgroup_dirs)}>'
|
|
|
|
notes += f' [{result.duration}]'
|
|
|
|
if result.message:
|
|
display.show(f'FAIL: {result.scenario} {result.message}{notes}', display.RED)
|
|
elif result.duration.total_seconds() >= 90:
|
|
display.show(f'SLOW: {result.scenario}{notes}', display.YELLOW)
|
|
else:
|
|
display.show(f'PASS: {result.scenario}{notes}')
|
|
|
|
if error_total:
|
|
sys.exit(1)
|
|
|
|
|
|
def get_test_scenarios() -> list[TestScenario]:
|
|
"""Generate and return a list of test scenarios."""
|
|
|
|
supported_engines = ('docker', 'podman')
|
|
available_engines = [engine for engine in supported_engines if shutil.which(engine)]
|
|
|
|
if not available_engines:
|
|
raise ApplicationError(f'No supported container engines found: {", ".join(supported_engines)}')
|
|
|
|
completion_lines = pathlib.Path(os.environ['PYTHONPATH'], '../test/lib/ansible_test/_data/completion/docker.txt').read_text().splitlines()
|
|
|
|
# TODO: consider including testing for the collection default image
|
|
entries = {name: value for name, value in (parse_completion_entry(line) for line in completion_lines) if name != 'default'}
|
|
|
|
unprivileged_user = User.get(UNPRIVILEGED_USER_NAME)
|
|
|
|
scenarios: list[TestScenario] = []
|
|
|
|
for container_name, settings in entries.items():
|
|
image = settings['image']
|
|
cgroup = settings.get('cgroup', 'v1-v2')
|
|
|
|
if container_name == 'centos6' and os_release.id == 'alpine':
|
|
# Alpine kernels do not emulate vsyscall by default, which causes the centos6 container to fail during init.
|
|
# See: https://unix.stackexchange.com/questions/478387/running-a-centos-docker-image-on-arch-linux-exits-with-code-139
|
|
# Other distributions enable settings which trap vsyscall by default.
|
|
# See: https://www.kernelconfig.io/config_legacy_vsyscall_xonly
|
|
# See: https://www.kernelconfig.io/config_legacy_vsyscall_emulate
|
|
continue
|
|
|
|
for engine in available_engines:
|
|
# TODO: figure out how to get tests passing using docker without disabling selinux
|
|
disable_selinux = os_release.id == 'fedora' and engine == 'docker' and cgroup != 'none'
|
|
expose_cgroup_v1 = cgroup == 'v1-only' and get_docker_info(engine).cgroup_version != 1
|
|
debug_systemd = cgroup != 'none'
|
|
|
|
# The sleep+pkill used to support the cgroup probe causes problems with the centos6 container.
|
|
# It results in sshd connections being refused or reset for many, but not all, container instances.
|
|
# The underlying cause of this issue is unknown.
|
|
probe_cgroups = container_name != 'centos6'
|
|
|
|
# The default RHEL 9 crypto policy prevents use of SHA-1.
|
|
# This results in SSH errors with centos6 containers: ssh_dispatch_run_fatal: Connection to 1.2.3.4 port 22: error in libcrypto
|
|
# See: https://access.redhat.com/solutions/6816771
|
|
enable_sha1 = os_release.id == 'rhel' and os_release.version_id.startswith('9.') and container_name == 'centos6'
|
|
|
|
if cgroup != 'none' and get_docker_info(engine).cgroup_version == 1 and not have_cgroup_systemd():
|
|
expose_cgroup_v1 = True # the host uses cgroup v1 but there is no systemd cgroup and the container requires cgroup support
|
|
|
|
user_scenarios = [
|
|
# TODO: test rootless docker
|
|
UserScenario(ssh=unprivileged_user),
|
|
]
|
|
|
|
if engine == 'podman':
|
|
user_scenarios.append(UserScenario(ssh=ROOT_USER))
|
|
|
|
# TODO: test podman remote on Alpine and Ubuntu hosts
|
|
# TODO: combine remote with ssh using different unprivileged users
|
|
if os_release.id not in ('alpine', 'ubuntu'):
|
|
user_scenarios.append(UserScenario(remote=unprivileged_user))
|
|
|
|
if LOGINUID_MISMATCH:
|
|
user_scenarios.append(UserScenario())
|
|
|
|
for user_scenario in user_scenarios:
|
|
scenarios.append(
|
|
TestScenario(
|
|
user_scenario=user_scenario,
|
|
engine=engine,
|
|
container_name=container_name,
|
|
image=image,
|
|
disable_selinux=disable_selinux,
|
|
expose_cgroup_v1=expose_cgroup_v1,
|
|
enable_sha1=enable_sha1,
|
|
debug_systemd=debug_systemd,
|
|
probe_cgroups=probe_cgroups,
|
|
)
|
|
)
|
|
|
|
return scenarios
|
|
|
|
|
|
def run_test(scenario: TestScenario) -> TestResult:
|
|
"""Run a test scenario and return the test results."""
|
|
display.section(f'Testing {scenario} Started')
|
|
|
|
start = time.monotonic()
|
|
|
|
integration = ['ansible-test', 'integration', 'split']
|
|
integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v']
|
|
target_only_options = []
|
|
|
|
if scenario.debug_systemd:
|
|
integration_options.append('--dev-systemd-debug')
|
|
|
|
if scenario.probe_cgroups:
|
|
target_only_options = ['--dev-probe-cgroups', str(LOG_PATH)]
|
|
|
|
commands = [
|
|
# The cgroup probe is only performed for the first test of the target.
|
|
# There's no need to repeat the probe again for the same target.
|
|
# The controller will be tested separately as a target.
|
|
# This ensures that both the probe and no-probe code paths are functional.
|
|
[*integration, *integration_options, *target_only_options],
|
|
# For the split test we'll use alpine3 as the controller. There are two reasons for this:
|
|
# 1) It doesn't require the cgroup v1 hack, so we can test a target that doesn't need that.
|
|
# 2) It doesn't require disabling selinux, so we can test a target that doesn't need that.
|
|
[*integration, '--controller', 'docker:alpine3', *integration_options],
|
|
]
|
|
|
|
common_env: dict[str, str] = {}
|
|
test_env: dict[str, str] = {}
|
|
|
|
if scenario.engine == 'podman':
|
|
if scenario.user_scenario.remote:
|
|
common_env.update(
|
|
# Podman 4.3.0 has a regression which requires a port for remote connections to work.
|
|
# See: https://github.com/containers/podman/issues/16509
|
|
CONTAINER_HOST=f'ssh://{scenario.user_scenario.remote.name}@localhost:22'
|
|
f'/run/user/{scenario.user_scenario.remote.pwnam.pw_uid}/podman/podman.sock',
|
|
CONTAINER_SSHKEY=str(pathlib.Path('~/.ssh/id_rsa').expanduser()), # TODO: add support for ssh + remote when the ssh user is not root
|
|
)
|
|
|
|
test_env.update(ANSIBLE_TEST_PREFER_PODMAN='1')
|
|
|
|
test_env.update(common_env)
|
|
|
|
if scenario.user_scenario.ssh:
|
|
client_become_cmd = ['ssh', f'{scenario.user_scenario.ssh.name}@localhost']
|
|
test_commands = [client_become_cmd + [f'cd ~/ansible; {format_env(test_env)}{sys.executable} bin/{shlex.join(command)}'] for command in commands]
|
|
else:
|
|
client_become_cmd = ['sh', '-c']
|
|
test_commands = [client_become_cmd + [f'{format_env(test_env)}{shlex.join(command)}'] for command in commands]
|
|
|
|
prime_storage_command = []
|
|
|
|
if scenario.engine == 'podman' and scenario.user_scenario.actual.name == UNPRIVILEGED_USER_NAME:
|
|
# When testing podman we need to make sure that the overlay filesystem is used instead of vfs.
|
|
# Using the vfs filesystem will result in running out of disk space during the tests.
|
|
# To change the filesystem used, the existing storage directory must be removed before "priming" the storage database.
|
|
#
|
|
# Without this change the following message may be displayed:
|
|
#
|
|
# User-selected graph driver "overlay" overwritten by graph driver "vfs" from database - delete libpod local files to resolve
|
|
#
|
|
# However, with this change it may be replaced with the following message:
|
|
#
|
|
# User-selected graph driver "vfs" overwritten by graph driver "overlay" from database - delete libpod local files to resolve
|
|
|
|
actual_become_cmd = ['ssh', f'{scenario.user_scenario.actual.name}@localhost']
|
|
prime_storage_command = actual_become_cmd + prepare_prime_podman_storage()
|
|
|
|
message = ''
|
|
|
|
if scenario.expose_cgroup_v1:
|
|
prepare_cgroup_systemd(scenario.user_scenario.actual.name, scenario.engine)
|
|
|
|
try:
|
|
if prime_storage_command:
|
|
retry_command(lambda: run_command(*prime_storage_command), retry_any_error=True)
|
|
|
|
if scenario.disable_selinux:
|
|
run_command('setenforce', 'permissive')
|
|
|
|
if scenario.enable_sha1:
|
|
run_command('update-crypto-policies', '--set', 'DEFAULT:SHA1')
|
|
|
|
for test_command in test_commands:
|
|
retry_command(lambda: run_command(*test_command))
|
|
except SubprocessError as ex:
|
|
message = str(ex)
|
|
display.error(f'{scenario} {message}')
|
|
finally:
|
|
if scenario.enable_sha1:
|
|
run_command('update-crypto-policies', '--set', 'DEFAULT')
|
|
|
|
if scenario.disable_selinux:
|
|
run_command('setenforce', 'enforcing')
|
|
|
|
if scenario.expose_cgroup_v1:
|
|
dirs = remove_cgroup_systemd()
|
|
else:
|
|
dirs = list_group_systemd()
|
|
|
|
cleanup_command = [scenario.engine, 'rmi', '-f', scenario.image]
|
|
|
|
try:
|
|
retry_command(lambda: run_command(*client_become_cmd + [f'{format_env(common_env)}{shlex.join(cleanup_command)}']), retry_any_error=True)
|
|
except SubprocessError as ex:
|
|
display.error(str(ex))
|
|
|
|
cleanup = cleanup_podman() if scenario.engine == 'podman' else tuple()
|
|
|
|
finish = time.monotonic()
|
|
duration = datetime.timedelta(seconds=int(finish - start))
|
|
|
|
display.section(f'Testing {scenario} Completed in {duration}')
|
|
|
|
return TestResult(
|
|
scenario=scenario,
|
|
message=message,
|
|
cleanup=cleanup,
|
|
duration=duration,
|
|
cgroup_dirs=tuple(str(path) for path in dirs),
|
|
)
|
|
|
|
|
|
def prepare_prime_podman_storage() -> list[str]:
|
|
"""Partially prime podman storage and return a command to complete the remainder."""
|
|
prime_storage_command = ['rm -rf ~/.local/share/containers; STORAGE_DRIVER=overlay podman pull quay.io/bedrock/alpine:3.16.2']
|
|
|
|
test_containers = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.local/share/containers').expanduser()
|
|
|
|
if test_containers.is_dir():
|
|
# First remove the directory as root, since the user may not have permissions on all the files.
|
|
# The directory will be removed again after login, before initializing the database.
|
|
rmtree(test_containers)
|
|
|
|
return prime_storage_command
|
|
|
|
|
|
def cleanup_podman() -> tuple[str, ...]:
|
|
"""Cleanup podman processes and files on disk."""
|
|
cleanup = []
|
|
|
|
for remaining in range(3, -1, -1):
|
|
processes = [(int(item[0]), item[1]) for item in
|
|
[item.split(maxsplit=1) for item in run_command('ps', '-A', '-o', 'pid,comm', capture=True).stdout.splitlines()]
|
|
if pathlib.Path(item[1].split()[0]).name in ('catatonit', 'podman', 'conmon')]
|
|
|
|
if not processes:
|
|
break
|
|
|
|
for pid, name in processes:
|
|
display.info(f'Killing "{name}" ({pid}) ...')
|
|
|
|
try:
|
|
os.kill(pid, signal.SIGTERM if remaining > 1 else signal.SIGKILL)
|
|
except ProcessLookupError:
|
|
pass
|
|
|
|
cleanup.append(name)
|
|
|
|
time.sleep(1)
|
|
else:
|
|
raise Exception('failed to kill all matching processes')
|
|
|
|
uid = pwd.getpwnam(UNPRIVILEGED_USER_NAME).pw_uid
|
|
|
|
container_tmp = pathlib.Path(f'/tmp/containers-user-{uid}')
|
|
podman_tmp = pathlib.Path(f'/tmp/podman-run-{uid}')
|
|
|
|
user_config = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.config').expanduser()
|
|
user_local = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.local').expanduser()
|
|
|
|
if container_tmp.is_dir():
|
|
rmtree(container_tmp)
|
|
|
|
if podman_tmp.is_dir():
|
|
rmtree(podman_tmp)
|
|
|
|
if user_config.is_dir():
|
|
rmtree(user_config)
|
|
|
|
if user_local.is_dir():
|
|
rmtree(user_local)
|
|
|
|
return tuple(sorted(set(cleanup)))
|
|
|
|
|
|
def have_cgroup_systemd() -> bool:
|
|
"""Return True if the container host has a systemd cgroup."""
|
|
return pathlib.Path(CGROUP_SYSTEMD).is_dir()
|
|
|
|
|
|
def prepare_cgroup_systemd(username: str, engine: str) -> None:
|
|
"""Prepare the systemd cgroup."""
|
|
CGROUP_SYSTEMD.mkdir()
|
|
|
|
run_command('mount', 'cgroup', '-t', 'cgroup', str(CGROUP_SYSTEMD), '-o', 'none,name=systemd,xattr', capture=True)
|
|
|
|
if engine == 'podman':
|
|
run_command('chown', '-R', f'{username}:{username}', str(CGROUP_SYSTEMD))
|
|
|
|
run_command('find', str(CGROUP_SYSTEMD), '-type', 'd', '-exec', 'ls', '-l', '{}', ';')
|
|
|
|
|
|
def list_group_systemd() -> list[pathlib.Path]:
|
|
"""List the systemd cgroup."""
|
|
dirs = set()
|
|
|
|
for dirpath, dirnames, filenames in os.walk(CGROUP_SYSTEMD, topdown=False):
|
|
for dirname in dirnames:
|
|
target_path = pathlib.Path(dirpath, dirname)
|
|
display.info(f'dir: {target_path}')
|
|
dirs.add(target_path)
|
|
|
|
return sorted(dirs)
|
|
|
|
|
|
def remove_cgroup_systemd() -> list[pathlib.Path]:
|
|
"""Remove the systemd cgroup."""
|
|
dirs = set()
|
|
|
|
for sleep_seconds in range(1, 10):
|
|
try:
|
|
for dirpath, dirnames, filenames in os.walk(CGROUP_SYSTEMD, topdown=False):
|
|
for dirname in dirnames:
|
|
target_path = pathlib.Path(dirpath, dirname)
|
|
display.info(f'rmdir: {target_path}')
|
|
dirs.add(target_path)
|
|
target_path.rmdir()
|
|
except OSError as ex:
|
|
if ex.errno != errno.EBUSY:
|
|
raise
|
|
|
|
error = str(ex)
|
|
else:
|
|
break
|
|
|
|
display.warning(f'{error} -- sleeping for {sleep_seconds} second(s) before trying again ...') # pylint: disable=used-before-assignment
|
|
|
|
time.sleep(sleep_seconds)
|
|
|
|
time.sleep(1) # allow time for cgroups to be fully removed before unmounting
|
|
|
|
run_command('umount', str(CGROUP_SYSTEMD))
|
|
|
|
CGROUP_SYSTEMD.rmdir()
|
|
|
|
time.sleep(1) # allow time for cgroup hierarchy to be removed after unmounting
|
|
|
|
cgroup = pathlib.Path('/proc/self/cgroup').read_text()
|
|
|
|
if 'systemd' in cgroup:
|
|
raise Exception('systemd hierarchy detected')
|
|
|
|
return sorted(dirs)
|
|
|
|
|
|
def rmtree(path: pathlib.Path) -> None:
|
|
"""Wrapper around shutil.rmtree with additional error handling."""
|
|
for retries in range(10, -1, -1):
|
|
try:
|
|
display.info(f'rmtree: {path} ({retries} attempts remaining) ... ')
|
|
shutil.rmtree(path)
|
|
except Exception:
|
|
if not path.exists():
|
|
display.info(f'rmtree: {path} (not found)')
|
|
return
|
|
|
|
if not path.is_dir():
|
|
display.info(f'rmtree: {path} (not a directory)')
|
|
return
|
|
|
|
if retries:
|
|
continue
|
|
|
|
raise
|
|
else:
|
|
display.info(f'rmtree: {path} (done)')
|
|
return
|
|
|
|
|
|
def format_env(env: dict[str, str]) -> str:
|
|
"""Format an env dict for injection into a shell command and return the resulting string."""
|
|
if env:
|
|
return ' '.join(f'{shlex.quote(key)}={shlex.quote(value)}' for key, value in env.items()) + ' '
|
|
|
|
return ''
|
|
|
|
|
|
class DockerInfo:
|
|
"""The results of `docker info` for the container runtime."""
|
|
|
|
def __init__(self, data: dict[str, t.Any]) -> None:
|
|
self.data = data
|
|
|
|
@property
|
|
def cgroup_version(self) -> int:
|
|
"""The cgroup version of the container host."""
|
|
data = self.data
|
|
host = data.get('host')
|
|
|
|
if host:
|
|
version = int(host['cgroupVersion'].lstrip('v')) # podman
|
|
else:
|
|
version = int(data['CgroupVersion']) # docker
|
|
|
|
return version
|
|
|
|
|
|
@functools.lru_cache
|
|
def get_docker_info(engine: str) -> DockerInfo:
|
|
"""Return info for the current container runtime. The results are cached."""
|
|
return DockerInfo(json.loads(run_command(engine, 'info', '--format', '{{ json . }}', capture=True).stdout))
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class User:
|
|
name: str
|
|
pwnam: pwd.struct_passwd
|
|
|
|
@classmethod
|
|
def get(cls, name: str) -> User:
|
|
return User(
|
|
name=name,
|
|
pwnam=pwd.getpwnam(name),
|
|
)
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class UserScenario:
|
|
ssh: User = None
|
|
remote: User = None
|
|
|
|
@property
|
|
def actual(self) -> User:
|
|
return self.remote or self.ssh or ROOT_USER
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class TestScenario:
|
|
user_scenario: UserScenario
|
|
engine: str
|
|
container_name: str
|
|
image: str
|
|
disable_selinux: bool
|
|
expose_cgroup_v1: bool
|
|
enable_sha1: bool
|
|
debug_systemd: bool
|
|
probe_cgroups: bool
|
|
|
|
@property
|
|
def tags(self) -> tuple[str, ...]:
|
|
tags = []
|
|
|
|
if self.user_scenario.ssh:
|
|
tags.append(f'ssh: {self.user_scenario.ssh.name}')
|
|
|
|
if self.user_scenario.remote:
|
|
tags.append(f'remote: {self.user_scenario.remote.name}')
|
|
|
|
if self.disable_selinux:
|
|
tags.append('selinux: permissive')
|
|
|
|
if self.expose_cgroup_v1:
|
|
tags.append('cgroup: v1')
|
|
|
|
if self.enable_sha1:
|
|
tags.append('sha1: enabled')
|
|
|
|
return tuple(tags)
|
|
|
|
@property
|
|
def tag_label(self) -> str:
|
|
return ' '.join(f'[{tag}]' for tag in self.tags)
|
|
|
|
def __str__(self):
|
|
return f'[{self.container_name}] ({self.engine}) {self.tag_label}'.strip()
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class TestResult:
|
|
scenario: TestScenario
|
|
message: str
|
|
cleanup: tuple[str, ...]
|
|
duration: datetime.timedelta
|
|
cgroup_dirs: tuple[str, ...]
|
|
|
|
|
|
def parse_completion_entry(value: str) -> tuple[str, dict[str, str]]:
|
|
"""Parse the given completion entry, returning the entry name and a dictionary of key/value settings."""
|
|
values = value.split()
|
|
|
|
name = values[0]
|
|
data = {kvp[0]: kvp[1] if len(kvp) > 1 else '' for kvp in [item.split('=', 1) for item in values[1:]]}
|
|
|
|
return name, data
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class SubprocessResult:
|
|
"""Result from execution of a subprocess."""
|
|
|
|
command: list[str]
|
|
stdout: str
|
|
stderr: str
|
|
status: int
|
|
|
|
|
|
class ApplicationError(Exception):
|
|
"""An application error."""
|
|
|
|
def __init__(self, message: str) -> None:
|
|
self.message = message
|
|
|
|
super().__init__(message)
|
|
|
|
|
|
class SubprocessError(ApplicationError):
|
|
"""An error from executing a subprocess."""
|
|
|
|
def __init__(self, result: SubprocessResult) -> None:
|
|
self.result = result
|
|
|
|
message = f'Command `{shlex.join(result.command)}` exited with status: {result.status}'
|
|
|
|
stdout = (result.stdout or '').strip()
|
|
stderr = (result.stderr or '').strip()
|
|
|
|
if stdout:
|
|
message += f'\n>>> Standard Output\n{stdout}'
|
|
|
|
if stderr:
|
|
message += f'\n>>> Standard Error\n{stderr}'
|
|
|
|
super().__init__(message)
|
|
|
|
|
|
class ProgramNotFoundError(ApplicationError):
|
|
"""A required program was not found."""
|
|
|
|
def __init__(self, name: str) -> None:
|
|
self.name = name
|
|
|
|
super().__init__(f'Missing program: {name}')
|
|
|
|
|
|
class Display:
|
|
"""Display interface for sending output to the console."""
|
|
|
|
CLEAR = '\033[0m'
|
|
RED = '\033[31m'
|
|
GREEN = '\033[32m'
|
|
YELLOW = '\033[33m'
|
|
BLUE = '\033[34m'
|
|
PURPLE = '\033[35m'
|
|
CYAN = '\033[36m'
|
|
|
|
def __init__(self) -> None:
|
|
self.sensitive: set[str] = set()
|
|
|
|
def section(self, message: str) -> None:
|
|
"""Print a section message to the console."""
|
|
self.show(f'==> {message}', color=self.BLUE)
|
|
|
|
def subsection(self, message: str) -> None:
|
|
"""Print a subsection message to the console."""
|
|
self.show(f'--> {message}', color=self.CYAN)
|
|
|
|
def fatal(self, message: str) -> None:
|
|
"""Print a fatal message to the console."""
|
|
self.show(f'FATAL: {message}', color=self.RED)
|
|
|
|
def error(self, message: str) -> None:
|
|
"""Print an error message to the console."""
|
|
self.show(f'ERROR: {message}', color=self.RED)
|
|
|
|
def warning(self, message: str) -> None:
|
|
"""Print a warning message to the console."""
|
|
self.show(f'WARNING: {message}', color=self.PURPLE)
|
|
|
|
def info(self, message: str) -> None:
|
|
"""Print an info message to the console."""
|
|
self.show(f'INFO: {message}', color=self.YELLOW)
|
|
|
|
def show(self, message: str, color: str | None = None) -> None:
|
|
"""Print a message to the console."""
|
|
for item in self.sensitive:
|
|
message = message.replace(item, '*' * len(item))
|
|
|
|
print(f'{color or self.CLEAR}{message}{self.CLEAR}', flush=True)
|
|
|
|
|
|
def run_module(
|
|
module: str,
|
|
args: dict[str, t.Any],
|
|
) -> SubprocessResult:
|
|
"""Run the specified Ansible module and return the result."""
|
|
return run_command('ansible', '-m', module, '-v', '-a', json.dumps(args), 'localhost')
|
|
|
|
|
|
def retry_command(func: t.Callable[[], SubprocessResult], attempts: int = 3, retry_any_error: bool = False) -> SubprocessResult:
|
|
"""Run the given command function up to the specified number of attempts when the failure is due to an SSH error."""
|
|
for attempts_remaining in range(attempts - 1, -1, -1):
|
|
try:
|
|
return func()
|
|
except SubprocessError as ex:
|
|
if ex.result.command[0] == 'ssh' and ex.result.status == 255 and attempts_remaining:
|
|
# SSH connections on our Ubuntu 22.04 host sometimes fail for unknown reasons.
|
|
# This retry should allow the test suite to continue, maintaining CI stability.
|
|
# TODO: Figure out why local SSH connections sometimes fail during the test run.
|
|
display.warning('Command failed due to an SSH error. Waiting a few seconds before retrying.')
|
|
time.sleep(3)
|
|
continue
|
|
|
|
if retry_any_error:
|
|
display.warning('Command failed. Waiting a few seconds before retrying.')
|
|
time.sleep(3)
|
|
continue
|
|
|
|
raise
|
|
|
|
|
|
def run_command(
|
|
*command: str,
|
|
data: str | None = None,
|
|
stdin: int | t.IO[bytes] | None = None,
|
|
env: dict[str, str] | None = None,
|
|
capture: bool = False,
|
|
) -> SubprocessResult:
|
|
"""Run the specified command and return the result."""
|
|
stdin = subprocess.PIPE if data else stdin or subprocess.DEVNULL
|
|
stdout = subprocess.PIPE if capture else None
|
|
stderr = subprocess.PIPE if capture else None
|
|
|
|
display.subsection(f'Run command: {shlex.join(command)}')
|
|
|
|
try:
|
|
with subprocess.Popen(args=command, stdin=stdin, stdout=stdout, stderr=stderr, env=env, text=True) as process:
|
|
process_stdout, process_stderr = process.communicate(data)
|
|
process_status = process.returncode
|
|
except FileNotFoundError:
|
|
raise ProgramNotFoundError(command[0]) from None
|
|
|
|
result = SubprocessResult(
|
|
command=list(command),
|
|
stdout=process_stdout,
|
|
stderr=process_stderr,
|
|
status=process_status,
|
|
)
|
|
|
|
if process.returncode != 0:
|
|
raise SubprocessError(result)
|
|
|
|
return result
|
|
|
|
|
|
class Bootstrapper(metaclass=abc.ABCMeta):
|
|
"""Bootstrapper for remote instances."""
|
|
|
|
@classmethod
|
|
def install_podman(cls) -> bool:
|
|
"""Return True if podman will be installed."""
|
|
return False
|
|
|
|
@classmethod
|
|
def install_docker(cls) -> bool:
|
|
"""Return True if docker will be installed."""
|
|
return False
|
|
|
|
@classmethod
|
|
def usable(cls) -> bool:
|
|
"""Return True if the bootstrapper can be used, otherwise False."""
|
|
return False
|
|
|
|
@classmethod
|
|
def init(cls) -> t.Type[Bootstrapper]:
|
|
"""Return a bootstrapper type appropriate for the current system."""
|
|
for bootstrapper in cls.__subclasses__():
|
|
if bootstrapper.usable():
|
|
return bootstrapper
|
|
|
|
display.warning('No supported bootstrapper found.')
|
|
return Bootstrapper
|
|
|
|
@classmethod
|
|
def run(cls) -> None:
|
|
"""Run the bootstrapper."""
|
|
cls.configure_root_user()
|
|
cls.configure_unprivileged_user()
|
|
cls.configure_source_trees()
|
|
cls.configure_ssh_keys()
|
|
cls.configure_podman_remote()
|
|
|
|
@classmethod
|
|
def configure_root_user(cls) -> None:
|
|
"""Configure the root user to run tests."""
|
|
root_password_status = run_command('passwd', '--status', 'root', capture=True)
|
|
root_password_set = root_password_status.stdout.split()[1]
|
|
|
|
if root_password_set not in ('P', 'PS'):
|
|
root_password = run_command('openssl', 'passwd', '-5', '-stdin', data=secrets.token_hex(8), capture=True).stdout.strip()
|
|
|
|
run_module(
|
|
'user',
|
|
dict(
|
|
user='root',
|
|
password=root_password,
|
|
),
|
|
)
|
|
|
|
@classmethod
|
|
def configure_unprivileged_user(cls) -> None:
|
|
"""Configure the unprivileged user to run tests."""
|
|
unprivileged_password = run_command('openssl', 'passwd', '-5', '-stdin', data=secrets.token_hex(8), capture=True).stdout.strip()
|
|
|
|
run_module(
|
|
'user',
|
|
dict(
|
|
user=UNPRIVILEGED_USER_NAME,
|
|
password=unprivileged_password,
|
|
groups=['docker'] if cls.install_docker() else [],
|
|
append=True,
|
|
),
|
|
)
|
|
|
|
if os_release.id == 'alpine':
|
|
# Most distros handle this automatically, but not Alpine.
|
|
# See: https://www.redhat.com/sysadmin/rootless-podman
|
|
start = 165535
|
|
end = start + 65535
|
|
id_range = f'{start}-{end}'
|
|
|
|
run_command(
|
|
'usermod',
|
|
'--add-subuids',
|
|
id_range,
|
|
'--add-subgids',
|
|
id_range,
|
|
UNPRIVILEGED_USER_NAME,
|
|
)
|
|
|
|
@classmethod
|
|
def configure_source_trees(cls):
|
|
"""Configure the source trees needed to run tests for both root and the unprivileged user."""
|
|
current_ansible = pathlib.Path(os.environ['PYTHONPATH']).parent
|
|
|
|
root_ansible = pathlib.Path('~').expanduser() / 'ansible'
|
|
test_ansible = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}').expanduser() / 'ansible'
|
|
|
|
if current_ansible != root_ansible:
|
|
display.info(f'copying {current_ansible} -> {root_ansible} ...')
|
|
rmtree(root_ansible)
|
|
shutil.copytree(current_ansible, root_ansible)
|
|
run_command('chown', '-R', 'root:root', str(root_ansible))
|
|
|
|
display.info(f'copying {current_ansible} -> {test_ansible} ...')
|
|
rmtree(test_ansible)
|
|
shutil.copytree(current_ansible, test_ansible)
|
|
run_command('chown', '-R', f'{UNPRIVILEGED_USER_NAME}:{UNPRIVILEGED_USER_NAME}', str(test_ansible))
|
|
|
|
paths = [pathlib.Path(test_ansible)]
|
|
|
|
for root, dir_names, file_names in os.walk(test_ansible):
|
|
paths.extend(pathlib.Path(root, dir_name) for dir_name in dir_names)
|
|
paths.extend(pathlib.Path(root, file_name) for file_name in file_names)
|
|
|
|
user = pwd.getpwnam(UNPRIVILEGED_USER_NAME)
|
|
uid = user.pw_uid
|
|
gid = user.pw_gid
|
|
|
|
for path in paths:
|
|
os.chown(path, uid, gid)
|
|
|
|
@classmethod
|
|
def configure_ssh_keys(cls) -> None:
|
|
"""Configure SSH keys needed to run tests."""
|
|
user = pwd.getpwnam(UNPRIVILEGED_USER_NAME)
|
|
uid = user.pw_uid
|
|
gid = user.pw_gid
|
|
|
|
current_rsa_pub = pathlib.Path('~/.ssh/id_rsa.pub').expanduser()
|
|
|
|
test_authorized_keys = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.ssh/authorized_keys').expanduser()
|
|
|
|
test_authorized_keys.parent.mkdir(mode=0o755, parents=True, exist_ok=True)
|
|
os.chown(test_authorized_keys.parent, uid, gid)
|
|
|
|
shutil.copyfile(current_rsa_pub, test_authorized_keys)
|
|
os.chown(test_authorized_keys, uid, gid)
|
|
test_authorized_keys.chmod(mode=0o644)
|
|
|
|
@classmethod
|
|
def configure_podman_remote(cls) -> None:
|
|
"""Configure podman remote support."""
|
|
# TODO: figure out how to support remote podman without systemd (Alpine)
|
|
# TODO: figure out how to support remote podman on Ubuntu
|
|
if os_release.id in ('alpine', 'ubuntu'):
|
|
return
|
|
|
|
# Support podman remote on any host with systemd available.
|
|
retry_command(lambda: run_command('ssh', f'{UNPRIVILEGED_USER_NAME}@localhost', 'systemctl', '--user', 'enable', '--now', 'podman.socket'))
|
|
run_command('loginctl', 'enable-linger', UNPRIVILEGED_USER_NAME)
|
|
|
|
|
|
class DnfBootstrapper(Bootstrapper):
|
|
"""Bootstrapper for dnf based systems."""
|
|
|
|
@classmethod
|
|
def install_podman(cls) -> bool:
|
|
"""Return True if podman will be installed."""
|
|
return True
|
|
|
|
@classmethod
|
|
def install_docker(cls) -> bool:
|
|
"""Return True if docker will be installed."""
|
|
return os_release.id != 'rhel'
|
|
|
|
@classmethod
|
|
def usable(cls) -> bool:
|
|
"""Return True if the bootstrapper can be used, otherwise False."""
|
|
return bool(shutil.which('dnf'))
|
|
|
|
@classmethod
|
|
def run(cls) -> None:
|
|
"""Run the bootstrapper."""
|
|
# NOTE: Install crun to make it available to podman, otherwise installing moby-engine can cause podman to use runc instead.
|
|
packages = ['podman', 'crun']
|
|
|
|
if cls.install_docker():
|
|
packages.append('moby-engine')
|
|
|
|
if os_release.id == 'fedora' and os_release.version_id == '36':
|
|
# In Fedora 36 the current version of netavark, 1.2.0, causes TCP connect to hang between rootfull containers.
|
|
# The previously tested version, 1.1.0, did not have this issue.
|
|
# Unfortunately, with the release of 1.2.0 the 1.1.0 package was removed from the repositories.
|
|
# Thankfully the 1.0.2 version is available and also works, so we'll use that here until a fixed version is available.
|
|
# See: https://github.com/containers/netavark/issues/491
|
|
packages.append('netavark-1.0.2')
|
|
|
|
if os_release.id == 'rhel':
|
|
# As of the release of RHEL 9.1, installing podman on RHEL 9.0 results in a non-fatal error at install time:
|
|
#
|
|
# libsemanage.semanage_pipe_data: Child process /usr/libexec/selinux/hll/pp failed with code: 255. (No such file or directory).
|
|
# container: libsepol.policydb_read: policydb module version 21 does not match my version range 4-20
|
|
# container: libsepol.sepol_module_package_read: invalid module in module package (at section 0)
|
|
# container: Failed to read policy package
|
|
# libsemanage.semanage_direct_commit: Failed to compile hll files into cil files.
|
|
# (No such file or directory).
|
|
# /usr/sbin/semodule: Failed!
|
|
#
|
|
# Unfortunately this is then fatal when running podman, resulting in no error message and a 127 return code.
|
|
# The solution is to update the policycoreutils package *before* installing podman.
|
|
#
|
|
# NOTE: This work-around can probably be removed once we're testing on RHEL 9.1, as the updated packages should already be installed.
|
|
# Unfortunately at this time there is no RHEL 9.1 AMI available (other than the Beta release).
|
|
|
|
run_command('dnf', 'update', '-y', 'policycoreutils')
|
|
|
|
run_command('dnf', 'install', '-y', *packages)
|
|
|
|
if cls.install_docker():
|
|
run_command('systemctl', 'start', 'docker')
|
|
|
|
if os_release.id == 'rhel' and os_release.version_id.startswith('8.'):
|
|
# RHEL 8 defaults to using runc instead of crun.
|
|
# Unfortunately runc seems to have issues with podman remote.
|
|
# Specifically, it tends to cause conmon to burn CPU until it reaches the specified exit delay.
|
|
# So we'll just change the system default to crun instead.
|
|
# Unfortunately we can't do this with the `--runtime` option since that doesn't work with podman remote.
|
|
|
|
conf = pathlib.Path('/usr/share/containers/containers.conf').read_text()
|
|
|
|
conf = re.sub('^runtime .*', 'runtime = "crun"', conf, flags=re.MULTILINE)
|
|
|
|
pathlib.Path('/etc/containers/containers.conf').write_text(conf)
|
|
|
|
super().run()
|
|
|
|
|
|
class AptBootstrapper(Bootstrapper):
|
|
"""Bootstrapper for apt based systems."""
|
|
|
|
@classmethod
|
|
def install_podman(cls) -> bool:
|
|
"""Return True if podman will be installed."""
|
|
return not (os_release.id == 'ubuntu' and os_release.version_id == '20.04')
|
|
|
|
@classmethod
|
|
def install_docker(cls) -> bool:
|
|
"""Return True if docker will be installed."""
|
|
return True
|
|
|
|
@classmethod
|
|
def usable(cls) -> bool:
|
|
"""Return True if the bootstrapper can be used, otherwise False."""
|
|
return bool(shutil.which('apt-get'))
|
|
|
|
@classmethod
|
|
def run(cls) -> None:
|
|
"""Run the bootstrapper."""
|
|
apt_env = os.environ.copy()
|
|
apt_env.update(
|
|
DEBIAN_FRONTEND='noninteractive',
|
|
)
|
|
|
|
packages = ['docker.io']
|
|
|
|
if cls.install_podman():
|
|
# NOTE: Install crun to make it available to podman, otherwise installing docker.io can cause podman to use runc instead.
|
|
# Using podman rootless requires the `newuidmap` and `slirp4netns` commands.
|
|
packages.extend(('podman', 'crun', 'uidmap', 'slirp4netns'))
|
|
|
|
run_command('apt-get', 'install', *packages, '-y', '--no-install-recommends', env=apt_env)
|
|
|
|
super().run()
|
|
|
|
|
|
class ApkBootstrapper(Bootstrapper):
|
|
"""Bootstrapper for apk based systems."""
|
|
|
|
@classmethod
|
|
def install_podman(cls) -> bool:
|
|
"""Return True if podman will be installed."""
|
|
return True
|
|
|
|
@classmethod
|
|
def install_docker(cls) -> bool:
|
|
"""Return True if docker will be installed."""
|
|
return True
|
|
|
|
@classmethod
|
|
def usable(cls) -> bool:
|
|
"""Return True if the bootstrapper can be used, otherwise False."""
|
|
return bool(shutil.which('apk'))
|
|
|
|
@classmethod
|
|
def run(cls) -> None:
|
|
"""Run the bootstrapper."""
|
|
# The `openssl` package is used to generate hashed passwords.
|
|
# crun added as podman won't install it as dep if runc is present
|
|
# but we don't want runc as it fails
|
|
# The edge `crun` package installed below requires ip6tables, and in
|
|
# edge, the `iptables` package includes `ip6tables`, but in 3.18 they
|
|
# are separate. Remove `ip6tables` once we update to 3.19.
|
|
packages = ['docker', 'podman', 'openssl', 'crun', 'ip6tables']
|
|
|
|
run_command('apk', 'add', *packages)
|
|
# 3.18 only contains crun 1.8.4, to get 1.9.2 to resolve the run/shm issue, install crun from edge
|
|
# Remove once we update to 3.19
|
|
run_command('apk', 'upgrade', '-U', '--repository=http://dl-cdn.alpinelinux.org/alpine/edge/community', 'crun')
|
|
run_command('service', 'docker', 'start')
|
|
run_command('modprobe', 'tun')
|
|
|
|
super().run()
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class OsRelease:
|
|
"""Operating system identification."""
|
|
|
|
id: str
|
|
version_id: str
|
|
|
|
@staticmethod
|
|
def init() -> OsRelease:
|
|
"""Detect the current OS release and return the result."""
|
|
lines = run_command('sh', '-c', '. /etc/os-release && echo $ID && echo $VERSION_ID', capture=True).stdout.splitlines()
|
|
|
|
result = OsRelease(
|
|
id=lines[0],
|
|
version_id=lines[1],
|
|
)
|
|
|
|
display.show(f'Detected OS "{result.id}" version "{result.version_id}".')
|
|
|
|
return result
|
|
|
|
|
|
display = Display()
|
|
os_release = OsRelease.init()
|
|
|
|
ROOT_USER = User.get('root')
|
|
|
|
if __name__ == '__main__':
|
|
main()
|