mirror of https://github.com/ansible/ansible.git
Prevent stdio deadlock in forked children (#79522)
* background threads writing to stdout/stderr can cause children to deadlock if a thread in the parent holds the internal lock on the BufferedWriter wrapper * prevent writes to std handles during fork by monkeypatching stdout/stderr during display startup to require a mutex lock with fork(); this ensures no background threads can hold the lock during a fork operation * add integration test that fails reliably on Linux without this fixpull/79541/head
parent
80d2f8da02
commit
1424484be0
@ -0,0 +1,2 @@
|
|||||||
|
bugfixes:
|
||||||
|
- display - reduce risk of post-fork output deadlocks (https://github.com/ansible/ansible/pull/79522)
|
@ -0,0 +1,3 @@
|
|||||||
|
shippable/posix/group3
|
||||||
|
context/controller
|
||||||
|
skip/macos
|
@ -0,0 +1,58 @@
|
|||||||
|
import atexit
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from ansible.plugins.callback import CallbackBase
|
||||||
|
from ansible.utils.display import Display
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
# This callback plugin reliably triggers the deadlock from https://github.com/ansible/ansible-runner/issues/1164 when
|
||||||
|
# run on a TTY/PTY. It starts a thread in the controller that spews unprintable characters to stdout as fast as
|
||||||
|
# possible, while causing forked children to write directly to the inherited stdout immediately post-fork. If a fork
|
||||||
|
# occurs while the spew thread holds stdout's internal BufferedIOWriter lock, the lock will be orphaned in the child,
|
||||||
|
# and attempts to write to stdout there will hang forever.
|
||||||
|
|
||||||
|
# Any mechanism that ensures non-main threads do not hold locks before forking should allow this test to pass.
|
||||||
|
|
||||||
|
# ref: https://docs.python.org/3/library/io.html#multi-threading
|
||||||
|
# ref: https://github.com/python/cpython/blob/0547a981ae413248b21a6bb0cb62dda7d236fe45/Modules/_io/bufferedio.c#L268
|
||||||
|
|
||||||
|
|
||||||
|
class CallbackModule(CallbackBase):
|
||||||
|
CALLBACK_VERSION = 2.0
|
||||||
|
CALLBACK_NAME = 'spewstdio'
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.display = Display()
|
||||||
|
|
||||||
|
if os.environ.get('SPEWSTDIO_ENABLED', '0') != '1':
|
||||||
|
self.display.warning('spewstdio test plugin loaded but disabled; set SPEWSTDIO_ENABLED=1 to enable')
|
||||||
|
return
|
||||||
|
|
||||||
|
self.display = Display()
|
||||||
|
self._keep_spewing = True
|
||||||
|
|
||||||
|
# cause the child to write directly to stdout immediately post-fork
|
||||||
|
os.register_at_fork(after_in_child=lambda: print(f"hi from forked child pid {os.getpid()}"))
|
||||||
|
|
||||||
|
# in passing cases, stop spewing when the controller is exiting to prevent fatal errors on final flush
|
||||||
|
atexit.register(self.stop_spew)
|
||||||
|
|
||||||
|
self._spew_thread = Thread(target=self.spew, daemon=True)
|
||||||
|
self._spew_thread.start()
|
||||||
|
|
||||||
|
def stop_spew(self):
|
||||||
|
self._keep_spewing = False
|
||||||
|
|
||||||
|
def spew(self):
|
||||||
|
# dump a message so we know the callback thread has started
|
||||||
|
self.display.warning("spewstdio STARTING NONPRINTING SPEW ON BACKGROUND THREAD")
|
||||||
|
|
||||||
|
while self._keep_spewing:
|
||||||
|
# dump a non-printing control character directly to stdout to avoid junking up the screen while still
|
||||||
|
# doing lots of writes and flushes.
|
||||||
|
sys.stdout.write('\x1b[K')
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
self.display.warning("spewstdio STOPPING SPEW")
|
@ -0,0 +1,5 @@
|
|||||||
|
[all]
|
||||||
|
local-[1:10]
|
||||||
|
|
||||||
|
[all:vars]
|
||||||
|
ansible_connection=local
|
@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
"""Run a command using a PTY."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if sys.version_info < (3, 10):
|
||||||
|
import vendored_pty as pty
|
||||||
|
else:
|
||||||
|
import pty
|
||||||
|
|
||||||
|
sys.exit(1 if pty.spawn(sys.argv[1:]) else 0)
|
@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
echo "testing for stdio deadlock on forked workers (10s timeout)..."
|
||||||
|
|
||||||
|
# Enable a callback that trips deadlocks on forked-child stdout, time out after 10s; forces running
|
||||||
|
# in a pty, since that tends to be much slower than raw file I/O and thus more likely to trigger the deadlock.
|
||||||
|
# Redirect stdout to /dev/null since it's full of non-printable garbage we don't want to display unless it failed
|
||||||
|
ANSIBLE_CALLBACKS_ENABLED=spewstdio SPEWSTDIO_ENABLED=1 python run-with-pty.py timeout 10s ansible-playbook -i hosts -f 5 test.yml > stdout.txt && RC=$? || RC=$?
|
||||||
|
|
||||||
|
if [ $RC != 0 ]; then
|
||||||
|
echo "failed; likely stdout deadlock. dumping raw output (may be very large)"
|
||||||
|
cat stdout.txt
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
grep -q -e "spewstdio STARTING NONPRINTING SPEW ON BACKGROUND THREAD" stdout.txt || (echo "spewstdio callback was not enabled"; exit 1)
|
||||||
|
|
||||||
|
echo "PASS"
|
@ -0,0 +1,5 @@
|
|||||||
|
- hosts: all
|
||||||
|
gather_facts: no
|
||||||
|
tasks:
|
||||||
|
- debug:
|
||||||
|
msg: yo
|
@ -0,0 +1,189 @@
|
|||||||
|
# Vendored copy of https://github.com/python/cpython/blob/3680ebed7f3e529d01996dd0318601f9f0d02b4b/Lib/pty.py
|
||||||
|
# PSF License (see licenses/PSF-license.txt or https://opensource.org/licenses/Python-2.0)
|
||||||
|
"""Pseudo terminal utilities."""
|
||||||
|
|
||||||
|
# Bugs: No signal handling. Doesn't set slave termios and window size.
|
||||||
|
# Only tested on Linux, FreeBSD, and macOS.
|
||||||
|
# See: W. Richard Stevens. 1992. Advanced Programming in the
|
||||||
|
# UNIX Environment. Chapter 19.
|
||||||
|
# Author: Steen Lumholt -- with additions by Guido.
|
||||||
|
|
||||||
|
from select import select
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tty
|
||||||
|
|
||||||
|
# names imported directly for test mocking purposes
|
||||||
|
from os import close, waitpid
|
||||||
|
from tty import setraw, tcgetattr, tcsetattr
|
||||||
|
|
||||||
|
__all__ = ["openpty", "fork", "spawn"]
|
||||||
|
|
||||||
|
STDIN_FILENO = 0
|
||||||
|
STDOUT_FILENO = 1
|
||||||
|
STDERR_FILENO = 2
|
||||||
|
|
||||||
|
CHILD = 0
|
||||||
|
|
||||||
|
def openpty():
|
||||||
|
"""openpty() -> (master_fd, slave_fd)
|
||||||
|
Open a pty master/slave pair, using os.openpty() if possible."""
|
||||||
|
|
||||||
|
try:
|
||||||
|
return os.openpty()
|
||||||
|
except (AttributeError, OSError):
|
||||||
|
pass
|
||||||
|
master_fd, slave_name = _open_terminal()
|
||||||
|
slave_fd = slave_open(slave_name)
|
||||||
|
return master_fd, slave_fd
|
||||||
|
|
||||||
|
def master_open():
|
||||||
|
"""master_open() -> (master_fd, slave_name)
|
||||||
|
Open a pty master and return the fd, and the filename of the slave end.
|
||||||
|
Deprecated, use openpty() instead."""
|
||||||
|
|
||||||
|
try:
|
||||||
|
master_fd, slave_fd = os.openpty()
|
||||||
|
except (AttributeError, OSError):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
slave_name = os.ttyname(slave_fd)
|
||||||
|
os.close(slave_fd)
|
||||||
|
return master_fd, slave_name
|
||||||
|
|
||||||
|
return _open_terminal()
|
||||||
|
|
||||||
|
def _open_terminal():
|
||||||
|
"""Open pty master and return (master_fd, tty_name)."""
|
||||||
|
for x in 'pqrstuvwxyzPQRST':
|
||||||
|
for y in '0123456789abcdef':
|
||||||
|
pty_name = '/dev/pty' + x + y
|
||||||
|
try:
|
||||||
|
fd = os.open(pty_name, os.O_RDWR)
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
return (fd, '/dev/tty' + x + y)
|
||||||
|
raise OSError('out of pty devices')
|
||||||
|
|
||||||
|
def slave_open(tty_name):
|
||||||
|
"""slave_open(tty_name) -> slave_fd
|
||||||
|
Open the pty slave and acquire the controlling terminal, returning
|
||||||
|
opened filedescriptor.
|
||||||
|
Deprecated, use openpty() instead."""
|
||||||
|
|
||||||
|
result = os.open(tty_name, os.O_RDWR)
|
||||||
|
try:
|
||||||
|
from fcntl import ioctl, I_PUSH
|
||||||
|
except ImportError:
|
||||||
|
return result
|
||||||
|
try:
|
||||||
|
ioctl(result, I_PUSH, "ptem")
|
||||||
|
ioctl(result, I_PUSH, "ldterm")
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return result
|
||||||
|
|
||||||
|
def fork():
|
||||||
|
"""fork() -> (pid, master_fd)
|
||||||
|
Fork and make the child a session leader with a controlling terminal."""
|
||||||
|
|
||||||
|
try:
|
||||||
|
pid, fd = os.forkpty()
|
||||||
|
except (AttributeError, OSError):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if pid == CHILD:
|
||||||
|
try:
|
||||||
|
os.setsid()
|
||||||
|
except OSError:
|
||||||
|
# os.forkpty() already set us session leader
|
||||||
|
pass
|
||||||
|
return pid, fd
|
||||||
|
|
||||||
|
master_fd, slave_fd = openpty()
|
||||||
|
pid = os.fork()
|
||||||
|
if pid == CHILD:
|
||||||
|
# Establish a new session.
|
||||||
|
os.setsid()
|
||||||
|
os.close(master_fd)
|
||||||
|
|
||||||
|
# Slave becomes stdin/stdout/stderr of child.
|
||||||
|
os.dup2(slave_fd, STDIN_FILENO)
|
||||||
|
os.dup2(slave_fd, STDOUT_FILENO)
|
||||||
|
os.dup2(slave_fd, STDERR_FILENO)
|
||||||
|
if slave_fd > STDERR_FILENO:
|
||||||
|
os.close(slave_fd)
|
||||||
|
|
||||||
|
# Explicitly open the tty to make it become a controlling tty.
|
||||||
|
tmp_fd = os.open(os.ttyname(STDOUT_FILENO), os.O_RDWR)
|
||||||
|
os.close(tmp_fd)
|
||||||
|
else:
|
||||||
|
os.close(slave_fd)
|
||||||
|
|
||||||
|
# Parent and child process.
|
||||||
|
return pid, master_fd
|
||||||
|
|
||||||
|
def _writen(fd, data):
|
||||||
|
"""Write all the data to a descriptor."""
|
||||||
|
while data:
|
||||||
|
n = os.write(fd, data)
|
||||||
|
data = data[n:]
|
||||||
|
|
||||||
|
def _read(fd):
|
||||||
|
"""Default read function."""
|
||||||
|
return os.read(fd, 1024)
|
||||||
|
|
||||||
|
def _copy(master_fd, master_read=_read, stdin_read=_read):
|
||||||
|
"""Parent copy loop.
|
||||||
|
Copies
|
||||||
|
pty master -> standard output (master_read)
|
||||||
|
standard input -> pty master (stdin_read)"""
|
||||||
|
fds = [master_fd, STDIN_FILENO]
|
||||||
|
while fds:
|
||||||
|
rfds, _wfds, _xfds = select(fds, [], [])
|
||||||
|
|
||||||
|
if master_fd in rfds:
|
||||||
|
# Some OSes signal EOF by returning an empty byte string,
|
||||||
|
# some throw OSErrors.
|
||||||
|
try:
|
||||||
|
data = master_read(master_fd)
|
||||||
|
except OSError:
|
||||||
|
data = b""
|
||||||
|
if not data: # Reached EOF.
|
||||||
|
return # Assume the child process has exited and is
|
||||||
|
# unreachable, so we clean up.
|
||||||
|
else:
|
||||||
|
os.write(STDOUT_FILENO, data)
|
||||||
|
|
||||||
|
if STDIN_FILENO in rfds:
|
||||||
|
data = stdin_read(STDIN_FILENO)
|
||||||
|
if not data:
|
||||||
|
fds.remove(STDIN_FILENO)
|
||||||
|
else:
|
||||||
|
_writen(master_fd, data)
|
||||||
|
|
||||||
|
def spawn(argv, master_read=_read, stdin_read=_read):
|
||||||
|
"""Create a spawned process."""
|
||||||
|
if isinstance(argv, str):
|
||||||
|
argv = (argv,)
|
||||||
|
sys.audit('pty.spawn', argv)
|
||||||
|
|
||||||
|
pid, master_fd = fork()
|
||||||
|
if pid == CHILD:
|
||||||
|
os.execlp(argv[0], *argv)
|
||||||
|
|
||||||
|
try:
|
||||||
|
mode = tcgetattr(STDIN_FILENO)
|
||||||
|
setraw(STDIN_FILENO)
|
||||||
|
restore = True
|
||||||
|
except tty.error: # This is the same as termios.error
|
||||||
|
restore = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
_copy(master_fd, master_read, stdin_read)
|
||||||
|
finally:
|
||||||
|
if restore:
|
||||||
|
tcsetattr(STDIN_FILENO, tty.TCSAFLUSH, mode)
|
||||||
|
|
||||||
|
close(master_fd)
|
||||||
|
return waitpid(pid, 0)[1]
|
Loading…
Reference in New Issue