cmd/tailscaled, ipn/ipnlocal, wgengine: shutdown tailscaled if wgdevice is closed

Tailscaled becomes inoperative if the Tailscale Tunnel wintun adapter is abruptly removed.
wireguard-go closes the device in case of a read error, but tailscaled keeps running.
This adds detection of a closed WireGuard device, triggering a graceful shutdown of tailscaled.
It is then restarted by the tailscaled watchdog service process.

Fixes #11222

Signed-off-by: Nick Khyl <nickk@tailscale.com>
pull/11239/head
Nick Khyl 3 months ago committed by Nick Khyl
parent b42b9817b0
commit 7ef1fb113d

@ -432,13 +432,26 @@ func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID,
if sigPipe != nil { if sigPipe != nil {
signal.Ignore(sigPipe) signal.Ignore(sigPipe)
} }
wgEngineCreated := make(chan struct{})
go func() { go func() {
select { var wgEngineClosed <-chan struct{}
case s := <-interrupt: wgEngineCreated := wgEngineCreated // local shadow
logf("tailscaled got signal %v; shutting down", s) for {
cancel() select {
case <-ctx.Done(): case s := <-interrupt:
// continue logf("tailscaled got signal %v; shutting down", s)
cancel()
return
case <-wgEngineClosed:
logf("wgengine has been closed; shutting down")
cancel()
return
case <-wgEngineCreated:
wgEngineClosed = sys.Engine.Get().Done()
wgEngineCreated = nil
case <-ctx.Done():
return
}
} }
}() }()
@ -464,6 +477,7 @@ func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID,
if err == nil { if err == nil {
logf("got LocalBackend in %v", time.Since(t0).Round(time.Millisecond)) logf("got LocalBackend in %v", time.Since(t0).Round(time.Millisecond))
srv.SetLocalBackend(lb) srv.SetLocalBackend(lb)
close(wgEngineCreated)
return return
} }
lbErr.Store(err) // before the following cancel lbErr.Store(err) // before the following cancel

@ -678,7 +678,7 @@ func (b *LocalBackend) Shutdown() {
} }
b.ctxCancel() b.ctxCancel()
b.e.Close() b.e.Close()
b.e.Wait() <-b.e.Done()
} }
func stripKeysFromPrefs(p ipn.PrefsView) ipn.PrefsView { func stripKeysFromPrefs(p ipn.PrefsView) ipn.PrefsView {

@ -92,7 +92,7 @@ func (m *windowsManager) openInterfaceKey(pfx winutil.RegistryPathPrefix) (regis
func (m *windowsManager) muteKeyNotFoundIfClosing(err error) error { func (m *windowsManager) muteKeyNotFoundIfClosing(err error) error {
m.mu.Lock() m.mu.Lock()
defer m.mu.Unlock() defer m.mu.Unlock()
if !m.closing || (err != windows.ERROR_FILE_NOT_FOUND && err != windows.ERROR_PATH_NOT_FOUND) { if !m.closing || (!errors.Is(err, windows.ERROR_FILE_NOT_FOUND) && !errors.Is(err, windows.ERROR_PATH_NOT_FOUND)) {
return err return err
} }

@ -425,6 +425,21 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
} }
}() }()
go func() {
select {
case <-e.wgdev.Wait():
e.mu.Lock()
closing := e.closing
e.mu.Unlock()
if !closing {
e.logf("Closing the engine because the WireGuard device has been closed...")
e.Close()
}
case <-e.waitCh:
// continue
}
}()
e.logf("Bringing WireGuard device up...") e.logf("Bringing WireGuard device up...")
if err := e.wgdev.Up(); err != nil { if err := e.wgdev.Up(); err != nil {
return nil, fmt.Errorf("wgdev.Up: %w", err) return nil, fmt.Errorf("wgdev.Up: %w", err)
@ -1112,8 +1127,8 @@ func (e *userspaceEngine) Close() {
} }
} }
func (e *userspaceEngine) Wait() { func (e *userspaceEngine) Done() <-chan struct{} {
<-e.waitCh return e.waitCh
} }
func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) { func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) {

@ -150,8 +150,8 @@ func (e *watchdogEngine) PeerForIP(ip netip.Addr) (ret PeerForIP, ok bool) {
return ret, ok return ret, ok
} }
func (e *watchdogEngine) Wait() { func (e *watchdogEngine) Done() <-chan struct{} {
e.wrap.Wait() return e.wrap.Done()
} }
func (e *watchdogEngine) InstallCaptureHook(cb capture.Callback) { func (e *watchdogEngine) InstallCaptureHook(cb capture.Callback) {

@ -89,10 +89,11 @@ type Engine interface {
// new Engine. // new Engine.
Close() Close()
// Wait waits until the Engine's Close method is called or the // Done returns a channel that is closed when the Engine's
// engine aborts with an error. You don't have to call this. // Close method is called, the engine aborts with an error,
// TODO: return an error? // or it shuts down due to the closure of the underlying device.
Wait() // You don't have to call this.
Done() <-chan struct{}
// SetNetworkMap informs the engine of the latest network map // SetNetworkMap informs the engine of the latest network map
// from the server. The network map's DERPMap field should be // from the server. The network map's DERPMap field should be

Loading…
Cancel
Save