From f62e678df8e1d4a3fd3a41f8c847c6b0a3605ac8 Mon Sep 17 00:00:00 2001 From: Nick Khyl Date: Thu, 2 May 2024 18:33:13 -0500 Subject: [PATCH] net/dns/resolver, control/controlknobs, tailcfg: use UserDial instead of SystemDial to dial DNS servers Now that tsdial.Dialer.UserDial has been updated to honor the configured routes and dial external network addresses without going through Tailscale, while also being able to dial a node/subnet router on the tailnet, we can start using UserDial to forward DNS requests. This is primarily needed for DNS over TCP when forwarding requests to internal DNS servers, but we also update getKnownDoHClientForProvider to use it. Updates tailscale/corp#18725 Signed-off-by: Nick Khyl --- control/controlknobs/controlknobs.go | 8 +++++++ net/dns/resolver/forwarder.go | 35 ++++++++++++++++------------ tailcfg/tailcfg.go | 3 ++- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/control/controlknobs/controlknobs.go b/control/controlknobs/controlknobs.go index 2f80ba38c..0b86c2d3b 100644 --- a/control/controlknobs/controlknobs.go +++ b/control/controlknobs/controlknobs.go @@ -76,6 +76,11 @@ type Knobs struct { // AppCStoreRoutes is whether the node should store RouteInfo to StateStore // if it's an app connector. AppCStoreRoutes atomic.Bool + + // UserDialUseRoutes is whether tsdial.Dialer.UserDial should use routes to determine + // how to dial the destination address. When true, it also makes the DNS forwarder + // use UserDial instead of SystemDial when dialing resolvers. + UserDialUseRoutes atomic.Bool } // UpdateFromNodeAttributes updates k (if non-nil) based on the provided self @@ -101,6 +106,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) { seamlessKeyRenewal = has(tailcfg.NodeAttrSeamlessKeyRenewal) probeUDPLifetime = has(tailcfg.NodeAttrProbeUDPLifetime) appCStoreRoutes = has(tailcfg.NodeAttrStoreAppCRoutes) + userDialUseRoutes = has(tailcfg.NodeAttrUserDialUseRoutes) ) if has(tailcfg.NodeAttrOneCGNATEnable) { @@ -124,6 +130,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) { k.SeamlessKeyRenewal.Store(seamlessKeyRenewal) k.ProbeUDPLifetime.Store(probeUDPLifetime) k.AppCStoreRoutes.Store(appCStoreRoutes) + k.UserDialUseRoutes.Store(userDialUseRoutes) } // AsDebugJSON returns k as something that can be marshalled with json.Marshal @@ -148,5 +155,6 @@ func (k *Knobs) AsDebugJSON() map[string]any { "SeamlessKeyRenewal": k.SeamlessKeyRenewal.Load(), "ProbeUDPLifetime": k.ProbeUDPLifetime.Load(), "AppCStoreRoutes": k.AppCStoreRoutes.Load(), + "UserDialUseRoutes": k.UserDialUseRoutes.Load(), } } diff --git a/net/dns/resolver/forwarder.go b/net/dns/resolver/forwarder.go index 5822915e8..24d7bebe5 100644 --- a/net/dns/resolver/forwarder.go +++ b/net/dns/resolver/forwarder.go @@ -391,20 +391,8 @@ func (f *forwarder) getKnownDoHClientForProvider(urlBase string) (c *http.Client if err != nil { return nil, false } - // NOTE: use f.dialer.SystemDial so we close connections on a link - // change; on mobile devices when switching between WiFi and cellular, - // we need to ensure we don't retain a connection on the old interface - // or we can block DNS resolution. - // - // NOTE: if we ever support arbitrary user-defined DoH providers, this - // isn't sufficient; we'd need a dialer that dial a DoH server on the - // internet, without going through Tailscale (as SystemDial does), but - // also can dial a node on the tailnet (e.g. a PiHole). - // - // As of the time of writing (2024-02-11), this isn't a problem because - // we only support a restricted set of public DoH providers that aren't - // on a user's tailnet. - dialer := dnscache.Dialer(f.dialer.SystemDial, &dnscache.Resolver{ + + dialer := dnscache.Dialer(f.getDialerType(), &dnscache.Resolver{ SingleHost: dohURL.Hostname(), SingleHostStaticResult: allIPs, Logf: f.logf, @@ -699,6 +687,23 @@ func (f *forwarder) sendUDP(ctx context.Context, fq *forwardQuery, rr resolverAn return out, nil } +func (f *forwarder) getDialerType() dnscache.DialContextFunc { + if f.controlKnobs != nil && f.controlKnobs.UserDialUseRoutes.Load() { + // It is safe to use UserDial as it dials external servers without going through Tailscale + // and closes connections on interface change in the same way as SystemDial does, + // thus preventing DNS resolution issues when switching between WiFi and cellular, + // but can also dial an internal DNS server on the Tailnet or via a subnet router. + // + // TODO(nickkhyl): Update tsdial.Dialer to reuse the bart.Table we create in net/tstun.Wrapper + // to avoid having two bart tables in memory, especially on iOS. Once that's done, + // we can get rid of the nodeAttr/control knob and always use UserDial for DNS. + // + // See https://github.com/tailscale/tailscale/issues/12027. + return f.dialer.UserDial + } + return f.dialer.SystemDial +} + func (f *forwarder) sendTCP(ctx context.Context, fq *forwardQuery, rr resolverAndDelay) (ret []byte, err error) { ipp, ok := rr.name.IPPort() if !ok { @@ -717,7 +722,7 @@ func (f *forwarder) sendTCP(ctx context.Context, fq *forwardQuery, rr resolverAn ctx, cancel := context.WithTimeout(ctx, tcpQueryTimeout) defer cancel() - conn, err := f.dialer.SystemDial(ctx, tcpFam, ipp.String()) + conn, err := f.getDialerType()(ctx, tcpFam, ipp.String()) if err != nil { return nil, err } diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index 29028d3c8..9d91632e9 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -2263,7 +2263,8 @@ const ( NodeAttrSuggestExitNodeUI NodeCapability = "suggest-exit-node-ui" // NodeAttrUserDialUseRoutes makes UserDial use either the peer dialer or the system dialer, - // depending on the destination address and the configured routes. + // depending on the destination address and the configured routes. When present, it also makes + // the DNS forwarder use UserDial instead of SystemDial when dialing resolvers. NodeAttrUserDialUseRoutes NodeCapability = "user-dial-routes" )