From 55d0e6d3a8f2622355d9dde1c71c4932731fb319 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Tue, 16 Sep 2025 20:32:28 -0700 Subject: [PATCH] net/dns/recursive: remove recursive DNS resolver It doesn't really pull its weight: it adds 577 KB to the binary and is rarely useful. Also, we now have static IPs and other connectivity paths coming soon enough. Updates #5853 Updates #1278 Updates tailscale/corp#32168 Change-Id: If336fed00a9c9ae9745419e6d81f7de6da6f7275 Signed-off-by: Brad Fitzpatrick --- cmd/k8s-operator/depaware.txt | 6 +- cmd/tailscale/depaware.txt | 12 +- cmd/tailscaled/depaware.txt | 8 +- cmd/tsidp/depaware.txt | 6 +- net/dns/recursive/recursive.go | 622 ----------------------- net/dns/recursive/recursive_test.go | 742 ---------------------------- net/dnsfallback/dnsfallback.go | 155 +----- tsnet/depaware.txt | 6 +- 8 files changed, 15 insertions(+), 1542 deletions(-) delete mode 100644 net/dns/recursive/recursive.go delete mode 100644 net/dns/recursive/recursive_test.go diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt index faf7b2f83..e65977875 100644 --- a/cmd/k8s-operator/depaware.txt +++ b/cmd/k8s-operator/depaware.txt @@ -168,7 +168,6 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ L github.com/mdlayher/netlink/nltest from github.com/google/nftables L github.com/mdlayher/sdnotify from tailscale.com/util/systemd L 💣 github.com/mdlayher/socket from github.com/mdlayher/netlink+ - github.com/miekg/dns from tailscale.com/net/dns/recursive 💣 github.com/mitchellh/go-ps from tailscale.com/safesocket github.com/modern-go/concurrent from github.com/json-iterator/go 💣 github.com/modern-go/reflect2 from github.com/json-iterator/go @@ -847,7 +846,6 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ tailscale.com/net/connstats from tailscale.com/net/tstun+ tailscale.com/net/dns from tailscale.com/ipn/ipnlocal+ tailscale.com/net/dns/publicdns from tailscale.com/net/dns+ - tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback tailscale.com/net/dns/resolvconffile from tailscale.com/cmd/k8s-operator+ tailscale.com/net/dns/resolver from tailscale.com/net/dns+ tailscale.com/net/dnscache from tailscale.com/control/controlclient+ @@ -1026,8 +1024,8 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ golang.org/x/net/internal/iana from golang.org/x/net/icmp+ golang.org/x/net/internal/socket from golang.org/x/net/icmp+ golang.org/x/net/internal/socks from golang.org/x/net/proxy - golang.org/x/net/ipv4 from github.com/miekg/dns+ - golang.org/x/net/ipv6 from github.com/miekg/dns+ + golang.org/x/net/ipv4 from github.com/prometheus-community/pro-bing+ + golang.org/x/net/ipv6 from github.com/prometheus-community/pro-bing+ golang.org/x/net/proxy from tailscale.com/net/netns D golang.org/x/net/route from tailscale.com/net/netmon+ golang.org/x/net/websocket from tailscale.com/k8s-operator/sessionrecording/ws diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index c86af7ea7..ae4a7bd4d 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -48,7 +48,6 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep L 💣 github.com/mdlayher/netlink/nlenc from github.com/jsimonetti/rtnetlink+ L github.com/mdlayher/netlink/nltest from github.com/google/nftables L 💣 github.com/mdlayher/socket from github.com/mdlayher/netlink - github.com/miekg/dns from tailscale.com/net/dns/recursive 💣 github.com/mitchellh/go-ps from tailscale.com/cmd/tailscale/cli+ github.com/peterbourgon/ff/v3 from github.com/peterbourgon/ff/v3/ffcli+ github.com/peterbourgon/ff/v3/ffcli from tailscale.com/cmd/tailscale/cli+ @@ -121,7 +120,6 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/metrics from tailscale.com/derp+ tailscale.com/net/bakedroots from tailscale.com/net/tlsdial tailscale.com/net/captivedetection from tailscale.com/net/netcheck - tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback tailscale.com/net/dnscache from tailscale.com/control/controlhttp+ tailscale.com/net/dnsfallback from tailscale.com/control/controlhttp+ tailscale.com/net/netaddr from tailscale.com/ipn+ @@ -192,8 +190,8 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/util/quarantine from tailscale.com/cmd/tailscale/cli tailscale.com/util/rands from tailscale.com/tsweb tailscale.com/util/set from tailscale.com/derp+ - tailscale.com/util/singleflight from tailscale.com/net/dnscache+ - tailscale.com/util/slicesx from tailscale.com/net/dns/recursive+ + tailscale.com/util/singleflight from tailscale.com/net/dnscache + tailscale.com/util/slicesx from tailscale.com/client/systray+ L tailscale.com/util/stringsx from tailscale.com/client/systray tailscale.com/util/syspolicy from tailscale.com/feature/syspolicy tailscale.com/util/syspolicy/internal from tailscale.com/util/syspolicy/setting+ @@ -250,8 +248,8 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep golang.org/x/net/internal/iana from golang.org/x/net/icmp+ golang.org/x/net/internal/socket from golang.org/x/net/icmp+ golang.org/x/net/internal/socks from golang.org/x/net/proxy - golang.org/x/net/ipv4 from github.com/miekg/dns+ - golang.org/x/net/ipv6 from github.com/miekg/dns+ + golang.org/x/net/ipv4 from golang.org/x/net/icmp+ + golang.org/x/net/ipv6 from golang.org/x/net/icmp+ golang.org/x/net/proxy from tailscale.com/net/netns D golang.org/x/net/route from tailscale.com/net/netmon+ golang.org/x/oauth2 from golang.org/x/oauth2/clientcredentials @@ -337,7 +335,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep crypto/sha3 from crypto/internal/fips140hash crypto/sha512 from crypto/ecdsa+ crypto/subtle from crypto/cipher+ - crypto/tls from github.com/miekg/dns+ + crypto/tls from golang.org/x/net/http2+ crypto/tls/internal/fips140tls from crypto/tls crypto/x509 from crypto/tls+ D crypto/x509/internal/macos from crypto/x509 diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index d4e1f13bf..4482ad125 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -149,7 +149,6 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de L github.com/mdlayher/netlink/nltest from github.com/google/nftables L github.com/mdlayher/sdnotify from tailscale.com/util/systemd L 💣 github.com/mdlayher/socket from github.com/mdlayher/netlink+ - github.com/miekg/dns from tailscale.com/net/dns/recursive 💣 github.com/mitchellh/go-ps from tailscale.com/safesocket L github.com/pierrec/lz4/v4 from github.com/u-root/uio/uio L github.com/pierrec/lz4/v4/internal/lz4block from github.com/pierrec/lz4/v4+ @@ -321,7 +320,6 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/net/connstats from tailscale.com/net/tstun+ tailscale.com/net/dns from tailscale.com/cmd/tailscaled+ tailscale.com/net/dns/publicdns from tailscale.com/net/dns+ - tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback tailscale.com/net/dns/resolvconffile from tailscale.com/net/dns+ tailscale.com/net/dns/resolver from tailscale.com/net/dns+ tailscale.com/net/dnscache from tailscale.com/control/controlclient+ @@ -433,7 +431,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/util/ringlog from tailscale.com/wgengine/magicsock tailscale.com/util/set from tailscale.com/derp+ tailscale.com/util/singleflight from tailscale.com/control/controlclient+ - tailscale.com/util/slicesx from tailscale.com/net/dns/recursive+ + tailscale.com/util/slicesx from tailscale.com/appc+ tailscale.com/util/syspolicy from tailscale.com/feature/syspolicy tailscale.com/util/syspolicy/internal from tailscale.com/util/syspolicy/setting+ tailscale.com/util/syspolicy/internal/loggerx from tailscale.com/util/syspolicy/internal/metrics+ @@ -504,8 +502,8 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de golang.org/x/net/internal/iana from golang.org/x/net/icmp+ golang.org/x/net/internal/socket from golang.org/x/net/icmp+ golang.org/x/net/internal/socks from golang.org/x/net/proxy - golang.org/x/net/ipv4 from github.com/miekg/dns+ - golang.org/x/net/ipv6 from github.com/miekg/dns+ + golang.org/x/net/ipv4 from github.com/prometheus-community/pro-bing+ + golang.org/x/net/ipv6 from github.com/prometheus-community/pro-bing+ golang.org/x/net/proxy from tailscale.com/net/netns D golang.org/x/net/route from tailscale.com/net/netmon+ golang.org/x/sync/errgroup from github.com/mdlayher/socket+ diff --git a/cmd/tsidp/depaware.txt b/cmd/tsidp/depaware.txt index 0aafff8e1..7db7849b7 100644 --- a/cmd/tsidp/depaware.txt +++ b/cmd/tsidp/depaware.txt @@ -132,7 +132,6 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar L github.com/mdlayher/netlink/nltest from github.com/google/nftables L github.com/mdlayher/sdnotify from tailscale.com/util/systemd L 💣 github.com/mdlayher/socket from github.com/mdlayher/netlink+ - github.com/miekg/dns from tailscale.com/net/dns/recursive 💣 github.com/mitchellh/go-ps from tailscale.com/safesocket D github.com/prometheus-community/pro-bing from tailscale.com/wgengine/netstack L 💣 github.com/safchain/ethtool from tailscale.com/doctor/ethtool+ @@ -276,7 +275,6 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar tailscale.com/net/connstats from tailscale.com/net/tstun+ tailscale.com/net/dns from tailscale.com/ipn/ipnlocal+ tailscale.com/net/dns/publicdns from tailscale.com/net/dns+ - tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback tailscale.com/net/dns/resolvconffile from tailscale.com/net/dns+ tailscale.com/net/dns/resolver from tailscale.com/net/dns+ tailscale.com/net/dnscache from tailscale.com/control/controlclient+ @@ -455,8 +453,8 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar golang.org/x/net/internal/iana from golang.org/x/net/icmp+ golang.org/x/net/internal/socket from golang.org/x/net/icmp+ golang.org/x/net/internal/socks from golang.org/x/net/proxy - golang.org/x/net/ipv4 from github.com/miekg/dns+ - golang.org/x/net/ipv6 from github.com/miekg/dns+ + golang.org/x/net/ipv4 from github.com/prometheus-community/pro-bing+ + golang.org/x/net/ipv6 from github.com/prometheus-community/pro-bing+ golang.org/x/net/proxy from tailscale.com/net/netns D golang.org/x/net/route from tailscale.com/net/netmon+ golang.org/x/sync/errgroup from github.com/mdlayher/socket+ diff --git a/net/dns/recursive/recursive.go b/net/dns/recursive/recursive.go deleted file mode 100644 index fd865e37a..000000000 --- a/net/dns/recursive/recursive.go +++ /dev/null @@ -1,622 +0,0 @@ -// Copyright (c) Tailscale Inc & AUTHORS -// SPDX-License-Identifier: BSD-3-Clause - -// Package recursive implements a simple recursive DNS resolver. -package recursive - -import ( - "context" - "errors" - "fmt" - "net" - "net/netip" - "slices" - "strings" - "time" - - "github.com/miekg/dns" - "tailscale.com/envknob" - "tailscale.com/net/netns" - "tailscale.com/types/logger" - "tailscale.com/util/dnsname" - "tailscale.com/util/mak" - "tailscale.com/util/multierr" - "tailscale.com/util/slicesx" -) - -const ( - // maxDepth is how deep from the root nameservers we'll recurse when - // resolving; passing this limit will instead return an error. - // - // maxDepth must be at least 20 to resolve "console.aws.amazon.com", - // which is a domain with a moderately complicated DNS setup. The - // current value of 30 was chosen semi-arbitrarily to ensure that we - // have about 50% headroom. - maxDepth = 30 - // numStartingServers is the number of root nameservers that we use as - // initial candidates for our recursion. - numStartingServers = 3 - // udpQueryTimeout is the amount of time we wait for a UDP response - // from a nameserver before falling back to a TCP connection. - udpQueryTimeout = 5 * time.Second - - // These constants aren't typed in the DNS package, so we create typed - // versions here to avoid having to do repeated type casts. - qtypeA dns.Type = dns.Type(dns.TypeA) - qtypeAAAA dns.Type = dns.Type(dns.TypeAAAA) -) - -var ( - // ErrMaxDepth is returned when recursive resolving exceeds the maximum - // depth limit for this package. - ErrMaxDepth = fmt.Errorf("exceeded max depth %d when resolving", maxDepth) - - // ErrAuthoritativeNoResponses is the error returned when an - // authoritative nameserver indicates that there are no responses to - // the given query. - ErrAuthoritativeNoResponses = errors.New("authoritative server returned no responses") - - // ErrNoResponses is returned when our resolution process completes - // with no valid responses from any nameserver, but no authoritative - // server explicitly returned NXDOMAIN. - ErrNoResponses = errors.New("no responses to query") -) - -var rootServersV4 = []netip.Addr{ - netip.MustParseAddr("198.41.0.4"), // a.root-servers.net - netip.MustParseAddr("170.247.170.2"), // b.root-servers.net - netip.MustParseAddr("192.33.4.12"), // c.root-servers.net - netip.MustParseAddr("199.7.91.13"), // d.root-servers.net - netip.MustParseAddr("192.203.230.10"), // e.root-servers.net - netip.MustParseAddr("192.5.5.241"), // f.root-servers.net - netip.MustParseAddr("192.112.36.4"), // g.root-servers.net - netip.MustParseAddr("198.97.190.53"), // h.root-servers.net - netip.MustParseAddr("192.36.148.17"), // i.root-servers.net - netip.MustParseAddr("192.58.128.30"), // j.root-servers.net - netip.MustParseAddr("193.0.14.129"), // k.root-servers.net - netip.MustParseAddr("199.7.83.42"), // l.root-servers.net - netip.MustParseAddr("202.12.27.33"), // m.root-servers.net -} - -var rootServersV6 = []netip.Addr{ - netip.MustParseAddr("2001:503:ba3e::2:30"), // a.root-servers.net - netip.MustParseAddr("2801:1b8:10::b"), // b.root-servers.net - netip.MustParseAddr("2001:500:2::c"), // c.root-servers.net - netip.MustParseAddr("2001:500:2d::d"), // d.root-servers.net - netip.MustParseAddr("2001:500:a8::e"), // e.root-servers.net - netip.MustParseAddr("2001:500:2f::f"), // f.root-servers.net - netip.MustParseAddr("2001:500:12::d0d"), // g.root-servers.net - netip.MustParseAddr("2001:500:1::53"), // h.root-servers.net - netip.MustParseAddr("2001:7fe::53"), // i.root-servers.net - netip.MustParseAddr("2001:503:c27::2:30"), // j.root-servers.net - netip.MustParseAddr("2001:7fd::1"), // k.root-servers.net - netip.MustParseAddr("2001:500:9f::42"), // l.root-servers.net - netip.MustParseAddr("2001:dc3::35"), // m.root-servers.net -} - -var debug = envknob.RegisterBool("TS_DEBUG_RECURSIVE_DNS") - -// Resolver is a recursive DNS resolver that is designed for looking up A and AAAA records. -type Resolver struct { - // Dialer is used to create outbound connections. If nil, a zero - // net.Dialer will be used instead. - Dialer netns.Dialer - - // Logf is the logging function to use; if none is specified, then logs - // will be dropped. - Logf logger.Logf - - // NoIPv6, if set, will prevent this package from querying for AAAA - // records and will avoid contacting nameservers over IPv6. - NoIPv6 bool - - // Test mocks - testQueryHook func(name dnsname.FQDN, nameserver netip.Addr, protocol string, qtype dns.Type) (*dns.Msg, error) - testExchangeHook func(nameserver netip.Addr, network string, msg *dns.Msg) (*dns.Msg, error) - rootServers []netip.Addr - timeNow func() time.Time - - // Caching - // NOTE(andrew): if we make resolution parallel, this needs a mutex - queryCache map[dnsQuery]dnsMsgWithExpiry - - // Possible future additions: - // - Additional nameservers? From the system maybe? - // - NoIPv4 for IPv4 - // - DNS-over-HTTPS or DNS-over-TLS support -} - -// queryState stores all state during the course of a single query -type queryState struct { - // rootServers are the root nameservers to start from - rootServers []netip.Addr - - // TODO: metrics? -} - -type dnsQuery struct { - nameserver netip.Addr - name dnsname.FQDN - qtype dns.Type -} - -func (q dnsQuery) String() string { - return fmt.Sprintf("dnsQuery{nameserver:%q,name:%q,qtype:%v}", q.nameserver.String(), q.name, q.qtype) -} - -type dnsMsgWithExpiry struct { - *dns.Msg - expiresAt time.Time -} - -func (r *Resolver) now() time.Time { - if r.timeNow != nil { - return r.timeNow() - } - return time.Now() -} - -func (r *Resolver) logf(format string, args ...any) { - if r.Logf == nil { - return - } - r.Logf(format, args...) -} - -func (r *Resolver) depthlogf(depth int, format string, args ...any) { - if r.Logf == nil || !debug() { - return - } - prefix := fmt.Sprintf("[%d] %s", depth, strings.Repeat(" ", depth)) - r.Logf(prefix+format, args...) -} - -var defaultDialer net.Dialer - -func (r *Resolver) dialer() netns.Dialer { - if r.Dialer != nil { - return r.Dialer - } - - return &defaultDialer -} - -func (r *Resolver) newState() *queryState { - var rootServers []netip.Addr - if len(r.rootServers) > 0 { - rootServers = r.rootServers - } else { - // Select a random subset of root nameservers to start from, since if - // we don't get responses from those, something else has probably gone - // horribly wrong. - roots4 := slices.Clone(rootServersV4) - slicesx.Shuffle(roots4) - roots4 = roots4[:numStartingServers] - - var roots6 []netip.Addr - if !r.NoIPv6 { - roots6 = slices.Clone(rootServersV6) - slicesx.Shuffle(roots6) - roots6 = roots6[:numStartingServers] - } - - // Interleave the root servers so that we try to contact them over - // IPv4, then IPv6, IPv4, IPv6, etc. - rootServers = slicesx.Interleave(roots4, roots6) - } - - return &queryState{ - rootServers: rootServers, - } -} - -// Resolve will perform a recursive DNS resolution for the provided name, -// starting at a randomly-chosen root DNS server, and return the A and AAAA -// responses as a slice of netip.Addrs along with the minimum TTL for the -// returned records. -func (r *Resolver) Resolve(ctx context.Context, name string) (addrs []netip.Addr, minTTL time.Duration, err error) { - dnsName, err := dnsname.ToFQDN(name) - if err != nil { - return nil, 0, err - } - - qstate := r.newState() - - r.logf("querying IPv4 addresses for: %q", name) - addrs4, minTTL4, err4 := r.resolveRecursiveFromRoot(ctx, qstate, 0, dnsName, qtypeA) - - var ( - addrs6 []netip.Addr - minTTL6 time.Duration - err6 error - ) - if !r.NoIPv6 { - r.logf("querying IPv6 addresses for: %q", name) - addrs6, minTTL6, err6 = r.resolveRecursiveFromRoot(ctx, qstate, 0, dnsName, qtypeAAAA) - } - - if err4 != nil && err6 != nil { - if err4 == err6 { - return nil, 0, err4 - } - - return nil, 0, multierr.New(err4, err6) - } - if err4 != nil { - return addrs6, minTTL6, nil - } else if err6 != nil { - return addrs4, minTTL4, nil - } - - minTTL = minTTL4 - if minTTL6 < minTTL { - minTTL = minTTL6 - } - - addrs = append(addrs4, addrs6...) - if len(addrs) == 0 { - return nil, 0, ErrNoResponses - } - - slicesx.Shuffle(addrs) - return addrs, minTTL, nil -} - -func (r *Resolver) resolveRecursiveFromRoot( - ctx context.Context, - qstate *queryState, - depth int, - name dnsname.FQDN, // what we're querying - qtype dns.Type, -) ([]netip.Addr, time.Duration, error) { - r.depthlogf(depth, "resolving %q from root (type: %v)", name, qtype) - - var depthError bool - for _, server := range qstate.rootServers { - addrs, minTTL, err := r.resolveRecursive(ctx, qstate, depth, name, server, qtype) - if err == nil { - return addrs, minTTL, err - } else if errors.Is(err, ErrAuthoritativeNoResponses) { - return nil, 0, ErrAuthoritativeNoResponses - } else if errors.Is(err, ErrMaxDepth) { - depthError = true - } - } - - if depthError { - return nil, 0, ErrMaxDepth - } - return nil, 0, ErrNoResponses -} - -func (r *Resolver) resolveRecursive( - ctx context.Context, - qstate *queryState, - depth int, - name dnsname.FQDN, // what we're querying - nameserver netip.Addr, - qtype dns.Type, -) ([]netip.Addr, time.Duration, error) { - if depth == maxDepth { - r.depthlogf(depth, "not recursing past maximum depth") - return nil, 0, ErrMaxDepth - } - - // Ask this nameserver for an answer. - resp, err := r.queryNameserver(ctx, depth, name, nameserver, qtype) - if err != nil { - return nil, 0, err - } - - // If we get an actual answer from the nameserver, then return it. - var ( - answers []netip.Addr - cnames []dnsname.FQDN - minTTL = 24 * 60 * 60 // 24 hours in seconds - ) - for _, answer := range resp.Answer { - if crec, ok := answer.(*dns.CNAME); ok { - cnameFQDN, err := dnsname.ToFQDN(crec.Target) - if err != nil { - r.logf("bad CNAME %q returned: %v", crec.Target, err) - continue - } - - cnames = append(cnames, cnameFQDN) - continue - } - - addr := addrFromRecord(answer) - if !addr.IsValid() { - r.logf("[unexpected] invalid record in %T answer", answer) - } else if addr.Is4() && qtype != qtypeA { - r.logf("[unexpected] got IPv4 answer but qtype=%v", qtype) - } else if addr.Is6() && qtype != qtypeAAAA { - r.logf("[unexpected] got IPv6 answer but qtype=%v", qtype) - } else { - answers = append(answers, addr) - minTTL = min(minTTL, int(answer.Header().Ttl)) - } - } - - if len(answers) > 0 { - r.depthlogf(depth, "got answers for %q: %v", name, answers) - return answers, time.Duration(minTTL) * time.Second, nil - } - - r.depthlogf(depth, "no answers for %q", name) - - // If we have a non-zero number of CNAMEs, then try resolving those - // (from the root again) and return the first one that succeeds. - // - // TODO: return the union of all responses? - // TODO: parallelism? - if len(cnames) > 0 { - r.depthlogf(depth, "got CNAME responses for %q: %v", name, cnames) - } - var cnameDepthError bool - for _, cname := range cnames { - answers, minTTL, err := r.resolveRecursiveFromRoot(ctx, qstate, depth+1, cname, qtype) - if err == nil { - return answers, minTTL, nil - } else if errors.Is(err, ErrAuthoritativeNoResponses) { - return nil, 0, ErrAuthoritativeNoResponses - } else if errors.Is(err, ErrMaxDepth) { - cnameDepthError = true - } - } - - // If this is an authoritative response, then we know that continuing - // to look further is not going to result in any answers and we should - // bail out. - if resp.MsgHdr.Authoritative { - // If we failed to recurse into a CNAME due to a depth limit, - // propagate that here. - if cnameDepthError { - return nil, 0, ErrMaxDepth - } - - r.depthlogf(depth, "got authoritative response with no answers; stopping") - return nil, 0, ErrAuthoritativeNoResponses - } - - r.depthlogf(depth, "got %d NS responses and %d ADDITIONAL responses for %q", len(resp.Ns), len(resp.Extra), name) - - // No CNAMEs and no answers; see if we got any AUTHORITY responses, - // which indicate which nameservers to query next. - var authorities []dnsname.FQDN - for _, rr := range resp.Ns { - ns, ok := rr.(*dns.NS) - if !ok { - continue - } - - nsName, err := dnsname.ToFQDN(ns.Ns) - if err != nil { - r.logf("unexpected bad NS name %q: %v", ns.Ns, err) - continue - } - - authorities = append(authorities, nsName) - } - - // Also check for "glue" records, which are IP addresses provided by - // the DNS server for authority responses; these are required when the - // authority server is a subdomain of what's being resolved. - glueRecords := make(map[dnsname.FQDN][]netip.Addr) - for _, rr := range resp.Extra { - name, err := dnsname.ToFQDN(rr.Header().Name) - if err != nil { - r.logf("unexpected bad Name %q in Extra addr: %v", rr.Header().Name, err) - continue - } - - if addr := addrFromRecord(rr); addr.IsValid() { - glueRecords[name] = append(glueRecords[name], addr) - } else { - r.logf("unexpected bad Extra %T addr", rr) - } - } - - // Try authorities with glue records first, to minimize the number of - // additional DNS queries that we need to make. - authoritiesGlue, authoritiesNoGlue := slicesx.Partition(authorities, func(aa dnsname.FQDN) bool { - return len(glueRecords[aa]) > 0 - }) - - authorityDepthError := false - - r.depthlogf(depth, "authorities with glue records for recursion: %v", authoritiesGlue) - for _, authority := range authoritiesGlue { - for _, nameserver := range glueRecords[authority] { - answers, minTTL, err := r.resolveRecursive(ctx, qstate, depth+1, name, nameserver, qtype) - if err == nil { - return answers, minTTL, nil - } else if errors.Is(err, ErrAuthoritativeNoResponses) { - return nil, 0, ErrAuthoritativeNoResponses - } else if errors.Is(err, ErrMaxDepth) { - authorityDepthError = true - } - } - } - - r.depthlogf(depth, "authorities with no glue records for recursion: %v", authoritiesNoGlue) - for _, authority := range authoritiesNoGlue { - // First, resolve the IP for the authority server from the - // root, querying for both IPv4 and IPv6 addresses regardless - // of what the current question type is. - // - // TODO: check for infinite recursion; it'll get caught by our - // recursion depth, but we want to bail early. - for _, authorityQtype := range []dns.Type{qtypeAAAA, qtypeA} { - answers, _, err := r.resolveRecursiveFromRoot(ctx, qstate, depth+1, authority, authorityQtype) - if err != nil { - r.depthlogf(depth, "error querying authority %q: %v", authority, err) - continue - } - r.depthlogf(depth, "resolved authority %q (type %v) to: %v", authority, authorityQtype, answers) - - // Now, query this authority for the final address. - for _, nameserver := range answers { - answers, minTTL, err := r.resolveRecursive(ctx, qstate, depth+1, name, nameserver, qtype) - if err == nil { - return answers, minTTL, nil - } else if errors.Is(err, ErrAuthoritativeNoResponses) { - return nil, 0, ErrAuthoritativeNoResponses - } else if errors.Is(err, ErrMaxDepth) { - authorityDepthError = true - } - } - } - } - - if authorityDepthError { - return nil, 0, ErrMaxDepth - } - return nil, 0, ErrNoResponses -} - -// queryNameserver sends a query for "name" to the nameserver "nameserver" for -// records of type "qtype", trying both UDP and TCP connections as -// appropriate. -func (r *Resolver) queryNameserver( - ctx context.Context, - depth int, - name dnsname.FQDN, // what we're querying - nameserver netip.Addr, // destination of query - qtype dns.Type, -) (*dns.Msg, error) { - // TODO(andrew): we should QNAME minimisation here to avoid sending the - // full name to intermediate/root nameservers. See: - // https://www.rfc-editor.org/rfc/rfc7816 - - // Handle the case where UDP is blocked by adding an explicit timeout - // for the UDP portion of this query. - udpCtx, udpCtxCancel := context.WithTimeout(ctx, udpQueryTimeout) - defer udpCtxCancel() - - msg, err := r.queryNameserverProto(udpCtx, depth, name, nameserver, "udp", qtype) - if err == nil { - return msg, nil - } - - msg, err2 := r.queryNameserverProto(ctx, depth, name, nameserver, "tcp", qtype) - if err2 == nil { - return msg, nil - } - - return nil, multierr.New(err, err2) -} - -// queryNameserverProto sends a query for "name" to the nameserver "nameserver" -// for records of type "qtype" over the provided protocol (either "udp" -// or "tcp"), and returns the DNS response or an error. -func (r *Resolver) queryNameserverProto( - ctx context.Context, - depth int, - name dnsname.FQDN, // what we're querying - nameserver netip.Addr, // destination of query - protocol string, - qtype dns.Type, -) (resp *dns.Msg, err error) { - if r.testQueryHook != nil { - return r.testQueryHook(name, nameserver, protocol, qtype) - } - - now := r.now() - nameserverStr := nameserver.String() - - cacheKey := dnsQuery{ - nameserver: nameserver, - name: name, - qtype: qtype, - } - cacheEntry, ok := r.queryCache[cacheKey] - if ok && cacheEntry.expiresAt.Before(now) { - r.depthlogf(depth, "using cached response from %s about %q (type: %v)", nameserverStr, name, qtype) - return cacheEntry.Msg, nil - } - - var network string - if nameserver.Is4() { - network = protocol + "4" - } else { - network = protocol + "6" - } - - // Prepare a message asking for an appropriately-typed record - // for the name we're querying. - m := new(dns.Msg) - m.SetEdns0(1232, false /* no DNSSEC */) - m.SetQuestion(name.WithTrailingDot(), uint16(qtype)) - - // Allow mocking out the network components with our exchange hook. - if r.testExchangeHook != nil { - resp, err = r.testExchangeHook(nameserver, network, m) - } else { - // Dial the current nameserver using our dialer. - var nconn net.Conn - nconn, err = r.dialer().DialContext(ctx, network, net.JoinHostPort(nameserverStr, "53")) - if err != nil { - return nil, err - } - - var c dns.Client // TODO: share? - conn := &dns.Conn{ - Conn: nconn, - UDPSize: c.UDPSize, - } - - // Send the DNS request to the current nameserver. - r.depthlogf(depth, "asking %s over %s about %q (type: %v)", nameserverStr, protocol, name, qtype) - resp, _, err = c.ExchangeWithConnContext(ctx, m, conn) - } - if err != nil { - return nil, err - } - - // If the message was truncated and we're using UDP, re-run with TCP. - if resp.MsgHdr.Truncated && protocol == "udp" { - r.depthlogf(depth, "response message truncated; re-running query with TCP") - resp, err = r.queryNameserverProto(ctx, depth, name, nameserver, "tcp", qtype) - if err != nil { - return nil, err - } - } - - // Find minimum expiry for all records in this message. - var minTTL int - for _, rr := range resp.Answer { - minTTL = min(minTTL, int(rr.Header().Ttl)) - } - for _, rr := range resp.Ns { - minTTL = min(minTTL, int(rr.Header().Ttl)) - } - for _, rr := range resp.Extra { - minTTL = min(minTTL, int(rr.Header().Ttl)) - } - - mak.Set(&r.queryCache, cacheKey, dnsMsgWithExpiry{ - Msg: resp, - expiresAt: now.Add(time.Duration(minTTL) * time.Second), - }) - return resp, nil -} - -func addrFromRecord(rr dns.RR) netip.Addr { - switch v := rr.(type) { - case *dns.A: - ip, ok := netip.AddrFromSlice(v.A) - if !ok || !ip.Is4() { - return netip.Addr{} - } - return ip - case *dns.AAAA: - ip, ok := netip.AddrFromSlice(v.AAAA) - if !ok || !ip.Is6() { - return netip.Addr{} - } - return ip - } - return netip.Addr{} -} diff --git a/net/dns/recursive/recursive_test.go b/net/dns/recursive/recursive_test.go deleted file mode 100644 index d47e4cebf..000000000 --- a/net/dns/recursive/recursive_test.go +++ /dev/null @@ -1,742 +0,0 @@ -// Copyright (c) Tailscale Inc & AUTHORS -// SPDX-License-Identifier: BSD-3-Clause - -package recursive - -import ( - "context" - "errors" - "flag" - "fmt" - "net" - "net/netip" - "reflect" - "strings" - "testing" - "time" - - "slices" - - "github.com/miekg/dns" - "tailscale.com/envknob" - "tailscale.com/tstest" -) - -const testDomain = "tailscale.com" - -// Recursively resolving the AWS console requires being able to handle CNAMEs, -// glue records, falling back from UDP to TCP for oversize queries, and more; -// it's a great integration test for DNS resolution and they can handle the -// traffic :) -const complicatedTestDomain = "console.aws.amazon.com" - -var flagNetworkAccess = flag.Bool("enable-network-access", false, "run tests that need external network access") - -func init() { - envknob.Setenv("TS_DEBUG_RECURSIVE_DNS", "true") -} - -func newResolver(tb testing.TB) *Resolver { - clock := tstest.NewClock(tstest.ClockOpts{ - Step: 50 * time.Millisecond, - }) - return &Resolver{ - Logf: tb.Logf, - timeNow: clock.Now, - } -} - -func TestResolve(t *testing.T) { - if !*flagNetworkAccess { - t.SkipNow() - } - - ctx := context.Background() - r := newResolver(t) - addrs, minTTL, err := r.Resolve(ctx, testDomain) - if err != nil { - t.Fatal(err) - } - - t.Logf("addrs: %+v", addrs) - t.Logf("minTTL: %v", minTTL) - if len(addrs) < 1 { - t.Fatalf("expected at least one address") - } - - if minTTL <= 10*time.Second || minTTL >= 24*time.Hour { - t.Errorf("invalid minimum TTL: %v", minTTL) - } - - var has4, has6 bool - for _, addr := range addrs { - has4 = has4 || addr.Is4() - has6 = has6 || addr.Is6() - } - - if !has4 { - t.Errorf("expected at least one IPv4 address") - } - if !has6 { - t.Errorf("expected at least one IPv6 address") - } -} - -func TestResolveComplicated(t *testing.T) { - if !*flagNetworkAccess { - t.SkipNow() - } - - ctx := context.Background() - r := newResolver(t) - addrs, minTTL, err := r.Resolve(ctx, complicatedTestDomain) - if err != nil { - t.Fatal(err) - } - - t.Logf("addrs: %+v", addrs) - t.Logf("minTTL: %v", minTTL) - if len(addrs) < 1 { - t.Fatalf("expected at least one address") - } - - if minTTL <= 10*time.Second || minTTL >= 24*time.Hour { - t.Errorf("invalid minimum TTL: %v", minTTL) - } -} - -func TestResolveNoIPv6(t *testing.T) { - if !*flagNetworkAccess { - t.SkipNow() - } - - r := newResolver(t) - r.NoIPv6 = true - - addrs, _, err := r.Resolve(context.Background(), testDomain) - if err != nil { - t.Fatal(err) - } - - t.Logf("addrs: %+v", addrs) - if len(addrs) < 1 { - t.Fatalf("expected at least one address") - } - - for _, addr := range addrs { - if addr.Is6() { - t.Errorf("got unexpected IPv6 address: %v", addr) - } - } -} - -func TestResolveFallbackToTCP(t *testing.T) { - var udpCalls, tcpCalls int - hook := func(nameserver netip.Addr, network string, req *dns.Msg) (*dns.Msg, error) { - if strings.HasPrefix(network, "udp") { - t.Logf("got %q query; returning truncated result", network) - udpCalls++ - resp := &dns.Msg{} - resp.SetReply(req) - resp.Truncated = true - return resp, nil - } - - t.Logf("got %q query; returning real result", network) - tcpCalls++ - resp := &dns.Msg{} - resp.SetReply(req) - resp.Answer = append(resp.Answer, &dns.A{ - Hdr: dns.RR_Header{ - Name: req.Question[0].Name, - Rrtype: req.Question[0].Qtype, - Class: dns.ClassINET, - Ttl: 300, - }, - A: net.IPv4(1, 2, 3, 4), - }) - return resp, nil - } - - r := newResolver(t) - r.testExchangeHook = hook - - ctx := context.Background() - resp, err := r.queryNameserverProto(ctx, 0, "tailscale.com", netip.MustParseAddr("9.9.9.9"), "udp", dns.Type(dns.TypeA)) - if err != nil { - t.Fatal(err) - } - - if len(resp.Answer) < 1 { - t.Fatalf("no answers in response: %v", resp) - } - rrA, ok := resp.Answer[0].(*dns.A) - if !ok { - t.Fatalf("invalid RR type: %T", resp.Answer[0]) - } - if !rrA.A.Equal(net.IPv4(1, 2, 3, 4)) { - t.Errorf("wanted A response 1.2.3.4, got: %v", rrA.A) - } - if tcpCalls != 1 { - t.Errorf("got %d, want 1 TCP calls", tcpCalls) - } - if udpCalls != 1 { - t.Errorf("got %d, want 1 UDP calls", udpCalls) - } - - // Verify that we're cached and re-run to fetch from the cache. - if len(r.queryCache) < 1 { - t.Errorf("wanted entries in the query cache") - } - - resp2, err := r.queryNameserverProto(ctx, 0, "tailscale.com", netip.MustParseAddr("9.9.9.9"), "udp", dns.Type(dns.TypeA)) - if err != nil { - t.Fatal(err) - } - if !reflect.DeepEqual(resp, resp2) { - t.Errorf("expected equal responses; old=%+v new=%+v", resp, resp2) - } - - // We didn't make any more network requests since we loaded from the cache. - if tcpCalls != 1 { - t.Errorf("got %d, want 1 TCP calls", tcpCalls) - } - if udpCalls != 1 { - t.Errorf("got %d, want 1 UDP calls", udpCalls) - } -} - -func dnsIPRR(name string, addr netip.Addr) dns.RR { - if addr.Is4() { - return &dns.A{ - Hdr: dns.RR_Header{ - Name: name, - Rrtype: dns.TypeA, - Class: dns.ClassINET, - Ttl: 300, - }, - A: net.IP(addr.AsSlice()), - } - } - - return &dns.AAAA{ - Hdr: dns.RR_Header{ - Name: name, - Rrtype: dns.TypeAAAA, - Class: dns.ClassINET, - Ttl: 300, - }, - AAAA: net.IP(addr.AsSlice()), - } -} - -func cnameRR(name, target string) dns.RR { - return &dns.CNAME{ - Hdr: dns.RR_Header{ - Name: name, - Rrtype: dns.TypeCNAME, - Class: dns.ClassINET, - Ttl: 300, - }, - Target: target, - } -} - -func nsRR(name, target string) dns.RR { - return &dns.NS{ - Hdr: dns.RR_Header{ - Name: name, - Rrtype: dns.TypeNS, - Class: dns.ClassINET, - Ttl: 300, - }, - Ns: target, - } -} - -type mockReply struct { - name string - qtype dns.Type - resp *dns.Msg -} - -type replyMock struct { - tb testing.TB - replies map[netip.Addr][]mockReply -} - -func (r *replyMock) exchangeHook(nameserver netip.Addr, network string, req *dns.Msg) (*dns.Msg, error) { - if len(req.Question) != 1 { - r.tb.Fatalf("unsupported multiple or empty question: %v", req.Question) - } - question := req.Question[0] - - replies := r.replies[nameserver] - if len(replies) == 0 { - r.tb.Fatalf("no configured replies for nameserver: %v", nameserver) - } - - for _, reply := range replies { - if reply.name == question.Name && reply.qtype == dns.Type(question.Qtype) { - return reply.resp.Copy(), nil - } - } - - r.tb.Fatalf("no replies found for query %q of type %v to %v", question.Name, question.Qtype, nameserver) - panic("unreachable") -} - -// responses for mocking, shared between the following tests -var ( - rootServerAddr = netip.MustParseAddr("198.41.0.4") // a.root-servers.net. - comNSAddr = netip.MustParseAddr("192.5.6.30") // a.gtld-servers.net. - - // DNS response from the root nameservers for a .com nameserver - comRecord = &dns.Msg{ - Ns: []dns.RR{nsRR("com.", "a.gtld-servers.net.")}, - Extra: []dns.RR{dnsIPRR("a.gtld-servers.net.", comNSAddr)}, - } - - // Random Amazon nameservers that we use in glue records - amazonNS = netip.MustParseAddr("205.251.192.197") - amazonNSv6 = netip.MustParseAddr("2600:9000:5306:1600::1") - - // Nameservers for the tailscale.com domain - tailscaleNameservers = &dns.Msg{ - Ns: []dns.RR{ - nsRR("tailscale.com.", "ns-197.awsdns-24.com."), - nsRR("tailscale.com.", "ns-557.awsdns-05.net."), - nsRR("tailscale.com.", "ns-1558.awsdns-02.co.uk."), - nsRR("tailscale.com.", "ns-1359.awsdns-41.org."), - }, - Extra: []dns.RR{ - dnsIPRR("ns-197.awsdns-24.com.", amazonNS), - }, - } -) - -func TestBasicRecursion(t *testing.T) { - mock := &replyMock{ - tb: t, - replies: map[netip.Addr][]mockReply{ - // Query to the root server returns the .com server + a glue record - rootServerAddr: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: comRecord}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: comRecord}, - }, - - // Query to the ".com" server return the nameservers for tailscale.com - comNSAddr: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: tailscaleNameservers}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: tailscaleNameservers}, - }, - - // Query to the actual nameserver works. - amazonNS: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{ - dnsIPRR("tailscale.com.", netip.MustParseAddr("13.248.141.131")), - dnsIPRR("tailscale.com.", netip.MustParseAddr("76.223.15.28")), - }, - }}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{ - dnsIPRR("tailscale.com.", netip.MustParseAddr("2600:9000:a602:b1e6:86d:8165:5e8c:295b")), - dnsIPRR("tailscale.com.", netip.MustParseAddr("2600:9000:a51d:27c1:1530:b9ef:2a6:b9e5")), - }, - }}, - }, - }, - } - - r := newResolver(t) - r.testExchangeHook = mock.exchangeHook - r.rootServers = []netip.Addr{rootServerAddr} - - // Query for tailscale.com, verify we get the right responses - ctx := context.Background() - addrs, minTTL, err := r.Resolve(ctx, "tailscale.com") - if err != nil { - t.Fatal(err) - } - wantAddrs := []netip.Addr{ - netip.MustParseAddr("13.248.141.131"), - netip.MustParseAddr("76.223.15.28"), - netip.MustParseAddr("2600:9000:a602:b1e6:86d:8165:5e8c:295b"), - netip.MustParseAddr("2600:9000:a51d:27c1:1530:b9ef:2a6:b9e5"), - } - slices.SortFunc(addrs, func(x, y netip.Addr) int { return strings.Compare(x.String(), y.String()) }) - slices.SortFunc(wantAddrs, func(x, y netip.Addr) int { return strings.Compare(x.String(), y.String()) }) - - if !reflect.DeepEqual(addrs, wantAddrs) { - t.Errorf("got addrs=%+v; want %+v", addrs, wantAddrs) - } - - const wantMinTTL = 5 * time.Minute - if minTTL != wantMinTTL { - t.Errorf("got minTTL=%+v; want %+v", minTTL, wantMinTTL) - } -} - -func TestNoAnswers(t *testing.T) { - mock := &replyMock{ - tb: t, - replies: map[netip.Addr][]mockReply{ - // Query to the root server returns the .com server + a glue record - rootServerAddr: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: comRecord}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: comRecord}, - }, - - // Query to the ".com" server return the nameservers for tailscale.com - comNSAddr: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: tailscaleNameservers}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: tailscaleNameservers}, - }, - - // Query to the actual nameserver returns no responses, authoritatively. - amazonNS: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{}, - }}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{}, - }}, - }, - }, - } - - r := &Resolver{ - Logf: t.Logf, - testExchangeHook: mock.exchangeHook, - rootServers: []netip.Addr{rootServerAddr}, - } - - // Query for tailscale.com, verify we get the right responses - _, _, err := r.Resolve(context.Background(), "tailscale.com") - if err == nil { - t.Fatalf("got no error, want error") - } - if !errors.Is(err, ErrAuthoritativeNoResponses) { - t.Fatalf("got err=%v, want %v", err, ErrAuthoritativeNoResponses) - } -} - -func TestRecursionCNAME(t *testing.T) { - mock := &replyMock{ - tb: t, - replies: map[netip.Addr][]mockReply{ - // Query to the root server returns the .com server + a glue record - rootServerAddr: { - {name: "subdomain.otherdomain.com.", qtype: dns.Type(dns.TypeA), resp: comRecord}, - {name: "subdomain.otherdomain.com.", qtype: dns.Type(dns.TypeAAAA), resp: comRecord}, - - {name: "subdomain.tailscale.com.", qtype: dns.Type(dns.TypeA), resp: comRecord}, - {name: "subdomain.tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: comRecord}, - }, - - // Query to the ".com" server return the nameservers for tailscale.com - comNSAddr: { - {name: "subdomain.otherdomain.com.", qtype: dns.Type(dns.TypeA), resp: tailscaleNameservers}, - {name: "subdomain.otherdomain.com.", qtype: dns.Type(dns.TypeAAAA), resp: tailscaleNameservers}, - - {name: "subdomain.tailscale.com.", qtype: dns.Type(dns.TypeA), resp: tailscaleNameservers}, - {name: "subdomain.tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: tailscaleNameservers}, - }, - - // Query to the actual nameserver works. - amazonNS: { - {name: "subdomain.otherdomain.com.", qtype: dns.Type(dns.TypeA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{cnameRR("subdomain.otherdomain.com.", "subdomain.tailscale.com.")}, - }}, - {name: "subdomain.otherdomain.com.", qtype: dns.Type(dns.TypeAAAA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{cnameRR("subdomain.otherdomain.com.", "subdomain.tailscale.com.")}, - }}, - - {name: "subdomain.tailscale.com.", qtype: dns.Type(dns.TypeA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{dnsIPRR("tailscale.com.", netip.MustParseAddr("13.248.141.131"))}, - }}, - {name: "subdomain.tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{dnsIPRR("tailscale.com.", netip.MustParseAddr("2600:9000:a602:b1e6:86d:8165:5e8c:295b"))}, - }}, - }, - }, - } - - r := &Resolver{ - Logf: t.Logf, - testExchangeHook: mock.exchangeHook, - rootServers: []netip.Addr{rootServerAddr}, - } - - // Query for tailscale.com, verify we get the right responses - addrs, minTTL, err := r.Resolve(context.Background(), "subdomain.otherdomain.com") - if err != nil { - t.Fatal(err) - } - wantAddrs := []netip.Addr{ - netip.MustParseAddr("13.248.141.131"), - netip.MustParseAddr("2600:9000:a602:b1e6:86d:8165:5e8c:295b"), - } - slices.SortFunc(addrs, func(x, y netip.Addr) int { return strings.Compare(x.String(), y.String()) }) - slices.SortFunc(wantAddrs, func(x, y netip.Addr) int { return strings.Compare(x.String(), y.String()) }) - - if !reflect.DeepEqual(addrs, wantAddrs) { - t.Errorf("got addrs=%+v; want %+v", addrs, wantAddrs) - } - - const wantMinTTL = 5 * time.Minute - if minTTL != wantMinTTL { - t.Errorf("got minTTL=%+v; want %+v", minTTL, wantMinTTL) - } -} - -func TestRecursionNoGlue(t *testing.T) { - coukNS := netip.MustParseAddr("213.248.216.1") - coukRecord := &dns.Msg{ - Ns: []dns.RR{nsRR("com.", "dns1.nic.uk.")}, - Extra: []dns.RR{dnsIPRR("dns1.nic.uk.", coukNS)}, - } - - intermediateNS := netip.MustParseAddr("205.251.193.66") // g-ns-322.awsdns-02.co.uk. - intermediateRecord := &dns.Msg{ - Ns: []dns.RR{nsRR("awsdns-02.co.uk.", "g-ns-322.awsdns-02.co.uk.")}, - Extra: []dns.RR{dnsIPRR("g-ns-322.awsdns-02.co.uk.", intermediateNS)}, - } - - const amazonNameserver = "ns-1558.awsdns-02.co.uk." - tailscaleNameservers := &dns.Msg{ - Ns: []dns.RR{ - nsRR("tailscale.com.", amazonNameserver), - }, - } - - tailscaleResponses := []mockReply{ - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{dnsIPRR("tailscale.com.", netip.MustParseAddr("13.248.141.131"))}, - }}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{dnsIPRR("tailscale.com.", netip.MustParseAddr("2600:9000:a602:b1e6:86d:8165:5e8c:295b"))}, - }}, - } - - mock := &replyMock{ - tb: t, - replies: map[netip.Addr][]mockReply{ - rootServerAddr: { - // Query to the root server returns the .com server + a glue record - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: comRecord}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: comRecord}, - - // Querying the .co.uk nameserver returns the .co.uk nameserver + a glue record. - {name: amazonNameserver, qtype: dns.Type(dns.TypeA), resp: coukRecord}, - {name: amazonNameserver, qtype: dns.Type(dns.TypeAAAA), resp: coukRecord}, - }, - - // Queries to the ".com" server return the nameservers - // for tailscale.com, which don't contain a glue - // record. - comNSAddr: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: tailscaleNameservers}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: tailscaleNameservers}, - }, - - // Queries to the ".co.uk" nameserver returns the - // address of the intermediate Amazon nameserver. - coukNS: { - {name: amazonNameserver, qtype: dns.Type(dns.TypeA), resp: intermediateRecord}, - {name: amazonNameserver, qtype: dns.Type(dns.TypeAAAA), resp: intermediateRecord}, - }, - - // Queries to the intermediate nameserver returns an - // answer for the final Amazon nameserver. - intermediateNS: { - {name: amazonNameserver, qtype: dns.Type(dns.TypeA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{dnsIPRR(amazonNameserver, amazonNS)}, - }}, - {name: amazonNameserver, qtype: dns.Type(dns.TypeAAAA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{dnsIPRR(amazonNameserver, amazonNSv6)}, - }}, - }, - - // Queries to the actual nameserver work and return - // responses to the query. - amazonNS: tailscaleResponses, - amazonNSv6: tailscaleResponses, - }, - } - - r := newResolver(t) - r.testExchangeHook = mock.exchangeHook - r.rootServers = []netip.Addr{rootServerAddr} - - // Query for tailscale.com, verify we get the right responses - addrs, minTTL, err := r.Resolve(context.Background(), "tailscale.com") - if err != nil { - t.Fatal(err) - } - wantAddrs := []netip.Addr{ - netip.MustParseAddr("13.248.141.131"), - netip.MustParseAddr("2600:9000:a602:b1e6:86d:8165:5e8c:295b"), - } - slices.SortFunc(addrs, func(x, y netip.Addr) int { return strings.Compare(x.String(), y.String()) }) - slices.SortFunc(wantAddrs, func(x, y netip.Addr) int { return strings.Compare(x.String(), y.String()) }) - - if !reflect.DeepEqual(addrs, wantAddrs) { - t.Errorf("got addrs=%+v; want %+v", addrs, wantAddrs) - } - - const wantMinTTL = 5 * time.Minute - if minTTL != wantMinTTL { - t.Errorf("got minTTL=%+v; want %+v", minTTL, wantMinTTL) - } -} - -func TestRecursionLimit(t *testing.T) { - mock := &replyMock{ - tb: t, - replies: map[netip.Addr][]mockReply{}, - } - - // Fill out a CNAME chain equal to our recursion limit; we won't get - // this far since each CNAME is more than 1 level "deep", but this - // ensures that we have more than the limit. - for i := range maxDepth + 1 { - curr := fmt.Sprintf("%d-tailscale.com.", i) - - tailscaleNameservers := &dns.Msg{ - Ns: []dns.RR{nsRR(curr, "ns-197.awsdns-24.com.")}, - Extra: []dns.RR{dnsIPRR("ns-197.awsdns-24.com.", amazonNS)}, - } - - // Query to the root server returns the .com server + a glue record - mock.replies[rootServerAddr] = append(mock.replies[rootServerAddr], - mockReply{name: curr, qtype: dns.Type(dns.TypeA), resp: comRecord}, - mockReply{name: curr, qtype: dns.Type(dns.TypeAAAA), resp: comRecord}, - ) - - // Query to the ".com" server return the nameservers for NN-tailscale.com - mock.replies[comNSAddr] = append(mock.replies[comNSAddr], - mockReply{name: curr, qtype: dns.Type(dns.TypeA), resp: tailscaleNameservers}, - mockReply{name: curr, qtype: dns.Type(dns.TypeAAAA), resp: tailscaleNameservers}, - ) - - // Queries to the nameserver return a CNAME for the n+1th server. - next := fmt.Sprintf("%d-tailscale.com.", i+1) - mock.replies[amazonNS] = append(mock.replies[amazonNS], - mockReply{ - name: curr, - qtype: dns.Type(dns.TypeA), - resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{cnameRR(curr, next)}, - }, - }, - mockReply{ - name: curr, - qtype: dns.Type(dns.TypeAAAA), - resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{cnameRR(curr, next)}, - }, - }, - ) - } - - r := newResolver(t) - r.testExchangeHook = mock.exchangeHook - r.rootServers = []netip.Addr{rootServerAddr} - - // Query for the first node in the chain, 0-tailscale.com, and verify - // we get a max-depth error. - ctx := context.Background() - _, _, err := r.Resolve(ctx, "0-tailscale.com") - if err == nil { - t.Fatal("expected error, got nil") - } else if !errors.Is(err, ErrMaxDepth) { - t.Fatalf("got err=%v, want ErrMaxDepth", err) - } -} - -func TestInvalidResponses(t *testing.T) { - mock := &replyMock{ - tb: t, - replies: map[netip.Addr][]mockReply{ - // Query to the root server returns the .com server + a glue record - rootServerAddr: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: comRecord}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: comRecord}, - }, - - // Query to the ".com" server return the nameservers for tailscale.com - comNSAddr: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: tailscaleNameservers}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: tailscaleNameservers}, - }, - - // Query to the actual nameserver returns an invalid IP address - amazonNS: { - {name: "tailscale.com.", qtype: dns.Type(dns.TypeA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - Answer: []dns.RR{&dns.A{ - Hdr: dns.RR_Header{ - Name: "tailscale.com.", - Rrtype: dns.TypeA, - Class: dns.ClassINET, - Ttl: 300, - }, - // Note: this is an IPv6 addr in an IPv4 response - A: net.IP(netip.MustParseAddr("2600:9000:a51d:27c1:1530:b9ef:2a6:b9e5").AsSlice()), - }}, - }}, - {name: "tailscale.com.", qtype: dns.Type(dns.TypeAAAA), resp: &dns.Msg{ - MsgHdr: dns.MsgHdr{Authoritative: true}, - // This an IPv4 response to an IPv6 query - Answer: []dns.RR{&dns.A{ - Hdr: dns.RR_Header{ - Name: "tailscale.com.", - Rrtype: dns.TypeA, - Class: dns.ClassINET, - Ttl: 300, - }, - A: net.IP(netip.MustParseAddr("13.248.141.131").AsSlice()), - }}, - }}, - }, - }, - } - - r := &Resolver{ - Logf: t.Logf, - testExchangeHook: mock.exchangeHook, - rootServers: []netip.Addr{rootServerAddr}, - } - - // Query for tailscale.com, verify we get no responses since the - // addresses are invalid. - _, _, err := r.Resolve(context.Background(), "tailscale.com") - if err == nil { - t.Fatalf("got no error, want error") - } - if !errors.Is(err, ErrAuthoritativeNoResponses) { - t.Fatalf("got err=%v, want %v", err, ErrAuthoritativeNoResponses) - } -} - -// TODO(andrew): test for more edge cases that aren't currently covered: -// * Nameservers that cross between IPv4 and IPv6 -// * Authoritative no replies after following CNAME -// * Authoritative no replies after following non-glue NS record -// * Error querying non-glue NS record followed by success diff --git a/net/dnsfallback/dnsfallback.go b/net/dnsfallback/dnsfallback.go index 8e53c3b29..9843d46f9 100644 --- a/net/dnsfallback/dnsfallback.go +++ b/net/dnsfallback/dnsfallback.go @@ -22,35 +22,20 @@ import ( "net/url" "os" "reflect" - "slices" "sync/atomic" "time" "tailscale.com/atomicfile" - "tailscale.com/envknob" "tailscale.com/health" - "tailscale.com/net/dns/recursive" "tailscale.com/net/netmon" "tailscale.com/net/netns" "tailscale.com/net/tlsdial" "tailscale.com/net/tshttpproxy" "tailscale.com/tailcfg" "tailscale.com/types/logger" - "tailscale.com/util/clientmetric" - "tailscale.com/util/singleflight" "tailscale.com/util/slicesx" ) -var ( - optRecursiveResolver = envknob.RegisterOptBool("TS_DNSFALLBACK_RECURSIVE_RESOLVER") - disableRecursiveResolver = envknob.RegisterBool("TS_DNSFALLBACK_DISABLE_RECURSIVE_RESOLVER") // legacy pre-1.52 env knob name -) - -type resolveResult struct { - addrs []netip.Addr - minTTL time.Duration -} - // MakeLookupFunc creates a function that can be used to resolve hostnames // (e.g. as a LookupIPFallback from dnscache.Resolver). // The netMon parameter is optional; if non-nil it's used to do faster interface lookups. @@ -68,145 +53,13 @@ type fallbackResolver struct { logf logger.Logf netMon *netmon.Monitor // or nil healthTracker *health.Tracker // or nil - sf singleflight.Group[string, resolveResult] // for tests waitForCompare bool } func (fr *fallbackResolver) Lookup(ctx context.Context, host string) ([]netip.Addr, error) { - // If they've explicitly disabled the recursive resolver with the legacy - // TS_DNSFALLBACK_DISABLE_RECURSIVE_RESOLVER envknob or not set the - // newer TS_DNSFALLBACK_RECURSIVE_RESOLVER to true, then don't use the - // recursive resolver. (tailscale/corp#15261) In the future, we might - // change the default (the opt.Bool being unset) to mean enabled. - if disableRecursiveResolver() || !optRecursiveResolver().EqualBool(true) { - return lookup(ctx, host, fr.logf, fr.healthTracker, fr.netMon) - } - - addrsCh := make(chan []netip.Addr, 1) - - // Run the recursive resolver in the background so we can - // compare the results. For tests, we also allow waiting for the - // comparison to complete; normally, we do this entirely asynchronously - // so as not to block the caller. - var done chan struct{} - if fr.waitForCompare { - done = make(chan struct{}) - go func() { - defer close(done) - fr.compareWithRecursive(ctx, addrsCh, host) - }() - } else { - go fr.compareWithRecursive(ctx, addrsCh, host) - } - - addrs, err := lookup(ctx, host, fr.logf, fr.healthTracker, fr.netMon) - if err != nil { - addrsCh <- nil - return nil, err - } - - addrsCh <- slices.Clone(addrs) - if fr.waitForCompare { - select { - case <-done: - case <-ctx.Done(): - } - } - return addrs, nil -} - -// compareWithRecursive is responsible for comparing the DNS resolution -// performed via the "normal" path (bootstrap DNS requests to the DERP servers) -// with DNS resolution performed with our in-process recursive DNS resolver. -// -// It will select on addrsCh to read exactly one set of addrs (returned by the -// "normal" path) and compare against the results returned by the recursive -// resolver. If ctx is canceled, then it will abort. -func (fr *fallbackResolver) compareWithRecursive( - ctx context.Context, - addrsCh <-chan []netip.Addr, - host string, -) { - logf := logger.WithPrefix(fr.logf, "recursive: ") - - // Ensure that we catch panics while we're testing this - // code path; this should never panic, but we don't - // want to take down the process by having the panic - // propagate to the top of the goroutine's stack and - // then terminate. - defer func() { - if r := recover(); r != nil { - logf("bootstrap DNS: recovered panic: %v", r) - metricRecursiveErrors.Add(1) - } - }() - - // Don't resolve the same host multiple times - // concurrently; if we end up in a tight loop, this can - // take up a lot of CPU. - var didRun bool - result, err, _ := fr.sf.Do(host, func() (resolveResult, error) { - didRun = true - resolver := &recursive.Resolver{ - Dialer: netns.NewDialer(logf, fr.netMon), - Logf: logf, - } - addrs, minTTL, err := resolver.Resolve(ctx, host) - if err != nil { - logf("error using recursive resolver: %v", err) - metricRecursiveErrors.Add(1) - return resolveResult{}, err - } - return resolveResult{addrs, minTTL}, nil - }) - - // The singleflight function handled errors; return if - // there was one. Additionally, don't bother doing the - // comparison if we waited on another singleflight - // caller; the results are likely to be the same, so - // rather than spam the logs we can just exit and let - // the singleflight call that did execute do the - // comparison. - // - // Returning here is safe because the addrsCh channel - // is buffered, so the main function won't block even - // if we never read from it. - if err != nil || !didRun { - return - } - - addrs, minTTL := result.addrs, result.minTTL - compareAddr := func(a, b netip.Addr) int { return a.Compare(b) } - slices.SortFunc(addrs, compareAddr) - - // Wait for a response from the main function; try this once before we - // check whether the context is canceled since selects are - // nondeterministic. - var oldAddrs []netip.Addr - select { - case oldAddrs = <-addrsCh: - // All good; continue - default: - // Now block. - select { - case oldAddrs = <-addrsCh: - case <-ctx.Done(): - return - } - } - slices.SortFunc(oldAddrs, compareAddr) - - matches := slices.Equal(addrs, oldAddrs) - - logf("bootstrap DNS comparison: matches=%v oldAddrs=%v addrs=%v minTTL=%v", matches, oldAddrs, addrs, minTTL) - - if matches { - metricRecursiveMatches.Add(1) - } else { - metricRecursiveMismatches.Add(1) - } + return lookup(ctx, host, fr.logf, fr.healthTracker, fr.netMon) } func lookup(ctx context.Context, host string, logf logger.Logf, ht *health.Tracker, netMon *netmon.Monitor) ([]netip.Addr, error) { @@ -428,9 +281,3 @@ func SetCachePath(path string, logf logger.Logf) { cachedDERPMap.Store(dm) logf("[v2] dnsfallback: SetCachePath loaded cached DERP map") } - -var ( - metricRecursiveMatches = clientmetric.NewCounter("dnsfallback_recursive_matches") - metricRecursiveMismatches = clientmetric.NewCounter("dnsfallback_recursive_mismatches") - metricRecursiveErrors = clientmetric.NewCounter("dnsfallback_recursive_errors") -) diff --git a/tsnet/depaware.txt b/tsnet/depaware.txt index b3e2b7f0e..c115332fa 100644 --- a/tsnet/depaware.txt +++ b/tsnet/depaware.txt @@ -132,7 +132,6 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware) L github.com/mdlayher/netlink/nltest from github.com/google/nftables L github.com/mdlayher/sdnotify from tailscale.com/util/systemd LA 💣 github.com/mdlayher/socket from github.com/mdlayher/netlink+ - github.com/miekg/dns from tailscale.com/net/dns/recursive LDW 💣 github.com/mitchellh/go-ps from tailscale.com/safesocket DI github.com/prometheus-community/pro-bing from tailscale.com/wgengine/netstack L 💣 github.com/safchain/ethtool from tailscale.com/doctor/ethtool+ @@ -272,7 +271,6 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware) tailscale.com/net/connstats from tailscale.com/net/tstun+ tailscale.com/net/dns from tailscale.com/ipn/ipnlocal+ tailscale.com/net/dns/publicdns from tailscale.com/net/dns+ - tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback tailscale.com/net/dns/resolvconffile from tailscale.com/net/dns+ tailscale.com/net/dns/resolver from tailscale.com/net/dns+ tailscale.com/net/dnscache from tailscale.com/control/controlclient+ @@ -448,8 +446,8 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware) golang.org/x/net/internal/iana from golang.org/x/net/icmp+ golang.org/x/net/internal/socket from golang.org/x/net/icmp+ LDW golang.org/x/net/internal/socks from golang.org/x/net/proxy - golang.org/x/net/ipv4 from github.com/miekg/dns+ - golang.org/x/net/ipv6 from github.com/miekg/dns+ + golang.org/x/net/ipv4 from github.com/prometheus-community/pro-bing+ + golang.org/x/net/ipv6 from github.com/prometheus-community/pro-bing+ LDW golang.org/x/net/proxy from tailscale.com/net/netns DI golang.org/x/net/route from tailscale.com/net/netmon+ golang.org/x/sync/errgroup from github.com/mdlayher/socket+