From aa37aece9c007de64042ba2f882b4f0c703b2c1b Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 29 Jun 2022 19:32:41 -0700 Subject: [PATCH] ipn/ipnlocal, net/dns*, util/cloudenv: add AWS DNS support And remove the GCP special-casing from ipn/ipnlocal; do it only in the forwarder for *.internal. Fixes #4980 Fixes #4981 Change-Id: I5c481e96d91f3d51d274a80fbd37c38f16dfa5cb Signed-off-by: Brad Fitzpatrick --- cmd/tailscaled/depaware.txt | 2 +- ipn/ipnlocal/dnsconfig_test.go | 38 +--------------- ipn/ipnlocal/local.go | 17 +------- net/dns/resolver/forwarder.go | 79 ++++++++++++++++++++++------------ net/dns/resolver/tsdns.go | 4 ++ net/dnscache/dnscache.go | 5 ++- util/cloudenv/cloudenv.go | 40 +++++++++++++++++ 7 files changed, 103 insertions(+), 82 deletions(-) diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index f5e9ce10b..e48597ee6 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -260,7 +260,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/types/structs from tailscale.com/control/controlclient+ tailscale.com/types/views from tailscale.com/ipn/ipnlocal+ tailscale.com/util/clientmetric from tailscale.com/control/controlclient+ - tailscale.com/util/cloudenv from tailscale.com/ipn/ipnlocal+ + tailscale.com/util/cloudenv from tailscale.com/net/dns/resolver+ LW tailscale.com/util/cmpver from tailscale.com/net/dns+ 💣 tailscale.com/util/deephash from tailscale.com/ipn/ipnlocal+ tailscale.com/util/dnsname from tailscale.com/hostinfo+ diff --git a/ipn/ipnlocal/dnsconfig_test.go b/ipn/ipnlocal/dnsconfig_test.go index b4c92dbbf..2a8cabfdf 100644 --- a/ipn/ipnlocal/dnsconfig_test.go +++ b/ipn/ipnlocal/dnsconfig_test.go @@ -306,42 +306,6 @@ func TestDNSConfigForNetmap(t *testing.T) { Routes: map[dnsname.FQDN][]*dnstype.Resolver{}, }, }, - { - name: "google_cloud", - nm: &netmap.NetworkMap{ - DNS: tailcfg.DNSConfig{}, - }, - cloud: cloudenv.GCP, - prefs: &ipn.Prefs{ - CorpDNS: true, - }, - want: &dns.Config{ - Hosts: map[dnsname.FQDN][]netaddr.IP{}, - Routes: map[dnsname.FQDN][]*dnstype.Resolver{ - "internal.": []*dnstype.Resolver{{Addr: cloudenv.GoogleMetadataAndDNSIP}}, - }, - }, - }, - { - name: "google_cloud_with_exiting_internal", - nm: &netmap.NetworkMap{ - DNS: tailcfg.DNSConfig{ - Routes: map[string][]*dnstype.Resolver{ - ".internal": []*dnstype.Resolver{{Addr: "1.2.3.4"}}, - }, - }, - }, - cloud: cloudenv.GCP, - prefs: &ipn.Prefs{ - CorpDNS: true, - }, - want: &dns.Config{ - Hosts: map[dnsname.FQDN][]netaddr.IP{}, - Routes: map[dnsname.FQDN][]*dnstype.Resolver{ - "internal.": []*dnstype.Resolver{{Addr: "1.2.3.4"}}, - }, - }, - }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -350,7 +314,7 @@ func TestDNSConfigForNetmap(t *testing.T) { verOS = "linux" } var log tstest.MemLogger - got := dnsConfigForNetmap(tt.nm, tt.prefs, log.Logf, verOS, tt.cloud) + got := dnsConfigForNetmap(tt.nm, tt.prefs, log.Logf, verOS) if !reflect.DeepEqual(got, tt.want) { gotj, _ := json.MarshalIndent(got, "", "\t") wantj, _ := json.MarshalIndent(tt.want, "", "\t") diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 99817caea..e9431a39a 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -48,7 +48,6 @@ import ( "tailscale.com/types/persist" "tailscale.com/types/preftype" "tailscale.com/types/views" - "tailscale.com/util/cloudenv" "tailscale.com/util/deephash" "tailscale.com/util/dnsname" "tailscale.com/util/multierr" @@ -2222,7 +2221,7 @@ func (b *LocalBackend) authReconfig() { } rcfg := b.routerConfig(cfg, prefs, oneCGNATRoute) - dcfg := dnsConfigForNetmap(nm, prefs, b.logf, version.OS(), cloudenv.Get()) + dcfg := dnsConfigForNetmap(nm, prefs, b.logf, version.OS()) err = b.e.Reconfig(cfg, rcfg, dcfg, nm.Debug) if err == wgengine.ErrNoChanges { @@ -2238,7 +2237,7 @@ func (b *LocalBackend) authReconfig() { // // The versionOS is a Tailscale-style version ("iOS", "macOS") and not // a runtime.GOOS. -func dnsConfigForNetmap(nm *netmap.NetworkMap, prefs *ipn.Prefs, logf logger.Logf, versionOS string, cloud cloudenv.Cloud) *dns.Config { +func dnsConfigForNetmap(nm *netmap.NetworkMap, prefs *ipn.Prefs, logf logger.Logf, versionOS string) *dns.Config { dcfg := &dns.Config{ Routes: map[dnsname.FQDN][]*dnstype.Resolver{}, Hosts: map[dnsname.FQDN][]netaddr.IP{}, @@ -2327,18 +2326,6 @@ func dnsConfigForNetmap(nm *netmap.NetworkMap, prefs *ipn.Prefs, logf logger.Log } } - // If we're running on Google Cloud Platform, add a DNS route for its - // *.internal DNS names to its metadata DNS IP, unless the tailnet already - // defines one. This is especially important on their standard VM images - // that don't included systemd-resolved, so we were effectively breaking - // their *.internal DNS names previously when the tailnet had explicit DNS - // servers set ("override local DNS" checked). - if cloud == cloudenv.GCP { - if _, ok := dcfg.Routes["internal."]; !ok { - dcfg.Routes["internal."] = []*dnstype.Resolver{{Addr: cloudenv.GoogleMetadataAndDNSIP}} - } - } - addDefault := func(resolvers []*dnstype.Resolver) { for _, r := range resolvers { dcfg.DefaultResolvers = append(dcfg.DefaultResolvers, r) diff --git a/net/dns/resolver/forwarder.go b/net/dns/resolver/forwarder.go index 79030fed4..b747fb153 100644 --- a/net/dns/resolver/forwarder.go +++ b/net/dns/resolver/forwarder.go @@ -198,6 +198,16 @@ type forwarder struct { // routes are per-suffix resolvers to use, with // the most specific routes first. routes []route + // cloudHostFallback are last resort resolvers to use if no per-suffix + // resolver matches. These are only populated on cloud hosts where the + // platform provides a well-known recursive resolver. + // + // That is, if we're running on GCP or AWS where there's always a well-known + // IP of a recursive resolver, return that rather than having callers return + // errNoUpstreams. This fixes both normal 100.100.100.100 resolution when + // /etc/resolv.conf is missing/corrupt, and the peerapi ExitDNS stub + // resolver lookup. + cloudHostFallback []resolverAndDelay } func init() { @@ -297,18 +307,52 @@ func resolversWithDelays(resolvers []*dnstype.Resolver) []resolverAndDelay { return rr } +var ( + cloudResolversOnce sync.Once + cloudResolversLazy []resolverAndDelay +) + +func cloudResolvers() []resolverAndDelay { + cloudResolversOnce.Do(func() { + if ip := cloudenv.Get().ResolverIP(); ip != "" { + cloudResolver := []*dnstype.Resolver{{Addr: ip}} + cloudResolversLazy = resolversWithDelays(cloudResolver) + } + }) + return cloudResolversLazy +} + // setRoutes sets the routes to use for DNS forwarding. It's called by // Resolver.SetConfig on reconfig. // // The memory referenced by routesBySuffix should not be modified. func (f *forwarder) setRoutes(routesBySuffix map[dnsname.FQDN][]*dnstype.Resolver) { routes := make([]route, 0, len(routesBySuffix)) + + cloudHostFallback := cloudResolvers() for suffix, rs := range routesBySuffix { - routes = append(routes, route{ - Suffix: suffix, - Resolvers: resolversWithDelays(rs), - }) + if suffix == "." && len(rs) == 0 && len(cloudHostFallback) > 0 { + routes = append(routes, route{ + Suffix: suffix, + Resolvers: cloudHostFallback, + }) + } else { + routes = append(routes, route{ + Suffix: suffix, + Resolvers: resolversWithDelays(rs), + }) + } + } + + if cloudenv.Get().HasInternalTLD() && len(cloudHostFallback) > 0 { + if _, ok := routesBySuffix["internal."]; !ok { + routes = append(routes, route{ + Suffix: "internal.", + Resolvers: cloudHostFallback, + }) + } } + // Sort from longest prefix to shortest. sort.Slice(routes, func(i, j int) bool { return routes[i].Suffix.NumLabels() > routes[j].Suffix.NumLabels() @@ -317,6 +361,7 @@ func (f *forwarder) setRoutes(routesBySuffix map[dnsname.FQDN][]*dnstype.Resolve f.mu.Lock() defer f.mu.Unlock() f.routes = routes + f.cloudHostFallback = cloudHostFallback } var stdNetPacketListener packetListener = new(net.ListenConfig) @@ -561,38 +606,18 @@ func (f *forwarder) sendUDP(ctx context.Context, fq *forwardQuery, rr resolverAn return out, nil } -// gcpResolverFallback is the fallback resolver for Google Cloud. -var gcpResolverFallback = []resolverAndDelay{{name: &dnstype.Resolver{Addr: cloudenv.GoogleMetadataAndDNSIP}}} - // resolvers returns the resolvers to use for domain. func (f *forwarder) resolvers(domain dnsname.FQDN) []resolverAndDelay { f.mu.Lock() routes := f.routes + cloudHostFallback := f.cloudHostFallback f.mu.Unlock() - var ret []resolverAndDelay - var matchedSuffix dnsname.FQDN for _, route := range routes { if route.Suffix == "." || route.Suffix.Contains(domain) { - ret = route.Resolvers - matchedSuffix = route.Suffix - break + return route.Resolvers } } - - if len(ret) == 0 && cloudenv.Get() == cloudenv.GCP && (matchedSuffix == "" || matchedSuffix == ".") { - // If we're running on GCP where there's always a well-known IP of a - // recursive resolver, return that rather than having callers return - // errNoUpstreams. This fixes both normal 100.100.100.100 resolution - // when /etc/resolv.conf is missing/corrupt, and the peerapi ExitDNS - // stub resolver lookup. - // - // But we only do this if no route matched (matchedSuffix == "") or - // if we had no resolvers for the top-level route (matchedSuffix == "."). - // If they had an explicit empty route that we matched, don't do the auto - // fallback in that case. - ret = gcpResolverFallback - } - return ret + return cloudHostFallback // or nil if no fallback } // forwardQuery is information and state about a forwarded DNS query that's diff --git a/net/dns/resolver/tsdns.go b/net/dns/resolver/tsdns.go index 38d6ec73f..05df02484 100644 --- a/net/dns/resolver/tsdns.go +++ b/net/dns/resolver/tsdns.go @@ -31,6 +31,7 @@ import ( "tailscale.com/types/dnstype" "tailscale.com/types/logger" "tailscale.com/util/clientmetric" + "tailscale.com/util/cloudenv" "tailscale.com/util/dnsname" "tailscale.com/wgengine/monitor" ) @@ -97,6 +98,9 @@ func (c *Config) WriteToBufioWriter(w *bufio.Writer) { if arpa > 0 { fmt.Fprintf(w, "+%darpa", arpa) } + if c := cloudenv.Get(); c != "" { + fmt.Fprintf(w, ", cloud=%q", string(c)) + } w.WriteString("}") } diff --git a/net/dnscache/dnscache.go b/net/dnscache/dnscache.go index 15ebf1580..37b48bc97 100644 --- a/net/dnscache/dnscache.go +++ b/net/dnscache/dnscache.go @@ -119,14 +119,15 @@ func (r *Resolver) cloudHostResolver() (v *net.Resolver, ok bool) { // which supports net.Resolver.PreferGo on Windows. return nil, false } - if cloudenv.Get() != cloudenv.GCP { + ip := cloudenv.Get().ResolverIP() + if ip == "" { return nil, false } return &net.Resolver{ PreferGo: true, Dial: func(ctx context.Context, network, address string) (net.Conn, error) { var d net.Dialer - return d.DialContext(ctx, network, net.JoinHostPort(cloudenv.GoogleMetadataAndDNSIP, "53")) + return d.DialContext(ctx, network, net.JoinHostPort(ip, "53")) }, }, true } diff --git a/util/cloudenv/cloudenv.go b/util/cloudenv/cloudenv.go index cc921ac5e..5ba8f6e6e 100644 --- a/util/cloudenv/cloudenv.go +++ b/util/cloudenv/cloudenv.go @@ -6,6 +6,9 @@ package cloudenv import ( + "os" + "runtime" + "strings" "sync/atomic" gcpmetadata "cloud.google.com/go/compute/metadata" @@ -15,14 +18,41 @@ import ( // It's also the *.internal DNS server, and proxies to 8.8.8.8. const GoogleMetadataAndDNSIP = "169.254.169.254" +// AWSResolverIP is the IP address of the AWS DNS server. +// See https://docs.aws.amazon.com/vpc/latest/userguide/vpc-dns.html +const AWSResolverIP = "169.254.169.253" + // Cloud is a recognize cloud environment with properties that // Tailscale can specialize for in places. type Cloud string const ( GCP = Cloud("gcp") // Google Cloud + AWS = Cloud("aws") // Amazon Web Services (EC2 in particular) ) +// ResolverIP returns the cloud host's recursive DNS server or the +// empty string if not available. +func (c Cloud) ResolverIP() string { + switch c { + case GCP: + return GoogleMetadataAndDNSIP + case AWS: + return AWSResolverIP + } + return "" +} + +// HasInternalTLD reports whether c is a cloud environment +// whose ResolverIP serves *.internal records. +func (c Cloud) HasInternalTLD() bool { + switch c { + case GCP, AWS: + return true + } + return false +} + var cloudAtomic atomic.Value // of Cloud // Get returns the current cloud, or the empty string if unknown. @@ -37,6 +67,16 @@ func Get() Cloud { } func getCloud() Cloud { + // TODO(bradfitz): also detect AWS on Windows, etc. Just try to hit the metadata server + // and see if it's there? But it might be turned off. Do some small-timeout DNS request + // to 169.254.169.253 and see if it replies? But which DNS request? + if runtime.GOOS == "linux" { + biosVendorB, _ := os.ReadFile("/sys/class/dmi/id/bios_vendor") + biosVendor := strings.TrimSpace(string(biosVendorB)) + if biosVendor == "Amazon EC2" || strings.HasSuffix(biosVendor, ".amazon") { + return AWS + } + } if gcpmetadata.OnGCE() { return GCP }