You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tailscale/net/dns/manager.go

570 lines
16 KiB
Go

// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package dns
import (
"bufio"
"context"
"encoding/binary"
"errors"
"io"
"net"
"net/netip"
"runtime"
"sync/atomic"
"time"
"golang.org/x/exp/slices"
"tailscale.com/health"
"tailscale.com/net/dns/resolver"
"tailscale.com/net/packet"
"tailscale.com/net/tsaddr"
"tailscale.com/net/tsdial"
"tailscale.com/net/tstun"
"tailscale.com/types/dnstype"
"tailscale.com/types/ipproto"
"tailscale.com/types/logger"
"tailscale.com/util/clientmetric"
"tailscale.com/util/dnsname"
"tailscale.com/wgengine/monitor"
)
var (
magicDNSIP = tsaddr.TailscaleServiceIP()
magicDNSIPv6 = tsaddr.TailscaleServiceIPv6()
)
var (
errFullQueue = errors.New("request queue full")
)
// maxActiveQueries returns the maximal number of DNS requests that be
// can running.
// If EnqueueRequest is called when this many requests are already pending,
// the request will be dropped to avoid blocking the caller.
func maxActiveQueries() int32 {
if runtime.GOOS == "ios" {
// For memory paranoia reasons on iOS, match the
// historical Tailscale 1.x..1.8 behavior for now
// (just before the 1.10 release).
return 64
}
// But for other platforms, allow more burstiness:
return 256
}
// We use file-ignore below instead of ignore because on some platforms,
// the lint exception is necessary and on others it is not,
// and plain ignore complains if the exception is unnecessary.
// reconfigTimeout is the time interval within which Manager.{Up,Down} should complete.
//
// This is particularly useful because certain conditions can cause indefinite hangs
// (such as improper dbus auth followed by contextless dbus.Object.Call).
// Such operations should be wrapped in a timeout context.
const reconfigTimeout = time.Second
type response struct {
pkt []byte
to netip.AddrPort // response destination (request source)
}
// Manager manages system DNS settings.
type Manager struct {
logf logger.Logf
// When netstack is not used, Manager implements magic DNS.
// In this case, responses tracks completed DNS requests
// which need a response, and NextPacket() synthesizes a
// fake IP+UDP header to finish assembling the response.
//
// TODO(tom): Rip out once all platforms use netstack.
responses chan response
activeQueriesAtomic int32
ctx context.Context // good until Down
ctxCancel context.CancelFunc // closes ctx
resolver *resolver.Resolver
os OSConfigurator
}
// NewManagers created a new manager from the given config.
func NewManager(logf logger.Logf, oscfg OSConfigurator, linkMon *monitor.Mon, dialer *tsdial.Dialer, linkSel resolver.ForwardLinkSelector) *Manager {
if dialer == nil {
panic("nil Dialer")
}
logf = logger.WithPrefix(logf, "dns: ")
m := &Manager{
logf: logf,
resolver: resolver.New(logf, linkMon, linkSel, dialer),
os: oscfg,
responses: make(chan response),
}
m.ctx, m.ctxCancel = context.WithCancel(context.Background())
m.logf("using %T", m.os)
return m
}
// Resolver returns the Manager's DNS Resolver.
func (m *Manager) Resolver() *resolver.Resolver { return m.resolver }
func (m *Manager) Set(cfg Config) error {
m.logf("Set: %v", logger.ArgWriter(func(w *bufio.Writer) {
cfg.WriteToBufioWriter(w)
}))
rcfg, ocfg, err := m.compileConfig(cfg)
if err != nil {
return err
}
m.logf("Resolvercfg: %v", logger.ArgWriter(func(w *bufio.Writer) {
rcfg.WriteToBufioWriter(w)
}))
m.logf("OScfg: %+v", ocfg)
if err := m.resolver.SetConfig(rcfg); err != nil {
return err
}
if err := m.os.SetDNS(ocfg); err != nil {
health.SetDNSOSHealth(err)
return err
}
health.SetDNSOSHealth(nil)
return nil
}
// compileHostEntries creates a list of single-label resolutions possible
// from the configured hosts and search domains.
// The entries are compiled in the order of the search domains, then the hosts.
// The returned list is sorted by the first hostname in each entry.
func compileHostEntries(cfg Config) (hosts []*HostEntry) {
didLabel := make(map[string]bool, len(cfg.Hosts))
for _, sd := range cfg.SearchDomains {
for h, ips := range cfg.Hosts {
if !sd.Contains(h) || h.NumLabels() != (sd.NumLabels()+1) {
continue
}
ipHosts := []string{string(h.WithTrailingDot())}
if label := dnsname.FirstLabel(string(h)); !didLabel[label] {
didLabel[label] = true
ipHosts = append(ipHosts, label)
}
for _, ip := range ips {
if cfg.OnlyIPv6 && ip.Is4() {
continue
}
hosts = append(hosts, &HostEntry{
Addr: ip,
Hosts: ipHosts,
})
// Only add IPv4 or IPv6 per host, like we do in the resolver.
break
}
}
}
slices.SortFunc(hosts, func(a, b *HostEntry) bool {
if len(a.Hosts) == 0 {
return false
}
if len(b.Hosts) == 0 {
return true
}
return a.Hosts[0] < b.Hosts[0]
})
return hosts
}
// compileConfig converts cfg into a quad-100 resolver configuration
// and an OS-level configuration.
func (m *Manager) compileConfig(cfg Config) (rcfg resolver.Config, ocfg OSConfig, err error) {
// The internal resolver always gets MagicDNS hosts and
// authoritative suffixes, even if we don't propagate MagicDNS to
// the OS.
rcfg.Hosts = cfg.Hosts
routes := map[dnsname.FQDN][]*dnstype.Resolver{} // assigned conditionally to rcfg.Routes below.
for suffix, resolvers := range cfg.Routes {
if len(resolvers) == 0 {
rcfg.LocalDomains = append(rcfg.LocalDomains, suffix)
} else {
routes[suffix] = resolvers
}
}
// Similarly, the OS always gets search paths.
ocfg.SearchDomains = cfg.SearchDomains
if runtime.GOOS == "windows" {
ocfg.Hosts = compileHostEntries(cfg)
}
// Deal with trivial configs first.
switch {
case !cfg.needsOSResolver():
// Set search domains, but nothing else. This also covers the
// case where cfg is entirely zero, in which case these
// configs clear all Tailscale DNS settings.
return rcfg, ocfg, nil
case cfg.hasDefaultIPResolversOnly() && !cfg.hasHostsWithoutSplitDNSRoutes():
// Trivial CorpDNS configuration, just override the OS resolver.
//
// If there are hosts (ExtraRecords) that are not covered by an existing
// SplitDNS route, then we don't go into this path so that we fall into
// the next case and send the extra record hosts queries through
// 100.100.100.100 instead where we can answer them.
//
// TODO: for OSes that support it, pass IP:port and DoH
// addresses directly to OS.
// https://github.com/tailscale/tailscale/issues/1666
ocfg.Nameservers = toIPsOnly(cfg.DefaultResolvers)
return rcfg, ocfg, nil
case cfg.hasDefaultResolvers():
// Default resolvers plus other stuff always ends up proxying
// through quad-100.
rcfg.Routes = routes
rcfg.Routes["."] = cfg.DefaultResolvers
ocfg.Nameservers = []netip.Addr{cfg.serviceIP()}
return rcfg, ocfg, nil
}
// From this point on, we're figuring out split DNS
// configurations. The possible cases don't return directly any
// more, because as a final step we have to handle the case where
// the OS can't do split DNS.
// Workaround for
// https://github.com/tailscale/corp/issues/1662. Even though
// Windows natively supports split DNS, it only configures linux
// containers using whatever the primary is, and doesn't apply
// NRPT rules to DNS traffic coming from WSL.
//
// In order to make WSL work okay when the host Windows is using
// Tailscale, we need to set up quad-100 as a "full proxy"
// resolver, regardless of whether Windows itself can do split
// DNS. We still make Windows do split DNS itself when it can, but
// quad-100 will still have the full split configuration as well,
// and so can service WSL requests correctly.
//
// This bool is used in a couple of places below to implement this
// workaround.
isWindows := runtime.GOOS == "windows"
if cfg.singleResolverSet() != nil && m.os.SupportsSplitDNS() && !isWindows {
// Split DNS configuration requested, where all split domains
// go to the same resolvers. We can let the OS do it.
ocfg.Nameservers = toIPsOnly(cfg.singleResolverSet())
ocfg.MatchDomains = cfg.matchDomains()
return rcfg, ocfg, nil
}
// Split DNS configuration with either multiple upstream routes,
// or routes + MagicDNS, or just MagicDNS, or on an OS that cannot
// split-DNS. Install a split config pointing at quad-100.
rcfg.Routes = routes
ocfg.Nameservers = []netip.Addr{cfg.serviceIP()}
var baseCfg *OSConfig // base config; non-nil if/when known
// Even though Apple devices can do split DNS, they don't provide a way to
// selectively answer ExtraRecords, and ignore other DNS traffic. As a
// workaround, we read the existing default resolver configuration and use
// that as the forwarder for all DNS traffic that quad-100 doesn't handle.
const isApple = runtime.GOOS == "darwin" || runtime.GOOS == "ios"
if isApple || !m.os.SupportsSplitDNS() {
// If the OS can't do native split-dns, read out the underlying
// resolver config and blend it into our config.
cfg, err := m.os.GetBaseConfig()
if err == nil {
baseCfg = &cfg
} else if isApple && err == ErrGetBaseConfigNotSupported {
// This is currently (2022-10-13) expected on certain iOS and macOS
// builds.
} else {
health.SetDNSOSHealth(err)
return resolver.Config{}, OSConfig{}, err
}
}
if baseCfg == nil || isApple && len(baseCfg.Nameservers) == 0 {
// If there was no base config, or if we're on Apple and the base
// config is empty, then we need to fallback to SplitDNS mode.
ocfg.MatchDomains = cfg.matchDomains()
} else {
var defaultRoutes []*dnstype.Resolver
for _, ip := range baseCfg.Nameservers {
defaultRoutes = append(defaultRoutes, &dnstype.Resolver{Addr: ip.String()})
}
rcfg.Routes["."] = defaultRoutes
ocfg.SearchDomains = append(ocfg.SearchDomains, baseCfg.SearchDomains...)
}
return rcfg, ocfg, nil
}
// toIPsOnly returns only the IP portion of dnstype.Resolver.
// Only safe to use if the resolvers slice has been cleared of
// DoH or custom-port entries with something like hasDefaultIPResolversOnly.
func toIPsOnly(resolvers []*dnstype.Resolver) (ret []netip.Addr) {
for _, r := range resolvers {
if ipp, ok := r.IPPort(); ok && ipp.Port() == 53 {
ret = append(ret, ipp.Addr())
}
}
return ret
}
// EnqueuePacket is the legacy path for handling magic DNS traffic, and is
// called with a DNS request payload.
//
// TODO(tom): Rip out once all platforms use netstack.
func (m *Manager) EnqueuePacket(bs []byte, proto ipproto.Proto, from, to netip.AddrPort) error {
if to.Port() != 53 || proto != ipproto.UDP {
return nil
}
if n := atomic.AddInt32(&m.activeQueriesAtomic, 1); n > maxActiveQueries() {
atomic.AddInt32(&m.activeQueriesAtomic, -1)
metricDNSQueryErrorQueue.Add(1)
return errFullQueue
}
go func() {
resp, err := m.resolver.Query(m.ctx, bs, from)
if err != nil {
atomic.AddInt32(&m.activeQueriesAtomic, -1)
m.logf("dns query: %v", err)
return
}
select {
case <-m.ctx.Done():
return
case m.responses <- response{resp, from}:
}
}()
return nil
}
// NextPacket is the legacy path for obtaining DNS results in response to
// magic DNS queries. It blocks until a response is available.
//
// TODO(tom): Rip out once all platforms use netstack.
func (m *Manager) NextPacket() ([]byte, error) {
var resp response
select {
case <-m.ctx.Done():
return nil, net.ErrClosed
case resp = <-m.responses:
// continue
}
// Unused space is needed further down the stack. To avoid extra
// allocations/copying later on, we allocate such space here.
const offset = tstun.PacketStartOffset
var buf []byte
switch {
case resp.to.Addr().Is4():
h := packet.UDP4Header{
IP4Header: packet.IP4Header{
Src: magicDNSIP,
Dst: resp.to.Addr(),
},
SrcPort: 53,
DstPort: resp.to.Port(),
}
hlen := h.Len()
buf = make([]byte, offset+hlen+len(resp.pkt))
copy(buf[offset+hlen:], resp.pkt)
h.Marshal(buf[offset:])
case resp.to.Addr().Is6():
h := packet.UDP6Header{
IP6Header: packet.IP6Header{
Src: magicDNSIPv6,
Dst: resp.to.Addr(),
},
SrcPort: 53,
DstPort: resp.to.Port(),
}
hlen := h.Len()
buf = make([]byte, offset+hlen+len(resp.pkt))
copy(buf[offset+hlen:], resp.pkt)
h.Marshal(buf[offset:])
}
atomic.AddInt32(&m.activeQueriesAtomic, -1)
return buf, nil
}
// Query executes a DNS query received from the given address. The query is
// provided in bs as a wire-encoded DNS query without any transport header.
// This method is called for requests arriving over UDP and TCP.
func (m *Manager) Query(ctx context.Context, bs []byte, from netip.AddrPort) ([]byte, error) {
select {
case <-m.ctx.Done():
return nil, net.ErrClosed
default:
// continue
}
if n := atomic.AddInt32(&m.activeQueriesAtomic, 1); n > maxActiveQueries() {
atomic.AddInt32(&m.activeQueriesAtomic, -1)
metricDNSQueryErrorQueue.Add(1)
return nil, errFullQueue
}
defer atomic.AddInt32(&m.activeQueriesAtomic, -1)
return m.resolver.Query(ctx, bs, from)
}
const (
// RFC 7766 6.2 recommends connection reuse & request pipelining
// be undertaken, and the connection be closed by the server
// using an idle timeout on the order of seconds.
idleTimeoutTCP = 45 * time.Second
// The RFCs don't specify the max size of a TCP-based DNS query,
// but we want to keep this reasonable. Given payloads are typically
// much larger and all known client send a single query, I've arbitrarily
// chosen 2k.
maxReqSizeTCP = 2048
)
// dnsTCPSession services DNS requests sent over TCP.
type dnsTCPSession struct {
m *Manager
conn net.Conn
srcAddr netip.AddrPort
readClosing chan struct{}
responses chan []byte // DNS replies pending writing
ctx context.Context
closeCtx context.CancelFunc
}
func (s *dnsTCPSession) handleWrites() {
defer s.conn.Close()
defer close(s.responses)
defer s.closeCtx()
for {
select {
case <-s.readClosing:
return // connection closed or timeout, teardown time
case resp := <-s.responses:
s.conn.SetWriteDeadline(time.Now().Add(idleTimeoutTCP))
if err := binary.Write(s.conn, binary.BigEndian, uint16(len(resp))); err != nil {
s.m.logf("tcp write (len): %v", err)
return
}
if _, err := s.conn.Write(resp); err != nil {
s.m.logf("tcp write (response): %v", err)
return
}
}
}
}
func (s *dnsTCPSession) handleQuery(q []byte) {
resp, err := s.m.Query(s.ctx, q, s.srcAddr)
if err != nil {
s.m.logf("tcp query: %v", err)
return
}
select {
case <-s.ctx.Done():
case s.responses <- resp:
}
}
func (s *dnsTCPSession) handleReads() {
defer close(s.readClosing)
for {
select {
case <-s.ctx.Done():
return
default:
s.conn.SetReadDeadline(time.Now().Add(idleTimeoutTCP))
var reqLen uint16
if err := binary.Read(s.conn, binary.BigEndian, &reqLen); err != nil {
if err == io.EOF || err == io.ErrClosedPipe {
return // connection closed nominally, we gucci
}
s.m.logf("tcp read (len): %v", err)
return
}
if int(reqLen) > maxReqSizeTCP {
s.m.logf("tcp request too large (%d > %d)", reqLen, maxReqSizeTCP)
return
}
buf := make([]byte, int(reqLen))
if _, err := io.ReadFull(s.conn, buf); err != nil {
s.m.logf("tcp read (payload): %v", err)
return
}
select {
case <-s.ctx.Done():
return
default:
go s.handleQuery(buf)
}
}
}
}
// HandleTCPConn implements magicDNS over TCP, taking a connection and
// servicing DNS requests sent down it.
func (m *Manager) HandleTCPConn(conn net.Conn, srcAddr netip.AddrPort) {
s := dnsTCPSession{
m: m,
conn: conn,
srcAddr: srcAddr,
responses: make(chan []byte),
readClosing: make(chan struct{}),
}
s.ctx, s.closeCtx = context.WithCancel(m.ctx)
go s.handleReads()
s.handleWrites()
}
func (m *Manager) Down() error {
m.ctxCancel()
if err := m.os.Close(); err != nil {
return err
}
m.resolver.Close()
return nil
}
func (m *Manager) FlushCaches() error {
return flushCaches()
}
// Cleanup restores the system DNS configuration to its original state
// in case the Tailscale daemon terminated without closing the router.
// No other state needs to be instantiated before this runs.
func Cleanup(logf logger.Logf, interfaceName string) {
oscfg, err := NewOSConfigurator(logf, interfaceName)
if err != nil {
logf("creating dns cleanup: %v", err)
return
}
dns := NewManager(logf, oscfg, nil, &tsdial.Dialer{Logf: logf}, nil)
if err := dns.Down(); err != nil {
logf("dns down: %v", err)
}
}
var (
metricDNSQueryErrorQueue = clientmetric.NewCounter("dns_query_local_error_queue")
)