@ -36,6 +36,7 @@ import (
"tailscale.com/tailcfg"
"tailscale.com/types/logger"
"tailscale.com/util/clientmetric"
"tailscale.com/util/singleflight"
"tailscale.com/util/slicesx"
)
@ -44,76 +45,165 @@ var (
disableRecursiveResolver = envknob . RegisterBool ( "TS_DNSFALLBACK_DISABLE_RECURSIVE_RESOLVER" ) // legacy pre-1.52 env knob name
)
type resolveResult struct {
addrs [ ] netip . Addr
minTTL time . Duration
}
// MakeLookupFunc creates a function that can be used to resolve hostnames
// (e.g. as a LookupIPFallback from dnscache.Resolver).
// The netMon parameter is optional; if non-nil it's used to do faster interface lookups.
func MakeLookupFunc ( logf logger . Logf , netMon * netmon . Monitor ) func ( ctx context . Context , host string ) ( [ ] netip . Addr , error ) {
return func ( ctx context . Context , host string ) ( [ ] netip . Addr , error ) {
// If they've explicitly disabled the recursive resolver with the legacy
// TS_DNSFALLBACK_DISABLE_RECURSIVE_RESOLVER envknob or not set the
// newer TS_DNSFALLBACK_RECURSIVE_RESOLVER to true, then don't use the
// recursive resolver. (tailscale/corp#15261) In the future, we might
// change the default (the opt.Bool being unset) to mean enabled.
if disableRecursiveResolver ( ) || ! optRecursiveResolver ( ) . EqualBool ( true ) {
return lookup ( ctx , host , logf , netMon )
}
fr := & fallbackResolver {
logf : logf ,
netMon : netMon ,
}
return fr . Lookup
}
addrsCh := make ( chan [ ] netip . Addr , 1 )
// fallbackResolver contains the state and configuration for a DNS resolution
// function.
type fallbackResolver struct {
logf logger . Logf
netMon * netmon . Monitor // or nil
sf singleflight . Group [ string , resolveResult ]
// Run the recursive resolver in the background so we can
// compare the results.
go func ( ) {
logf := logger . WithPrefix ( logf , "recursive: " )
// Ensure that we catch panics while we're testing this
// code path; this should never panic, but we don't
// want to take down the process by having the panic
// propagate to the top of the goroutine's stack and
// then terminate.
defer func ( ) {
if r := recover ( ) ; r != nil {
logf ( "bootstrap DNS: recovered panic: %v" , r )
metricRecursiveErrors . Add ( 1 )
}
} ( )
resolver := recursive . Resolver {
Dialer : netns . NewDialer ( logf , netMon ) ,
Logf : logf ,
}
addrs , minTTL , err := resolver . Resolve ( ctx , host )
if err != nil {
logf ( "error using recursive resolver: %v" , err )
metricRecursiveErrors . Add ( 1 )
return
}
// for tests
waitForCompare bool
}
compareAddr := func ( a , b netip . Addr ) int { return a . Compare ( b ) }
slices . SortFunc ( addrs , compareAddr )
func ( fr * fallbackResolver ) Lookup ( ctx context . Context , host string ) ( [ ] netip . Addr , error ) {
// If they've explicitly disabled the recursive resolver with the legacy
// TS_DNSFALLBACK_DISABLE_RECURSIVE_RESOLVER envknob or not set the
// newer TS_DNSFALLBACK_RECURSIVE_RESOLVER to true, then don't use the
// recursive resolver. (tailscale/corp#15261) In the future, we might
// change the default (the opt.Bool being unset) to mean enabled.
if disableRecursiveResolver ( ) || ! optRecursiveResolver ( ) . EqualBool ( true ) {
return lookup ( ctx , host , fr . logf , fr . netMon )
}
// Wait for a response from the main function
oldAddrs := <- addrsCh
slices . SortFunc ( oldAddrs , compareAddr )
addrsCh := make ( chan [ ] netip . Addr , 1 )
matches := slices . Equal ( addrs , oldAddrs )
// Run the recursive resolver in the background so we can
// compare the results. For tests, we also allow waiting for the
// comparison to complete; normally, we do this entirely asynchronously
// so as not to block the caller.
var done chan struct { }
if fr . waitForCompare {
done = make ( chan struct { } )
go func ( ) {
defer close ( done )
fr . compareWithRecursive ( ctx , addrsCh , host )
} ( )
} else {
go fr . compareWithRecursive ( ctx , addrsCh , host )
}
logf ( "bootstrap DNS comparison: matches=%v oldAddrs=%v addrs=%v minTTL=%v" , matches , oldAddrs , addrs , minTTL )
addrs , err := lookup ( ctx , host , fr . logf , fr . netMon )
if err != nil {
addrsCh <- nil
return nil , err
}
if matches {
metricRecursiveMatches . Add ( 1 )
} else {
metricRecursiveMismatches . Add ( 1 )
}
} ( )
addrsCh <- slices . Clone ( addrs )
if fr . waitForCompare {
select {
case <- done :
case <- ctx . Done ( ) :
}
}
return addrs , nil
}
addrs , err := lookup ( ctx , host , logf , netMon )
// compareWithRecursive is responsible for comparing the DNS resolution
// performed via the "normal" path (bootstrap DNS requests to the DERP servers)
// with DNS resolution performed with our in-process recursive DNS resolver.
//
// It will select on addrsCh to read exactly one set of addrs (returned by the
// "normal" path) and compare against the results returned by the recursive
// resolver. If ctx is canceled, then it will abort.
func ( fr * fallbackResolver ) compareWithRecursive (
ctx context . Context ,
addrsCh <- chan [ ] netip . Addr ,
host string ,
) {
logf := logger . WithPrefix ( fr . logf , "recursive: " )
// Ensure that we catch panics while we're testing this
// code path; this should never panic, but we don't
// want to take down the process by having the panic
// propagate to the top of the goroutine's stack and
// then terminate.
defer func ( ) {
if r := recover ( ) ; r != nil {
logf ( "bootstrap DNS: recovered panic: %v" , r )
metricRecursiveErrors . Add ( 1 )
}
} ( )
// Don't resolve the same host multiple times
// concurrently; if we end up in a tight loop, this can
// take up a lot of CPU.
var didRun bool
result , err , _ := fr . sf . Do ( host , func ( ) ( resolveResult , error ) {
didRun = true
resolver := & recursive . Resolver {
Dialer : netns . NewDialer ( logf , fr . netMon ) ,
Logf : logf ,
}
addrs , minTTL , err := resolver . Resolve ( ctx , host )
if err != nil {
addrsCh <- nil
return nil , err
logf ( "error using recursive resolver: %v" , err )
metricRecursiveErrors . Add ( 1 )
return resolveResult { } , err
}
return resolveResult { addrs , minTTL } , nil
} )
// The singleflight function handled errors; return if
// there was one. Additionally, don't bother doing the
// comparison if we waited on another singleflight
// caller; the results are likely to be the same, so
// rather than spam the logs we can just exit and let
// the singleflight call that did execute do the
// comparison.
//
// Returning here is safe because the addrsCh channel
// is buffered, so the main function won't block even
// if we never read from it.
if err != nil || ! didRun {
return
}
addrs , minTTL := result . addrs , result . minTTL
compareAddr := func ( a , b netip . Addr ) int { return a . Compare ( b ) }
slices . SortFunc ( addrs , compareAddr )
// Wait for a response from the main function; try this once before we
// check whether the context is canceled since selects are
// nondeterministic.
var oldAddrs [ ] netip . Addr
select {
case oldAddrs = <- addrsCh :
// All good; continue
default :
// Now block.
select {
case oldAddrs = <- addrsCh :
case <- ctx . Done ( ) :
return
}
}
slices . SortFunc ( oldAddrs , compareAddr )
matches := slices . Equal ( addrs , oldAddrs )
logf ( "bootstrap DNS comparison: matches=%v oldAddrs=%v addrs=%v minTTL=%v" , matches , oldAddrs , addrs , minTTL )
addrsCh <- slices . Clone ( addrs )
return addrs , nil
if matches {
metricRecursiveMatches . Add ( 1 )
} else {
metricRecursiveMismatches . Add ( 1 )
}
}