Fix concurrency issues in controlclient, ipn, types/logger (#456)

Signed-Off-By: Dmytro Shynkevych <dmytro@tailscale.com>
pull/475/head
Dmytro Shynkevych 4 years ago committed by GitHub
parent c8cf3169ba
commit c12d87c54b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -92,6 +92,7 @@ type Direct struct {
authKey string
tryingNewKey wgcfg.PrivateKey
expiry *time.Time
// hostinfo is mutated in-place while mu is held.
hostinfo *tailcfg.Hostinfo // always non-nil
endpoints []string
localPort uint16 // or zero to mean auto
@ -262,6 +263,8 @@ func (c *Direct) doLogin(ctx context.Context, t *oauth2.Token, flags LoginFlags,
tryingNewKey := c.tryingNewKey
serverKey := c.serverKey
authKey := c.authKey
hostinfo := c.hostinfo
backendLogID := hostinfo.BackendLogID
expired := c.expiry != nil && !c.expiry.IsZero() && c.expiry.Before(c.timeNow())
c.mu.Unlock()
@ -318,7 +321,7 @@ func (c *Direct) doLogin(ctx context.Context, t *oauth2.Token, flags LoginFlags,
if tryingNewKey == (wgcfg.PrivateKey{}) {
log.Fatalf("tryingNewKey is empty, give up")
}
if c.hostinfo.BackendLogID == "" {
if backendLogID == "" {
err = errors.New("hostinfo: BackendLogID missing")
return regen, url, err
}
@ -326,7 +329,7 @@ func (c *Direct) doLogin(ctx context.Context, t *oauth2.Token, flags LoginFlags,
Version: 1,
OldNodeKey: tailcfg.NodeKey(oldNodeKey),
NodeKey: tailcfg.NodeKey(tryingNewKey.Public()),
Hostinfo: c.hostinfo,
Hostinfo: hostinfo,
Followup: url,
}
c.logf("RegisterReq: onode=%v node=%v fup=%v",
@ -453,11 +456,12 @@ func (c *Direct) PollNetMap(ctx context.Context, maxPolls int, cb func(*NetworkM
serverURL := c.serverURL
serverKey := c.serverKey
hostinfo := c.hostinfo
backendLogID := hostinfo.BackendLogID
localPort := c.localPort
ep := append([]string(nil), c.endpoints...)
c.mu.Unlock()
if hostinfo.BackendLogID == "" {
if backendLogID == "" {
return errors.New("hostinfo: BackendLogID missing")
}

@ -63,8 +63,10 @@ type LocalBackend struct {
stateKey StateKey
prefs *Prefs
state State
hiCache *tailcfg.Hostinfo
netMapCache *controlclient.NetworkMap
// hostinfo is mutated in-place while mu is held.
hostinfo *tailcfg.Hostinfo
// netMap is not mutated in-place once set.
netMap *controlclient.NetworkMap
engineStatus EngineStatus
endpoints []string
blocked bool
@ -106,11 +108,6 @@ func NewLocalBackend(logf logger.Logf, logid string, store StateStore, e wgengin
}
b.statusChanged = sync.NewCond(&b.statusLock)
if b.portpoll != nil {
go b.portpoll.Run(ctx)
go b.readPoller()
}
return b, nil
}
@ -146,11 +143,11 @@ func (b *LocalBackend) UpdateStatus(sb *ipnstate.StatusBuilder) {
// TODO: hostinfo, and its networkinfo
// TODO: EngineStatus copy (and deprecate it?)
if b.netMapCache != nil {
for id, up := range b.netMapCache.UserProfiles {
if b.netMap != nil {
for id, up := range b.netMap.UserProfiles {
sb.AddUser(id, up)
}
for _, p := range b.netMapCache.Peers {
for _, p := range b.netMap.Peers {
var lastSeen time.Time
if p.LastSeen != nil {
lastSeen = *p.LastSeen
@ -184,104 +181,17 @@ func (b *LocalBackend) SetDecompressor(fn func() (controlclient.Decompressor, er
b.newDecompressor = fn
}
// Start applies the configuration specified in opts, and starts the
// state machine.
//
// TODO(danderson): this function is trying to do too many things at
// once: it loads state, or imports it, or updates prefs sometimes,
// contains some settings that are one-shot things done by `tailscale
// up` because we had nowhere else to put them, and there's no clear
// guarantee that switching from one user's state to another is
// actually a supported operation (it should be, but it's very unclear
// from the following whether or not that is a safe transition).
func (b *LocalBackend) Start(opts Options) error {
if opts.Prefs == nil && opts.StateKey == "" {
return errors.New("no state key or prefs provided")
}
if opts.Prefs != nil {
b.logf("Start: %v", opts.Prefs.Pretty())
} else {
b.logf("Start")
}
hi := controlclient.NewHostinfo()
hi.BackendLogID = b.backendLogID
hi.FrontendLogID = opts.FrontendLogID
b.mu.Lock()
if b.c != nil {
// TODO(apenwarr): avoid the need to reinit controlclient.
// This will trigger a full relogin/reconfigure cycle every
// time a Handle reconnects to the backend. Ideally, we
// would send the new Prefs and everything would get back
// into sync with the minimal changes. But that's not how it
// is right now, which is a sign that the code is still too
// complicated.
b.c.Shutdown()
}
if b.hiCache != nil {
hi.Services = b.hiCache.Services // keep any previous session and netinfo
hi.NetInfo = b.hiCache.NetInfo
}
b.hiCache = hi
b.state = NoState
if err := b.loadStateLocked(opts.StateKey, opts.Prefs, opts.LegacyConfigPath); err != nil {
b.mu.Unlock()
return fmt.Errorf("loading requested state: %v", err)
}
b.serverURL = b.prefs.ControlURL
hi.RoutableIPs = append(hi.RoutableIPs, b.prefs.AdvertiseRoutes...)
hi.RequestTags = append(hi.RequestTags, b.prefs.AdvertiseTags...)
b.notify = opts.Notify
b.netMapCache = nil
persist := b.prefs.Persist
b.mu.Unlock()
b.updateFilter(nil)
var err error
if persist == nil {
// let controlclient initialize it
persist = &controlclient.Persist{}
}
cli, err := controlclient.New(controlclient.Options{
Logf: logger.WithPrefix(b.logf, "control: "),
Persist: *persist,
ServerURL: b.serverURL,
AuthKey: opts.AuthKey,
Hostinfo: hi,
KeepAlive: true,
NewDecompressor: b.newDecompressor,
HTTPTestClient: opts.HTTPTestClient,
})
if err != nil {
return err
}
b.mu.Lock()
b.c = cli
endpoints := b.endpoints
b.mu.Unlock()
if endpoints != nil {
cli.UpdateEndpoints(0, endpoints)
}
cli.SetStatusFunc(func(newSt controlclient.Status) {
if newSt.LoginFinished != nil {
// setClientStatus is the callback invoked by the control client whenever it posts a new status.
// Among other things, this is where we update the netmap, packet filters, DNS and DERP maps.
func (b *LocalBackend) setClientStatus(st controlclient.Status) {
if st.LoginFinished != nil {
// Auth completed, unblock the engine
b.blockEngineUpdates(false)
b.authReconfig()
b.send(Notify{LoginFinished: &empty.Message{}})
}
if newSt.Persist != nil {
persist := *newSt.Persist // copy
if st.Persist != nil {
persist := *st.Persist // copy
b.mu.Lock()
b.prefs.Persist = &persist
@ -296,12 +206,12 @@ func (b *LocalBackend) Start(opts Options) error {
}
b.send(Notify{Prefs: prefs})
}
if newSt.NetMap != nil {
if st.NetMap != nil {
// Netmap is unchanged only when the diff is empty.
changed := true
b.mu.Lock()
if b.netMapCache != nil {
diff := newSt.NetMap.ConciseDiffFrom(b.netMapCache)
if b.netMap != nil {
diff := st.NetMap.ConciseDiffFrom(b.netMap)
if strings.TrimSpace(diff) == "" {
changed = false
b.logf("netmap diff: (none)")
@ -310,39 +220,39 @@ func (b *LocalBackend) Start(opts Options) error {
}
}
disableDERP := b.prefs != nil && b.prefs.DisableDERP
b.netMapCache = newSt.NetMap
b.netMap = st.NetMap
b.mu.Unlock()
b.send(Notify{NetMap: newSt.NetMap})
b.send(Notify{NetMap: st.NetMap})
// There is nothing to update if the map hasn't changed.
if changed {
b.updateFilter(newSt.NetMap)
b.updateDNSMap(newSt.NetMap)
b.updateFilter(st.NetMap)
b.updateDNSMap(st.NetMap)
}
if disableDERP {
b.e.SetDERPMap(nil)
} else {
b.e.SetDERPMap(newSt.NetMap.DERPMap)
b.e.SetDERPMap(st.NetMap.DERPMap)
}
}
if newSt.URL != "" {
b.logf("Received auth URL: %.20v...", newSt.URL)
if st.URL != "" {
b.logf("Received auth URL: %.20v...", st.URL)
b.mu.Lock()
interact := b.interact
b.authURL = newSt.URL
b.authURL = st.URL
b.mu.Unlock()
if interact > 0 {
b.popBrowserAuthNow()
}
}
if newSt.Err != "" {
if st.Err != "" {
// TODO(crawshaw): display in the UI.
b.logf("Received error: %v", newSt.Err)
b.logf("Received error: %v", st.Err)
return
}
if newSt.NetMap != nil {
if st.NetMap != nil {
b.mu.Lock()
if b.state == NeedsLogin {
b.prefs.WantRunning = true
@ -353,15 +263,17 @@ func (b *LocalBackend) Start(opts Options) error {
b.SetPrefs(prefs)
}
b.stateMachine()
})
}
b.e.SetStatusCallback(func(s *wgengine.Status, err error) {
// setWgengineStatus is the callback by the wireguard engine whenever it posts a new status.
// This updates the endpoints both in the backend and in the control client.
func (b *LocalBackend) setWgengineStatus(s *wgengine.Status, err error) {
if err != nil {
b.logf("wgengine status error: %#v", err)
return
}
if s == nil {
b.logf("weird: non-error wgengine update with status=nil: %v", s)
b.logf("[unexpected] non-error wgengine update with status=nil: %v", s)
return
}
@ -383,8 +295,106 @@ func (b *LocalBackend) Start(opts Options) error {
b.statusLock.Unlock()
b.send(Notify{Engine: &es})
}
// Start applies the configuration specified in opts, and starts the
// state machine.
//
// TODO(danderson): this function is trying to do too many things at
// once: it loads state, or imports it, or updates prefs sometimes,
// contains some settings that are one-shot things done by `tailscale
// up` because we had nowhere else to put them, and there's no clear
// guarantee that switching from one user's state to another is
// actually a supported operation (it should be, but it's very unclear
// from the following whether or not that is a safe transition).
func (b *LocalBackend) Start(opts Options) error {
if opts.Prefs == nil && opts.StateKey == "" {
return errors.New("no state key or prefs provided")
}
if opts.Prefs != nil {
b.logf("Start: %v", opts.Prefs.Pretty())
} else {
b.logf("Start")
}
hostinfo := controlclient.NewHostinfo()
hostinfo.BackendLogID = b.backendLogID
hostinfo.FrontendLogID = opts.FrontendLogID
b.mu.Lock()
if b.c != nil {
// TODO(apenwarr): avoid the need to reinit controlclient.
// This will trigger a full relogin/reconfigure cycle every
// time a Handle reconnects to the backend. Ideally, we
// would send the new Prefs and everything would get back
// into sync with the minimal changes. But that's not how it
// is right now, which is a sign that the code is still too
// complicated.
b.c.Shutdown()
}
if b.hostinfo != nil {
hostinfo.Services = b.hostinfo.Services // keep any previous session and netinfo
hostinfo.NetInfo = b.hostinfo.NetInfo
}
b.hostinfo = hostinfo
b.state = NoState
if err := b.loadStateLocked(opts.StateKey, opts.Prefs, opts.LegacyConfigPath); err != nil {
b.mu.Unlock()
return fmt.Errorf("loading requested state: %v", err)
}
b.serverURL = b.prefs.ControlURL
hostinfo.RoutableIPs = append(hostinfo.RoutableIPs, b.prefs.AdvertiseRoutes...)
hostinfo.RequestTags = append(hostinfo.RequestTags, b.prefs.AdvertiseTags...)
b.notify = opts.Notify
b.netMap = nil
persist := b.prefs.Persist
b.mu.Unlock()
b.updateFilter(nil)
var err error
if persist == nil {
// let controlclient initialize it
persist = &controlclient.Persist{}
}
cli, err := controlclient.New(controlclient.Options{
Logf: logger.WithPrefix(b.logf, "control: "),
Persist: *persist,
ServerURL: b.serverURL,
AuthKey: opts.AuthKey,
Hostinfo: hostinfo,
KeepAlive: true,
NewDecompressor: b.newDecompressor,
HTTPTestClient: opts.HTTPTestClient,
})
if err != nil {
return err
}
// At this point, we have finished using hostinfo without synchronization,
// so it is safe to start readPoller which concurrently writes to it.
if b.portpoll != nil {
go b.portpoll.Run(b.ctx)
go b.readPoller()
}
b.mu.Lock()
b.c = cli
endpoints := b.endpoints
b.mu.Unlock()
if endpoints != nil {
cli.UpdateEndpoints(0, endpoints)
}
cli.SetStatusFunc(b.setClientStatus)
b.e.SetStatusCallback(b.setWgengineStatus)
b.e.SetNetInfoCallback(b.setNetInfo)
b.mu.Lock()
@ -477,13 +487,11 @@ func (b *LocalBackend) readPoller() {
}
b.mu.Lock()
if b.hiCache == nil {
// TODO(bradfitz): it's a little weird that this port poller
// is started (by NewLocalBackend) before the Start call.
b.hiCache = new(tailcfg.Hostinfo)
if b.hostinfo == nil {
b.hostinfo = new(tailcfg.Hostinfo)
}
b.hiCache.Services = sl
hi := b.hiCache
b.hostinfo.Services = sl
hi := b.hostinfo
b.mu.Unlock()
b.doSetHostinfoFilterServices(hi)
@ -617,13 +625,23 @@ func (b *LocalBackend) StartLoginInteractive() {
// FakeExpireAfter implements Backend.
func (b *LocalBackend) FakeExpireAfter(x time.Duration) {
b.logf("FakeExpireAfter: %v", x)
if b.netMapCache != nil {
e := b.netMapCache.Expiry
if e.IsZero() || time.Until(e) > x {
b.netMapCache.Expiry = time.Now().Add(x)
b.mu.Lock()
defer b.mu.Unlock()
if b.netMap == nil {
return
}
b.send(Notify{NetMap: b.netMapCache})
// This function is called very rarely,
// so we prefer to fully copy the netmap over introducing in-place modification here.
mapCopy := *b.netMap
e := mapCopy.Expiry
if e.IsZero() || time.Until(e) > x {
mapCopy.Expiry = time.Now().Add(x)
}
b.netMap = &mapCopy
b.send(Notify{NetMap: b.netMap})
}
func (b *LocalBackend) parseWgStatus(s *wgengine.Status) (ret EngineStatus) {
@ -680,13 +698,13 @@ func (b *LocalBackend) SetPrefs(new *Prefs) {
b.logf("Failed to save new controlclient state: %v", err)
}
}
oldHi := b.hiCache
oldHi := b.hostinfo
newHi := oldHi.Clone()
newHi.RoutableIPs = append([]wgcfg.CIDR(nil), b.prefs.AdvertiseRoutes...)
if h := new.Hostname; h != "" {
newHi.Hostname = h
}
b.hiCache = newHi
b.hostinfo = newHi
b.mu.Unlock()
b.logf("SetPrefs: %v", new.Pretty())
@ -695,15 +713,15 @@ func (b *LocalBackend) SetPrefs(new *Prefs) {
b.doSetHostinfoFilterServices(newHi)
}
b.updateFilter(b.netMapCache)
b.updateFilter(b.netMap)
// TODO(dmytro): when Prefs gain an EnableTailscaleDNS toggle, updateDNSMap here.
turnDERPOff := new.DisableDERP && !old.DisableDERP
turnDERPOn := !new.DisableDERP && old.DisableDERP
if turnDERPOff {
b.e.SetDERPMap(nil)
} else if turnDERPOn && b.netMapCache != nil {
b.e.SetDERPMap(b.netMapCache.DERPMap)
} else if turnDERPOn && b.netMap != nil {
b.e.SetDERPMap(b.netMap.DERPMap)
}
if old.WantRunning != new.WantRunning {
@ -741,7 +759,7 @@ func (b *LocalBackend) doSetHostinfoFilterServices(hi *tailcfg.Hostinfo) {
// NetMap returns the latest cached network map received from
// controlclient, or nil if no network map was received yet.
func (b *LocalBackend) NetMap() *controlclient.NetworkMap {
return b.netMapCache
return b.netMap
}
// blockEngineUpdate sets b.blocked to block, while holding b.mu. Its
@ -762,7 +780,7 @@ func (b *LocalBackend) authReconfig() {
b.mu.Lock()
blocked := b.blocked
uc := b.prefs
nm := b.netMapCache
nm := b.netMap
b.mu.Unlock()
if blocked {
@ -939,7 +957,7 @@ func (b *LocalBackend) nextState() State {
b.assertClientLocked()
var (
c = b.c
netMap = b.netMapCache
netMap = b.netMap
state = b.state
wantRunning = b.prefs.WantRunning
)
@ -1037,13 +1055,13 @@ func (b *LocalBackend) Logout() {
b.mu.Lock()
b.assertClientLocked()
c := b.c
b.netMapCache = nil
b.netMap = nil
b.mu.Unlock()
c.Logout()
b.mu.Lock()
b.netMapCache = nil
b.netMap = nil
b.mu.Unlock()
b.stateMachine()
@ -1056,13 +1074,13 @@ func (b *LocalBackend) assertClientLocked() {
}
}
// setNetInfo sets b.hiCache.NetInfo to ni, and passes ni along to the
// setNetInfo sets b.hostinfo.NetInfo to ni, and passes ni along to the
// controlclient, if one exists.
func (b *LocalBackend) setNetInfo(ni *tailcfg.NetInfo) {
b.mu.Lock()
c := b.c
if b.hiCache != nil {
b.hiCache.NetInfo = ni.Clone()
if b.hostinfo != nil {
b.hostinfo.NetInfo = ni.Clone()
}
b.mu.Unlock()

@ -127,18 +127,23 @@ func RateLimitedFn(logf Logf, f time.Duration, burst int, maxCache int) Logf {
// since the last time this identical line was logged.
func LogOnChange(logf Logf, maxInterval time.Duration, timeNow func() time.Time) Logf {
var (
mu sync.Mutex
sLastLogged string
tLastLogged = timeNow()
)
return func(format string, args ...interface{}) {
s := fmt.Sprintf(format, args...)
mu.Lock()
if s == sLastLogged && timeNow().Sub(tLastLogged) < maxInterval {
mu.Unlock()
return
}
sLastLogged = s
tLastLogged = timeNow()
mu.Unlock()
logf(s)
}

@ -9,6 +9,7 @@ import (
"bytes"
"fmt"
"log"
"sync"
"testing"
"time"
)
@ -117,3 +118,31 @@ func TestArgWriter(t *testing.T) {
t.Errorf("got %q; want %q", got, want)
}
}
func TestSynchronization(t *testing.T) {
timeNow := testTimer(1 * time.Second)
tests := []struct {
name string
logf Logf
}{
{"RateLimitedFn", RateLimitedFn(t.Logf, 1*time.Minute, 2, 50)},
{"LogOnChange", LogOnChange(t.Logf, 5*time.Second, timeNow)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var wg sync.WaitGroup
wg.Add(2)
f := func() {
tt.logf("1 2 3 4 5")
wg.Done()
}
go f()
go f()
wg.Wait()
})
}
}

Loading…
Cancel
Save