diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 416a1a1fb..4cefde066 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -198,6 +198,14 @@ type LocalBackend struct { // dialPlan is any dial plan that we've received from the control // server during a previous connection; it is cleared on logout. dialPlan atomic.Pointer[tailcfg.ControlDialPlan] + + // tkaSyncLock is used to make tkaSyncIfNeeded an exclusive + // section. This is needed to stop two map-responses in quick succession + // from racing each other through TKA sync logic / RPCs. + // + // tkaSyncLock MUST be taken before mu (or inversely, mu must not be held + // at the moment that tkaSyncLock is taken). + tkaSyncLock sync.Mutex } // clientGen is a func that creates a control plane client. @@ -775,9 +783,12 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) { } } if st.NetMap != nil { - if err := b.tkaSyncIfNeededLocked(st.NetMap); err != nil { + b.mu.Unlock() // respect locking rules for tkaSyncIfNeeded + if err := b.tkaSyncIfNeeded(st.NetMap); err != nil { b.logf("[v1] TKA sync error: %v", err) } + b.mu.Lock() + if !envknob.TKASkipSignatureCheck() { b.tkaFilterNetmapLocked(st.NetMap) } diff --git a/ipn/ipnlocal/network-lock.go b/ipn/ipnlocal/network-lock.go index 3c0d25911..98ecef8e8 100644 --- a/ipn/ipnlocal/network-lock.go +++ b/ipn/ipnlocal/network-lock.go @@ -39,6 +39,8 @@ type tkaState struct { // tkaFilterNetmapLocked checks the signatures on each node key, dropping // nodes from the netmap who's signature does not verify. +// +// b.mu must be held. func (b *LocalBackend) tkaFilterNetmapLocked(nm *netmap.NetworkMap) { if !envknob.UseWIPCode() { return // Feature-flag till network-lock is in Alpha. @@ -70,7 +72,7 @@ func (b *LocalBackend) tkaFilterNetmapLocked(nm *netmap.NetworkMap) { nm.Peers = peers } -// tkaSyncIfNeededLocked examines TKA info reported from the control plane, +// tkaSyncIfNeeded examines TKA info reported from the control plane, // performing the steps necessary to synchronize local tka state. // // There are 4 scenarios handled here: @@ -85,13 +87,19 @@ func (b *LocalBackend) tkaFilterNetmapLocked(nm *netmap.NetworkMap) { // - Everything up to date: All other cases. // ∴ no action necessary. // -// b.mu must be held. b.mu will be stepped out of (and back in) during network -// RPCs. -func (b *LocalBackend) tkaSyncIfNeededLocked(nm *netmap.NetworkMap) error { +// tkaSyncIfNeeded immediately takes b.takeSyncLock which is held throughout, +// and may take b.mu as required. +func (b *LocalBackend) tkaSyncIfNeeded(nm *netmap.NetworkMap) error { if !envknob.UseWIPCode() { // If the feature flag is not enabled, pretend we don't exist. return nil } + + b.tkaSyncLock.Lock() // take tkaSyncLock to make this function an exclusive section. + defer b.tkaSyncLock.Unlock() + b.mu.Lock() // take mu to protect access to synchronized fields. + defer b.mu.Unlock() + ourNodeKey := b.prefs.Persist.PrivateNodeKey.Public() isEnabled := b.tka != nil @@ -158,6 +166,8 @@ func toSyncOffer(head string, ancestors []string) (tka.SyncOffer, error) { // tkaSyncLocked synchronizes TKA state with control. b.mu must be held // and tka must be initialized. b.mu will be stepped out of (and back into) // during network RPCs. +// +// b.mu must be held. func (b *LocalBackend) tkaSyncLocked(ourNodeKey key.NodePublic) error { offer, err := b.tka.authority.SyncOffer(b.tka.storage) if err != nil { diff --git a/ipn/ipnlocal/network-lock_test.go b/ipn/ipnlocal/network-lock_test.go index a8840c9ee..738073f9e 100644 --- a/ipn/ipnlocal/network-lock_test.go +++ b/ipn/ipnlocal/network-lock_test.go @@ -127,12 +127,10 @@ func TestTKAEnablementFlow(t *testing.T) { }, } - b.mu.Lock() - err = b.tkaSyncIfNeededLocked(&netmap.NetworkMap{ + err = b.tkaSyncIfNeeded(&netmap.NetworkMap{ TKAEnabled: true, TKAHead: a1.Head(), }) - b.mu.Unlock() if err != nil { t.Errorf("tkaSyncIfNeededLocked() failed: %v", err) } @@ -228,12 +226,10 @@ func TestTKADisablementFlow(t *testing.T) { // Test that the wrong disablement secret does not shut down the authority. returnWrongSecret = true - b.mu.Lock() - err = b.tkaSyncIfNeededLocked(&netmap.NetworkMap{ + err = b.tkaSyncIfNeeded(&netmap.NetworkMap{ TKAEnabled: false, TKAHead: authority.Head(), }) - b.mu.Unlock() if err != nil { t.Errorf("tkaSyncIfNeededLocked() failed: %v", err) } @@ -243,12 +239,10 @@ func TestTKADisablementFlow(t *testing.T) { // Test the correct disablement secret shuts down the authority. returnWrongSecret = false - b.mu.Lock() - err = b.tkaSyncIfNeededLocked(&netmap.NetworkMap{ + err = b.tkaSyncIfNeeded(&netmap.NetworkMap{ TKAEnabled: false, TKAHead: authority.Head(), }) - b.mu.Unlock() if err != nil { t.Errorf("tkaSyncIfNeededLocked() failed: %v", err) } @@ -468,12 +462,10 @@ func TestTKASync(t *testing.T) { } // Finally, lets trigger a sync. - b.mu.Lock() - err = b.tkaSyncIfNeededLocked(&netmap.NetworkMap{ + err = b.tkaSyncIfNeeded(&netmap.NetworkMap{ TKAEnabled: true, TKAHead: controlAuthority.Head(), }) - b.mu.Unlock() if err != nil { t.Errorf("tkaSyncIfNeededLocked() failed: %v", err) }