From 8602061f32de5588c2eb506ff5e139c9c2260b28 Mon Sep 17 00:00:00 2001 From: Tom DNetto Date: Mon, 3 Oct 2022 16:07:34 -0700 Subject: [PATCH] ipn/ipnlocal,tka: Fix bugs found by integration testing * tka.State.staticValidateCheckpoint could call methods on a contained key prior to calling StaticValidate on that key * Remove broken backoff / RPC retry logic from tka methods in ipn/ipnlocal, to be fixed at a later time * Fix NetworkLockModify() which would attempt to take b.mu twice and deadlock, remove now-unused dependence on netmap * Add methods on ipnlocal.LocalBackend to be used in integration tests * Use TAILSCALE_USE_WIP_CODE as the feature flag so it can be manipulated in tests Signed-off-by: Tom DNetto --- ipn/ipnlocal/network-lock.go | 194 ++++++++++++------------------ ipn/ipnlocal/network-lock_test.go | 6 +- tka/state.go | 4 + 3 files changed, 82 insertions(+), 122 deletions(-) diff --git a/ipn/ipnlocal/network-lock.go b/ipn/ipnlocal/network-lock.go index 6dcb92ba7..400bd2908 100644 --- a/ipn/ipnlocal/network-lock.go +++ b/ipn/ipnlocal/network-lock.go @@ -16,10 +16,8 @@ import ( "path/filepath" "time" - "golang.org/x/exp/slices" "tailscale.com/envknob" "tailscale.com/ipn/ipnstate" - "tailscale.com/logtail/backoff" "tailscale.com/tailcfg" "tailscale.com/tka" "tailscale.com/types/key" @@ -27,7 +25,7 @@ import ( "tailscale.com/types/tkatype" ) -var networkLockAvailable = envknob.RegisterBool("TS_EXPERIMENTAL_NETWORK_LOCK") +// TODO(tom): RPC retry/backoff was broken and has been removed. Fix? var ( errMissingNetmap = errors.New("missing netmap: verify that you are logged in") @@ -90,7 +88,7 @@ func (b *LocalBackend) tkaFilterNetmapLocked(nm *netmap.NetworkMap) { // b.mu must be held. b.mu will be stepped out of (and back in) during network // RPCs. func (b *LocalBackend) tkaSyncIfNeededLocked(nm *netmap.NetworkMap) error { - if !networkLockAvailable() { + if !envknob.UseWIPCode() { // If the feature flag is not enabled, pretend we don't exist. return nil } @@ -213,7 +211,7 @@ func (b *LocalBackend) tkaSyncLocked(ourNodeKey key.NodePublic) error { // copy of all forks that clients had. b.mu.Unlock() - sendResp, err := b.tkaDoSyncSend(ourNodeKey, toSendAUMs) + sendResp, err := b.tkaDoSyncSend(ourNodeKey, toSendAUMs, false) b.mu.Lock() if err != nil { return fmt.Errorf("send RPC: %v", err) @@ -274,7 +272,7 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM) err // CanSupportNetworkLock returns nil if tailscaled is able to operate // a local tailnet key authority (and hence enforce network lock). func (b *LocalBackend) CanSupportNetworkLock() error { - if !networkLockAvailable() { + if !envknob.UseWIPCode() { return errors.New("this feature is not yet complete, a later release may support this functionality") } @@ -382,6 +380,26 @@ func (b *LocalBackend) NetworkLockInit(keys []tka.Key) error { return err } +// Only use is in tests. +func (b *LocalBackend) NetworkLockVerifySignatureForTest(nks tkatype.MarshaledSignature, nodeKey key.NodePublic) error { + b.mu.Lock() + defer b.mu.Unlock() + if b.tka == nil { + return errNetworkLockNotActive + } + return b.tka.authority.NodeKeyAuthorized(nodeKey, nks) +} + +// Only use is in tests. +func (b *LocalBackend) NetworkLockKeyTrustedForTest(keyID tkatype.KeyID) bool { + b.mu.Lock() + defer b.mu.Unlock() + if b.tka == nil { + panic("network lock not initialized") + } + return b.tka.authority.KeyTrusted(keyID) +} + // NetworkLockModify adds and/or removes keys in the tailnet's key authority. func (b *LocalBackend) NetworkLockModify(addKeys, removeKeys []tka.Key) (err error) { defer func() { @@ -399,10 +417,6 @@ func (b *LocalBackend) NetworkLockModify(addKeys, removeKeys []tka.Key) (err err if b.tka == nil { return errNetworkLockNotActive } - nm := b.NetMap() - if nm == nil { - return errMissingNetmap - } updater := b.tka.authority.NewUpdater(b.nlPrivKey) @@ -426,73 +440,27 @@ func (b *LocalBackend) NetworkLockModify(addKeys, removeKeys []tka.Key) (err err return nil } - head, err := b.sendAUMsLocked(aums, true) + ourNodeKey := b.prefs.Persist.PrivateNodeKey.Public() + b.mu.Unlock() + resp, err := b.tkaDoSyncSend(ourNodeKey, aums, true) + b.mu.Lock() if err != nil { return err } + var controlHead tka.AUMHash + if err := controlHead.UnmarshalText([]byte(resp.Head)); err != nil { + return err + } + lastHead := aums[len(aums)-1].Hash() - if !slices.Equal(head[:], lastHead[:]) { + if controlHead != lastHead { return errors.New("central tka head differs from submitted AUM, try again") } return nil } -func (b *LocalBackend) sendAUMsLocked(aums []tka.AUM, interactive bool) (head tka.AUMHash, err error) { - // Submitting AUMs may block, so release the lock - b.mu.Unlock() - defer b.mu.Lock() - - mAUMs := make([]tkatype.MarshaledAUM, len(aums)) - for i := range aums { - mAUMs[i] = aums[i].Serialize() - } - - var req bytes.Buffer - if err := json.NewEncoder(&req).Encode(tailcfg.TKASyncSendRequest{ - MissingAUMs: mAUMs, - Interactive: interactive, - }); err != nil { - return head, err - } - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) - defer cancel() - bo := backoff.NewBackoff("tka-submit", b.logf, 5*time.Second) - var res *http.Response - for { - if err := ctx.Err(); err != nil { - return head, err - } - req, err := http.NewRequestWithContext(ctx, "GET", "https://unused/machine/tka/sync/send", &req) - if err != nil { - return head, err - } - res, err = b.DoNoiseRequest(req) - bo.BackOff(ctx, err) - if err == nil { - break - } - } - defer res.Body.Close() - - if res.StatusCode != 200 { - body, _ := io.ReadAll(res.Body) - return head, fmt.Errorf("submit status %d: %s", res.StatusCode, string(body)) - } - a := new(tailcfg.TKASyncSendResponse) - if err := json.NewDecoder(res.Body).Decode(a); err != nil { - return head, err - } - - if err := head.UnmarshalText([]byte(a.Head)); err != nil { - return head, err - } - - return head, nil -} - func signNodeKey(nodeInfo tailcfg.TKASignInfo, signer key.NLPrivate) (*tka.NodeKeySignature, error) { p, err := nodeInfo.NodePublic.MarshalBinary() if err != nil { @@ -524,34 +492,27 @@ func (b *LocalBackend) tkaInitBegin(ourNodeKey key.NodePublic, aum tka.AUM) (*ta ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() - bo := backoff.NewBackoff("tka-init-begin", b.logf, 5*time.Second) - for { - if err := ctx.Err(); err != nil { - return nil, fmt.Errorf("ctx: %w", err) - } - req, err := http.NewRequestWithContext(ctx, "GET", "https://unused/machine/tka/init/begin", &req) - if err != nil { - return nil, fmt.Errorf("req: %w", err) - } - res, err := b.DoNoiseRequest(req) - if err != nil { - bo.BackOff(ctx, err) - continue - } - if res.StatusCode != 200 { - body, _ := io.ReadAll(res.Body) - res.Body.Close() - return nil, fmt.Errorf("request returned (%d): %s", res.StatusCode, string(body)) - } - a := new(tailcfg.TKAInitBeginResponse) - err = json.NewDecoder(res.Body).Decode(a) + req2, err := http.NewRequestWithContext(ctx, "GET", "https://unused/machine/tka/init/begin", &req) + if err != nil { + return nil, fmt.Errorf("req: %w", err) + } + res, err := b.DoNoiseRequest(req2) + if err != nil { + return nil, fmt.Errorf("resp: %w", err) + } + if res.StatusCode != 200 { + body, _ := io.ReadAll(res.Body) res.Body.Close() - if err != nil { - return nil, fmt.Errorf("decoding JSON: %w", err) - } - - return a, nil + return nil, fmt.Errorf("request returned (%d): %s", res.StatusCode, string(body)) } + a := new(tailcfg.TKAInitBeginResponse) + err = json.NewDecoder(res.Body).Decode(a) + res.Body.Close() + if err != nil { + return nil, fmt.Errorf("decoding JSON: %w", err) + } + + return a, nil } func (b *LocalBackend) tkaInitFinish(ourNodeKey key.NodePublic, nks map[tailcfg.NodeID]tkatype.MarshaledSignature) (*tailcfg.TKAInitFinishResponse, error) { @@ -566,34 +527,28 @@ func (b *LocalBackend) tkaInitFinish(ourNodeKey key.NodePublic, nks map[tailcfg. ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() - bo := backoff.NewBackoff("tka-init-finish", b.logf, 5*time.Second) - for { - if err := ctx.Err(); err != nil { - return nil, fmt.Errorf("ctx: %w", err) - } - req, err := http.NewRequestWithContext(ctx, "GET", "https://unused/machine/tka/init/finish", &req) - if err != nil { - return nil, fmt.Errorf("req: %w", err) - } - res, err := b.DoNoiseRequest(req) - if err != nil { - bo.BackOff(ctx, err) - continue - } - if res.StatusCode != 200 { - body, _ := io.ReadAll(res.Body) - res.Body.Close() - return nil, fmt.Errorf("request returned (%d): %s", res.StatusCode, string(body)) - } - a := new(tailcfg.TKAInitFinishResponse) - err = json.NewDecoder(res.Body).Decode(a) - res.Body.Close() - if err != nil { - return nil, fmt.Errorf("decoding JSON: %w", err) - } - return a, nil + req2, err := http.NewRequestWithContext(ctx, "GET", "https://unused/machine/tka/init/finish", &req) + if err != nil { + return nil, fmt.Errorf("req: %w", err) + } + res, err := b.DoNoiseRequest(req2) + if err != nil { + return nil, fmt.Errorf("resp: %w", err) } + if res.StatusCode != 200 { + body, _ := io.ReadAll(res.Body) + res.Body.Close() + return nil, fmt.Errorf("request returned (%d): %s", res.StatusCode, string(body)) + } + a := new(tailcfg.TKAInitFinishResponse) + err = json.NewDecoder(res.Body).Decode(a) + res.Body.Close() + if err != nil { + return nil, fmt.Errorf("decoding JSON: %w", err) + } + + return a, nil } // tkaFetchBootstrap sends a /machine/tka/bootstrap RPC to the control plane @@ -707,11 +662,12 @@ func (b *LocalBackend) tkaDoSyncOffer(ourNodeKey key.NodePublic, offer tka.SyncO // tkaDoSyncSend sends a /machine/tka/sync/send RPC to the control plane // over noise. This is the second of two RPCs implementing tka synchronization. -func (b *LocalBackend) tkaDoSyncSend(ourNodeKey key.NodePublic, aums []tka.AUM) (*tailcfg.TKASyncSendResponse, error) { +func (b *LocalBackend) tkaDoSyncSend(ourNodeKey key.NodePublic, aums []tka.AUM, interactive bool) (*tailcfg.TKASyncSendResponse, error) { sendReq := tailcfg.TKASyncSendRequest{ Version: tailcfg.CurrentCapabilityVersion, NodeKey: ourNodeKey, MissingAUMs: make([]tkatype.MarshaledAUM, len(aums)), + Interactive: interactive, } for i, a := range aums { sendReq.MissingAUMs[i] = a.Serialize() diff --git a/ipn/ipnlocal/network-lock_test.go b/ipn/ipnlocal/network-lock_test.go index 3933d2e33..a8840c9ee 100644 --- a/ipn/ipnlocal/network-lock_test.go +++ b/ipn/ipnlocal/network-lock_test.go @@ -64,7 +64,7 @@ func fakeNoiseServer(t *testing.T, handler http.HandlerFunc) (*httptest.Server, } func TestTKAEnablementFlow(t *testing.T) { - networkLockAvailable = func() bool { return true } // Enable the feature flag + envknob.Setenv("TAILSCALE_USE_WIP_CODE", "1") nodePriv := key.NewNode() // Make a fake TKA authority, getting a usable genesis AUM which @@ -145,7 +145,7 @@ func TestTKAEnablementFlow(t *testing.T) { } func TestTKADisablementFlow(t *testing.T) { - networkLockAvailable = func() bool { return true } // Enable the feature flag + envknob.Setenv("TAILSCALE_USE_WIP_CODE", "1") temp := t.TempDir() os.Mkdir(filepath.Join(temp, "tka"), 0755) nodePriv := key.NewNode() @@ -262,7 +262,7 @@ func TestTKADisablementFlow(t *testing.T) { } func TestTKASync(t *testing.T) { - networkLockAvailable = func() bool { return true } // Enable the feature flag + envknob.Setenv("TAILSCALE_USE_WIP_CODE", "1") someKeyPriv := key.NewNLPrivate() someKey := tka.Key{Kind: tka.Key25519, Public: someKeyPriv.Public().Verifier(), Votes: 1} diff --git a/tka/state.go b/tka/state.go index 353b68a85..2ab4715da 100644 --- a/tka/state.go +++ b/tka/state.go @@ -249,6 +249,10 @@ func (s *State) staticValidateCheckpoint() error { if err := k.StaticValidate(); err != nil { return fmt.Errorf("key[%d]: %v", i, err) } + } + // NOTE: The max number of keys is constrained (512), so + // O(n^2) is fine. + for i, k := range s.Keys { for j, k2 := range s.Keys { if i == j { continue