From 2b448f0696006b76d1d4cfa227472d0153782445 Mon Sep 17 00:00:00 2001 From: Alex Chan Date: Mon, 20 Oct 2025 18:12:25 +0100 Subject: [PATCH] ipn, tka: improve the logging around TKA sync and AUM errors * When we do the TKA sync, log whether TKA is enabled and whether we want it to be enabled. This would help us see if a node is making bootstrap errors. * When we fail to look up an AUM locally, log the ID of the AUM rather than a generic "file does not exist" error. These AUM IDs are cryptographic hashes of the TKA state, which itself just contains public keys and signatures. These IDs aren't sensitive and logging them is safe. Signed-off-by: Alex Chan Updates https://github.com/tailscale/corp/issues/33594 --- ipn/ipnlocal/network-lock.go | 11 ++++++----- tka/builder.go | 2 +- tka/tailchonk.go | 10 +++++----- tka/tka.go | 10 +++++----- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/ipn/ipnlocal/network-lock.go b/ipn/ipnlocal/network-lock.go index 499082445..6acb9fe1d 100644 --- a/ipn/ipnlocal/network-lock.go +++ b/ipn/ipnlocal/network-lock.go @@ -288,8 +288,11 @@ func (b *LocalBackend) tkaSyncIfNeeded(nm *netmap.NetworkMap, prefs ipn.PrefsVie return nil } - if b.tka != nil || nm.TKAEnabled { - b.logf("tkaSyncIfNeeded: enabled=%v, head=%v", nm.TKAEnabled, nm.TKAHead) + isEnabled := b.tka != nil + wantEnabled := nm.TKAEnabled + + if isEnabled || wantEnabled { + b.logf("tkaSyncIfNeeded: isEnabled=%t, wantEnabled=%t, head=%v", isEnabled, wantEnabled, nm.TKAHead) } ourNodeKey, ok := prefs.Persist().PublicNodeKeyOK() @@ -297,8 +300,6 @@ func (b *LocalBackend) tkaSyncIfNeeded(nm *netmap.NetworkMap, prefs ipn.PrefsVie return errors.New("tkaSyncIfNeeded: no node key in prefs") } - isEnabled := b.tka != nil - wantEnabled := nm.TKAEnabled didJustEnable := false if isEnabled != wantEnabled { var ourHead tka.AUMHash @@ -948,7 +949,7 @@ func (b *LocalBackend) NetworkLockLog(maxEntries int) ([]ipnstate.NetworkLockUpd if err == os.ErrNotExist { break } - return out, fmt.Errorf("reading AUM: %w", err) + return out, fmt.Errorf("reading AUM (%v): %w", cursor, err) } update := ipnstate.NetworkLockUpdate{ diff --git a/tka/builder.go b/tka/builder.go index 642f39d77..199cec06d 100644 --- a/tka/builder.go +++ b/tka/builder.go @@ -136,7 +136,7 @@ func (b *UpdateBuilder) Finalize(storage Chonk) ([]AUM, error) { needCheckpoint = false break } - return nil, fmt.Errorf("reading AUM: %v", err) + return nil, fmt.Errorf("reading AUM (%v): %v", cursor, err) } if aum.MessageKind == AUMCheckpoint { diff --git a/tka/tailchonk.go b/tka/tailchonk.go index cb683c273..d01c5826e 100644 --- a/tka/tailchonk.go +++ b/tka/tailchonk.go @@ -596,7 +596,7 @@ func markActiveChain(storage Chonk, verdict map[AUMHash]retainState, minChain in // We've reached the end of the chain we have stored. return h, nil } - return AUMHash{}, fmt.Errorf("reading active chain (retainStateActive) (%d): %w", i, err) + return AUMHash{}, fmt.Errorf("reading active chain (retainStateActive) (%d, %v): %w", i, parent, err) } } @@ -616,7 +616,7 @@ func markActiveChain(storage Chonk, verdict map[AUMHash]retainState, minChain in return AUMHash{}, errors.New("reached genesis AUM without finding an appropriate lastActiveAncestor") } if next, err = storage.AUM(parent); err != nil { - return AUMHash{}, fmt.Errorf("searching for compaction target: %w", err) + return AUMHash{}, fmt.Errorf("searching for compaction target (%v): %w", parent, err) } } @@ -632,7 +632,7 @@ func markActiveChain(storage Chonk, verdict map[AUMHash]retainState, minChain in // We've reached the end of the chain we have stored. break } - return AUMHash{}, fmt.Errorf("reading active chain (retainStateCandidate): %w", err) + return AUMHash{}, fmt.Errorf("reading active chain (retainStateCandidate, %v): %w", parent, err) } } @@ -744,7 +744,7 @@ func markAncestorIntersectionAUMs(storage Chonk, verdict map[AUMHash]retainState if didAdjustCandidateAncestor { var next AUM if next, err = storage.AUM(candidateAncestor); err != nil { - return AUMHash{}, fmt.Errorf("searching for compaction target: %w", err) + return AUMHash{}, fmt.Errorf("searching for compaction target (%v): %w", candidateAncestor, err) } for { @@ -760,7 +760,7 @@ func markAncestorIntersectionAUMs(storage Chonk, verdict map[AUMHash]retainState return AUMHash{}, errors.New("reached genesis AUM without finding an appropriate candidateAncestor") } if next, err = storage.AUM(parent); err != nil { - return AUMHash{}, fmt.Errorf("searching for compaction target: %w", err) + return AUMHash{}, fmt.Errorf("searching for compaction target (%v): %w", parent, err) } } } diff --git a/tka/tka.go b/tka/tka.go index 234c87fe1..c37c39754 100644 --- a/tka/tka.go +++ b/tka/tka.go @@ -102,14 +102,14 @@ func computeChainCandidates(storage Chonk, lastKnownOldest *AUMHash, maxIter int iterAgain = false for j := range candidates { - parent, hasParent := candidates[j].Oldest.Parent() + parentHash, hasParent := candidates[j].Oldest.Parent() if hasParent { - parent, err := storage.AUM(parent) + parent, err := storage.AUM(parentHash) if err != nil { if err == os.ErrNotExist { continue } - return nil, fmt.Errorf("reading parent: %v", err) + return nil, fmt.Errorf("reading parent %s: %v", parentHash, err) } candidates[j].Oldest = parent if lastKnownOldest != nil && *lastKnownOldest == parent.Hash() { @@ -210,7 +210,7 @@ func fastForwardWithAdvancer( } nextAUM, err := storage.AUM(*startState.LastAUMHash) if err != nil { - return AUM{}, State{}, fmt.Errorf("reading next: %v", err) + return AUM{}, State{}, fmt.Errorf("reading next (%v): %v", *startState.LastAUMHash, err) } curs := nextAUM @@ -297,7 +297,7 @@ func computeStateAt(storage Chonk, maxIter int, wantHash AUMHash) (State, error) // If we got here, the current state is dependent on the previous. // Keep iterating backwards till thats not the case. if curs, err = storage.AUM(parent); err != nil { - return State{}, fmt.Errorf("reading parent: %v", err) + return State{}, fmt.Errorf("reading parent (%v): %v", parent, err) } }