From 37b63eff1c1a705d4268d017af354690117c7cf9 Mon Sep 17 00:00:00 2001 From: Alex Chan Date: Wed, 29 Oct 2025 11:09:28 +0000 Subject: [PATCH] ipn/ipnlocal: use an in-memory TKA store if FS is unavailable This requires making the internals of LocalBackend a bit more generic, and implementing the `tka.CompactableChonk` interface for `tka.Mem`. Signed-off-by: Alex Chan Updates https://github.com/tailscale/corp/issues/33599 (cherry picked from commit 1723cb83ed95db76fa933348e8d9df7d9fcb960d) --- cmd/tailscale/cli/up.go | 1 + health/healthmsg/healthmsg.go | 11 ++-- ipn/ipnlocal/network-lock.go | 54 +++++++++++------- tka/tailchonk.go | 92 +++++++++++++++++++++++++++++- tka/tailchonk_test.go | 37 ++++++++++++ tstest/chonktest/tailchonk_test.go | 6 ++ 6 files changed, 174 insertions(+), 27 deletions(-) diff --git a/cmd/tailscale/cli/up.go b/cmd/tailscale/cli/up.go index 91a6b6087..61cade8de 100644 --- a/cmd/tailscale/cli/up.go +++ b/cmd/tailscale/cli/up.go @@ -818,6 +818,7 @@ func upWorthyWarning(s string) bool { strings.Contains(s, healthmsg.WarnAcceptRoutesOff) || strings.Contains(s, healthmsg.LockedOut) || strings.Contains(s, healthmsg.WarnExitNodeUsage) || + strings.Contains(s, healthmsg.InMemoryTailnetLockState) || strings.Contains(strings.ToLower(s), "update available: ") } diff --git a/health/healthmsg/healthmsg.go b/health/healthmsg/healthmsg.go index 238410373..5ea1c736d 100644 --- a/health/healthmsg/healthmsg.go +++ b/health/healthmsg/healthmsg.go @@ -8,9 +8,10 @@ package healthmsg const ( - WarnAcceptRoutesOff = "Some peers are advertising routes but --accept-routes is false" - TailscaleSSHOnBut = "Tailscale SSH enabled, but " // + ... something from caller - LockedOut = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out" - WarnExitNodeUsage = "The following issues on your machine will likely make usage of exit nodes impossible" - DisableRPFilter = "Please set rp_filter=2 instead of rp_filter=1; see https://github.com/tailscale/tailscale/issues/3310" + WarnAcceptRoutesOff = "Some peers are advertising routes but --accept-routes is false" + TailscaleSSHOnBut = "Tailscale SSH enabled, but " // + ... something from caller + LockedOut = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out" + WarnExitNodeUsage = "The following issues on your machine will likely make usage of exit nodes impossible" + DisableRPFilter = "Please set rp_filter=2 instead of rp_filter=1; see https://github.com/tailscale/tailscale/issues/3310" + InMemoryTailnetLockState = "Tailnet Lock state is only being stored in-memory. Set --statedir to store state on disk, which is more secure. See https://tailscale.com/kb/1226/tailnet-lock#tailnet-lock-state" ) diff --git a/ipn/ipnlocal/network-lock.go b/ipn/ipnlocal/network-lock.go index 499082445..c769e242d 100644 --- a/ipn/ipnlocal/network-lock.go +++ b/ipn/ipnlocal/network-lock.go @@ -23,6 +23,7 @@ import ( "slices" "time" + "tailscale.com/health" "tailscale.com/health/healthmsg" "tailscale.com/ipn" "tailscale.com/ipn/ipnstate" @@ -54,7 +55,7 @@ var ( type tkaState struct { profile ipn.ProfileID authority *tka.Authority - storage *tka.FS + storage tka.CompactableChonk filtered []ipnstate.TKAPeer } @@ -75,7 +76,7 @@ func (b *LocalBackend) initTKALocked() error { root := b.TailscaleVarRoot() if root == "" { b.tka = nil - b.logf("network-lock unavailable; no state directory") + b.logf("cannot fetch existing TKA state; no state directory for network-lock") return nil } @@ -90,6 +91,7 @@ func (b *LocalBackend) initTKALocked() error { if err != nil { return fmt.Errorf("initializing tka: %v", err) } + if err := authority.Compact(storage, tkaCompactionDefaults); err != nil { b.logf("tka compaction failed: %v", err) } @@ -105,6 +107,16 @@ func (b *LocalBackend) initTKALocked() error { return nil } +// noNetworkLockStateDirWarnable is a Warnable to warn the user that Tailnet Lock data +// (in particular, the list of AUMs in the TKA state) is being stored in memory and will +// be lost when tailscaled restarts. +var noNetworkLockStateDirWarnable = health.Register(&health.Warnable{ + Code: "no-tailnet-lock-state-dir", + Title: "No statedir for Tailnet Lock", + Severity: health.SeverityMedium, + Text: health.StaticMessage(healthmsg.InMemoryTailnetLockState), +}) + // tkaFilterNetmapLocked checks the signatures on each node key, dropping // nodes from the netmap whose signature does not verify. // @@ -442,7 +454,7 @@ func (b *LocalBackend) tkaSyncLocked(ourNodeKey key.NodePublic) error { // b.mu must be held & TKA must be initialized. func (b *LocalBackend) tkaApplyDisablementLocked(secret []byte) error { if b.tka.authority.ValidDisablement(secret) { - if err := os.RemoveAll(b.chonkPathLocked()); err != nil { + if err := b.tka.storage.RemoveAll(); err != nil { return err } b.tka = nil @@ -486,19 +498,21 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM, per } } - chonkDir := b.chonkPathLocked() - if err := os.Mkdir(filepath.Dir(chonkDir), 0755); err != nil && !os.IsExist(err) { - return fmt.Errorf("creating chonk root dir: %v", err) - } - if err := os.Mkdir(chonkDir, 0755); err != nil && !os.IsExist(err) { - return fmt.Errorf("mkdir: %v", err) - } - - chonk, err := tka.ChonkDir(chonkDir) - if err != nil { - return fmt.Errorf("chonk: %v", err) + root := b.TailscaleVarRoot() + var storage tka.CompactableChonk + if root == "" { + b.health.SetUnhealthy(noNetworkLockStateDirWarnable, nil) + b.logf("network-lock using in-memory storage; no state directory") + storage = &tka.Mem{} + } else { + chonkDir := b.chonkPathLocked() + chonk, err := tka.ChonkDir(chonkDir) + if err != nil { + return fmt.Errorf("chonk: %v", err) + } + storage = chonk } - authority, err := tka.Bootstrap(chonk, genesis) + authority, err := tka.Bootstrap(storage, genesis) if err != nil { return fmt.Errorf("tka bootstrap: %v", err) } @@ -506,7 +520,7 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM, per b.tka = &tkaState{ profile: b.pm.CurrentProfile().ID(), authority: authority, - storage: chonk, + storage: storage, } return nil } @@ -519,10 +533,6 @@ func (b *LocalBackend) CanSupportNetworkLock() error { return nil } - if b.TailscaleVarRoot() == "" { - return errors.New("network-lock is not supported in this configuration, try setting --statedir") - } - // There's a var root (aka --statedir), so if network lock gets // initialized we have somewhere to store our AUMs. That's all // we need. @@ -642,6 +652,7 @@ func tkaStateFromPeer(p tailcfg.NodeView) ipnstate.TKAPeer { // needing signatures is returned as a response. // The Finish RPC submits signatures for all these nodes, at which point // Control has everything it needs to atomically enable network lock. +// TODO(alexc): Only with persistent backend func (b *LocalBackend) NetworkLockInit(keys []tka.Key, disablementValues [][]byte, supportDisablement []byte) error { if err := b.CanSupportNetworkLock(); err != nil { return err @@ -762,7 +773,7 @@ func (b *LocalBackend) NetworkLockForceLocalDisable() error { return fmt.Errorf("saving prefs: %w", err) } - if err := os.RemoveAll(b.chonkPathLocked()); err != nil { + if err := b.tka.storage.RemoveAll(); err != nil { return fmt.Errorf("deleting TKA state: %w", err) } b.tka = nil @@ -771,6 +782,7 @@ func (b *LocalBackend) NetworkLockForceLocalDisable() error { // NetworkLockSign signs the given node-key and submits it to the control plane. // rotationPublic, if specified, must be an ed25519 public key. +// TODO(alexc): in-memory only func (b *LocalBackend) NetworkLockSign(nodeKey key.NodePublic, rotationPublic []byte) error { ourNodeKey, sig, err := func(nodeKey key.NodePublic, rotationPublic []byte) (key.NodePublic, tka.NodeKeySignature, error) { b.mu.Lock() diff --git a/tka/tailchonk.go b/tka/tailchonk.go index 3c50e5efa..f20eb5920 100644 --- a/tka/tailchonk.go +++ b/tka/tailchonk.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "log" + "maps" "os" "path/filepath" "slices" @@ -57,6 +58,10 @@ type Chonk interface { // as a hint to pick the correct chain in the event that the Chonk stores // multiple distinct chains. LastActiveAncestor() (*AUMHash, error) + + // RemoveAll permanently and completely clears the TKA state. This should + // be called when the user disables Tailnet Lock. + RemoveAll() error } // CompactableChonk implementation are extensions of Chonk, which are @@ -78,12 +83,21 @@ type CompactableChonk interface { } // Mem implements in-memory storage of TKA state, suitable for -// tests. +// tests or cases where filesystem storage is unavailable. // // Mem implements the Chonk interface. +// +// Mem is thread-safe. type Mem struct { mu sync.RWMutex aums map[AUMHash]AUM + commitTimes map[AUMHash]time.Time + + // parentIndex is a map of AUMs to the AUMs for which they are + // the parent. + // + // For example, if parent index is {1 -> {2, 3, 4}}, that means + // that AUMs 2, 3, 4 all have aum.PrevAUMHash = 1. parentIndex map[AUMHash][]AUMHash lastActiveAncestor *AUMHash @@ -152,12 +166,14 @@ func (c *Mem) CommitVerifiedAUMs(updates []AUM) error { if c.aums == nil { c.parentIndex = make(map[AUMHash][]AUMHash, 64) c.aums = make(map[AUMHash]AUM, 64) + c.commitTimes = make(map[AUMHash]time.Time, 64) } updateLoop: for _, aum := range updates { aumHash := aum.Hash() c.aums[aumHash] = aum + c.commitTimes[aumHash] = time.Now() parent, ok := aum.Parent() if ok { @@ -173,6 +189,71 @@ updateLoop: return nil } +// RemoveAll permanently and completely clears the TKA state. +func (c *Mem) RemoveAll() error { + c.mu.Lock() + defer c.mu.Unlock() + c.aums = nil + c.commitTimes = nil + c.parentIndex = nil + c.lastActiveAncestor = nil + return nil +} + +// AllAUMs returns all AUMs stored in the chonk. +func (c *Mem) AllAUMs() ([]AUMHash, error) { + c.mu.RLock() + defer c.mu.RUnlock() + + return slices.Collect(maps.Keys(c.aums)), nil +} + +// CommitTime returns the time at which the AUM was committed. +// +// If the AUM does not exist, then os.ErrNotExist is returned. +func (c *Mem) CommitTime(h AUMHash) (time.Time, error) { + c.mu.RLock() + defer c.mu.RUnlock() + + t, ok := c.commitTimes[h] + if ok { + return t, nil + } else { + return time.Time{}, os.ErrNotExist + } +} + +// PurgeAUMs marks the specified AUMs for deletion from storage. +func (c *Mem) PurgeAUMs(hashes []AUMHash) error { + c.mu.Lock() + defer c.mu.Unlock() + + for _, h := range hashes { + // Remove the deleted AUM from the list of its parents' children. + // + // However, we leave the list of this AUM's children in parentIndex, + // so we can find them later in ChildAUMs(). + if aum, ok := c.aums[h]; ok { + parent, hasParent := aum.Parent() + if hasParent { + c.parentIndex[parent] = slices.DeleteFunc( + c.parentIndex[parent], + func(other AUMHash) bool { return bytes.Equal(h[:], other[:]) }, + ) + if len(c.parentIndex[parent]) == 0 { + delete(c.parentIndex, parent) + } + } + } + + // Delete this AUM from the list of AUMs and commit times. + delete(c.aums, h) + delete(c.commitTimes, h) + } + + return nil +} + // FS implements filesystem storage of TKA state. // // FS implements the Chonk interface. @@ -184,6 +265,10 @@ type FS struct { // ChonkDir returns an implementation of Chonk which uses the // given directory to store TKA state. func ChonkDir(dir string) (*FS, error) { + if err := os.MkdirAll(dir, 0755); err != nil && !os.IsExist(err) { + return nil, fmt.Errorf("creating chonk root dir: %v", err) + } + stat, err := os.Stat(dir) if err != nil { return nil, err @@ -376,6 +461,11 @@ func (c *FS) Heads() ([]AUM, error) { return out, nil } +// RemoveAll permanently and completely clears the TKA state. +func (c *FS) RemoveAll() error { + return os.RemoveAll(c.base) +} + // AllAUMs returns all AUMs stored in the chonk. func (c *FS) AllAUMs() ([]AUMHash, error) { c.mu.RLock() diff --git a/tka/tailchonk_test.go b/tka/tailchonk_test.go index 7816d2dc1..70b7dc9a7 100644 --- a/tka/tailchonk_test.go +++ b/tka/tailchonk_test.go @@ -127,6 +127,43 @@ func TestTailchonkFS_IgnoreTempFile(t *testing.T) { } } +// If we use a non-existent directory with filesystem Chonk storage, +// it's automatically created. +func TestTailchonkFS_CreateChonkDir(t *testing.T) { + base := filepath.Join(t.TempDir(), "a", "b", "c") + + chonk, err := ChonkDir(base) + if err != nil { + t.Fatalf("ChonkDir: %v", err) + } + + aum := AUM{MessageKind: AUMNoOp} + must.Do(chonk.CommitVerifiedAUMs([]AUM{aum})) + + got, err := chonk.AUM(aum.Hash()) + if err != nil { + t.Errorf("Chonk.AUM: %v", err) + } + if diff := cmp.Diff(got, aum); diff != "" { + t.Errorf("wrong AUM; (-got+want):%v", diff) + } + + if _, err := os.Stat(base); err != nil { + t.Errorf("os.Stat: %v", err) + } +} + +// You can't use a file as the root of your filesystem Chonk storage. +func TestTailchonkFS_CannotUseFile(t *testing.T) { + base := filepath.Join(t.TempDir(), "tka_storage.txt") + must.Do(os.WriteFile(base, []byte("this won't work"), 0644)) + + _, err := ChonkDir(base) + if err == nil { + t.Fatal("ChonkDir succeeded; expected an error") + } +} + func TestMarkActiveChain(t *testing.T) { type aumTemplate struct { AUM AUM diff --git a/tstest/chonktest/tailchonk_test.go b/tstest/chonktest/tailchonk_test.go index ce6b04324..6dfab798e 100644 --- a/tstest/chonktest/tailchonk_test.go +++ b/tstest/chonktest/tailchonk_test.go @@ -39,6 +39,12 @@ func TestImplementsCompactableChonk(t *testing.T) { name string newChonk func(t *testing.T) tka.CompactableChonk }{ + { + name: "Mem", + newChonk: func(t *testing.T) tka.CompactableChonk { + return &tka.Mem{} + }, + }, { name: "FS", newChonk: func(t *testing.T) tka.CompactableChonk {