ipn/ipnlocal: use an in-memory TKA store if FS is unavailable

This requires making the internals of LocalBackend a bit more generic,
and implementing the `tka.CompactableChonk` interface for `tka.Mem`.

Signed-off-by: Alex Chan <alexc@tailscale.com>

Updates https://github.com/tailscale/corp/issues/33599
pull/17941/head
Alex Chan 1 month ago committed by Alex Chan
parent d01081683c
commit 1723cb83ed

@ -818,6 +818,7 @@ func upWorthyWarning(s string) bool {
strings.Contains(s, healthmsg.WarnAcceptRoutesOff) ||
strings.Contains(s, healthmsg.LockedOut) ||
strings.Contains(s, healthmsg.WarnExitNodeUsage) ||
strings.Contains(s, healthmsg.InMemoryTailnetLockState) ||
strings.Contains(strings.ToLower(s), "update available: ")
}

@ -8,9 +8,10 @@
package healthmsg
const (
WarnAcceptRoutesOff = "Some peers are advertising routes but --accept-routes is false"
TailscaleSSHOnBut = "Tailscale SSH enabled, but " // + ... something from caller
LockedOut = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out"
WarnExitNodeUsage = "The following issues on your machine will likely make usage of exit nodes impossible"
DisableRPFilter = "Please set rp_filter=2 instead of rp_filter=1; see https://github.com/tailscale/tailscale/issues/3310"
WarnAcceptRoutesOff = "Some peers are advertising routes but --accept-routes is false"
TailscaleSSHOnBut = "Tailscale SSH enabled, but " // + ... something from caller
LockedOut = "this node is locked out; it will not have connectivity until it is signed. For more info, see https://tailscale.com/s/locked-out"
WarnExitNodeUsage = "The following issues on your machine will likely make usage of exit nodes impossible"
DisableRPFilter = "Please set rp_filter=2 instead of rp_filter=1; see https://github.com/tailscale/tailscale/issues/3310"
InMemoryTailnetLockState = "Tailnet Lock state is only being stored in-memory. Set --statedir to store state on disk, which is more secure. See https://tailscale.com/kb/1226/tailnet-lock#tailnet-lock-state"
)

@ -23,6 +23,7 @@ import (
"slices"
"time"
"tailscale.com/health"
"tailscale.com/health/healthmsg"
"tailscale.com/ipn"
"tailscale.com/ipn/ipnstate"
@ -54,7 +55,7 @@ var (
type tkaState struct {
profile ipn.ProfileID
authority *tka.Authority
storage *tka.FS
storage tka.CompactableChonk
filtered []ipnstate.TKAPeer
}
@ -75,7 +76,7 @@ func (b *LocalBackend) initTKALocked() error {
root := b.TailscaleVarRoot()
if root == "" {
b.tka = nil
b.logf("network-lock unavailable; no state directory")
b.logf("cannot fetch existing TKA state; no state directory for network-lock")
return nil
}
@ -90,6 +91,7 @@ func (b *LocalBackend) initTKALocked() error {
if err != nil {
return fmt.Errorf("initializing tka: %v", err)
}
if err := authority.Compact(storage, tkaCompactionDefaults); err != nil {
b.logf("tka compaction failed: %v", err)
}
@ -105,6 +107,16 @@ func (b *LocalBackend) initTKALocked() error {
return nil
}
// noNetworkLockStateDirWarnable is a Warnable to warn the user that Tailnet Lock data
// (in particular, the list of AUMs in the TKA state) is being stored in memory and will
// be lost when tailscaled restarts.
var noNetworkLockStateDirWarnable = health.Register(&health.Warnable{
Code: "no-tailnet-lock-state-dir",
Title: "No statedir for Tailnet Lock",
Severity: health.SeverityMedium,
Text: health.StaticMessage(healthmsg.InMemoryTailnetLockState),
})
// tkaFilterNetmapLocked checks the signatures on each node key, dropping
// nodes from the netmap whose signature does not verify.
//
@ -447,7 +459,7 @@ func (b *LocalBackend) tkaSyncLocked(ourNodeKey key.NodePublic) error {
// b.mu must be held & TKA must be initialized.
func (b *LocalBackend) tkaApplyDisablementLocked(secret []byte) error {
if b.tka.authority.ValidDisablement(secret) {
if err := os.RemoveAll(b.chonkPathLocked()); err != nil {
if err := b.tka.storage.RemoveAll(); err != nil {
return err
}
b.tka = nil
@ -491,19 +503,21 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM, per
}
}
chonkDir := b.chonkPathLocked()
if err := os.Mkdir(filepath.Dir(chonkDir), 0755); err != nil && !os.IsExist(err) {
return fmt.Errorf("creating chonk root dir: %v", err)
}
if err := os.Mkdir(chonkDir, 0755); err != nil && !os.IsExist(err) {
return fmt.Errorf("mkdir: %v", err)
}
chonk, err := tka.ChonkDir(chonkDir)
if err != nil {
return fmt.Errorf("chonk: %v", err)
root := b.TailscaleVarRoot()
var storage tka.CompactableChonk
if root == "" {
b.health.SetUnhealthy(noNetworkLockStateDirWarnable, nil)
b.logf("network-lock using in-memory storage; no state directory")
storage = &tka.Mem{}
} else {
chonkDir := b.chonkPathLocked()
chonk, err := tka.ChonkDir(chonkDir)
if err != nil {
return fmt.Errorf("chonk: %v", err)
}
storage = chonk
}
authority, err := tka.Bootstrap(chonk, genesis)
authority, err := tka.Bootstrap(storage, genesis)
if err != nil {
return fmt.Errorf("tka bootstrap: %v", err)
}
@ -511,7 +525,7 @@ func (b *LocalBackend) tkaBootstrapFromGenesisLocked(g tkatype.MarshaledAUM, per
b.tka = &tkaState{
profile: b.pm.CurrentProfile().ID(),
authority: authority,
storage: chonk,
storage: storage,
}
return nil
}
@ -524,10 +538,6 @@ func (b *LocalBackend) CanSupportNetworkLock() error {
return nil
}
if b.TailscaleVarRoot() == "" {
return errors.New("network-lock is not supported in this configuration, try setting --statedir")
}
// There's a var root (aka --statedir), so if network lock gets
// initialized we have somewhere to store our AUMs. That's all
// we need.
@ -647,6 +657,7 @@ func tkaStateFromPeer(p tailcfg.NodeView) ipnstate.TKAPeer {
// needing signatures is returned as a response.
// The Finish RPC submits signatures for all these nodes, at which point
// Control has everything it needs to atomically enable network lock.
// TODO(alexc): Only with persistent backend
func (b *LocalBackend) NetworkLockInit(keys []tka.Key, disablementValues [][]byte, supportDisablement []byte) error {
if err := b.CanSupportNetworkLock(); err != nil {
return err
@ -767,7 +778,7 @@ func (b *LocalBackend) NetworkLockForceLocalDisable() error {
return fmt.Errorf("saving prefs: %w", err)
}
if err := os.RemoveAll(b.chonkPathLocked()); err != nil {
if err := b.tka.storage.RemoveAll(); err != nil {
return fmt.Errorf("deleting TKA state: %w", err)
}
b.tka = nil
@ -776,6 +787,7 @@ func (b *LocalBackend) NetworkLockForceLocalDisable() error {
// NetworkLockSign signs the given node-key and submits it to the control plane.
// rotationPublic, if specified, must be an ed25519 public key.
// TODO(alexc): in-memory only
func (b *LocalBackend) NetworkLockSign(nodeKey key.NodePublic, rotationPublic []byte) error {
ourNodeKey, sig, err := func(nodeKey key.NodePublic, rotationPublic []byte) (key.NodePublic, tka.NodeKeySignature, error) {
b.mu.Lock()

@ -10,6 +10,7 @@ import (
"errors"
"fmt"
"log"
"maps"
"os"
"path/filepath"
"slices"
@ -57,6 +58,10 @@ type Chonk interface {
// as a hint to pick the correct chain in the event that the Chonk stores
// multiple distinct chains.
LastActiveAncestor() (*AUMHash, error)
// RemoveAll permanently and completely clears the TKA state. This should
// be called when the user disables Tailnet Lock.
RemoveAll() error
}
// CompactableChonk implementation are extensions of Chonk, which are
@ -78,12 +83,21 @@ type CompactableChonk interface {
}
// Mem implements in-memory storage of TKA state, suitable for
// tests.
// tests or cases where filesystem storage is unavailable.
//
// Mem implements the Chonk interface.
//
// Mem is thread-safe.
type Mem struct {
mu sync.RWMutex
aums map[AUMHash]AUM
commitTimes map[AUMHash]time.Time
// parentIndex is a map of AUMs to the AUMs for which they are
// the parent.
//
// For example, if parent index is {1 -> {2, 3, 4}}, that means
// that AUMs 2, 3, 4 all have aum.PrevAUMHash = 1.
parentIndex map[AUMHash][]AUMHash
lastActiveAncestor *AUMHash
@ -152,12 +166,14 @@ func (c *Mem) CommitVerifiedAUMs(updates []AUM) error {
if c.aums == nil {
c.parentIndex = make(map[AUMHash][]AUMHash, 64)
c.aums = make(map[AUMHash]AUM, 64)
c.commitTimes = make(map[AUMHash]time.Time, 64)
}
updateLoop:
for _, aum := range updates {
aumHash := aum.Hash()
c.aums[aumHash] = aum
c.commitTimes[aumHash] = time.Now()
parent, ok := aum.Parent()
if ok {
@ -173,6 +189,71 @@ updateLoop:
return nil
}
// RemoveAll permanently and completely clears the TKA state.
func (c *Mem) RemoveAll() error {
c.mu.Lock()
defer c.mu.Unlock()
c.aums = nil
c.commitTimes = nil
c.parentIndex = nil
c.lastActiveAncestor = nil
return nil
}
// AllAUMs returns all AUMs stored in the chonk.
func (c *Mem) AllAUMs() ([]AUMHash, error) {
c.mu.RLock()
defer c.mu.RUnlock()
return slices.Collect(maps.Keys(c.aums)), nil
}
// CommitTime returns the time at which the AUM was committed.
//
// If the AUM does not exist, then os.ErrNotExist is returned.
func (c *Mem) CommitTime(h AUMHash) (time.Time, error) {
c.mu.RLock()
defer c.mu.RUnlock()
t, ok := c.commitTimes[h]
if ok {
return t, nil
} else {
return time.Time{}, os.ErrNotExist
}
}
// PurgeAUMs marks the specified AUMs for deletion from storage.
func (c *Mem) PurgeAUMs(hashes []AUMHash) error {
c.mu.Lock()
defer c.mu.Unlock()
for _, h := range hashes {
// Remove the deleted AUM from the list of its parents' children.
//
// However, we leave the list of this AUM's children in parentIndex,
// so we can find them later in ChildAUMs().
if aum, ok := c.aums[h]; ok {
parent, hasParent := aum.Parent()
if hasParent {
c.parentIndex[parent] = slices.DeleteFunc(
c.parentIndex[parent],
func(other AUMHash) bool { return bytes.Equal(h[:], other[:]) },
)
if len(c.parentIndex[parent]) == 0 {
delete(c.parentIndex, parent)
}
}
}
// Delete this AUM from the list of AUMs and commit times.
delete(c.aums, h)
delete(c.commitTimes, h)
}
return nil
}
// FS implements filesystem storage of TKA state.
//
// FS implements the Chonk interface.
@ -184,6 +265,10 @@ type FS struct {
// ChonkDir returns an implementation of Chonk which uses the
// given directory to store TKA state.
func ChonkDir(dir string) (*FS, error) {
if err := os.MkdirAll(dir, 0755); err != nil && !os.IsExist(err) {
return nil, fmt.Errorf("creating chonk root dir: %v", err)
}
stat, err := os.Stat(dir)
if err != nil {
return nil, err
@ -376,6 +461,11 @@ func (c *FS) Heads() ([]AUM, error) {
return out, nil
}
// RemoveAll permanently and completely clears the TKA state.
func (c *FS) RemoveAll() error {
return os.RemoveAll(c.base)
}
// AllAUMs returns all AUMs stored in the chonk.
func (c *FS) AllAUMs() ([]AUMHash, error) {
c.mu.RLock()

@ -127,6 +127,43 @@ func TestTailchonkFS_IgnoreTempFile(t *testing.T) {
}
}
// If we use a non-existent directory with filesystem Chonk storage,
// it's automatically created.
func TestTailchonkFS_CreateChonkDir(t *testing.T) {
base := filepath.Join(t.TempDir(), "a", "b", "c")
chonk, err := ChonkDir(base)
if err != nil {
t.Fatalf("ChonkDir: %v", err)
}
aum := AUM{MessageKind: AUMNoOp}
must.Do(chonk.CommitVerifiedAUMs([]AUM{aum}))
got, err := chonk.AUM(aum.Hash())
if err != nil {
t.Errorf("Chonk.AUM: %v", err)
}
if diff := cmp.Diff(got, aum); diff != "" {
t.Errorf("wrong AUM; (-got+want):%v", diff)
}
if _, err := os.Stat(base); err != nil {
t.Errorf("os.Stat: %v", err)
}
}
// You can't use a file as the root of your filesystem Chonk storage.
func TestTailchonkFS_CannotUseFile(t *testing.T) {
base := filepath.Join(t.TempDir(), "tka_storage.txt")
must.Do(os.WriteFile(base, []byte("this won't work"), 0644))
_, err := ChonkDir(base)
if err == nil {
t.Fatal("ChonkDir succeeded; expected an error")
}
}
func TestMarkActiveChain(t *testing.T) {
type aumTemplate struct {
AUM AUM

@ -39,6 +39,12 @@ func TestImplementsCompactableChonk(t *testing.T) {
name string
newChonk func(t *testing.T) tka.CompactableChonk
}{
{
name: "Mem",
newChonk: func(t *testing.T) tka.CompactableChonk {
return &tka.Mem{}
},
},
{
name: "FS",
newChonk: func(t *testing.T) tka.CompactableChonk {

Loading…
Cancel
Save