From 8ea90ba80d640c7197fa80097bd247ea78108a66 Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Tue, 6 Jan 2026 12:29:46 +0100 Subject: [PATCH] cmd/tailscaled,ipn/{ipnlocal,store/kubestore}: don't create attestation keys for stores that are not bound to a node (#18322) Ensure that hardware attestation keys are not added to tailscaled state stores that are Kubernetes Secrets or AWS SSM as those Tailscale devices should be able to be recreated on different nodes, for example, when moving Pods between nodes. Updates tailscale/tailscale#18302 Signed-off-by: Irbe Krumina --- cmd/tailscaled/tailscaled.go | 49 +++++++++++++-- cmd/tailscaled/tailscaled_test.go | 53 ++++++++++++++++ ipn/store/kubestore/store_kube.go | 80 ++++++++++++++++++++---- ipn/store/kubestore/store_kube_test.go | 84 ++++++++++++++++++++++++++ 4 files changed, 251 insertions(+), 15 deletions(-) diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go index 6abe0cb79..7c19ebb42 100644 --- a/cmd/tailscaled/tailscaled.go +++ b/cmd/tailscaled/tailscaled.go @@ -209,7 +209,10 @@ func main() { flag.BoolVar(&args.disableLogs, "no-logs-no-support", false, "disable log uploads; this also disables any technical support") flag.StringVar(&args.confFile, "config", "", "path to config file, or 'vm:user-data' to use the VM's user-data (EC2)") if buildfeatures.HasTPM { - flag.Var(&args.hardwareAttestation, "hardware-attestation", "use hardware-backed keys to bind node identity to this device when supported by the OS and hardware. Uses TPM 2.0 on Linux and Windows; SecureEnclave on macOS and iOS; and Keystore on Android") + flag.Var(&args.hardwareAttestation, "hardware-attestation", `use hardware-backed keys to bind node identity to this device when supported +by the OS and hardware. Uses TPM 2.0 on Linux and Windows; SecureEnclave on +macOS and iOS; and Keystore on Android. Only supported for Tailscale nodes that +store state on filesystem.`) } if f, ok := hookRegisterOutboundProxyFlags.GetOk(); ok { f() @@ -905,13 +908,18 @@ func applyIntegrationTestEnvKnob() { func handleTPMFlags() { switch { case args.hardwareAttestation.v: - if _, err := key.NewEmptyHardwareAttestationKey(); err == key.ErrUnsupported { + if err := canUseHardwareAttestation(); err != nil { log.SetFlags(0) - log.Fatalf("--hardware-attestation is not supported on this platform or in this build of tailscaled") + log.Fatal(err) } case !args.hardwareAttestation.set: policyHWAttestation, _ := policyclient.Get().GetBoolean(pkey.HardwareAttestation, false) - args.hardwareAttestation.v = policyHWAttestation + if err := canUseHardwareAttestation(); err != nil { + log.Printf("[unexpected] policy requires hardware attestation, but device does not support it: %v", err) + args.hardwareAttestation.v = false + } else { + args.hardwareAttestation.v = policyHWAttestation + } } switch { @@ -929,6 +937,39 @@ func handleTPMFlags() { } } +// canUseHardwareAttestation returns an error if hardware attestation can't be +// enabled, either due to availability or compatibility with other settings. +func canUseHardwareAttestation() error { + if _, err := key.NewEmptyHardwareAttestationKey(); err == key.ErrUnsupported { + return errors.New("--hardware-attestation is not supported on this platform or in this build of tailscaled") + } + // Hardware attestation keys are TPM-bound and cannot be migrated between + // machines. Disable when using portable state stores like kube: or arn: + // where state may be loaded on a different machine. + if args.statepath != "" && isPortableStore(args.statepath) { + return errors.New("--hardware-attestation cannot be used with portable state stores (kube:, arn:) because TPM-bound keys cannot be migrated between machines") + } + return nil +} + +// isPortableStore reports whether the given state path refers to a portable +// state store where state may be loaded on different machines. +// All stores apart from file store and TPM store are portable. +func isPortableStore(path string) bool { + if store.HasKnownProviderPrefix(path) && !strings.HasPrefix(path, store.TPMPrefix) { + return true + } + // In most cases Kubernetes Secret and AWS SSM stores would have been caught + // by the earlier check - but that check relies on those stores having been + // registered. This additional check is here to ensure that if we ever + // produce a faulty build that failed to register some store, users who + // upgraded to that don't get hardware keys generated. + if strings.HasPrefix(path, "kube:") || strings.HasPrefix(path, "arn:") { + return true + } + return false +} + // canEncryptState returns an error if state encryption can't be enabled, // either due to availability or compatibility with other settings. func canEncryptState() error { diff --git a/cmd/tailscaled/tailscaled_test.go b/cmd/tailscaled/tailscaled_test.go index 1188ad35f..36327cccc 100644 --- a/cmd/tailscaled/tailscaled_test.go +++ b/cmd/tailscaled/tailscaled_test.go @@ -88,3 +88,56 @@ func TestStateStoreError(t *testing.T) { } }) } + +func TestIsPortableStore(t *testing.T) { + tests := []struct { + name string + path string + want bool + }{ + { + name: "kube_store", + path: "kube:my-secret", + want: true, + }, + { + name: "aws_arn_store", + path: "arn:aws:ssm:us-east-1:123456789012:parameter/tailscale/state", + want: true, + }, + { + name: "tpm_store", + path: "tpmseal:/var/lib/tailscale/tailscaled.state", + want: false, + }, + { + name: "local_file_store", + path: "/var/lib/tailscale/tailscaled.state", + want: false, + }, + { + name: "empty_path", + path: "", + want: false, + }, + { + name: "mem_store", + path: "mem:", + want: true, + }, + { + name: "windows_file_store", + path: `C:\ProgramData\Tailscale\server-state.conf`, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isPortableStore(tt.path) + if got != tt.want { + t.Errorf("isPortableStore(%q) = %v, want %v", tt.path, got, tt.want) + } + }) + } +} diff --git a/ipn/store/kubestore/store_kube.go b/ipn/store/kubestore/store_kube.go index f48237c05..ba45409ed 100644 --- a/ipn/store/kubestore/store_kube.go +++ b/ipn/store/kubestore/store_kube.go @@ -6,8 +6,8 @@ package kubestore import ( "context" + "encoding/json" "fmt" - "log" "net" "net/http" "os" @@ -57,6 +57,8 @@ type Store struct { certShareMode string // 'ro', 'rw', or empty podName string + logf logger.Logf + // memory holds the latest tailscale state. Writes write state to a kube // Secret and memory, Reads read from memory. memory mem.Store @@ -96,6 +98,7 @@ func newWithClient(logf logger.Logf, c kubeclient.Client, secretName string) (*S canPatch: canPatch, secretName: secretName, podName: os.Getenv("POD_NAME"), + logf: logf, } if envknob.IsCertShareReadWriteMode() { s.certShareMode = "rw" @@ -113,11 +116,11 @@ func newWithClient(logf logger.Logf, c kubeclient.Client, secretName string) (*S if err := s.loadCerts(context.Background(), sel); err != nil { // We will attempt to again retrieve the certs from Secrets when a request for an HTTPS endpoint // is received. - log.Printf("[unexpected] error loading TLS certs: %v", err) + s.logf("[unexpected] error loading TLS certs: %v", err) } } if s.certShareMode == "ro" { - go s.runCertReload(context.Background(), logf) + go s.runCertReload(context.Background()) } return s, nil } @@ -147,7 +150,7 @@ func (s *Store) WriteState(id ipn.StateKey, bs []byte) (err error) { // of a Tailscale Kubernetes node's state Secret. func (s *Store) WriteTLSCertAndKey(domain string, cert, key []byte) (err error) { if s.certShareMode == "ro" { - log.Printf("[unexpected] TLS cert and key write in read-only mode") + s.logf("[unexpected] TLS cert and key write in read-only mode") } if err := dnsname.ValidHostname(domain); err != nil { return fmt.Errorf("invalid domain name %q: %w", domain, err) @@ -258,11 +261,11 @@ func (s *Store) updateSecret(data map[string][]byte, secretName string) (err err defer func() { if err != nil { if err := s.client.Event(ctx, eventTypeWarning, reasonTailscaleStateUpdateFailed, err.Error()); err != nil { - log.Printf("kubestore: error creating tailscaled state update Event: %v", err) + s.logf("kubestore: error creating tailscaled state update Event: %v", err) } } else { if err := s.client.Event(ctx, eventTypeNormal, reasonTailscaleStateUpdated, "Successfully updated tailscaled state Secret"); err != nil { - log.Printf("kubestore: error creating tailscaled state Event: %v", err) + s.logf("kubestore: error creating tailscaled state Event: %v", err) } } cancel() @@ -342,17 +345,72 @@ func (s *Store) loadState() (err error) { return ipn.ErrStateNotExist } if err := s.client.Event(ctx, eventTypeWarning, reasonTailscaleStateLoadFailed, err.Error()); err != nil { - log.Printf("kubestore: error creating Event: %v", err) + s.logf("kubestore: error creating Event: %v", err) } return err } if err := s.client.Event(ctx, eventTypeNormal, reasonTailscaleStateLoaded, "Successfully loaded tailscaled state from Secret"); err != nil { - log.Printf("kubestore: error creating Event: %v", err) + s.logf("kubestore: error creating Event: %v", err) + } + data, err := s.maybeStripAttestationKeyFromProfile(secret.Data) + if err != nil { + return fmt.Errorf("error attempting to strip attestation data from state Secret: %w", err) } - s.memory.LoadFromMap(secret.Data) + s.memory.LoadFromMap(data) return nil } +// maybeStripAttestationKeyFromProfile removes the hardware attestation key +// field from serialized Tailscale profile. This is done to recover from a bug +// introduced in 1.92, where node-bound hardware attestation keys were added to +// Tailscale states stored in Kubernetes Secrets. +// See https://github.com/tailscale/tailscale/issues/18302 +// TODO(irbekrm): it would be good if we could somehow determine when we no +// longer need to run this check. +func (s *Store) maybeStripAttestationKeyFromProfile(data map[string][]byte) (map[string][]byte, error) { + prefsKey := extractPrefsKey(data) + prefsBytes, ok := data[prefsKey] + if !ok { + return data, nil + } + var prefs map[string]any + if err := json.Unmarshal(prefsBytes, &prefs); err != nil { + s.logf("[unexpected]: kube store: failed to unmarshal prefs data") + // don't error as in most cases the state won't have the attestation key + return data, nil + } + + config, ok := prefs["Config"].(map[string]any) + if !ok { + return data, nil + } + if _, hasKey := config["AttestationKey"]; !hasKey { + return data, nil + } + s.logf("kube store: found redundant attestation key, deleting") + delete(config, "AttestationKey") + prefsBytes, err := json.Marshal(prefs) + if err != nil { + return nil, fmt.Errorf("[unexpected] kube store: failed to marshal profile after removing attestation key: %v", err) + } + data[prefsKey] = prefsBytes + if err := s.updateSecret(map[string][]byte{prefsKey: prefsBytes}, s.secretName); err != nil { + // don't error out - this might have been a temporary kube API server + // connection issue. The key will be removed from the in-memory cache + // and we'll retry updating the Secret on the next restart. + s.logf("kube store: error updating Secret after stripping AttestationKey: %v", err) + } + return data, nil +} + +const currentProfileKey = "_current-profile" + +// extractPrefs returns the key at which Tailscale prefs are stored in the +// provided Secret data. +func extractPrefsKey(data map[string][]byte) string { + return string(data[currentProfileKey]) +} + // runCertReload relists and reloads all TLS certs for endpoints shared by this // node from Secrets other than the state Secret to ensure that renewed certs get eventually loaded. // It is not critical to reload a cert immediately after @@ -361,7 +419,7 @@ func (s *Store) loadState() (err error) { // Note that if shared certs are not found in memory on an HTTPS request, we // do a Secret lookup, so this mechanism does not need to ensure that newly // added Ingresses' certs get loaded. -func (s *Store) runCertReload(ctx context.Context, logf logger.Logf) { +func (s *Store) runCertReload(ctx context.Context) { ticker := time.NewTicker(time.Hour * 24) defer ticker.Stop() for { @@ -371,7 +429,7 @@ func (s *Store) runCertReload(ctx context.Context, logf logger.Logf) { case <-ticker.C: sel := s.certSecretSelector() if err := s.loadCerts(ctx, sel); err != nil { - logf("[unexpected] error reloading TLS certs: %v", err) + s.logf("[unexpected] error reloading TLS certs: %v", err) } } } diff --git a/ipn/store/kubestore/store_kube_test.go b/ipn/store/kubestore/store_kube_test.go index 8c8e5e870..44a4bbb7f 100644 --- a/ipn/store/kubestore/store_kube_test.go +++ b/ipn/store/kubestore/store_kube_test.go @@ -20,6 +20,90 @@ import ( "tailscale.com/kube/kubetypes" ) +func TestKubernetesPodMigrationWithTPMAttestationKey(t *testing.T) { + stateWithAttestationKey := `{ + "Config": { + "NodeID": "nSTABLE123456", + "AttestationKey": { + "tpmPrivate": "c2Vuc2l0aXZlLXRwbS1kYXRhLXRoYXQtb25seS13b3Jrcy1vbi1vcmlnaW5hbC1ub2Rl", + "tpmPublic": "cHVibGljLXRwbS1kYXRhLWZvci1hdHRlc3RhdGlvbi1rZXk=" + } + } + }` + + secretData := map[string][]byte{ + "profile-abc123": []byte(stateWithAttestationKey), + "_current-profile": []byte("profile-abc123"), + } + + client := &kubeclient.FakeClient{ + GetSecretImpl: func(ctx context.Context, name string) (*kubeapi.Secret, error) { + return &kubeapi.Secret{Data: secretData}, nil + }, + CheckSecretPermissionsImpl: func(ctx context.Context, name string) (bool, bool, error) { + return true, true, nil + }, + JSONPatchResourceImpl: func(ctx context.Context, name, resourceType string, patches []kubeclient.JSONPatch) error { + for _, p := range patches { + if p.Op == "add" && p.Path == "/data" { + secretData = p.Value.(map[string][]byte) + } + } + return nil + }, + } + + store := &Store{ + client: client, + canPatch: true, + secretName: "ts-state", + memory: mem.Store{}, + logf: t.Logf, + } + + if err := store.loadState(); err != nil { + t.Fatalf("loadState failed: %v", err) + } + + // Verify we can read the state from the store + stateBytes, err := store.ReadState("profile-abc123") + if err != nil { + t.Fatalf("ReadState failed: %v", err) + } + + // The state should be readable as JSON + var state map[string]json.RawMessage + if err := json.Unmarshal(stateBytes, &state); err != nil { + t.Fatalf("failed to unmarshal state: %v", err) + } + + // Verify the Config field exists + configRaw, ok := state["Config"] + if !ok { + t.Fatal("Config field not found in state") + } + + // Parse the Config to verify fields are preserved + var config map[string]json.RawMessage + if err := json.Unmarshal(configRaw, &config); err != nil { + t.Fatalf("failed to unmarshal Config: %v", err) + } + + // The AttestationKey should be stripped by the kubestore + if _, hasAttestation := config["AttestationKey"]; hasAttestation { + t.Error("AttestationKey should be stripped from state loaded by kubestore") + } + + // Verify other fields are preserved + var nodeID string + if err := json.Unmarshal(config["NodeID"], &nodeID); err != nil { + t.Fatalf("failed to unmarshal NodeID: %v", err) + } + if nodeID != "nSTABLE123456" { + t.Errorf("NodeID mismatch: got %q, want %q", nodeID, "nSTABLE123456") + } +} + func TestWriteState(t *testing.T) { tests := []struct { name string