cmd/tailscaled/ipn{ipnlocal,store/kubestore}: disable hardware attestation by default, don't use it with non-file system state stores (#18342)

* ipn/ipnlocal: don't fail profile unmarshal due to attestation keys (#18335)

Soft-fail on initial unmarshal and try again, ignoring the
AttestationKey. This helps in cases where something about the
attestation key storage (usually a TPM) is messed up. The old key will
be lost, but at least the node can start again.

Updates #18302
Updates #15830

Signed-off-by: Andrew Lytvynov <awly@tailscale.com>
(cherry picked from commit 2e77b75e96)

* cmd/tailscaled: disable state encryption / attestation by default (#18336)

TPM-based features have been incredibly painful due to the heterogeneous
devices in the wild, and many situations in which the TPM "changes" (is
reset or replaced). All of this leads to a lot of customer issues.

We hoped to iron out all the kinks and get all users to benefit from
state encryption and hardware attestation without manually opting in,
but the long tail of kinks is just too long.

This change disables TPM-based features on Windows and Linux by default.
Node state should get auto-decrypted on update, and old attestation keys
will be removed.

There's also tailscaled-on-macOS, but it won't have a TPM or Keychain
bindings anyway.

Updates #18302
Updates #15830

Signed-off-by: Andrew Lytvynov <awly@tailscale.com>
(cherry picked from commit 68617bb82e)

* cmd/tailscaled,ipn/{ipnlocal,store/kubestore}: don't create attestation keys for stores that are not bound to a node (#18322)

Ensure that hardware attestation keys are not added to tailscaled
state stores that are Kubernetes Secrets or AWS SSM as those Tailscale
devices should be able to be recreated on different nodes, for example,
when moving Pods between nodes.

Updates tailscale/tailscale#18302

Signed-off-by: Irbe Krumina <irbekrm@gmail.com>
(cherry picked from commit 8ea90ba80d)

---------

Signed-off-by: Andrew Lytvynov <awly@tailscale.com>
Signed-off-by: Irbe Krumina <irbekrm@gmail.com>
Co-authored-by: Andrew Lytvynov <awly@tailscale.com>
pull/18345/head
Irbe Krumina 7 days ago committed by GitHub
parent 4adbd14ab5
commit e89382897a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -209,7 +209,10 @@ func main() {
flag.BoolVar(&args.disableLogs, "no-logs-no-support", false, "disable log uploads; this also disables any technical support")
flag.StringVar(&args.confFile, "config", "", "path to config file, or 'vm:user-data' to use the VM's user-data (EC2)")
if buildfeatures.HasTPM {
flag.Var(&args.hardwareAttestation, "hardware-attestation", "use hardware-backed keys to bind node identity to this device when supported by the OS and hardware. Uses TPM 2.0 on Linux and Windows; SecureEnclave on macOS and iOS; and Keystore on Android")
flag.Var(&args.hardwareAttestation, "hardware-attestation", `use hardware-backed keys to bind node identity to this device when supported
by the OS and hardware. Uses TPM 2.0 on Linux and Windows; SecureEnclave on
macOS and iOS; and Keystore on Android. Only supported for Tailscale nodes that
store state on filesystem.`)
}
if f, ok := hookRegisterOutboundProxyFlags.GetOk(); ok {
f()
@ -904,17 +907,17 @@ func applyIntegrationTestEnvKnob() {
func handleTPMFlags() {
switch {
case args.hardwareAttestation.v:
if _, err := key.NewEmptyHardwareAttestationKey(); err == key.ErrUnsupported {
if err := canUseHardwareAttestation(); err != nil {
log.SetFlags(0)
log.Fatalf("--hardware-attestation is not supported on this platform or in this build of tailscaled")
log.Fatal(err)
}
case !args.hardwareAttestation.set:
policyHWAttestation, _ := policyclient.Get().GetBoolean(pkey.HardwareAttestation, feature.HardwareAttestationAvailable())
if !policyHWAttestation {
break
}
if feature.TPMAvailable() {
args.hardwareAttestation.v = true
policyHWAttestation, _ := policyclient.Get().GetBoolean(pkey.HardwareAttestation, false)
if err := canUseHardwareAttestation(); err != nil {
log.Printf("[unexpected] policy requires hardware attestation, but device does not support it: %v", err)
args.hardwareAttestation.v = false
} else {
args.hardwareAttestation.v = policyHWAttestation
}
}
@ -926,18 +929,46 @@ func handleTPMFlags() {
log.Fatal(err)
}
case !args.encryptState.set:
policyEncrypt, _ := policyclient.Get().GetBoolean(pkey.EncryptState, feature.TPMAvailable())
if !policyEncrypt {
// Default disabled, no need to validate.
return
}
// Default enabled if available.
if err := canEncryptState(); err == nil {
policyEncrypt, _ := policyclient.Get().GetBoolean(pkey.EncryptState, false)
if err := canEncryptState(); policyEncrypt && err == nil {
args.encryptState.v = true
}
}
}
// canUseHardwareAttestation returns an error if hardware attestation can't be
// enabled, either due to availability or compatibility with other settings.
func canUseHardwareAttestation() error {
if _, err := key.NewEmptyHardwareAttestationKey(); err == key.ErrUnsupported {
return errors.New("--hardware-attestation is not supported on this platform or in this build of tailscaled")
}
// Hardware attestation keys are TPM-bound and cannot be migrated between
// machines. Disable when using portable state stores like kube: or arn:
// where state may be loaded on a different machine.
if args.statepath != "" && isPortableStore(args.statepath) {
return errors.New("--hardware-attestation cannot be used with portable state stores (kube:, arn:) because TPM-bound keys cannot be migrated between machines")
}
return nil
}
// isPortableStore reports whether the given state path refers to a portable
// state store where state may be loaded on different machines.
// All stores apart from file store and TPM store are portable.
func isPortableStore(path string) bool {
if store.HasKnownProviderPrefix(path) && !strings.HasPrefix(path, store.TPMPrefix) {
return true
}
// In most cases Kubernetes Secret and AWS SSM stores would have been caught
// by the earlier check - but that check relies on those stores having been
// registered. This additional check is here to ensure that if we ever
// produce a faulty build that failed to register some store, users who
// upgraded to that don't get hardware keys generated.
if strings.HasPrefix(path, "kube:") || strings.HasPrefix(path, "arn:") {
return true
}
return false
}
// canEncryptState returns an error if state encryption can't be enabled,
// either due to availability or compatibility with other settings.
func canEncryptState() error {

@ -88,3 +88,56 @@ func TestStateStoreError(t *testing.T) {
}
})
}
func TestIsPortableStore(t *testing.T) {
tests := []struct {
name string
path string
want bool
}{
{
name: "kube_store",
path: "kube:my-secret",
want: true,
},
{
name: "aws_arn_store",
path: "arn:aws:ssm:us-east-1:123456789012:parameter/tailscale/state",
want: true,
},
{
name: "tpm_store",
path: "tpmseal:/var/lib/tailscale/tailscaled.state",
want: false,
},
{
name: "local_file_store",
path: "/var/lib/tailscale/tailscaled.state",
want: false,
},
{
name: "empty_path",
path: "",
want: false,
},
{
name: "mem_store",
path: "mem:",
want: true,
},
{
name: "windows_file_store",
path: `C:\ProgramData\Tailscale\server-state.conf`,
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := isPortableStore(tt.path)
if got != tt.want {
t.Errorf("isPortableStore(%q) = %v, want %v", tt.path, got, tt.want)
}
})
}
}

@ -2488,7 +2488,7 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error {
// neither UpdatePrefs or reconciliation should change Persist
newPrefs.Persist = b.pm.CurrentPrefs().Persist().AsStruct()
if buildfeatures.HasTPM {
if buildfeatures.HasTPM && b.HardwareAttested() {
if genKey, ok := feature.HookGenerateAttestationKeyIfEmpty.GetOk(); ok {
newKey, err := genKey(newPrefs.Persist, logf)
if err != nil {
@ -2500,6 +2500,12 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error {
}
}
}
// Remove any existing attestation key if HardwareAttested is false.
if !b.HardwareAttested() && newPrefs.Persist != nil && newPrefs.Persist.AttestationKey != nil && !newPrefs.Persist.AttestationKey.IsZero() {
newPrefs.Persist.AttestationKey = nil
prefsChanged = true
prefsChangedWhy = append(prefsChangedWhy, "removeAttestationKey")
}
if prefsChanged {
logf("updated prefs: %v, reason: %v", newPrefs.Pretty(), prefsChangedWhy)

@ -5,10 +5,12 @@ package ipnlocal
import (
"cmp"
"crypto"
"crypto/rand"
"encoding/json"
"errors"
"fmt"
"io"
"runtime"
"slices"
"strings"
@ -59,6 +61,9 @@ type profileManager struct {
// extHost is the bridge between [profileManager] and the registered [ipnext.Extension]s.
// It may be nil in tests. A nil pointer is a valid, no-op host.
extHost *ExtensionHost
// Override for key.NewEmptyHardwareAttestationKey used for testing.
newEmptyHardwareAttestationKey func() (key.HardwareAttestationKey, error)
}
// SetExtensionHost sets the [ExtensionHost] for the [profileManager].
@ -660,13 +665,23 @@ func (pm *profileManager) loadSavedPrefs(k ipn.StateKey) (ipn.PrefsView, error)
// if supported by the platform, create an empty hardware attestation key to use when deserializing
// to avoid type exceptions from json.Unmarshaling into an interface{}.
hw, _ := key.NewEmptyHardwareAttestationKey()
hw, _ := pm.newEmptyHardwareAttestationKey()
savedPrefs.Persist = &persist.Persist{
AttestationKey: hw,
}
if err := ipn.PrefsFromBytes(bs, savedPrefs); err != nil {
return ipn.PrefsView{}, fmt.Errorf("parsing saved prefs: %v", err)
// Try loading again, this time ignoring the AttestationKey contents.
// If that succeeds, there's something wrong with the underlying
// attestation key mechanism (most likely the TPM changed), but we
// should at least proceed with client startup.
origErr := err
savedPrefs.Persist.AttestationKey = &noopAttestationKey{}
if err := ipn.PrefsFromBytes(bs, savedPrefs); err != nil {
return ipn.PrefsView{}, fmt.Errorf("parsing saved prefs: %w", err)
} else {
pm.logf("failed to parse savedPrefs with attestation key (error: %v) but parsing without the attestation key succeeded; will proceed without using the old attestation key", origErr)
}
}
pm.logf("using backend prefs for %q: %v", k, savedPrefs.Pretty())
@ -912,11 +927,12 @@ func newProfileManagerWithGOOS(store ipn.StateStore, logf logger.Logf, ht *healt
metricProfileCount.Set(int64(len(knownProfiles)))
pm := &profileManager{
goos: goos,
store: store,
knownProfiles: knownProfiles,
logf: logf,
health: ht,
goos: goos,
store: store,
knownProfiles: knownProfiles,
logf: logf,
health: ht,
newEmptyHardwareAttestationKey: key.NewEmptyHardwareAttestationKey,
}
var initialProfile ipn.LoginProfileView
@ -985,3 +1001,21 @@ var (
metricMigrationError = clientmetric.NewCounter("profiles_migration_error")
metricMigrationSuccess = clientmetric.NewCounter("profiles_migration_success")
)
// noopAttestationKey is a key.HardwareAttestationKey that always successfully
// unmarshals as a zero key.
type noopAttestationKey struct{}
func (n noopAttestationKey) Public() crypto.PublicKey {
panic("noopAttestationKey.Public should not be called; missing IsZero check somewhere?")
}
func (n noopAttestationKey) Sign(rand io.Reader, digest []byte, opts crypto.SignerOpts) (signature []byte, err error) {
panic("noopAttestationKey.Sign should not be called; missing IsZero check somewhere?")
}
func (n noopAttestationKey) MarshalJSON() ([]byte, error) { return nil, nil }
func (n noopAttestationKey) UnmarshalJSON([]byte) error { return nil }
func (n noopAttestationKey) Close() error { return nil }
func (n noopAttestationKey) Clone() key.HardwareAttestationKey { return n }
func (n noopAttestationKey) IsZero() bool { return true }

@ -4,6 +4,7 @@
package ipnlocal
import (
"errors"
"fmt"
"os/user"
"strconv"
@ -1147,3 +1148,40 @@ func TestProfileStateChangeCallback(t *testing.T) {
})
}
}
func TestProfileBadAttestationKey(t *testing.T) {
store := new(mem.Store)
pm, err := newProfileManagerWithGOOS(store, t.Logf, health.NewTracker(eventbustest.NewBus(t)), "linux")
if err != nil {
t.Fatal(err)
}
fk := new(failingHardwareAttestationKey)
pm.newEmptyHardwareAttestationKey = func() (key.HardwareAttestationKey, error) {
return fk, nil
}
sk := ipn.StateKey(t.Name())
if err := pm.store.WriteState(sk, []byte(`{"Config": {"AttestationKey": {}}}`)); err != nil {
t.Fatal(err)
}
prefs, err := pm.loadSavedPrefs(sk)
if err != nil {
t.Fatal(err)
}
ak := prefs.Persist().AsStruct().AttestationKey
if _, ok := ak.(noopAttestationKey); !ok {
t.Errorf("loaded attestation key of type %T, want noopAttestationKey", ak)
}
if !fk.unmarshalCalled {
t.Error("UnmarshalJSON was not called on failingHardwareAttestationKey")
}
}
type failingHardwareAttestationKey struct {
noopAttestationKey
unmarshalCalled bool
}
func (k *failingHardwareAttestationKey) UnmarshalJSON([]byte) error {
k.unmarshalCalled = true
return errors.New("failed to unmarshal attestation key!")
}

@ -6,8 +6,8 @@ package kubestore
import (
"context"
"encoding/json"
"fmt"
"log"
"net"
"net/http"
"os"
@ -57,6 +57,8 @@ type Store struct {
certShareMode string // 'ro', 'rw', or empty
podName string
logf logger.Logf
// memory holds the latest tailscale state. Writes write state to a kube
// Secret and memory, Reads read from memory.
memory mem.Store
@ -96,6 +98,7 @@ func newWithClient(logf logger.Logf, c kubeclient.Client, secretName string) (*S
canPatch: canPatch,
secretName: secretName,
podName: os.Getenv("POD_NAME"),
logf: logf,
}
if envknob.IsCertShareReadWriteMode() {
s.certShareMode = "rw"
@ -113,11 +116,11 @@ func newWithClient(logf logger.Logf, c kubeclient.Client, secretName string) (*S
if err := s.loadCerts(context.Background(), sel); err != nil {
// We will attempt to again retrieve the certs from Secrets when a request for an HTTPS endpoint
// is received.
log.Printf("[unexpected] error loading TLS certs: %v", err)
s.logf("[unexpected] error loading TLS certs: %v", err)
}
}
if s.certShareMode == "ro" {
go s.runCertReload(context.Background(), logf)
go s.runCertReload(context.Background())
}
return s, nil
}
@ -147,7 +150,7 @@ func (s *Store) WriteState(id ipn.StateKey, bs []byte) (err error) {
// of a Tailscale Kubernetes node's state Secret.
func (s *Store) WriteTLSCertAndKey(domain string, cert, key []byte) (err error) {
if s.certShareMode == "ro" {
log.Printf("[unexpected] TLS cert and key write in read-only mode")
s.logf("[unexpected] TLS cert and key write in read-only mode")
}
if err := dnsname.ValidHostname(domain); err != nil {
return fmt.Errorf("invalid domain name %q: %w", domain, err)
@ -258,11 +261,11 @@ func (s *Store) updateSecret(data map[string][]byte, secretName string) (err err
defer func() {
if err != nil {
if err := s.client.Event(ctx, eventTypeWarning, reasonTailscaleStateUpdateFailed, err.Error()); err != nil {
log.Printf("kubestore: error creating tailscaled state update Event: %v", err)
s.logf("kubestore: error creating tailscaled state update Event: %v", err)
}
} else {
if err := s.client.Event(ctx, eventTypeNormal, reasonTailscaleStateUpdated, "Successfully updated tailscaled state Secret"); err != nil {
log.Printf("kubestore: error creating tailscaled state Event: %v", err)
s.logf("kubestore: error creating tailscaled state Event: %v", err)
}
}
cancel()
@ -342,17 +345,72 @@ func (s *Store) loadState() (err error) {
return ipn.ErrStateNotExist
}
if err := s.client.Event(ctx, eventTypeWarning, reasonTailscaleStateLoadFailed, err.Error()); err != nil {
log.Printf("kubestore: error creating Event: %v", err)
s.logf("kubestore: error creating Event: %v", err)
}
return err
}
if err := s.client.Event(ctx, eventTypeNormal, reasonTailscaleStateLoaded, "Successfully loaded tailscaled state from Secret"); err != nil {
log.Printf("kubestore: error creating Event: %v", err)
s.logf("kubestore: error creating Event: %v", err)
}
data, err := s.maybeStripAttestationKeyFromProfile(secret.Data)
if err != nil {
return fmt.Errorf("error attempting to strip attestation data from state Secret: %w", err)
}
s.memory.LoadFromMap(secret.Data)
s.memory.LoadFromMap(data)
return nil
}
// maybeStripAttestationKeyFromProfile removes the hardware attestation key
// field from serialized Tailscale profile. This is done to recover from a bug
// introduced in 1.92, where node-bound hardware attestation keys were added to
// Tailscale states stored in Kubernetes Secrets.
// See https://github.com/tailscale/tailscale/issues/18302
// TODO(irbekrm): it would be good if we could somehow determine when we no
// longer need to run this check.
func (s *Store) maybeStripAttestationKeyFromProfile(data map[string][]byte) (map[string][]byte, error) {
prefsKey := extractPrefsKey(data)
prefsBytes, ok := data[prefsKey]
if !ok {
return data, nil
}
var prefs map[string]any
if err := json.Unmarshal(prefsBytes, &prefs); err != nil {
s.logf("[unexpected]: kube store: failed to unmarshal prefs data")
// don't error as in most cases the state won't have the attestation key
return data, nil
}
config, ok := prefs["Config"].(map[string]any)
if !ok {
return data, nil
}
if _, hasKey := config["AttestationKey"]; !hasKey {
return data, nil
}
s.logf("kube store: found redundant attestation key, deleting")
delete(config, "AttestationKey")
prefsBytes, err := json.Marshal(prefs)
if err != nil {
return nil, fmt.Errorf("[unexpected] kube store: failed to marshal profile after removing attestation key: %v", err)
}
data[prefsKey] = prefsBytes
if err := s.updateSecret(map[string][]byte{prefsKey: prefsBytes}, s.secretName); err != nil {
// don't error out - this might have been a temporary kube API server
// connection issue. The key will be removed from the in-memory cache
// and we'll retry updating the Secret on the next restart.
s.logf("kube store: error updating Secret after stripping AttestationKey: %v", err)
}
return data, nil
}
const currentProfileKey = "_current-profile"
// extractPrefs returns the key at which Tailscale prefs are stored in the
// provided Secret data.
func extractPrefsKey(data map[string][]byte) string {
return string(data[currentProfileKey])
}
// runCertReload relists and reloads all TLS certs for endpoints shared by this
// node from Secrets other than the state Secret to ensure that renewed certs get eventually loaded.
// It is not critical to reload a cert immediately after
@ -361,7 +419,7 @@ func (s *Store) loadState() (err error) {
// Note that if shared certs are not found in memory on an HTTPS request, we
// do a Secret lookup, so this mechanism does not need to ensure that newly
// added Ingresses' certs get loaded.
func (s *Store) runCertReload(ctx context.Context, logf logger.Logf) {
func (s *Store) runCertReload(ctx context.Context) {
ticker := time.NewTicker(time.Hour * 24)
defer ticker.Stop()
for {
@ -371,7 +429,7 @@ func (s *Store) runCertReload(ctx context.Context, logf logger.Logf) {
case <-ticker.C:
sel := s.certSecretSelector()
if err := s.loadCerts(ctx, sel); err != nil {
logf("[unexpected] error reloading TLS certs: %v", err)
s.logf("[unexpected] error reloading TLS certs: %v", err)
}
}
}

@ -20,6 +20,90 @@ import (
"tailscale.com/kube/kubetypes"
)
func TestKubernetesPodMigrationWithTPMAttestationKey(t *testing.T) {
stateWithAttestationKey := `{
"Config": {
"NodeID": "nSTABLE123456",
"AttestationKey": {
"tpmPrivate": "c2Vuc2l0aXZlLXRwbS1kYXRhLXRoYXQtb25seS13b3Jrcy1vbi1vcmlnaW5hbC1ub2Rl",
"tpmPublic": "cHVibGljLXRwbS1kYXRhLWZvci1hdHRlc3RhdGlvbi1rZXk="
}
}
}`
secretData := map[string][]byte{
"profile-abc123": []byte(stateWithAttestationKey),
"_current-profile": []byte("profile-abc123"),
}
client := &kubeclient.FakeClient{
GetSecretImpl: func(ctx context.Context, name string) (*kubeapi.Secret, error) {
return &kubeapi.Secret{Data: secretData}, nil
},
CheckSecretPermissionsImpl: func(ctx context.Context, name string) (bool, bool, error) {
return true, true, nil
},
JSONPatchResourceImpl: func(ctx context.Context, name, resourceType string, patches []kubeclient.JSONPatch) error {
for _, p := range patches {
if p.Op == "add" && p.Path == "/data" {
secretData = p.Value.(map[string][]byte)
}
}
return nil
},
}
store := &Store{
client: client,
canPatch: true,
secretName: "ts-state",
memory: mem.Store{},
logf: t.Logf,
}
if err := store.loadState(); err != nil {
t.Fatalf("loadState failed: %v", err)
}
// Verify we can read the state from the store
stateBytes, err := store.ReadState("profile-abc123")
if err != nil {
t.Fatalf("ReadState failed: %v", err)
}
// The state should be readable as JSON
var state map[string]json.RawMessage
if err := json.Unmarshal(stateBytes, &state); err != nil {
t.Fatalf("failed to unmarshal state: %v", err)
}
// Verify the Config field exists
configRaw, ok := state["Config"]
if !ok {
t.Fatal("Config field not found in state")
}
// Parse the Config to verify fields are preserved
var config map[string]json.RawMessage
if err := json.Unmarshal(configRaw, &config); err != nil {
t.Fatalf("failed to unmarshal Config: %v", err)
}
// The AttestationKey should be stripped by the kubestore
if _, hasAttestation := config["AttestationKey"]; hasAttestation {
t.Error("AttestationKey should be stripped from state loaded by kubestore")
}
// Verify other fields are preserved
var nodeID string
if err := json.Unmarshal(config["NodeID"], &nodeID); err != nil {
t.Fatalf("failed to unmarshal NodeID: %v", err)
}
if nodeID != "nSTABLE123456" {
t.Errorf("NodeID mismatch: got %q, want %q", nodeID, "nSTABLE123456")
}
}
func TestWriteState(t *testing.T) {
tests := []struct {
name string

Loading…
Cancel
Save