diff --git a/control/controlclient/map.go b/control/controlclient/map.go index e7db7aae4..5ff3baa19 100644 --- a/control/controlclient/map.go +++ b/control/controlclient/map.go @@ -9,6 +9,7 @@ import ( "log" "net/netip" "sort" + "time" "tailscale.com/envknob" "tailscale.com/tailcfg" @@ -52,6 +53,12 @@ type mapSession struct { lastPopBrowserURL string stickyDebug tailcfg.Debug // accumulated opt.Bool values lastTKAInfo *tailcfg.TKAInfo + previouslyExpired map[tailcfg.StableNodeID]bool // to avoid log spam + + // clockDelta stores the delta between the current time and the time + // received from control such that: + // time.Now().Add(clockDelta) == MapResponse.ControlTime + clockDelta time.Duration // netMapBuilding is non-nil during a netmapForResponse call, // containing the value to be returned, once fully populated. @@ -60,11 +67,12 @@ type mapSession struct { func newMapSession(privateNodeKey key.NodePrivate) *mapSession { ms := &mapSession{ - privateNodeKey: privateNodeKey, - logf: logger.Discard, - vlogf: logger.Discard, - lastDNSConfig: new(tailcfg.DNSConfig), - lastUserProfile: map[tailcfg.UserID]tailcfg.UserProfile{}, + privateNodeKey: privateNodeKey, + logf: logger.Discard, + vlogf: logger.Discard, + lastDNSConfig: new(tailcfg.DNSConfig), + lastUserProfile: map[tailcfg.UserID]tailcfg.UserProfile{}, + previouslyExpired: map[tailcfg.StableNodeID]bool{}, } return ms } @@ -85,6 +93,7 @@ func (ms *mapSession) addUserProfile(userID tailcfg.UserID) { // information from prior MapResponse values. func (ms *mapSession) netmapForResponse(resp *tailcfg.MapResponse) *netmap.NetworkMap { undeltaPeers(resp, ms.previousPeers) + ms.flagExpiredPeers(resp) ms.previousPeers = cloneNodes(resp.Peers) // defensive/lazy clone, since this escapes to who knows where for _, up := range resp.UserProfiles { @@ -343,6 +352,83 @@ func undeltaPeers(mapRes *tailcfg.MapResponse, prev []*tailcfg.Node) { mapRes.PeersRemoved = nil } +// For extra defense-in-depth, when we're testing expired nodes we check +// ControlTime against this 'epoch' (set to the approximate time that this code +// was written) such that if control (or Headscale, etc.) sends a ControlTime +// that's sufficiently far in the past, we can safely ignore it. +var flagExpiredPeersEpoch = time.Unix(1673373066, 0) + +// If the offset between the current time and the time received from control is +// larger than this, we store an offset in our mapSession to adjust future +// clock timings. +const minClockDelta = 1 * time.Minute + +// flagExpiredPeers updates mapRes.Peers, mutating all peers that have expired, +// taking into account any clock skew detected by using the ControlTime field +// in the MapResponse. We don't actually remove expired peers from the Peers +// array; instead, we clear some fields of the Node object, and set +// Node.Expired so other parts of the codebase can provide more clear error +// messages when attempting to e.g. ping an expired node. +// +// This is additionally a defense-in-depth against something going wrong with +// control such that we start seeing expired peers with a valid Endpoints or +// DERP field. +func (ms *mapSession) flagExpiredPeers(mapRes *tailcfg.MapResponse) { + localNow := clockNow() + + // If we have a ControlTime field, update our delta. + if mapRes.ControlTime != nil && !mapRes.ControlTime.IsZero() { + delta := mapRes.ControlTime.Sub(localNow) + if delta.Abs() > minClockDelta { + ms.logf("[v1] netmap: flagExpiredPeers: setting clock delta to %v", delta) + ms.clockDelta = delta + } else { + ms.clockDelta = 0 + } + } + + // Adjust our current time by any saved delta to adjust for clock skew. + controlNow := localNow.Add(ms.clockDelta) + if controlNow.Before(flagExpiredPeersEpoch) { + ms.logf("netmap: flagExpiredPeers: [unexpected] delta-adjusted current time is before hardcoded epoch; skipping") + return + } + + for _, peer := range mapRes.Peers { + // Nodes that don't expire have KeyExpiry set to the zero time; + // skip those and peers that are already marked as expired + // (e.g. from control). + if peer.KeyExpiry.IsZero() || peer.KeyExpiry.After(controlNow) { + delete(ms.previouslyExpired, peer.StableID) + continue + } else if peer.Expired { + continue + } + + if !ms.previouslyExpired[peer.StableID] { + ms.logf("[v1] netmap: flagExpiredPeers: clearing expired peer %v", peer.StableID) + ms.previouslyExpired[peer.StableID] = true + } + + // Actually mark the node as expired + peer.Expired = true + + // Control clears the Endpoints and DERP fields of expired + // nodes; do so here as well. The Expired bool is the correct + // thing to set, but this replicates the previous behaviour. + // + // NOTE: this is insufficient to actually break connectivity, + // since we discover endpoints via DERP, and due to DERP return + // path optimization. + peer.Endpoints = nil + peer.DERP = "" + + // Defense-in-depth: break the node's public key as well, in + // case something tries to communicate. + peer.Key = key.NodePublicWithBadOldPrefix(peer.Key) + } +} + // ptrCopy returns a pointer to a newly allocated shallow copy of *v. func ptrCopy[T any](v *T) *T { if v == nil { diff --git a/control/controlclient/map_test.go b/control/controlclient/map_test.go index 07e0caa3b..18146ca57 100644 --- a/control/controlclient/map_test.go +++ b/control/controlclient/map_test.go @@ -308,26 +308,141 @@ func TestUndeltaPeers(t *testing.T) { } } +func TestFlagExpiredPeers(t *testing.T) { + n := func(id tailcfg.NodeID, name string, expiry time.Time, mod ...func(*tailcfg.Node)) *tailcfg.Node { + n := &tailcfg.Node{ID: id, Name: name, KeyExpiry: expiry} + for _, f := range mod { + f(n) + } + return n + } + + now := time.Unix(1673373129, 0) + + oldClockNow := clockNow + clockNow = func() time.Time { return now } + t.Cleanup(func() { clockNow = oldClockNow }) + + timeInPast := now.Add(-1 * time.Hour) + timeInFuture := now.Add(1 * time.Hour) + + timeBeforeEpoch := flagExpiredPeersEpoch.Add(-1 * time.Second) + if now.Before(timeBeforeEpoch) { + panic("current time in test cannot be before epoch") + } + + var expiredKey key.NodePublic + if err := expiredKey.UnmarshalText([]byte("nodekey:6da774d5d7740000000000000000000000000000000000000000000000000000")); err != nil { + panic(err) + } + + tests := []struct { + name string + mapRes *tailcfg.MapResponse + want []*tailcfg.Node + }{ + { + name: "no_expiry", + mapRes: &tailcfg.MapResponse{ + ControlTime: &now, + Peers: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + n(2, "bar", timeInFuture), + }, + }, + want: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + n(2, "bar", timeInFuture), + }, + }, + { + name: "expiry", + mapRes: &tailcfg.MapResponse{ + ControlTime: &now, + Peers: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + n(2, "bar", timeInPast), + }, + }, + want: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + n(2, "bar", timeInPast, func(n *tailcfg.Node) { + n.Expired = true + n.Key = expiredKey + }), + }, + }, + { + name: "bad_ControlTime", + mapRes: &tailcfg.MapResponse{ + // ControlTime here is intentionally before our hardcoded epoch + ControlTime: &timeBeforeEpoch, + + Peers: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + n(2, "bar", timeBeforeEpoch.Add(-1*time.Hour)), // before ControlTime + }, + }, + want: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + n(2, "bar", timeBeforeEpoch.Add(-1*time.Hour)), // should have expired, but ControlTime is before epoch + }, + }, + { + name: "tagged_node", + mapRes: &tailcfg.MapResponse{ + ControlTime: &now, + Peers: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + n(2, "bar", time.Time{}), // tagged node; zero expiry + }, + }, + want: []*tailcfg.Node{ + n(1, "foo", timeInFuture), + n(2, "bar", time.Time{}), // not expired + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ms := newTestMapSession(t) + ms.flagExpiredPeers(tt.mapRes) + if !reflect.DeepEqual(tt.mapRes.Peers, tt.want) { + t.Errorf("wrong results\n got: %s\nwant: %s", formatNodes(tt.mapRes.Peers), formatNodes(tt.want)) + } + }) + } +} + func formatNodes(nodes []*tailcfg.Node) string { var sb strings.Builder for i, n := range nodes { if i > 0 { sb.WriteString(", ") } - var extra string + fmt.Fprintf(&sb, "(%d, %q", n.ID, n.Name) + if n.Online != nil { - extra += fmt.Sprintf(", online=%v", *n.Online) + fmt.Fprintf(&sb, ", online=%v", *n.Online) } if n.LastSeen != nil { - extra += fmt.Sprintf(", lastSeen=%v", n.LastSeen.Unix()) + fmt.Fprintf(&sb, ", lastSeen=%v", n.LastSeen.Unix()) + } + if n.Key != (key.NodePublic{}) { + fmt.Fprintf(&sb, ", key=%v", n.Key.String()) + } + if n.Expired { + fmt.Fprintf(&sb, ", expired=true") } - fmt.Fprintf(&sb, "(%d, %q%s)", n.ID, n.Name, extra) + sb.WriteString(")") } return sb.String() } func newTestMapSession(t *testing.T) *mapSession { - return newMapSession(key.NewNode()) + ms := newMapSession(key.NewNode()) + ms.logf = t.Logf + return ms } func TestNetmapForResponse(t *testing.T) { diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index ffcd2173a..5b7acc549 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -255,6 +255,12 @@ type Node struct { // DataPlaneAuditLogID is the per-node logtail ID used for data plane audit logging. DataPlaneAuditLogID string `json:",omitempty"` + + // Expired is whether this node's key has expired. Control may send + // this; clients are only allowed to set this from false to true. On + // the client, this is calculated client-side based on a timestamp sent + // from control, to avoid clock skew issues. + Expired bool `json:",omitempty"` } // DisplayName returns the user-facing name for a node which should @@ -1628,7 +1634,8 @@ func (n *Node) Equal(n2 *Node) bool { n.ComputedName == n2.ComputedName && n.computedHostIfDifferent == n2.computedHostIfDifferent && n.ComputedNameWithHost == n2.ComputedNameWithHost && - eqStrings(n.Tags, n2.Tags) + eqStrings(n.Tags, n2.Tags) && + n.Expired == n2.Expired } func eqBoolPtr(a, b *bool) bool { diff --git a/tailcfg/tailcfg_clone.go b/tailcfg/tailcfg_clone.go index 098f42489..b502581a3 100644 --- a/tailcfg/tailcfg_clone.go +++ b/tailcfg/tailcfg_clone.go @@ -97,6 +97,7 @@ var _NodeCloneNeedsRegeneration = Node(struct { computedHostIfDifferent string ComputedNameWithHost string DataPlaneAuditLogID string + Expired bool }{}) // Clone makes a deep copy of Hostinfo. diff --git a/tailcfg/tailcfg_test.go b/tailcfg/tailcfg_test.go index 12dc07351..08cc6148a 100644 --- a/tailcfg/tailcfg_test.go +++ b/tailcfg/tailcfg_test.go @@ -334,7 +334,7 @@ func TestNodeEqual(t *testing.T) { "Capabilities", "UnsignedPeerAPIOnly", "ComputedName", "computedHostIfDifferent", "ComputedNameWithHost", - "DataPlaneAuditLogID", + "DataPlaneAuditLogID", "Expired", } if have := fieldsOf(reflect.TypeOf(Node{})); !reflect.DeepEqual(have, nodeHandles) { t.Errorf("Node.Equal check might be out of sync\nfields: %q\nhandled: %q\n", @@ -514,6 +514,11 @@ func TestNodeEqual(t *testing.T) { &Node{}, false, }, + { + &Node{Expired: true}, + &Node{}, + false, + }, } for i, tt := range tests { got := tt.a.Equal(tt.b) diff --git a/tailcfg/tailcfg_view.go b/tailcfg/tailcfg_view.go index eb8d3bb02..850d9edba 100644 --- a/tailcfg/tailcfg_view.go +++ b/tailcfg/tailcfg_view.go @@ -175,6 +175,7 @@ func (v NodeView) UnsignedPeerAPIOnly() bool { return v.ж.UnsignedPeerA func (v NodeView) ComputedName() string { return v.ж.ComputedName } func (v NodeView) ComputedNameWithHost() string { return v.ж.ComputedNameWithHost } func (v NodeView) DataPlaneAuditLogID() string { return v.ж.DataPlaneAuditLogID } +func (v NodeView) Expired() bool { return v.ж.Expired } func (v NodeView) Equal(v2 NodeView) bool { return v.ж.Equal(v2.ж) } // A compilation failure here means this code must be regenerated, with the command at the top of this file. @@ -207,6 +208,7 @@ var _NodeViewNeedsRegeneration = Node(struct { computedHostIfDifferent string ComputedNameWithHost string DataPlaneAuditLogID string + Expired bool }{}) // View returns a readonly view of Hostinfo. diff --git a/types/key/node.go b/types/key/node.go index b71245711..f47d5a95b 100644 --- a/types/key/node.go +++ b/types/key/node.go @@ -193,6 +193,23 @@ func NodePublicFromRaw32(raw mem.RO) NodePublic { return ret } +// badOldPrefix is a nodekey/discokey prefix that, when base64'd, serializes +// with a "bad01" ("bad ol'", ~"bad old") prefix. It's used for expired node +// keys so when we debug a customer issue, the "bad01" can jump out to us. See: +// +// https://github.com/tailscale/tailscale/issues/6932 +var badOldPrefix = []byte{109, 167, 116, 213, 215, 116} + +// NodePublicWithBadOldPrefix returns a copy of k with its leading public key +// bytes mutated such that it base64's to a ShortString of [bad01] ("bad ol'" +// [expired node key]). +func NodePublicWithBadOldPrefix(k NodePublic) NodePublic { + var buf [32]byte + k.AppendTo(buf[:0]) + copy(buf[:], badOldPrefix) + return NodePublicFromRaw32(mem.B(buf[:])) +} + // IsZero reports whether k is the zero value. func (k NodePublic) IsZero() bool { return k == NodePublic{} diff --git a/util/deephash/deephash_test.go b/util/deephash/deephash_test.go index 089e2b62d..ce783ec46 100644 --- a/util/deephash/deephash_test.go +++ b/util/deephash/deephash_test.go @@ -576,7 +576,7 @@ func TestGetTypeHasher(t *testing.T) { { name: "tailcfg.Node", val: &tailcfg.Node{}, - out: "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + out: "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", }, } for _, tt := range tests {