control/controlclient, tailcfg: add Node.Expired field, set for expired nodes

Nodes that are expired, taking into account the time delta calculated
from MapResponse.ControlTime have the newly-added Expired boolean set.
For additional defense-in-depth, also replicate what control does and
clear the Endpoints and DERP fields, and additionally set the node key
to a bogus value.

Updates #6932

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: Ia2bd6b56064416feee28aef5699ca7090940662a
pull/6940/head
Andrew Dunham 1 year ago
parent 22ebb25e83
commit 1e67947cfa

@ -9,6 +9,7 @@ import (
"log"
"net/netip"
"sort"
"time"
"tailscale.com/envknob"
"tailscale.com/tailcfg"
@ -52,6 +53,12 @@ type mapSession struct {
lastPopBrowserURL string
stickyDebug tailcfg.Debug // accumulated opt.Bool values
lastTKAInfo *tailcfg.TKAInfo
previouslyExpired map[tailcfg.StableNodeID]bool // to avoid log spam
// clockDelta stores the delta between the current time and the time
// received from control such that:
// time.Now().Add(clockDelta) == MapResponse.ControlTime
clockDelta time.Duration
// netMapBuilding is non-nil during a netmapForResponse call,
// containing the value to be returned, once fully populated.
@ -60,11 +67,12 @@ type mapSession struct {
func newMapSession(privateNodeKey key.NodePrivate) *mapSession {
ms := &mapSession{
privateNodeKey: privateNodeKey,
logf: logger.Discard,
vlogf: logger.Discard,
lastDNSConfig: new(tailcfg.DNSConfig),
lastUserProfile: map[tailcfg.UserID]tailcfg.UserProfile{},
privateNodeKey: privateNodeKey,
logf: logger.Discard,
vlogf: logger.Discard,
lastDNSConfig: new(tailcfg.DNSConfig),
lastUserProfile: map[tailcfg.UserID]tailcfg.UserProfile{},
previouslyExpired: map[tailcfg.StableNodeID]bool{},
}
return ms
}
@ -85,6 +93,7 @@ func (ms *mapSession) addUserProfile(userID tailcfg.UserID) {
// information from prior MapResponse values.
func (ms *mapSession) netmapForResponse(resp *tailcfg.MapResponse) *netmap.NetworkMap {
undeltaPeers(resp, ms.previousPeers)
ms.flagExpiredPeers(resp)
ms.previousPeers = cloneNodes(resp.Peers) // defensive/lazy clone, since this escapes to who knows where
for _, up := range resp.UserProfiles {
@ -343,6 +352,83 @@ func undeltaPeers(mapRes *tailcfg.MapResponse, prev []*tailcfg.Node) {
mapRes.PeersRemoved = nil
}
// For extra defense-in-depth, when we're testing expired nodes we check
// ControlTime against this 'epoch' (set to the approximate time that this code
// was written) such that if control (or Headscale, etc.) sends a ControlTime
// that's sufficiently far in the past, we can safely ignore it.
var flagExpiredPeersEpoch = time.Unix(1673373066, 0)
// If the offset between the current time and the time received from control is
// larger than this, we store an offset in our mapSession to adjust future
// clock timings.
const minClockDelta = 1 * time.Minute
// flagExpiredPeers updates mapRes.Peers, mutating all peers that have expired,
// taking into account any clock skew detected by using the ControlTime field
// in the MapResponse. We don't actually remove expired peers from the Peers
// array; instead, we clear some fields of the Node object, and set
// Node.Expired so other parts of the codebase can provide more clear error
// messages when attempting to e.g. ping an expired node.
//
// This is additionally a defense-in-depth against something going wrong with
// control such that we start seeing expired peers with a valid Endpoints or
// DERP field.
func (ms *mapSession) flagExpiredPeers(mapRes *tailcfg.MapResponse) {
localNow := clockNow()
// If we have a ControlTime field, update our delta.
if mapRes.ControlTime != nil && !mapRes.ControlTime.IsZero() {
delta := mapRes.ControlTime.Sub(localNow)
if delta.Abs() > minClockDelta {
ms.logf("[v1] netmap: flagExpiredPeers: setting clock delta to %v", delta)
ms.clockDelta = delta
} else {
ms.clockDelta = 0
}
}
// Adjust our current time by any saved delta to adjust for clock skew.
controlNow := localNow.Add(ms.clockDelta)
if controlNow.Before(flagExpiredPeersEpoch) {
ms.logf("netmap: flagExpiredPeers: [unexpected] delta-adjusted current time is before hardcoded epoch; skipping")
return
}
for _, peer := range mapRes.Peers {
// Nodes that don't expire have KeyExpiry set to the zero time;
// skip those and peers that are already marked as expired
// (e.g. from control).
if peer.KeyExpiry.IsZero() || peer.KeyExpiry.After(controlNow) {
delete(ms.previouslyExpired, peer.StableID)
continue
} else if peer.Expired {
continue
}
if !ms.previouslyExpired[peer.StableID] {
ms.logf("[v1] netmap: flagExpiredPeers: clearing expired peer %v", peer.StableID)
ms.previouslyExpired[peer.StableID] = true
}
// Actually mark the node as expired
peer.Expired = true
// Control clears the Endpoints and DERP fields of expired
// nodes; do so here as well. The Expired bool is the correct
// thing to set, but this replicates the previous behaviour.
//
// NOTE: this is insufficient to actually break connectivity,
// since we discover endpoints via DERP, and due to DERP return
// path optimization.
peer.Endpoints = nil
peer.DERP = ""
// Defense-in-depth: break the node's public key as well, in
// case something tries to communicate.
peer.Key = key.NodePublicWithBadOldPrefix(peer.Key)
}
}
// ptrCopy returns a pointer to a newly allocated shallow copy of *v.
func ptrCopy[T any](v *T) *T {
if v == nil {

@ -308,26 +308,141 @@ func TestUndeltaPeers(t *testing.T) {
}
}
func TestFlagExpiredPeers(t *testing.T) {
n := func(id tailcfg.NodeID, name string, expiry time.Time, mod ...func(*tailcfg.Node)) *tailcfg.Node {
n := &tailcfg.Node{ID: id, Name: name, KeyExpiry: expiry}
for _, f := range mod {
f(n)
}
return n
}
now := time.Unix(1673373129, 0)
oldClockNow := clockNow
clockNow = func() time.Time { return now }
t.Cleanup(func() { clockNow = oldClockNow })
timeInPast := now.Add(-1 * time.Hour)
timeInFuture := now.Add(1 * time.Hour)
timeBeforeEpoch := flagExpiredPeersEpoch.Add(-1 * time.Second)
if now.Before(timeBeforeEpoch) {
panic("current time in test cannot be before epoch")
}
var expiredKey key.NodePublic
if err := expiredKey.UnmarshalText([]byte("nodekey:6da774d5d7740000000000000000000000000000000000000000000000000000")); err != nil {
panic(err)
}
tests := []struct {
name string
mapRes *tailcfg.MapResponse
want []*tailcfg.Node
}{
{
name: "no_expiry",
mapRes: &tailcfg.MapResponse{
ControlTime: &now,
Peers: []*tailcfg.Node{
n(1, "foo", timeInFuture),
n(2, "bar", timeInFuture),
},
},
want: []*tailcfg.Node{
n(1, "foo", timeInFuture),
n(2, "bar", timeInFuture),
},
},
{
name: "expiry",
mapRes: &tailcfg.MapResponse{
ControlTime: &now,
Peers: []*tailcfg.Node{
n(1, "foo", timeInFuture),
n(2, "bar", timeInPast),
},
},
want: []*tailcfg.Node{
n(1, "foo", timeInFuture),
n(2, "bar", timeInPast, func(n *tailcfg.Node) {
n.Expired = true
n.Key = expiredKey
}),
},
},
{
name: "bad_ControlTime",
mapRes: &tailcfg.MapResponse{
// ControlTime here is intentionally before our hardcoded epoch
ControlTime: &timeBeforeEpoch,
Peers: []*tailcfg.Node{
n(1, "foo", timeInFuture),
n(2, "bar", timeBeforeEpoch.Add(-1*time.Hour)), // before ControlTime
},
},
want: []*tailcfg.Node{
n(1, "foo", timeInFuture),
n(2, "bar", timeBeforeEpoch.Add(-1*time.Hour)), // should have expired, but ControlTime is before epoch
},
},
{
name: "tagged_node",
mapRes: &tailcfg.MapResponse{
ControlTime: &now,
Peers: []*tailcfg.Node{
n(1, "foo", timeInFuture),
n(2, "bar", time.Time{}), // tagged node; zero expiry
},
},
want: []*tailcfg.Node{
n(1, "foo", timeInFuture),
n(2, "bar", time.Time{}), // not expired
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ms := newTestMapSession(t)
ms.flagExpiredPeers(tt.mapRes)
if !reflect.DeepEqual(tt.mapRes.Peers, tt.want) {
t.Errorf("wrong results\n got: %s\nwant: %s", formatNodes(tt.mapRes.Peers), formatNodes(tt.want))
}
})
}
}
func formatNodes(nodes []*tailcfg.Node) string {
var sb strings.Builder
for i, n := range nodes {
if i > 0 {
sb.WriteString(", ")
}
var extra string
fmt.Fprintf(&sb, "(%d, %q", n.ID, n.Name)
if n.Online != nil {
extra += fmt.Sprintf(", online=%v", *n.Online)
fmt.Fprintf(&sb, ", online=%v", *n.Online)
}
if n.LastSeen != nil {
extra += fmt.Sprintf(", lastSeen=%v", n.LastSeen.Unix())
fmt.Fprintf(&sb, ", lastSeen=%v", n.LastSeen.Unix())
}
if n.Key != (key.NodePublic{}) {
fmt.Fprintf(&sb, ", key=%v", n.Key.String())
}
if n.Expired {
fmt.Fprintf(&sb, ", expired=true")
}
fmt.Fprintf(&sb, "(%d, %q%s)", n.ID, n.Name, extra)
sb.WriteString(")")
}
return sb.String()
}
func newTestMapSession(t *testing.T) *mapSession {
return newMapSession(key.NewNode())
ms := newMapSession(key.NewNode())
ms.logf = t.Logf
return ms
}
func TestNetmapForResponse(t *testing.T) {

@ -255,6 +255,12 @@ type Node struct {
// DataPlaneAuditLogID is the per-node logtail ID used for data plane audit logging.
DataPlaneAuditLogID string `json:",omitempty"`
// Expired is whether this node's key has expired. Control may send
// this; clients are only allowed to set this from false to true. On
// the client, this is calculated client-side based on a timestamp sent
// from control, to avoid clock skew issues.
Expired bool `json:",omitempty"`
}
// DisplayName returns the user-facing name for a node which should
@ -1628,7 +1634,8 @@ func (n *Node) Equal(n2 *Node) bool {
n.ComputedName == n2.ComputedName &&
n.computedHostIfDifferent == n2.computedHostIfDifferent &&
n.ComputedNameWithHost == n2.ComputedNameWithHost &&
eqStrings(n.Tags, n2.Tags)
eqStrings(n.Tags, n2.Tags) &&
n.Expired == n2.Expired
}
func eqBoolPtr(a, b *bool) bool {

@ -97,6 +97,7 @@ var _NodeCloneNeedsRegeneration = Node(struct {
computedHostIfDifferent string
ComputedNameWithHost string
DataPlaneAuditLogID string
Expired bool
}{})
// Clone makes a deep copy of Hostinfo.

@ -334,7 +334,7 @@ func TestNodeEqual(t *testing.T) {
"Capabilities",
"UnsignedPeerAPIOnly",
"ComputedName", "computedHostIfDifferent", "ComputedNameWithHost",
"DataPlaneAuditLogID",
"DataPlaneAuditLogID", "Expired",
}
if have := fieldsOf(reflect.TypeOf(Node{})); !reflect.DeepEqual(have, nodeHandles) {
t.Errorf("Node.Equal check might be out of sync\nfields: %q\nhandled: %q\n",
@ -514,6 +514,11 @@ func TestNodeEqual(t *testing.T) {
&Node{},
false,
},
{
&Node{Expired: true},
&Node{},
false,
},
}
for i, tt := range tests {
got := tt.a.Equal(tt.b)

@ -175,6 +175,7 @@ func (v NodeView) UnsignedPeerAPIOnly() bool { return v.ж.UnsignedPeerA
func (v NodeView) ComputedName() string { return v.ж.ComputedName }
func (v NodeView) ComputedNameWithHost() string { return v.ж.ComputedNameWithHost }
func (v NodeView) DataPlaneAuditLogID() string { return v.ж.DataPlaneAuditLogID }
func (v NodeView) Expired() bool { return v.ж.Expired }
func (v NodeView) Equal(v2 NodeView) bool { return v.ж.Equal(v2.ж) }
// A compilation failure here means this code must be regenerated, with the command at the top of this file.
@ -207,6 +208,7 @@ var _NodeViewNeedsRegeneration = Node(struct {
computedHostIfDifferent string
ComputedNameWithHost string
DataPlaneAuditLogID string
Expired bool
}{})
// View returns a readonly view of Hostinfo.

@ -193,6 +193,23 @@ func NodePublicFromRaw32(raw mem.RO) NodePublic {
return ret
}
// badOldPrefix is a nodekey/discokey prefix that, when base64'd, serializes
// with a "bad01" ("bad ol'", ~"bad old") prefix. It's used for expired node
// keys so when we debug a customer issue, the "bad01" can jump out to us. See:
//
// https://github.com/tailscale/tailscale/issues/6932
var badOldPrefix = []byte{109, 167, 116, 213, 215, 116}
// NodePublicWithBadOldPrefix returns a copy of k with its leading public key
// bytes mutated such that it base64's to a ShortString of [bad01] ("bad ol'"
// [expired node key]).
func NodePublicWithBadOldPrefix(k NodePublic) NodePublic {
var buf [32]byte
k.AppendTo(buf[:0])
copy(buf[:], badOldPrefix)
return NodePublicFromRaw32(mem.B(buf[:]))
}
// IsZero reports whether k is the zero value.
func (k NodePublic) IsZero() bool {
return k == NodePublic{}

@ -576,7 +576,7 @@ func TestGetTypeHasher(t *testing.T) {
{
name: "tailcfg.Node",
val: &tailcfg.Node{},
out: "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
out: "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\tn\x88\xf1\xff\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
},
}
for _, tt := range tests {

Loading…
Cancel
Save