mirror of https://github.com/tailscale/tailscale/
ipn/ipnlocal: move handling of expired nodes to LocalBackend
In order to be able to synthesize a new NetMap when a node expires, have LocalBackend start a timer when receiving a new NetMap that fires slightly after the next node expires. Additionally, move the logic that updates expired nodes into LocalBackend so it runs on every netmap (whether received from controlclient or self-triggered). Updates #6932 Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Change-Id: I833390e16ad188983eac29eb34cc7574f555f2f3pull/6969/head
parent
6d85a94767
commit
3a018e51bb
@ -0,0 +1,122 @@
|
||||
// Copyright (c) 2023 Tailscale Inc & AUTHORS All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package ipnlocal
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"tailscale.com/syncs"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/types/netmap"
|
||||
)
|
||||
|
||||
// For extra defense-in-depth, when we're testing expired nodes we check
|
||||
// ControlTime against this 'epoch' (set to the approximate time that this code
|
||||
// was written) such that if control (or Headscale, etc.) sends a ControlTime
|
||||
// that's sufficiently far in the past, we can safely ignore it.
|
||||
var flagExpiredPeersEpoch = time.Unix(1673373066, 0)
|
||||
|
||||
// If the offset between the current time and the time received from control is
|
||||
// larger than this, we store an offset in our expiryManager to adjust future
|
||||
// clock timings.
|
||||
const minClockDelta = 1 * time.Minute
|
||||
|
||||
// expiryManager tracks the state of expired nodes and the delta from the
|
||||
// current clock time to the time returned from control, and allows mutating a
|
||||
// netmap to mark peers as expired based on the current delta-adjusted time.
|
||||
type expiryManager struct {
|
||||
// previouslyExpired stores nodes that have already expired so we can
|
||||
// only log on state transitions.
|
||||
previouslyExpired map[tailcfg.StableNodeID]bool
|
||||
|
||||
// clockDelta stores the delta between the current time and the time
|
||||
// received from control such that:
|
||||
// time.Now().Add(clockDelta) == MapResponse.ControlTime
|
||||
clockDelta syncs.AtomicValue[time.Duration]
|
||||
|
||||
logf logger.Logf
|
||||
timeNow func() time.Time
|
||||
}
|
||||
|
||||
func newExpiryManager(logf logger.Logf) *expiryManager {
|
||||
return &expiryManager{
|
||||
previouslyExpired: map[tailcfg.StableNodeID]bool{},
|
||||
logf: logf,
|
||||
timeNow: time.Now,
|
||||
}
|
||||
}
|
||||
|
||||
// onControlTime is called whenever we receive a new timestamp from the control
|
||||
// server to store the delta.
|
||||
func (em *expiryManager) onControlTime(t time.Time) {
|
||||
localNow := em.timeNow()
|
||||
delta := t.Sub(localNow)
|
||||
if delta.Abs() > minClockDelta {
|
||||
em.logf("[v1] netmap: flagExpiredPeers: setting clock delta to %v", delta)
|
||||
em.clockDelta.Store(delta)
|
||||
} else {
|
||||
em.clockDelta.Store(0)
|
||||
}
|
||||
}
|
||||
|
||||
// flagExpiredPeers updates mapRes.Peers, mutating all peers that have expired,
|
||||
// taking into account any clock skew detected by using the ControlTime field
|
||||
// in the MapResponse. We don't actually remove expired peers from the Peers
|
||||
// array; instead, we clear some fields of the Node object, and set
|
||||
// Node.Expired so other parts of the codebase can provide more clear error
|
||||
// messages when attempting to e.g. ping an expired node.
|
||||
//
|
||||
// This is additionally a defense-in-depth against something going wrong with
|
||||
// control such that we start seeing expired peers with a valid Endpoints or
|
||||
// DERP field.
|
||||
//
|
||||
// This function is safe to call concurrently with onControlTime but not
|
||||
// concurrently with any other call to flagExpiredPeers.
|
||||
func (em *expiryManager) flagExpiredPeers(netmap *netmap.NetworkMap) {
|
||||
localNow := em.timeNow()
|
||||
|
||||
// Adjust our current time by any saved delta to adjust for clock skew.
|
||||
controlNow := localNow.Add(em.clockDelta.Load())
|
||||
if controlNow.Before(flagExpiredPeersEpoch) {
|
||||
em.logf("netmap: flagExpiredPeers: [unexpected] delta-adjusted current time is before hardcoded epoch; skipping")
|
||||
return
|
||||
}
|
||||
|
||||
for _, peer := range netmap.Peers {
|
||||
// Nodes that don't expire have KeyExpiry set to the zero time;
|
||||
// skip those and peers that are already marked as expired
|
||||
// (e.g. from control).
|
||||
if peer.KeyExpiry.IsZero() || peer.KeyExpiry.After(controlNow) {
|
||||
delete(em.previouslyExpired, peer.StableID)
|
||||
continue
|
||||
} else if peer.Expired {
|
||||
continue
|
||||
}
|
||||
|
||||
if !em.previouslyExpired[peer.StableID] {
|
||||
em.logf("[v1] netmap: flagExpiredPeers: clearing expired peer %v", peer.StableID)
|
||||
em.previouslyExpired[peer.StableID] = true
|
||||
}
|
||||
|
||||
// Actually mark the node as expired
|
||||
peer.Expired = true
|
||||
|
||||
// Control clears the Endpoints and DERP fields of expired
|
||||
// nodes; do so here as well. The Expired bool is the correct
|
||||
// thing to set, but this replicates the previous behaviour.
|
||||
//
|
||||
// NOTE: this is insufficient to actually break connectivity,
|
||||
// since we discover endpoints via DERP, and due to DERP return
|
||||
// path optimization.
|
||||
peer.Endpoints = nil
|
||||
peer.DERP = ""
|
||||
|
||||
// Defense-in-depth: break the node's public key as well, in
|
||||
// case something tries to communicate.
|
||||
peer.Key = key.NodePublicWithBadOldPrefix(peer.Key)
|
||||
}
|
||||
}
|
@ -0,0 +1,150 @@
|
||||
// Copyright (c) 2023 Tailscale Inc & AUTHORS All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package ipnlocal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/types/netmap"
|
||||
)
|
||||
|
||||
func TestFlagExpiredPeers(t *testing.T) {
|
||||
n := func(id tailcfg.NodeID, name string, expiry time.Time, mod ...func(*tailcfg.Node)) *tailcfg.Node {
|
||||
n := &tailcfg.Node{ID: id, Name: name, KeyExpiry: expiry}
|
||||
for _, f := range mod {
|
||||
f(n)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
now := time.Unix(1673373129, 0)
|
||||
|
||||
timeInPast := now.Add(-1 * time.Hour)
|
||||
timeInFuture := now.Add(1 * time.Hour)
|
||||
|
||||
timeBeforeEpoch := flagExpiredPeersEpoch.Add(-1 * time.Second)
|
||||
if now.Before(timeBeforeEpoch) {
|
||||
panic("current time in test cannot be before epoch")
|
||||
}
|
||||
|
||||
var expiredKey key.NodePublic
|
||||
if err := expiredKey.UnmarshalText([]byte("nodekey:6da774d5d7740000000000000000000000000000000000000000000000000000")); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
controlTime *time.Time
|
||||
netmap *netmap.NetworkMap
|
||||
want []*tailcfg.Node
|
||||
}{
|
||||
{
|
||||
name: "no_expiry",
|
||||
controlTime: &now,
|
||||
netmap: &netmap.NetworkMap{
|
||||
Peers: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeInFuture),
|
||||
},
|
||||
},
|
||||
want: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeInFuture),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "expiry",
|
||||
controlTime: &now,
|
||||
netmap: &netmap.NetworkMap{
|
||||
Peers: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeInPast),
|
||||
},
|
||||
},
|
||||
want: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeInPast, func(n *tailcfg.Node) {
|
||||
n.Expired = true
|
||||
n.Key = expiredKey
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "bad_ControlTime",
|
||||
// controlTime here is intentionally before our hardcoded epoch
|
||||
controlTime: &timeBeforeEpoch,
|
||||
|
||||
netmap: &netmap.NetworkMap{
|
||||
Peers: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeBeforeEpoch.Add(-1*time.Hour)), // before ControlTime
|
||||
},
|
||||
},
|
||||
want: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", timeBeforeEpoch.Add(-1*time.Hour)), // should have expired, but ControlTime is before epoch
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tagged_node",
|
||||
controlTime: &now,
|
||||
netmap: &netmap.NetworkMap{
|
||||
Peers: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", time.Time{}), // tagged node; zero expiry
|
||||
},
|
||||
},
|
||||
want: []*tailcfg.Node{
|
||||
n(1, "foo", timeInFuture),
|
||||
n(2, "bar", time.Time{}), // not expired
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
em := newExpiryManager(t.Logf)
|
||||
em.timeNow = func() time.Time { return now }
|
||||
|
||||
if tt.controlTime != nil {
|
||||
em.onControlTime(*tt.controlTime)
|
||||
}
|
||||
em.flagExpiredPeers(tt.netmap)
|
||||
if !reflect.DeepEqual(tt.netmap.Peers, tt.want) {
|
||||
t.Errorf("wrong results\n got: %s\nwant: %s", formatNodes(tt.netmap.Peers), formatNodes(tt.want))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func formatNodes(nodes []*tailcfg.Node) string {
|
||||
var sb strings.Builder
|
||||
for i, n := range nodes {
|
||||
if i > 0 {
|
||||
sb.WriteString(", ")
|
||||
}
|
||||
fmt.Fprintf(&sb, "(%d, %q", n.ID, n.Name)
|
||||
|
||||
if n.Online != nil {
|
||||
fmt.Fprintf(&sb, ", online=%v", *n.Online)
|
||||
}
|
||||
if n.LastSeen != nil {
|
||||
fmt.Fprintf(&sb, ", lastSeen=%v", n.LastSeen.Unix())
|
||||
}
|
||||
if n.Key != (key.NodePublic{}) {
|
||||
fmt.Fprintf(&sb, ", key=%v", n.Key.String())
|
||||
}
|
||||
if n.Expired {
|
||||
fmt.Fprintf(&sb, ", expired=true")
|
||||
}
|
||||
sb.WriteString(")")
|
||||
}
|
||||
return sb.String()
|
||||
}
|
Loading…
Reference in New Issue