diff --git a/.github/workflows/natlab-integrationtest.yml b/.github/workflows/natlab-integrationtest.yml index 99d58717b..bc39266dc 100644 --- a/.github/workflows/natlab-integrationtest.yml +++ b/.github/workflows/natlab-integrationtest.yml @@ -18,7 +18,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install qemu run: | - sudo rm /var/lib/man-db/auto-update + sudo rm -f /var/lib/man-db/auto-update sudo apt-get -y update sudo apt-get -y remove man-db sudo apt-get install -y qemu-system-x86 qemu-utils diff --git a/tstest/integration/nat/nat_test.go b/tstest/integration/nat/nat_test.go index 15f126985..f01d02f33 100644 --- a/tstest/integration/nat/nat_test.go +++ b/tstest/integration/nat/nat_test.go @@ -133,6 +133,16 @@ func easyAnd6(c *vnet.Config) *vnet.Node { vnet.EasyNAT)) } +func easyNoControlDiscoRotate(c *vnet.Config) *vnet.Node { + n := c.NumNodes() + 1 + nw := c.AddNetwork( + fmt.Sprintf("2.%d.%d.%d", n, n, n), // public IP + fmt.Sprintf("192.168.%d.1/24", n), + vnet.EasyNAT) + nw.SetPostConnectControlBlackhole(true) + return c.AddNode(vnet.RotateDisco, vnet.PreICMPPing, nw) +} + func v6AndBlackholedIPv4(c *vnet.Config) *vnet.Node { n := c.NumNodes() + 1 nw := c.AddNetwork( @@ -364,7 +374,9 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { var clients []*vnet.NodeAgentClient for _, n := range nodes { - clients = append(clients, nt.vnet.NodeAgentClient(n)) + client := nt.vnet.NodeAgentClient(n) + n.SetClient(client) + clients = append(clients, client) } sts := make([]*ipnstate.Status, len(nodes)) @@ -415,7 +427,27 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute { return "" } - pingRes, err := ping(ctx, clients[0], sts[1].Self.TailscaleIPs[0]) + preICMPPing := false + for _, node := range c.Nodes() { + node.Network().PostConnectedToControl() + if err := node.PostConnectedToControl(ctx); err != nil { + t.Fatalf("post control error: %s", err) + } + if node.PreICMPPing() { + preICMPPing = true + } + } + + // Should we send traffic across the nodes before starting disco? + // For nodes that rotated disco keys after control going away. + if preICMPPing { + _, err := ping(ctx, clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingICMP) + if err != nil { + t.Fatalf("ICMP ping failure: %v", err) + } + } + + pingRes, err := ping(ctx, clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingDisco) if err != nil { t.Fatalf("ping failure: %v", err) } @@ -450,13 +482,13 @@ const ( routeNil pingRoute = "nil" // *ipnstate.PingResult is nil ) -func ping(ctx context.Context, c *vnet.NodeAgentClient, target netip.Addr) (*ipnstate.PingResult, error) { +func ping(ctx context.Context, c *vnet.NodeAgentClient, target netip.Addr, pType tailcfg.PingType) (*ipnstate.PingResult, error) { n := 0 var res *ipnstate.PingResult anyPong := false for n < 10 { n++ - pr, err := c.PingWithOpts(ctx, target, tailcfg.PingDisco, tailscale.PingOpts{}) + pr, err := c.PingWithOpts(ctx, target, pType, tailscale.PingOpts{}) if err != nil { if anyPong { return res, nil @@ -526,6 +558,18 @@ func TestEasyEasy(t *testing.T) { nt.want(routeDirect) } +func TestEasyEasyNoControlDiscoRotate(t *testing.T) { + nt := newNatTest(t) + nt.runTest(easyNoControlDiscoRotate, easy) + nt.want(routeDirect) +} + +func TestTwoEasyNoControlDiscoRotate(t *testing.T) { + nt := newNatTest(t) + nt.runTest(easyNoControlDiscoRotate, easyNoControlDiscoRotate) + nt.want(routeDirect) +} + // Issue tailscale/corp#26438: use learned DERP route as send path of last // resort // diff --git a/tstest/natlab/vnet/conf.go b/tstest/natlab/vnet/conf.go index 07b181540..7dcb40274 100644 --- a/tstest/natlab/vnet/conf.go +++ b/tstest/natlab/vnet/conf.go @@ -5,6 +5,7 @@ package vnet import ( "cmp" + "context" "fmt" "iter" "net/netip" @@ -114,6 +115,10 @@ func (c *Config) AddNode(opts ...any) *Node { switch o { case HostFirewall: n.hostFW = true + case RotateDisco: + n.rotateDisco = true + case PreICMPPing: + n.preICMPPing = true case VerboseSyslog: n.verboseSyslog = true default: @@ -137,6 +142,8 @@ type NodeOption string const ( HostFirewall NodeOption = "HostFirewall" + RotateDisco NodeOption = "RotateDisco" + PreICMPPing NodeOption = "PreICMPPing" VerboseSyslog NodeOption = "VerboseSyslog" ) @@ -197,12 +204,15 @@ func (c *Config) AddNetwork(opts ...any) *Network { // Node is the configuration of a node in the virtual network. type Node struct { - err error - num int // 1-based node number - n *node // nil until NewServer called + err error + num int // 1-based node number + n *node // nil until NewServer called + client *NodeAgentClient env []TailscaledEnv hostFW bool + rotateDisco bool + preICMPPing bool verboseSyslog bool // TODO(bradfitz): this is halfway converted to supporting multiple NICs @@ -243,6 +253,26 @@ func (n *Node) SetVerboseSyslog(v bool) { n.verboseSyslog = v } +func (n *Node) SetClient(c *NodeAgentClient) { + n.client = c +} + +func (n *Node) PostConnectedToControl(ctx context.Context) error { + if n.rotateDisco { + if err := n.client.DebugAction(ctx, "rotate-disco-key"); err != nil { + return err + } + } + return nil +} + +// PreICMPPing indicates the need of the node to have an ICMP Ping sent before +// the disco ping. This is important for the nodes having rotated their +// disco keys while control is down. +func (n *Node) PreICMPPing() bool { + return n.preICMPPing +} + // IsV6Only reports whether this node is only connected to IPv6 networks. func (n *Node) IsV6Only() bool { for _, net := range n.nets { @@ -275,10 +305,12 @@ type Network struct { wanIP6 netip.Prefix // global unicast router in host bits; CIDR is /64 delegated to LAN - wanIP4 netip.Addr // IPv4 WAN IP, if any - lanIP4 netip.Prefix - nodes []*Node - breakWAN4 bool // whether to break WAN IPv4 connectivity + wanIP4 netip.Addr // IPv4 WAN IP, if any + lanIP4 netip.Prefix + nodes []*Node + breakWAN4 bool // whether to break WAN IPv4 connectivity + postConnectBreakControl bool // whether to break control connectivity after nodes have connected + network *network svcs set.Set[NetworkService] @@ -310,6 +342,10 @@ func (n *Network) SetBlackholedIPv4(v bool) { n.breakWAN4 = v } +func (n *Network) SetPostConnectControlBlackhole(v bool) { + n.postConnectBreakControl = v +} + func (n *Network) CanV4() bool { return n.lanIP4.IsValid() || n.wanIP4.IsValid() } @@ -325,6 +361,10 @@ func (n *Network) CanTakeMoreNodes() bool { return len(n.nodes) < 150 } +func (n *Network) PostConnectedToControl() { + n.network.BreakControl(n.postConnectBreakControl) +} + // NetworkService is a service that can be added to a network. type NetworkService string @@ -390,6 +430,8 @@ func (s *Server) initFromConfig(c *Config) error { } netOfConf[conf] = n s.networks.Add(n) + + conf.network = n if conf.wanIP4.IsValid() { if conf.wanIP4.Is6() { return fmt.Errorf("invalid IPv6 address in wanIP") diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go index 49d47f029..092452bd4 100644 --- a/tstest/natlab/vnet/vnet.go +++ b/tstest/natlab/vnet/vnet.go @@ -518,6 +518,7 @@ type network struct { wanIP4 netip.Addr // router's LAN IPv4, if any lanIP4 netip.Prefix // router's LAN IP + CIDR (e.g. 192.168.2.1/24) breakWAN4 bool // break WAN IPv4 connectivity + breakControl bool // break control connectivity latency time.Duration // latency applied to interface writes lossRate float64 // probability of dropping a packet (0.0 to 1.0) nodesByIP4 map[netip.Addr]*node // by LAN IPv4 @@ -578,6 +579,10 @@ func (n *network) MACOfIP(ip netip.Addr) (_ MAC, ok bool) { return MAC{}, false } +func (n *network) BreakControl(v bool) { + n.breakControl = v +} + type node struct { mac MAC num int // 1-based node number @@ -1263,7 +1268,8 @@ func (n *network) HandleEthernetPacketForRouter(ep EthernetPacket) { } if toForward && n.s.shouldInterceptTCP(packet) { - if flow.dst.Is4() && n.breakWAN4 { + if (flow.dst.Is4() && n.breakWAN4) || + (fakeControl.Match(flow.dst) && n.breakControl) { // Blackhole the packet. return } diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index aafb7b626..3c157e78f 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -4304,6 +4304,7 @@ func (c *Conn) HandleDiscoKeyAdvertisement(node tailcfg.NodeView, update packet. // If the key did not change, count it and return. if oldDiscoKey.Compare(discoKey) == 0 { metricTSMPDiscoKeyAdvertisementUnchanged.Add(1) + c.logf("magicsock: disco key did not change for node %v", nodeKey.ShortString()) return } c.discoInfoForKnownPeerLocked(discoKey)