diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index 99850e32d..7caca8136 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -10,7 +10,6 @@ import ( crand "crypto/rand" "crypto/tls" "encoding/binary" - "encoding/json" "errors" "fmt" "io/ioutil" @@ -26,7 +25,6 @@ import ( "time" "unsafe" - "github.com/google/go-cmp/cmp" "github.com/tailscale/wireguard-go/device" "github.com/tailscale/wireguard-go/tun/tuntest" "golang.org/x/crypto/nacl/box" @@ -996,148 +994,6 @@ func testTwoDevicePing(t *testing.T, d *devices) { ping1(t) ping2(t) }) - - // TODO: Remove this once the following tests are reliable. - if run, _ := strconv.ParseBool(os.Getenv("RUN_CURSED_TESTS")); !run { - t.Skip("skipping following tests because RUN_CURSED_TESTS is not set.") - } - - pingSeq := func(t *testing.T, count int, totalTime time.Duration, strict bool) { - msg := func(i int) []byte { - b := tuntest.Ping(net.ParseIP("1.0.0.2"), net.ParseIP("1.0.0.1")) - b[len(b)-1] = byte(i) // set seq num - return b - } - - // Space out ping transmissions so that the overall - // transmission happens in totalTime. - // - // We do this because the packet spray logic in magicsock is - // time-based to allow for reliable NAT traversal. However, - // for the packet spraying test further down, there needs to - // be at least 1 sprayed packet that is not the handshake, in - // case the handshake gets eaten by the race resolution logic. - // - // This is an inherent "race by design" in our current - // magicsock+wireguard-go codebase: sometimes, racing - // handshakes will result in a sub-optimal path for a few - // hundred milliseconds, until a subsequent spray corrects the - // issue. In order for the test to reflect that magicsock - // works as designed, we have to space out packet transmission - // here. - interPacketGap := totalTime / time.Duration(count) - if interPacketGap < 1*time.Millisecond { - interPacketGap = 0 - } - - for i := 0; i < count; i++ { - b := msg(i) - m1.tun.Outbound <- b - time.Sleep(interPacketGap) - } - - for i := 0; i < count; i++ { - b := msg(i) - select { - case msgRecv := <-m2.tun.Inbound: - if !bytes.Equal(b, msgRecv) { - if strict { - t.Errorf("return ping %d did not transit correctly: %s", i, cmp.Diff(b, msgRecv)) - } - } - case <-time.After(pingTimeout): - if strict { - t.Errorf("return ping %d did not transit", i) - } - } - } - } - - t.Run("ping 1.0.0.1 x50", func(t *testing.T) { - setT(t) - defer setT(outerT) - pingSeq(t, 50, 0, true) - }) - - // Add DERP relay. - derpEp := "127.3.3.40:1" - ep0 := cfgs[0].Peers[0].Endpoints - ep0 = derpEp + "," + ep0 - cfgs[0].Peers[0].Endpoints = ep0 - ep1 := cfgs[1].Peers[0].Endpoints - ep1 = derpEp + "," + ep1 - cfgs[1].Peers[0].Endpoints = ep1 - if err := m1.Reconfig(&cfgs[0]); err != nil { - t.Fatal(err) - } - if err := m2.Reconfig(&cfgs[1]); err != nil { - t.Fatal(err) - } - - t.Run("add DERP", func(t *testing.T) { - setT(t) - defer setT(outerT) - pingSeq(t, 20, 0, true) - }) - - // Disable real route. - cfgs[0].Peers[0].Endpoints = derpEp - cfgs[1].Peers[0].Endpoints = derpEp - if err := m1.Reconfig(&cfgs[0]); err != nil { - t.Fatal(err) - } - if err := m2.Reconfig(&cfgs[1]); err != nil { - t.Fatal(err) - } - time.Sleep(250 * time.Millisecond) // TODO remove - - t.Run("all traffic over DERP", func(t *testing.T) { - setT(t) - defer setT(outerT) - defer func() { - if t.Failed() || true { - logf("cfg0: %v", stringifyConfig(cfgs[0])) - logf("cfg1: %v", stringifyConfig(cfgs[1])) - } - }() - pingSeq(t, 20, 0, true) - }) - - m1.dev.RemoveAllPeers() - m2.dev.RemoveAllPeers() - - // Give one peer a non-DERP endpoint. We expect the other to - // accept it via roamAddr. - cfgs[0].Peers[0].Endpoints = ep0 - if ep2 := cfgs[1].Peers[0].Endpoints; len(ep2) != 1 { - t.Errorf("unexpected peer endpoints in dev2: %v", ep2) - } - if err := m2.Reconfig(&cfgs[1]); err != nil { - t.Fatal(err) - } - if err := m1.Reconfig(&cfgs[0]); err != nil { - t.Fatal(err) - } - // Dear future human debugging a test failure here: this test is - // flaky, and very infrequently will drop 1-2 of the 50 ping - // packets. This does not affect normal operation of tailscaled, - // but makes this test fail. - // - // TODO(danderson): finish root-causing and de-flake this test. - t.Run("one real route is enough thanks to spray", func(t *testing.T) { - setT(t) - defer setT(outerT) - pingSeq(t, 50, 700*time.Millisecond, false) - - cfg, err := wgcfg.DeviceConfig(m2.dev) - if err != nil { - t.Fatal(err) - } - ep2 := cfg.Peers[0].Endpoints - if len(ep2) != 2 { - t.Error("handshake spray failed to find real route") - } - }) } // TestAddrSet tests addrSet appendDests and updateDst. @@ -1362,14 +1218,6 @@ func TestDiscoStringLogRace(t *testing.T) { wg.Wait() } -func stringifyConfig(cfg wgcfg.Config) string { - j, err := json.Marshal(cfg) - if err != nil { - panic(err) - } - return string(j) -} - func Test32bitAlignment(t *testing.T) { var de discoEndpoint