From 635e4c74352b26078ec7a693de779d33105a4264 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Wed, 10 Feb 2021 11:49:30 -0800 Subject: [PATCH] wgengine/magicsock: increase legacy ping timeout again I based my estimation of the required timeout based on locally observed behavior. But CI machines are worse than my local machine. 16s was enough to reduce flakiness but not eliminate it. Bump it up again. Signed-off-by: Josh Bleecher Snyder --- util/cibuild/cibuild.go | 13 +++++++++++++ wgengine/magicsock/magicsock_test.go | 13 +++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 util/cibuild/cibuild.go diff --git a/util/cibuild/cibuild.go b/util/cibuild/cibuild.go new file mode 100644 index 000000000..b2d4af20c --- /dev/null +++ b/util/cibuild/cibuild.go @@ -0,0 +1,13 @@ +// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cibuild reports runtime CI information. +package cibuild + +import "os" + +// On reports whether the current binary is executing on a CI system. +func On() bool { + return os.Getenv("GITHUB_ACTIONS") != "" +} diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index a4e3ed96d..520fbe590 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -44,6 +44,7 @@ import ( "tailscale.com/types/netmap" "tailscale.com/types/nettype" "tailscale.com/types/wgkey" + "tailscale.com/util/cibuild" "tailscale.com/wgengine/filter" "tailscale.com/wgengine/tstun" "tailscale.com/wgengine/wgcfg" @@ -929,10 +930,14 @@ func testTwoDevicePing(t *testing.T, d *devices) { // In the normal case, pings succeed immediately. // However, in the case of a handshake race, we need to retry. - // Typical retries take 5s. With very bad luck, we can need to retry - // multiple times. Give ourselves enough time for three retries - // plus a bit of processing time. - const pingTimeout = 16 * time.Second + // With very bad luck, we can need to retry multiple times. + allowedRetries := 3 + if cibuild.On() { + // Allow extra retries on small/flaky/loaded CI machines. + allowedRetries *= 2 + } + // Retries take 5s each. Add 1s for some processing time. + pingTimeout := 5*time.Second*time.Duration(allowedRetries) + time.Second ping1 := func(t *testing.T) { msg2to1 := tuntest.Ping(net.ParseIP("1.0.0.1"), net.ParseIP("1.0.0.2"))