diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6a0e4a5ed..c7dec1ff8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -41,6 +41,14 @@ concurrency: jobs: race-root-integration: runs-on: ubuntu-22.04 + strategy: + fail-fast: false # don't abort the entire matrix if one element fails + matrix: + include: + - shard: '1/4' + - shard: '2/4' + - shard: '3/4' + - shard: '4/4' steps: - name: checkout uses: actions/checkout@v4 @@ -48,6 +56,8 @@ jobs: run: ./tool/go build -o /tmp/testwrapper ./cmd/testwrapper - name: integration tests as root run: PATH=$PWD/tool:$PATH /tmp/testwrapper -exec "sudo -E" -race ./tstest/integration/ + env: + TS_TEST_SHARD: ${{ matrix.shard }} test: strategy: diff --git a/cmd/testwrapper/testwrapper.go b/cmd/testwrapper/testwrapper.go index bab62f48d..a4bc64995 100644 --- a/cmd/testwrapper/testwrapper.go +++ b/cmd/testwrapper/testwrapper.go @@ -85,6 +85,9 @@ func runTests(ctx context.Context, attempt int, pt *packageTests, goTestArgs, te fmt.Println("running", strings.Join(args, " ")) } cmd := exec.CommandContext(ctx, "go", args...) + if len(pt.Tests) > 0 { + cmd.Env = append(os.Environ(), "TS_TEST_SHARD=") // clear test shard; run all tests we say to run + } r, err := cmd.StdoutPipe() if err != nil { log.Printf("error creating stdout pipe: %v", err) diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index 10d8735e8..fb2ef44f2 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -71,6 +71,7 @@ func TestMain(m *testing.M) { // Tests that tailscaled starts up in TUN mode, and also without data races: // https://github.com/tailscale/tailscale/issues/7894 func TestTUNMode(t *testing.T) { + tstest.Shard(t) if os.Getuid() != 0 { t.Skip("skipping when not root") } @@ -90,6 +91,7 @@ func TestTUNMode(t *testing.T) { } func TestOneNodeUpNoAuth(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) n1 := newTestNode(t, env) @@ -107,6 +109,7 @@ func TestOneNodeUpNoAuth(t *testing.T) { } func TestOneNodeExpiredKey(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) n1 := newTestNode(t, env) @@ -143,6 +146,7 @@ func TestOneNodeExpiredKey(t *testing.T) { } func TestControlKnobs(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) n1 := newTestNode(t, env) @@ -173,6 +177,7 @@ func TestControlKnobs(t *testing.T) { } func TestCollectPanic(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) n := newTestNode(t, env) @@ -203,6 +208,7 @@ func TestCollectPanic(t *testing.T) { } func TestControlTimeLogLine(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) env.LogCatcher.StoreRawJSON() @@ -226,6 +232,7 @@ func TestControlTimeLogLine(t *testing.T) { // test Issue 2321: Start with UpdatePrefs should save prefs to disk func TestStateSavedOnStart(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) n1 := newTestNode(t, env) @@ -262,6 +269,7 @@ func TestStateSavedOnStart(t *testing.T) { } func TestOneNodeUpAuth(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t, configureControl(func(control *testcontrol.Server) { control.RequireAuth = true @@ -305,6 +313,7 @@ func TestOneNodeUpAuth(t *testing.T) { } func TestTwoNodes(t *testing.T) { + tstest.Shard(t) flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/3598") t.Parallel() env := newTestEnv(t) @@ -354,6 +363,7 @@ func TestTwoNodes(t *testing.T) { // tests two nodes where the first gets a incremental MapResponse (with only // PeersRemoved set) saying that the second node disappeared. func TestIncrementalMapUpdatePeersRemoved(t *testing.T) { + tstest.Shard(t) flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/3598") t.Parallel() env := newTestEnv(t) @@ -438,6 +448,7 @@ func TestIncrementalMapUpdatePeersRemoved(t *testing.T) { } func TestNodeAddressIPFields(t *testing.T) { + tstest.Shard(t) flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/7008") t.Parallel() env := newTestEnv(t) @@ -465,6 +476,7 @@ func TestNodeAddressIPFields(t *testing.T) { } func TestAddPingRequest(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) n1 := newTestNode(t, env) @@ -517,6 +529,7 @@ func TestAddPingRequest(t *testing.T) { } func TestC2NPingRequest(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) n1 := newTestNode(t, env) @@ -587,6 +600,7 @@ func TestC2NPingRequest(t *testing.T) { // Issue 2434: when "down" (WantRunning false), tailscaled shouldn't // be connected to control. func TestNoControlConnWhenDown(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) n1 := newTestNode(t, env) @@ -628,6 +642,7 @@ func TestNoControlConnWhenDown(t *testing.T) { // Issue 2137: make sure Windows tailscaled works with the CLI alone, // without the GUI to kick off a Start. func TestOneNodeUpWindowsStyle(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) n1 := newTestNode(t, env) @@ -646,6 +661,7 @@ func TestOneNodeUpWindowsStyle(t *testing.T) { // TestNATPing creates two nodes, n1 and n2, sets up masquerades for both and // tries to do bi-directional pings between them. func TestNATPing(t *testing.T) { + tstest.Shard(t) t.Parallel() for _, v6 := range []bool{false, true} { env := newTestEnv(t) @@ -773,6 +789,7 @@ func TestNATPing(t *testing.T) { } func TestLogoutRemovesAllPeers(t *testing.T) { + tstest.Shard(t) t.Parallel() env := newTestEnv(t) // Spin up some nodes. diff --git a/tstest/tstest.go b/tstest/tstest.go index 7ccba8004..cff8726a6 100644 --- a/tstest/tstest.go +++ b/tstest/tstest.go @@ -6,6 +6,10 @@ package tstest import ( "context" + "os" + "strconv" + "strings" + "sync/atomic" "testing" "time" @@ -46,3 +50,27 @@ func WaitFor(maxWait time.Duration, try func() error) error { } return err } + +var testNum atomic.Int32 + +// Shard skips t if it's not running if the TS_TEST_SHARD test shard is set to +// "n/m" and this test execution number in the process mod m is not equal to n-1. +// That is, to run with 4 shards, set TS_TEST_SHARD=1/4, ..., TS_TEST_SHARD=4/4 +// for the four jobs. +func Shard(t testing.TB) { + e := os.Getenv("TS_TEST_SHARD") + a, b, ok := strings.Cut(e, "/") + if !ok { + return + } + wantShard, _ := strconv.ParseInt(a, 10, 32) + shards, _ := strconv.ParseInt(b, 10, 32) + if wantShard == 0 || shards == 0 { + return + } + + shard := ((testNum.Add(1) - 1) % int32(shards)) + 1 + if shard != int32(wantShard) { + t.Skipf("skipping shard %d/%d (process has TS_TEST_SHARD=%q)", shard, shards, e) + } +}