diff --git a/tstest/integration/vms/vms_test.go b/tstest/integration/vms/vms_test.go index 25c612879..236b258e1 100644 --- a/tstest/integration/vms/vms_test.go +++ b/tstest/integration/vms/vms_test.go @@ -7,6 +7,7 @@ package vms import ( + "context" "crypto/sha256" "encoding/hex" "flag" @@ -31,6 +32,7 @@ import ( expect "github.com/google/goexpect" "github.com/pkg/sftp" "golang.org/x/crypto/ssh" + "golang.org/x/sync/semaphore" "inet.af/netaddr" "tailscale.com/net/interfaces" "tailscale.com/tstest" @@ -40,7 +42,8 @@ import ( const securePassword = "hunter2" -var runVMTests = flag.Bool("run-vm-tests", false, "if set, run expensive (10G+ ram) VM based integration tests") +var runVMTests = flag.Bool("run-vm-tests", false, "if set, run expensive VM based integration tests") +var vmRamLimit = flag.Int("ram-limit", 4096, "the maximum number of megabytes of ram that can be used for VMs, must be greater than or equal to 1024") var distroRex *regexValue = func() *regexValue { result := ®exValue{r: regexp.MustCompile(`.*`)} flag.Var(result, "distro-regex", "The regex that matches what distros should be run") @@ -259,9 +262,8 @@ func mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) func() { if err != nil { t.Fatalf("can't find cache dir: %v", err) } - cdir = filepath.Join(cdir, "within", "mkvm") + cdir = filepath.Join(cdir, "tailscale", "vm-test") os.MkdirAll(filepath.Join(cdir, "qcow2"), 0755) - os.MkdirAll(filepath.Join(cdir, "seed"), 0755) port := 23100 + n @@ -280,6 +282,7 @@ func mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) func() { "-drive", driveArg, "-cdrom", filepath.Join(tdir, d.name, "seed", "seed.iso"), "-vnc", fmt.Sprintf(":%d", n), + "-smbios", "type=1,serial=ds=nocloud;h=" + d.name, } t.Logf("running: qemu-system-x86_64 %s", strings.Join(args, " ")) @@ -378,7 +381,7 @@ func TestVMIntegrationEndToEnd(t *testing.T) { var ( ipMu sync.Mutex - ipMap = []ipMapping{} + ipMap = map[string]ipMapping{} ) mux := http.NewServeMux() @@ -398,7 +401,8 @@ func TestVMIntegrationEndToEnd(t *testing.T) { if err != nil { log.Panicf("bad port: %v", port) } - ipMap = append(ipMap, ipMapping{r.UserAgent(), port, host}) + distro := r.UserAgent() + ipMap[distro] = ipMapping{distro, port, host} t.Logf("%s: %v", name, host) }) @@ -424,140 +428,129 @@ func TestVMIntegrationEndToEnd(t *testing.T) { loginServer := fmt.Sprintf("http://%s", ln.Addr()) t.Logf("loginServer: %s", loginServer) - var numDistros = 0 + tstest.FixLogs(t) + defer tstest.UnfixLogs(t) - cancels := make(chan func(), len(distros)) + ramsem := semaphore.NewWeighted(int64(*vmRamLimit)) - t.Run("mkvm", func(t *testing.T) { + t.Run("do", func(t *testing.T) { for n, distro := range distros { n, distro := n, distro if rex.MatchString(distro.name) { t.Logf("%s matches %s", distro.name, rex) - numDistros++ } else { continue } t.Run(distro.name, func(t *testing.T) { + ctx, done := context.WithCancel(context.Background()) + defer done() + + if distro.name == "opensuse-leap-15-1" { + t.Skip("OpenSUSE Leap 15.1's cloud-init image just doesn't work for some reason, see https://github.com/tailscale/tailscale/issues/1988") + } + t.Parallel() + err := ramsem.Acquire(ctx, int64(distro.mem)) + if err != nil { + t.Fatalf("can't acquire ram semaphore: %v", err) + } + defer ramsem.Release(int64(distro.mem)) + cancel := mkVM(t, n, distro, string(pubkey), loginServer, dir) - cancels <- cancel + defer cancel() + var ipm ipMapping + + t.Run("wait-for-start", func(t *testing.T) { + waiter := time.NewTicker(time.Second) + defer waiter.Stop() + var ok bool + for { + <-waiter.C + ipMu.Lock() + if ipm, ok = ipMap[distro.name]; ok { + ipMu.Unlock() + break + } + ipMu.Unlock() + } + }) + + testDistro(t, loginServer, signer, ipm) }) } }) +} - close(cancels) - for cancel := range cancels { - //lint:ignore SA9001 They do actually get ran - defer cancel() - - if len(cancels) == 0 { - t.Log("all VMs started") +func testDistro(t *testing.T, loginServer string, signer ssh.Signer, ipm ipMapping) { + t.Helper() + port := ipm.port + hostport := fmt.Sprintf("127.0.0.1:%d", port) + ccfg := &ssh.ClientConfig{ + User: "root", + Auth: []ssh.AuthMethod{ssh.PublicKeys(signer), ssh.Password(securePassword)}, + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + } + + // NOTE(Xe): This deadline loop helps to make things a bit faster, centos + // sometimes is slow at starting its sshd and will sometimes randomly kill + // SSH sessions on transition to multi-user.target. I don't know why they + // don't use socket activation. + const maxRetries = 5 + var working bool + for i := 0; i < maxRetries; i++ { + cli, err := ssh.Dial("tcp", hostport, ccfg) + if err == nil { + working = true + cli.Close() break } - } - t.Run("wait-for-vms", func(t *testing.T) { - t.Log("waiting for VMs to register") - waiter := time.NewTicker(time.Second) - defer waiter.Stop() - n := 0 - for { - <-waiter.C - ipMu.Lock() - if len(ipMap) == numDistros { - ipMu.Unlock() - break - } else { - if n%30 == 0 { - t.Logf("ipMap: %d", len(ipMap)) - t.Logf("distros: %d", numDistros) - } - } - n++ - ipMu.Unlock() - } - }) - - ipMu.Lock() - defer ipMu.Unlock() - t.Run("join-net", func(t *testing.T) { - for _, ipm := range ipMap { - ipm := ipm - port := ipm.port - t.Run(ipm.name, func(t *testing.T) { - tstest.FixLogs(t) - t.Parallel() - - hostport := fmt.Sprintf("127.0.0.1:%d", port) - - // NOTE(Xe): This retry loop helps to make things a bit faster, centos sometimes is slow at starting its sshd. I don't know why they don't use socket activation. - const maxRetries = 5 - var working bool - for i := 0; i < maxRetries; i++ { - conn, err := net.Dial("tcp", hostport) - if err == nil { - working = true - conn.Close() - break - } - - time.Sleep(5 * time.Second) - } - - if !working { - t.Fatalf("can't connect to %s, tried %d times", hostport, maxRetries) - } + time.Sleep(10 * time.Second) + } - t.Logf("about to ssh into 127.0.0.1:%d", port) - cli, err := ssh.Dial("tcp", hostport, &ssh.ClientConfig{ - User: "root", - Auth: []ssh.AuthMethod{ssh.PublicKeys(signer), ssh.Password(securePassword)}, - HostKeyCallback: ssh.InsecureIgnoreHostKey(), - }) - if err != nil { - t.Fatal(err) - } - copyBinaries(t, cli) + if !working { + t.Fatalf("can't connect to %s, tried %d times", hostport, maxRetries) + } - timeout := 5 * time.Minute + t.Logf("about to ssh into 127.0.0.1:%d", port) + cli, err := ssh.Dial("tcp", hostport, ccfg) + if err != nil { + t.Fatal(err) + } + copyBinaries(t, cli) - e, _, err := expect.SpawnSSH(cli, timeout, expect.Verbose(true), expect.VerboseWriter(log.Writer())) - if err != nil { - t.Fatalf("%d: can't register a shell session: %v", port, err) - } - defer e.Close() + timeout := 5 * time.Minute - t.Log("opened session") + e, _, err := expect.SpawnSSH(cli, timeout, expect.Verbose(true), expect.VerboseWriter(log.Writer())) + if err != nil { + t.Fatalf("%d: can't register a shell session: %v", port, err) + } + defer e.Close() - _, _, err = e.Expect(regexp.MustCompile(`(\#)`), timeout) - if err != nil { - t.Fatalf("%d: can't get a shell: %v", port, err) - } - t.Logf("got shell for %d", port) - err = e.Send("systemctl start tailscaled.service\n") - if err != nil { - t.Fatalf("can't send command to start tailscaled: %v", err) - } - _, _, err = e.Expect(regexp.MustCompile(`(\#)`), timeout) - if err != nil { - t.Fatalf("%d: can't get a shell: %v", port, err) - } - err = e.Send(fmt.Sprintf("sudo tailscale up --login-server %s\n", loginServer)) - if err != nil { - t.Fatalf("%d: can't send tailscale up command: %v", port, err) - } - _, _, err = e.Expect(regexp.MustCompile(`Success.`), timeout) - if err != nil { - t.Fatalf("not successful: %v", err) - } - }) - } - }) + t.Log("opened session") - if numNodes := cs.NumNodes(); numNodes != len(ipMap) { - t.Errorf("wanted %d nodes, got: %d", len(ipMap), numNodes) + _, _, err = e.Expect(regexp.MustCompile(`(\#)`), timeout) + if err != nil { + t.Fatalf("%d: can't get a shell: %v", port, err) + } + t.Logf("got shell for %d", port) + err = e.Send("systemctl start tailscaled.service\n") + if err != nil { + t.Fatalf("can't send command to start tailscaled: %v", err) + } + _, _, err = e.Expect(regexp.MustCompile(`(\#)`), timeout) + if err != nil { + t.Fatalf("%d: can't get a shell: %v", port, err) + } + err = e.Send(fmt.Sprintf("sudo tailscale up --login-server %s\n", loginServer)) + if err != nil { + t.Fatalf("%d: can't send tailscale up command: %v", port, err) + } + _, _, err = e.Expect(regexp.MustCompile(`Success.`), timeout) + if err != nil { + t.Fatalf("not successful: %v", err) } }