tstest/integration/vms: print qemu console output, fix printing issues

Fix a few test printing issues when tests fail.

Qemu console output is super useful when something is wrong in the
harness and we cannot even bring up the tests.
Also useful for figuring out where all the time goes in tests.

A little noisy, but not too noisy as long as you're only running one VM
as part of the tests, which is my plan.

Signed-off-by: David Crawshaw <crawshaw@tailscale.com>
pull/2755/head
David Crawshaw 3 years ago committed by David Crawshaw
parent 1a9fba5b04
commit 8668103f06

@ -8,9 +8,9 @@
package vms
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"io"
"net"
@ -18,11 +18,10 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"syscall"
"testing"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
@ -33,10 +32,26 @@ import (
"tailscale.com/types/logger"
)
type vmInstance struct {
d Distro
cmd *exec.Cmd
done chan struct{}
doneErr error // not written until done is closed
}
func (vm *vmInstance) running() bool {
select {
case <-vm.done:
return false
default:
return true
}
}
// mkVM makes a KVM-accelerated virtual machine and prepares it for introduction
// to the testcontrol server. The function it returns is for killing the virtual
// machine when it is time for it to die.
func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) {
func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) *vmInstance {
t.Helper()
cdir, err := os.UserCacheDir()
@ -51,7 +66,7 @@ func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir stri
t.Fatal(err)
}
mkLayeredQcow(t, tdir, d, h.fetchDistro(t, d))
mkLayeredQcow(t, tdir, d, fetchDistro(t, d))
mkSeed(t, d, sshKey, hostURL, tdir, port)
driveArg := fmt.Sprintf("file=%s,if=virtio", filepath.Join(tdir, d.Name+".qcow2"))
@ -65,6 +80,7 @@ func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir stri
"-drive", driveArg,
"-cdrom", filepath.Join(tdir, d.Name, "seed", "seed.iso"),
"-smbios", "type=1,serial=ds=nocloud;h=" + d.Name,
"-nographic",
}
if *useVNC {
@ -82,33 +98,58 @@ func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir stri
t.Logf("running: qemu-system-x86_64 %s", strings.Join(args, " "))
cmd := exec.Command("qemu-system-x86_64", args...)
cmd.Stdout = logger.FuncWriter(t.Logf)
cmd.Stderr = logger.FuncWriter(t.Logf)
err = cmd.Start()
if err != nil {
cmd.Stdout = &qemuLog{f: t.Logf}
cmd.Stderr = &qemuLog{f: t.Logf}
if err := cmd.Start(); err != nil {
t.Fatal(err)
}
time.Sleep(time.Second)
// NOTE(Xe): In Unix if you do a kill with signal number 0, the kernel will do
// all of the access checking for the process (existence, permissions, etc) but
// nothing else. This is a way to ensure that qemu's process is active.
if err := cmd.Process.Signal(syscall.Signal(0)); err != nil {
t.Fatalf("qemu is not running: %v", err)
vm := &vmInstance{
cmd: cmd,
d: d,
done: make(chan struct{}),
}
go func() {
vm.doneErr = cmd.Wait()
close(vm.done)
}()
t.Cleanup(func() {
err := cmd.Process.Kill()
err := vm.cmd.Process.Kill()
if err != nil {
t.Errorf("can't kill %s (%d): %v", d.Name, cmd.Process.Pid, err)
t.Logf("can't kill %s (%d): %v", d.Name, cmd.Process.Pid, err)
}
cmd.Wait()
<-vm.done
})
return vm
}
type qemuLog struct {
buf []byte
f logger.Logf
}
func (w *qemuLog) Write(p []byte) (int, error) {
if !*verboseQemu {
return len(p), nil
}
w.buf = append(w.buf, p...)
if i := bytes.LastIndexByte(w.buf, '\n'); i > 0 {
j := i
if w.buf[j-1] == '\r' {
j--
}
buf := ansiEscCodeRE.ReplaceAll(w.buf[:j], nil)
w.buf = w.buf[i+1:]
w.f("qemu console: %q", buf)
}
return len(p), nil
}
var ansiEscCodeRE = regexp.MustCompile("\x1b" + `\[[0-?]*[ -/]*[@-~]`)
// fetchFromS3 fetches a distribution image from Amazon S3 or reports whether
// it is unable to. It can fail to fetch from S3 if there is either no AWS
// configuration (in ~/.aws/credentials) or if the `-no-s3` flag is passed. In
@ -161,7 +202,7 @@ func fetchFromS3(t *testing.T, fout *os.File, d Distro) bool {
// fetchDistro fetches a distribution from the internet if it doesn't already exist locally. It
// also validates the sha256 sum from a known good hash.
func (h *Harness) fetchDistro(t *testing.T, resultDistro Distro) string {
func fetchDistro(t *testing.T, resultDistro Distro) string {
t.Helper()
cdir, err := os.UserCacheDir()
@ -170,66 +211,60 @@ func (h *Harness) fetchDistro(t *testing.T, resultDistro Distro) string {
}
cdir = filepath.Join(cdir, "tailscale", "vm-test")
if strings.HasPrefix(resultDistro.Name, "nixos") {
return h.makeNixOSImage(t, resultDistro, cdir)
}
qcowPath := filepath.Join(cdir, "qcow2", resultDistro.SHA256Sum)
_, err = os.Stat(qcowPath)
if err == nil {
if _, err = os.Stat(qcowPath); err == nil {
hash := checkCachedImageHash(t, resultDistro, cdir)
if hash != resultDistro.SHA256Sum {
t.Logf("hash for %s (%s) doesn't match expected %s, re-downloading", resultDistro.Name, qcowPath, resultDistro.SHA256Sum)
err = errors.New("some fake non-nil error to force a redownload")
if err := os.Remove(qcowPath); err != nil {
t.Fatalf("can't delete wrong cached image: %v", err)
}
if hash == resultDistro.SHA256Sum {
return qcowPath
}
t.Logf("hash for %s (%s) doesn't match expected %s, re-downloading", resultDistro.Name, qcowPath, resultDistro.SHA256Sum)
if err := os.Remove(qcowPath); err != nil {
t.Fatalf("can't delete wrong cached image: %v", err)
}
}
t.Logf("downloading distro image %s to %s", resultDistro.URL, qcowPath)
if err := os.MkdirAll(filepath.Dir(qcowPath), 0777); err != nil {
t.Fatal(err)
}
fout, err := os.Create(qcowPath)
if err != nil {
t.Logf("downloading distro image %s to %s", resultDistro.URL, qcowPath)
fout, err := os.Create(qcowPath)
t.Fatal(err)
}
if !fetchFromS3(t, fout, resultDistro) {
resp, err := http.Get(resultDistro.URL)
if err != nil {
t.Fatal(err)
t.Fatalf("can't fetch qcow2 for %s (%s): %v", resultDistro.Name, resultDistro.URL, err)
}
if !fetchFromS3(t, fout, resultDistro) {
resp, err := http.Get(resultDistro.URL)
if err != nil {
t.Fatalf("can't fetch qcow2 for %s (%s): %v", resultDistro.Name, resultDistro.URL, err)
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
t.Fatalf("%s replied %s", resultDistro.URL, resp.Status)
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
t.Fatalf("%s replied %s", resultDistro.URL, resp.Status)
}
_, err = io.Copy(fout, resp.Body)
if err != nil {
t.Fatalf("download of %s failed: %v", resultDistro.URL, err)
}
if _, err = io.Copy(fout, resp.Body); err != nil {
t.Fatalf("download of %s failed: %v", resultDistro.URL, err)
}
resp.Body.Close()
err = fout.Close()
if err != nil {
t.Fatalf("can't close fout: %v", err)
}
resp.Body.Close()
err = fout.Close()
if err != nil {
t.Fatalf("can't close fout: %v", err)
}
hash := checkCachedImageHash(t, resultDistro, cdir)
hash := checkCachedImageHash(t, resultDistro, cdir)
if hash != resultDistro.SHA256Sum {
t.Fatalf("hash mismatch, want: %s, got: %s", resultDistro.SHA256Sum, hash)
}
if hash != resultDistro.SHA256Sum {
t.Fatalf("hash mismatch for %s, want: %s, got: %s", resultDistro.URL, resultDistro.SHA256Sum, hash)
}
}
return qcowPath
}
func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash string) {
func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) string {
t.Helper()
qcowPath := filepath.Join(cacheDir, "qcow2", d.SHA256Sum)
@ -238,6 +273,7 @@ func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash stri
if err != nil {
t.Fatal(err)
}
defer fin.Close()
hasher := sha256.New()
if _, err := io.Copy(hasher, fin); err != nil {
@ -248,10 +284,7 @@ func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash stri
if hash != d.SHA256Sum {
t.Fatalf("hash mismatch, got: %q, want: %q", hash, d.SHA256Sum)
}
gotHash = hash
return
return hash
}
func (h *Harness) copyBinaries(t *testing.T, d Distro, conn *ssh.Client) {

@ -41,34 +41,35 @@ func retry(t *testing.T, fn func() error) {
}
func (h *Harness) testPing(t *testing.T, ipAddr netaddr.IP, cli *ssh.Client) {
var outp []byte
var err error
retry(t, func() error {
sess := getSession(t, cli)
outp, err = sess.CombinedOutput(fmt.Sprintf("tailscale ping -c 1 %s", ipAddr))
return err
cmd := fmt.Sprintf("tailscale ping --verbose %s", ipAddr)
outp, err := sess.CombinedOutput(cmd)
if err == nil && !bytes.Contains(outp, []byte("pong")) {
err = fmt.Errorf("%s: no pong", cmd)
}
if err != nil {
return fmt.Errorf("%s : %v, output: %s", cmd, err, outp)
}
t.Logf("%s", outp)
return nil
})
if !bytes.Contains(outp, []byte("pong")) {
t.Log(string(outp))
t.Fatal("no pong")
}
retry(t, func() error {
sess := getSession(t, cli)
// NOTE(Xe): the ping command is inconsistent across distros. Joy.
pingCmd := fmt.Sprintf("sh -c 'ping -c 1 %[1]s || ping -6 -c 1 %[1]s || ping6 -c 1 %[1]s\n'", ipAddr)
t.Logf("running %q", pingCmd)
outp, err = sess.CombinedOutput(pingCmd)
cmd := fmt.Sprintf("sh -c 'ping -c 1 %[1]s || ping -6 -c 1 %[1]s || ping6 -c 1 %[1]s\n'", ipAddr)
t.Logf("running %q", cmd)
outp, err := sess.CombinedOutput(cmd)
if err == nil && !bytes.Contains(outp, []byte("bytes")) {
err = fmt.Errorf("%s: wanted output to contain %q, it did not", cmd, "bytes")
}
if err != nil {
err = fmt.Errorf("%s: %v, output: %s", cmd, err, outp)
}
return err
})
if !bytes.Contains(outp, []byte("bytes")) {
t.Log(string(outp))
t.Fatalf("wanted output to contain %q, it did not", "bytes")
}
}
func getSession(t *testing.T, cli *ssh.Client) *ssh.Session {
@ -111,7 +112,6 @@ func (h *Harness) testOutgoingTCP(t *testing.T, ipAddr netaddr.IP, cli *ssh.Clie
// sess.Stdout = logger.FuncWriter(t.Logf)
// sess.Run("sysctl -a")
var outp []byte
retry(t, func() error {
var err error
sess := getSession(t, cli)
@ -121,15 +121,15 @@ func (h *Harness) testOutgoingTCP(t *testing.T, ipAddr netaddr.IP, cli *ssh.Clie
}
cmd := fmt.Sprintf("curl -v %s -s -f http://%s\n", v6Arg, net.JoinHostPort(ipAddr.String(), port))
t.Logf("running: %s", cmd)
outp, err = sess.CombinedOutput(cmd)
outp, err := sess.CombinedOutput(cmd)
if msg := string(bytes.TrimSpace(outp)); err == nil && !strings.Contains(msg, sendmsg) {
err = fmt.Errorf("wanted %q, got: %q", sendmsg, msg)
}
if err != nil {
t.Log(string(outp))
err = fmt.Errorf("%v, output: %s", err, outp)
}
return err
})
if msg := string(bytes.TrimSpace(outp)); !strings.Contains(msg, sendmsg) {
t.Fatalf("wanted %q, got: %q", sendmsg, msg)
}
<-ctx.Done()
}

@ -30,7 +30,6 @@ import (
"golang.org/x/sync/semaphore"
"inet.af/netaddr"
"tailscale.com/tstest"
"tailscale.com/tstest/integration"
"tailscale.com/types/logger"
)
@ -45,6 +44,7 @@ var (
vmRamLimit = flag.Int("ram-limit", 4096, "the maximum number of megabytes of ram that can be used for VMs, must be greater than or equal to 1024")
useVNC = flag.Bool("use-vnc", false, "if set, display guest vms over VNC")
verboseLogcatcher = flag.Bool("verbose-logcatcher", false, "if set, spew logcatcher to t.Logf (spamtastic)")
verboseQemu = flag.Bool("verbose-qemu", true, "if set, print qemu console to t.Logf")
distroRex = func() *regexValue {
result := &regexValue{r: regexp.MustCompile(`.*`)}
flag.Var(result, "distro-regex", "The regex that matches what distros should be run")
@ -57,22 +57,18 @@ func TestDownloadImages(t *testing.T) {
t.Skip("not running integration tests (need --run-vm-tests)")
}
bins := integration.BuildTestBinaries(t)
for _, d := range Distros {
distro := d
t.Run(distro.Name, func(t *testing.T) {
t.Parallel()
if !distroRex.Unwrap().MatchString(distro.Name) {
t.Skipf("distro name %q doesn't match regex: %s", distro.Name, distroRex)
}
if strings.HasPrefix(distro.Name, "nixos") {
t.Skip("NixOS is built on the fly, no need to download it")
}
t.Parallel()
(&Harness{bins: bins}).fetchDistro(t, distro)
fetchDistro(t, distro)
})
}
}
@ -270,23 +266,36 @@ func testOneDistribution(t *testing.T, n int, distro Distro) {
}
t.Cleanup(func() { ramsem.sem.Release(int64(distro.MemoryMegs)) })
h.mkVM(t, n, distro, h.pubKey, h.loginServerURL, dir)
vm := h.mkVM(t, n, distro, h.pubKey, h.loginServerURL, dir)
var ipm ipMapping
t.Run("wait-for-start", func(t *testing.T) {
waiter := time.NewTicker(time.Second)
defer waiter.Stop()
for i := 0; i < 100; i++ {
if vm.running() {
break
}
time.Sleep(100 * time.Millisecond)
}
if !vm.running() {
t.Fatal("vm not running")
}
waiter := time.NewTicker(time.Second)
defer waiter.Stop()
for {
var ok bool
for {
<-waiter.C
h.ipMu.Lock()
if ipm, ok = h.ipMap[distro.Name]; ok {
h.ipMu.Unlock()
break
}
h.ipMu.Unlock()
h.ipMu.Lock()
ipm, ok = h.ipMap[distro.Name]
h.ipMu.Unlock()
if ok {
break
}
})
if !vm.running() {
t.Fatal("vm not running")
}
<-waiter.C
}
h.testDistro(t, distro, ipm)
}
@ -377,11 +386,10 @@ func (h *Harness) testDistro(t *testing.T, d Distro, ipm ipMapping) {
outp, err = sess.CombinedOutput("tailscale status")
if err == nil {
t.Logf("tailscale status: %s", outp)
if !strings.Contains(string(outp), "100.64.0.1") {
t.Log(string(outp))
t.Fatal("can't find tester IP")
}
return
}
time.Sleep(dur)

Loading…
Cancel
Save