From 622dc7b093218155c1f27a1eb62bc9e708955a98 Mon Sep 17 00:00:00 2001 From: Christine Dodrill Date: Tue, 8 Jun 2021 12:47:24 -0400 Subject: [PATCH] tstest/integration/vms: download images from s3 (#2035) This makes integration tests pull pristine VM images from Amazon S3 if they don't exist on disk. If the S3 fetch fails, it will fall back to grabbing the image from the public internet. The VM images on the public internet are known to be updated without warning and thusly change their SHA256 checksum. This is not ideal for a test that we want to be able to fire and forget, then run reliably for a very long time. This requires an AWS profile to be configured at the default path. The S3 bucket is rigged so that the requester pays. The VM images are currently about 6.9 gigabytes. Please keep this in mind when running these tests on your machine. Documentation was added to the integration test folder to aid others in running these tests on their machine. Some wording in the logs of the tests was altered. Updates #1988 Signed-off-by: Christine Dodrill --- go.mod | 1 + go.sum | 5 ++ tstest/integration/vms/README.md | 98 +++++++++++++++++++++++ tstest/integration/vms/vms_test.go | 122 ++++++++++++++++++++++------- 4 files changed, 197 insertions(+), 29 deletions(-) create mode 100644 tstest/integration/vms/README.md diff --git a/go.mod b/go.mod index 0f3fd421b..482483745 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.16 require ( github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74 github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect + github.com/aws/aws-sdk-go v1.38.52 // indirect github.com/coreos/go-iptables v0.6.0 github.com/frankban/quicktest v1.13.0 github.com/gliderlabs/ssh v0.3.2 diff --git a/go.sum b/go.sum index 6abcb32f0..14f2c4f12 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,8 @@ github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmV github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= +github.com/aws/aws-sdk-go v1.38.52 h1:7NKcUyTG/CyDX835kq04DDNe8vXaJhbGW8ThemHb18A= +github.com/aws/aws-sdk-go v1.38.52/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= @@ -308,6 +310,9 @@ github.com/jingyugao/rowserrcheck v0.0.0-20191204022205-72ab7603b68a/go.mod h1:x github.com/jirfag/go-printf-func-name v0.0.0-20191110105641-45db9963cdd3/go.mod h1:HEWGJkRDzjJY2sqdDwxccsGicWEf9BQOZsq2tV+xzM0= github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af h1:KA9BjwUk7KlCh6S9EAGWBt1oExIUv9WyNCiRz5amv48= github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af/go.mod h1:HEWGJkRDzjJY2sqdDwxccsGicWEf9BQOZsq2tV+xzM0= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= github.com/jmoiron/sqlx v1.2.1-0.20190826204134-d7d95172beb5/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= diff --git a/tstest/integration/vms/README.md b/tstest/integration/vms/README.md new file mode 100644 index 000000000..881d47ec6 --- /dev/null +++ b/tstest/integration/vms/README.md @@ -0,0 +1,98 @@ +# End-to-End VM-based Integration Testing + +This test spins up a bunch of common linux distributions and then tries to get +them to connect to a +[`testcontrol`](https://pkg.go.dev/tailscale.com/tstest/integration/testcontrol) +server. + +## Running + +This test currently only runs on Linux. + +This test depends on the following command line tools: + +- [qemu](https://www.qemu.org/) +- [cdrkit](https://en.wikipedia.org/wiki/Cdrkit) +- [openssh](https://www.openssh.com/) + +This test also requires the following: + +- about 10 GB of temporary storage +- about 10 GB of cached VM images +- at least 4 GB of ram for virtual machines +- hardware virtualization support + ([KVM](https://www.linux-kvm.org/page/Main_Page)) enabled in the BIOS +- the `kvm` module to be loaded (`modprobe kvm`) +- the user running these tests must have access to `/dev/kvm` (being in the + `kvm` group should suffice) + +This optionally requires an AWS profile to be configured at the [default +path](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html). +The S3 bucket is set so that the requester pays. Please keep this in mind when +running these tests on your machine. If you are uncomfortable with the cost from +downloading from S3, you should pass the `-no-s3` flag to disable downloads from +S3. However keep in mind that some distributions do not use stable URLs for each +individual image artifact, so there may be spurious test failures as a result. + +If you are using [Nix](https://nixos.org), you can run all of the tests with the +correct command line tools using this command: + +```console +$ nix-shell -p openssh -p go -p qemu -p cdrkit --run "go test . --run-vm-tests --v --timeout 30m" +``` + +Keep the timeout high for the first run, especially if you are not downloading +VM images from S3. The mirrors we pull images from have download rate limits and +will take a while to download. + +Because of the hardware requirements of this test, this test will not run +without the `--run-vm-tests` flag set. + +## Other Fun Flags + +This test's behavior is customized with command line flags. + +### Don't Download Images From S3 + +If you pass the `-no-s3` flag to `go test`, the S3 step will be skipped in favor +of downloading the images directly from upstream sources, which may cause the +test to fail in odd places. + +### Distribution Picking + +This test runs on a large number of distributions. By default it tries to run +everything, which may or may not be ideal for you. If you only want to test a +subset of distributions, you can use the `--distro-regex` flag to match a subset +of distributions using a [regular expression](https://golang.org/pkg/regexp/) +such as like this: + +```console +$ go test -run-vm-tests -distro-regex centos +``` + +This would run all tests on all versions of CentOS. + +```console +$ go test -run-vm-tests -distro-regex '(debian|ubuntu)' +``` + +This would run all tests on all versions of Debian and Ubuntu. + +### Ram Limiting + +This test uses a lot of memory. In order to avoid making machines run out of +memory running this test, a semaphore is used to limit how many megabytes of ram +are being used at once. By default this semaphore is set to 4096 MB of ram +(about 4 gigabytes). You can customize this with the `--ram-limit` flag: + +```console +$ go test --run-vm-tests --ram-limit 2048 +$ go test --run-vm-tests --ram-limit 65536 +``` + +The first example will set the limit to 2048 MB of ram (about 2 gigabytes). The +second example will set the limit to 65536 MB of ram (about 65 gigabytes). +Please be careful with this flag, improper usage of it is known to cause the +Linux out-of-memory killer to engage. Try to keep it within 50-75% of your +machine's available ram (there is some overhead involved with the +virtualization) to be on the safe side. diff --git a/tstest/integration/vms/vms_test.go b/tstest/integration/vms/vms_test.go index 8e56b1d4d..a1f0f2208 100644 --- a/tstest/integration/vms/vms_test.go +++ b/tstest/integration/vms/vms_test.go @@ -30,6 +30,10 @@ import ( "text/template" "time" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3manager" expect "github.com/google/goexpect" "github.com/pkg/sftp" "golang.org/x/crypto/ssh" @@ -41,15 +45,21 @@ import ( "tailscale.com/tstest/integration/testcontrol" ) -const securePassword = "hunter2" +const ( + securePassword = "hunter2" + bucketName = "tailscale-integration-vm-images" +) -var runVMTests = flag.Bool("run-vm-tests", false, "if set, run expensive VM based integration tests") -var vmRamLimit = flag.Int("ram-limit", 4096, "the maximum number of megabytes of ram that can be used for VMs, must be greater than or equal to 1024") -var distroRex *regexValue = func() *regexValue { - result := ®exValue{r: regexp.MustCompile(`.*`)} - flag.Var(result, "distro-regex", "The regex that matches what distros should be run") - return result -}() +var ( + runVMTests = flag.Bool("run-vm-tests", false, "if set, run expensive VM based integration tests") + noS3 = flag.Bool("no-s3", false, "if set, always download images from the public internet (risks breaking)") + vmRamLimit = flag.Int("ram-limit", 4096, "the maximum number of megabytes of ram that can be used for VMs, must be greater than or equal to 1024") + distroRex = func() *regexValue { + result := ®exValue{r: regexp.MustCompile(`.*`)} + flag.Var(result, "distro-regex", "The regex that matches what distros should be run") + return result + }() +) type Distro struct { name string // amazon-linux @@ -134,6 +144,56 @@ var distros = []Distro{ {"ubuntu-21-04", "https://cloud-images.ubuntu.com/hirsute/20210603/hirsute-server-cloudimg-amd64.img", "bf07f36fc99ff521d3426e7d257e28f0c81feebc9780b0c4f4e25ae594ff4d3b", 512, "apt"}, } +// fetchFromS3 fetches a distribution image from Amazon S3 or reports whether +// it is unable to. It can fail to fetch from S3 if there is either no AWS +// configuration (in ~/.aws/credentials) or if the `-no-s3` flag is passed. In +// that case the test will fall back to downloading distribution images from the +// public internet. +// +// Like fetching from HTTP, the test will fail if an error is encountered during +// the downloading process. +// +// This function writes the distribution image to fout. It is always closed. Do +// not expect fout to remain writable. +func fetchFromS3(t *testing.T, fout *os.File, d Distro) bool { + t.Helper() + + if *noS3 { + t.Log("you asked to not use S3, not using S3") + return false + } + + sess, err := session.NewSession(&aws.Config{ + Region: aws.String("us-east-1"), + }) + if err != nil { + t.Logf("can't make AWS session: %v", err) + return false + } + + dler := s3manager.NewDownloader(sess, func(d *s3manager.Downloader) { + d.PartSize = 64 * 1024 * 1024 // 64MB per part + }) + + t.Logf("fetching s3://%s/%s", bucketName, d.sha256sum) + + _, err = dler.Download(fout, &s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(d.sha256sum), + }) + if err != nil { + fout.Close() + t.Fatalf("can't get s3://%s/%s: %v", bucketName, d.sha256sum, err) + } + + err = fout.Close() + if err != nil { + t.Fatalf("can't close fout: %v", err) + } + + return true +} + // fetchDistro fetches a distribution from the internet if it doesn't already exist locally. It // also validates the sha256 sum from a known good hash. func fetchDistro(t *testing.T, resultDistro Distro) { @@ -166,31 +226,29 @@ func fetchDistro(t *testing.T, resultDistro Distro) { if err != nil { t.Fatal(err) } - resp, err := http.Get(resultDistro.url) - if err != nil { - t.Fatalf("can't fetch qcow2 for %s (%s): %v", resultDistro.name, resultDistro.url, err) - } - if resp.StatusCode != http.StatusOK { - resp.Body.Close() - t.Fatalf("%s replied %s", resultDistro.url, resp.Status) - } + if !fetchFromS3(t, fout, resultDistro) { + resp, err := http.Get(resultDistro.url) + if err != nil { + t.Fatalf("can't fetch qcow2 for %s (%s): %v", resultDistro.name, resultDistro.url, err) + } - _, err = io.Copy(fout, resp.Body) - resp.Body.Close() - if err != nil { - t.Fatalf("download of %s failed: %v", resultDistro.url, err) - } + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + t.Fatalf("%s replied %s", resultDistro.url, resp.Status) + } - err = fout.Close() - if err != nil { - t.Fatalf("can't close fout: %v", err) - } + _, err = io.Copy(fout, resp.Body) + resp.Body.Close() + if err != nil { + t.Fatalf("download of %s failed: %v", resultDistro.url, err) + } - hash := checkCachedImageHash(t, resultDistro, cdir) + hash := checkCachedImageHash(t, resultDistro, cdir) - if hash != resultDistro.sha256sum { - t.Fatalf("hash mismatch, want: %s, got: %s", resultDistro.sha256sum, hash) + if hash != resultDistro.sha256sum { + t.Fatalf("hash mismatch, want: %s, got: %s", resultDistro.sha256sum, hash) + } } } } @@ -209,7 +267,13 @@ func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash stri if _, err := io.Copy(hasher, fin); err != nil { t.Fatal(err) } - gotHash = hex.EncodeToString(hasher.Sum(nil)) + hash := hex.EncodeToString(hasher.Sum(nil)) + + if hash != d.sha256sum { + t.Fatalf("hash mismatch, got: %q, want: %q", hash, d.sha256sum) + } + + gotHash = hash return }