tsnet: split bytes and routes metrics tests

Updates #13420

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
mpminardi/derp-experiments
Kristoffer Dalby 1 year ago committed by Kristoffer Dalby
parent 06d929f9ac
commit e55899386b

@ -936,16 +936,136 @@ func sendData(logf func(format string, args ...any), ctx context.Context, bytesC
return nil return nil
} }
func TestUserMetrics(t *testing.T) { func TestUserMetricsByteCounters(t *testing.T) {
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/13420") flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/13420")
tstest.ResourceCheck(t) tstest.ResourceCheck(t)
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel() defer cancel()
controlURL, c := startControl(t) controlURL, _ := startControl(t)
s1, s1ip, s1PubKey := startServer(t, ctx, controlURL, "s1") s1, s1ip, _ := startServer(t, ctx, controlURL, "s1")
s2, s2ip, _ := startServer(t, ctx, controlURL, "s2") s2, s2ip, _ := startServer(t, ctx, controlURL, "s2")
lc1, err := s1.LocalClient()
if err != nil {
t.Fatal(err)
}
lc2, err := s2.LocalClient()
if err != nil {
t.Fatal(err)
}
// Force an update to the netmap to ensure that the metrics are up-to-date.
s1.lb.DebugForceNetmapUpdate()
s2.lb.DebugForceNetmapUpdate()
// Wait for both nodes to have a peer in their netmap.
waitForCondition(t, "waiting for netmaps to contain peer", 90*time.Second, func() bool {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
status1, err := lc1.Status(ctx)
if err != nil {
t.Logf("getting status: %s", err)
return false
}
status2, err := lc2.Status(ctx)
if err != nil {
t.Logf("getting status: %s", err)
return false
}
return len(status1.Peers()) > 0 && len(status2.Peers()) > 0
})
// ping to make sure the connection is up.
res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP)
if err != nil {
t.Fatalf("pinging: %s", err)
}
t.Logf("ping success: %#+v", res)
mustDirect(t, t.Logf, lc1, lc2)
// 1 megabytes
bytesToSend := 1 * 1024 * 1024
// This asserts generates some traffic, it is factored out
// of TestUDPConn.
start := time.Now()
err = sendData(t.Logf, ctx, bytesToSend, s1, s2, s1ip, s2ip)
if err != nil {
t.Fatalf("Failed to send packets: %v", err)
}
t.Logf("Sent %d bytes from s1 to s2 in %s", bytesToSend, time.Since(start).String())
ctxLc, cancelLc := context.WithTimeout(context.Background(), 5*time.Second)
defer cancelLc()
metrics1, err := lc1.UserMetrics(ctxLc)
if err != nil {
t.Fatal(err)
}
parsedMetrics1, err := parseMetrics(metrics1)
if err != nil {
t.Fatal(err)
}
// Allow the metrics for the bytes sent to be off by 15%.
bytesSentTolerance := 1.15
t.Logf("Metrics1:\n%s\n", metrics1)
// Verify that the amount of data recorded in bytes is higher or equal to the data sent
inboundBytes1 := parsedMetrics1[`tailscaled_inbound_bytes_total{path="direct_ipv4"}`]
if inboundBytes1 < float64(bytesToSend) {
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, inboundBytes1)
}
// But ensure that it is not too much higher than the data sent.
if inboundBytes1 > float64(bytesToSend)*bytesSentTolerance {
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, inboundBytes1)
}
metrics2, err := lc2.UserMetrics(ctx)
if err != nil {
t.Fatal(err)
}
parsedMetrics2, err := parseMetrics(metrics2)
if err != nil {
t.Fatal(err)
}
t.Logf("Metrics2:\n%s\n", metrics2)
// Verify that the amount of data recorded in bytes is higher or equal than the data sent.
outboundBytes2 := parsedMetrics2[`tailscaled_outbound_bytes_total{path="direct_ipv4"}`]
if outboundBytes2 < float64(bytesToSend) {
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, outboundBytes2)
}
// But ensure that it is not too much higher than the data sent.
if outboundBytes2 > float64(bytesToSend)*bytesSentTolerance {
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, outboundBytes2)
}
}
func TestUserMetricsRouteGauges(t *testing.T) {
// Windows does not seem to support or report back routes when running in
// userspace via tsnet. So, we skip this check on Windows.
// TODO(kradalby): Figure out if this is correct.
if runtime.GOOS == "windows" {
t.Skipf("skipping on windows")
}
flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/13420")
tstest.ResourceCheck(t)
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
controlURL, c := startControl(t)
s1, _, s1PubKey := startServer(t, ctx, controlURL, "s1")
s2, _, _ := startServer(t, ctx, controlURL, "s2")
s1.lb.EditPrefs(&ipn.MaskedPrefs{ s1.lb.EditPrefs(&ipn.MaskedPrefs{
Prefs: ipn.Prefs{ Prefs: ipn.Prefs{
AdvertiseRoutes: []netip.Prefix{ AdvertiseRoutes: []netip.Prefix{
@ -973,24 +1093,11 @@ func TestUserMetrics(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
// ping to make sure the connection is up.
res, err := lc2.Ping(ctx, s1ip, tailcfg.PingICMP)
if err != nil {
t.Fatalf("pinging: %s", err)
}
t.Logf("ping success: %#+v", res)
ht := s1.lb.HealthTracker()
ht.SetUnhealthy(testWarnable, health.Args{"Text": "Hello world 1"})
// Force an update to the netmap to ensure that the metrics are up-to-date. // Force an update to the netmap to ensure that the metrics are up-to-date.
s1.lb.DebugForceNetmapUpdate() s1.lb.DebugForceNetmapUpdate()
s2.lb.DebugForceNetmapUpdate() s2.lb.DebugForceNetmapUpdate()
wantRoutes := float64(2) wantRoutes := float64(2)
if runtime.GOOS == "windows" {
wantRoutes = 0
}
// Wait for the routes to be propagated to node 1 to ensure // Wait for the routes to be propagated to node 1 to ensure
// that the metrics are up-to-date. // that the metrics are up-to-date.
@ -1002,31 +1109,11 @@ func TestUserMetrics(t *testing.T) {
t.Logf("getting status: %s", err) t.Logf("getting status: %s", err)
return false return false
} }
if runtime.GOOS == "windows" {
// Windows does not seem to support or report back routes when running in
// userspace via tsnet. So, we skip this check on Windows.
// TODO(kradalby): Figure out if this is correct.
return true
}
// Wait for the primary routes to reach our desired routes, which is wantRoutes + 1, because // Wait for the primary routes to reach our desired routes, which is wantRoutes + 1, because
// the PrimaryRoutes list will contain a exit node route, which the metric does not count. // the PrimaryRoutes list will contain a exit node route, which the metric does not count.
return status1.Self.PrimaryRoutes != nil && status1.Self.PrimaryRoutes.Len() == int(wantRoutes)+1 return status1.Self.PrimaryRoutes != nil && status1.Self.PrimaryRoutes.Len() == int(wantRoutes)+1
}) })
mustDirect(t, t.Logf, lc1, lc2)
// 1 megabytes
bytesToSend := 1 * 1024 * 1024
// This asserts generates some traffic, it is factored out
// of TestUDPConn.
start := time.Now()
err = sendData(t.Logf, ctx, bytesToSend, s1, s2, s1ip, s2ip)
if err != nil {
t.Fatalf("Failed to send packets: %v", err)
}
t.Logf("Sent %d bytes from s1 to s2 in %s", bytesToSend, time.Since(start).String())
ctxLc, cancelLc := context.WithTimeout(context.Background(), 5*time.Second) ctxLc, cancelLc := context.WithTimeout(context.Background(), 5*time.Second)
defer cancelLc() defer cancelLc()
metrics1, err := lc1.UserMetrics(ctxLc) metrics1, err := lc1.UserMetrics(ctxLc)
@ -1039,9 +1126,6 @@ func TestUserMetrics(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
// Allow the metrics for the bytes sent to be off by 15%.
bytesSentTolerance := 1.15
t.Logf("Metrics1:\n%s\n", metrics1) t.Logf("Metrics1:\n%s\n", metrics1)
// The node is advertising 4 routes: // The node is advertising 4 routes:
@ -1059,17 +1143,6 @@ func TestUserMetrics(t *testing.T) {
t.Errorf("metrics1, tailscaled_approved_routes: got %v, want %v", got, want) t.Errorf("metrics1, tailscaled_approved_routes: got %v, want %v", got, want)
} }
// Verify that the amount of data recorded in bytes is higher or equal to the data sent
inboundBytes1 := parsedMetrics1[`tailscaled_inbound_bytes_total{path="direct_ipv4"}`]
if inboundBytes1 < float64(bytesToSend) {
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, inboundBytes1)
}
// But ensure that it is not too much higher than the data sent.
if inboundBytes1 > float64(bytesToSend)*bytesSentTolerance {
t.Errorf(`metrics1, tailscaled_inbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, inboundBytes1)
}
metrics2, err := lc2.UserMetrics(ctx) metrics2, err := lc2.UserMetrics(ctx)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -1091,17 +1164,6 @@ func TestUserMetrics(t *testing.T) {
if got, want := parsedMetrics2["tailscaled_approved_routes"], 0.0; got != want { if got, want := parsedMetrics2["tailscaled_approved_routes"], 0.0; got != want {
t.Errorf("metrics2, tailscaled_approved_routes: got %v, want %v", got, want) t.Errorf("metrics2, tailscaled_approved_routes: got %v, want %v", got, want)
} }
// Verify that the amount of data recorded in bytes is higher or equal than the data sent.
outboundBytes2 := parsedMetrics2[`tailscaled_outbound_bytes_total{path="direct_ipv4"}`]
if outboundBytes2 < float64(bytesToSend) {
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected higher (or equal) than %d, got: %f`, bytesToSend, outboundBytes2)
}
// But ensure that it is not too much higher than the data sent.
if outboundBytes2 > float64(bytesToSend)*bytesSentTolerance {
t.Errorf(`metrics2, tailscaled_outbound_bytes_total{path="direct_ipv4"}: expected lower than %f, got: %f`, float64(bytesToSend)*bytesSentTolerance, outboundBytes2)
}
} }
func waitForCondition(t *testing.T, msg string, waitTime time.Duration, f func() bool) { func waitForCondition(t *testing.T, msg string, waitTime time.Duration, f func() bool) {

Loading…
Cancel
Save