From 2d96215d977e9dd823034ff000adbfd1268a4c2b Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 17 Feb 2021 22:11:59 -0800 Subject: [PATCH] wgengine/router: make Linux delRoute idempotent, cidrDiff fail late as possible This makes cidrDiff do as much as possible before failing, and makes a delete of an already-deleted rule be a no-op. We should never do this ourselves, but other things on the system can, and this should help us recover a bit. Also adds the start of root-requiring tests. TODO: hook into wgengine/monitor and notice when routes are changed behind our back, and invalidate our routes map and re-read from kernel (via the ip command) at least on the next reconfig call. Updates tailscale/corp#1338 Signed-off-by: Brad Fitzpatrick --- wgengine/router/router_linux.go | 63 +++++++++++++++++++++++-- wgengine/router/router_linux_test.go | 70 ++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 5 deletions(-) diff --git a/wgengine/router/router_linux.go b/wgengine/router/router_linux.go index ec6359e11..b700efccc 100644 --- a/wgengine/router/router_linux.go +++ b/wgengine/router/router_linux.go @@ -478,7 +478,42 @@ func (r *linuxRouter) delRoute(cidr netaddr.IPPrefix) error { if r.ipRuleAvailable { args = append(args, "table", tailscaleRouteTable) } - return r.cmd.run(args...) + err := r.cmd.run(args...) + if err != nil { + ok, err := r.hasRoute(cidr) + if err != nil { + r.logf("warning: error checking whether %v even exists after error deleting it: %v", err) + } else { + if !ok { + r.logf("warning: tried to delete route %v but it was already gone; ignoring error", cidr) + return nil + } + } + } + return err +} + +func dashFam(ip netaddr.IP) string { + if ip.Is6() { + return "-6" + } + return "-4" +} + +func (r *linuxRouter) hasRoute(cidr netaddr.IPPrefix) (bool, error) { + args := []string{ + "ip", dashFam(cidr.IP), "route", "show", + normalizeCIDR(cidr), + "dev", r.tunname, + } + if r.ipRuleAvailable { + args = append(args, "table", tailscaleRouteTable) + } + out, err := r.cmd.output(args...) + if err != nil { + return false, err + } + return len(out) > 0, nil } // upInterface brings up the tunnel interface. @@ -987,25 +1022,43 @@ func cidrDiff(kind string, old map[netaddr.IPPrefix]bool, new []netaddr.IPPrefix ret[cidr] = true } + var delFail []error for cidr := range old { if newMap[cidr] { continue } if err := del(cidr); err != nil { logf("%s del failed: %v", kind, err) - return ret, err + delFail = append(delFail, err) + } else { + delete(ret, cidr) } - delete(ret, cidr) } + if len(delFail) == 1 { + return ret, delFail[0] + } + if len(delFail) > 0 { + return ret, fmt.Errorf("%d delete %s failures; first was: %w", len(delFail), kind, delFail[0]) + } + + var addFail []error for cidr := range newMap { if old[cidr] { continue } if err := add(cidr); err != nil { logf("%s add failed: %v", kind, err) - return ret, err + addFail = append(addFail, err) + } else { + ret[cidr] = true } - ret[cidr] = true + } + + if len(addFail) == 1 { + return ret, addFail[0] + } + if len(addFail) > 0 { + return ret, fmt.Errorf("%d add %s failures; first was: %w", len(addFail), kind, addFail[0]) } return ret, nil diff --git a/wgengine/router/router_linux_test.go b/wgengine/router/router_linux_test.go index 8298c6d07..7bacb2e2f 100644 --- a/wgengine/router/router_linux_test.go +++ b/wgengine/router/router_linux_test.go @@ -5,14 +5,18 @@ package router import ( + "bytes" "errors" "fmt" "math/rand" + "os" "sort" "strings" + "sync/atomic" "testing" "github.com/google/go-cmp/cmp" + "github.com/tailscale/wireguard-go/tun" "inet.af/netaddr" ) @@ -595,3 +599,69 @@ func (o *fakeOS) output(args ...string) ([]byte, error) { } return []byte(strings.Join(ret, "\n")), nil } + +var tunTestNum int64 + +func createTestTUN(t *testing.T) tun.Device { + const minimalMTU = 1280 + tunName := fmt.Sprintf("tuntest%d", atomic.AddInt64(&tunTestNum, 1)) + tun, err := tun.CreateTUN(tunName, minimalMTU) + if err != nil { + t.Fatalf("CreateTUN(%q): %v", tunName, err) + } + return tun +} + +func TestDelRouteIdempotent(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("test requires root") + } + tun := createTestTUN(t) + defer tun.Close() + + var logOutput bytes.Buffer + logf := func(format string, args ...interface{}) { + fmt.Fprintf(&logOutput, format, args...) + if !bytes.HasSuffix(logOutput.Bytes(), []byte("\n")) { + logOutput.WriteByte('\n') + } + } + + r, err := newUserspaceRouter(logf, nil, tun) + if err != nil { + t.Fatal(err) + } + lr := r.(*linuxRouter) + if err := lr.upInterface(); err != nil { + t.Fatal(err) + } + for _, s := range []string{ + "192.0.2.0/24", // RFC 5737 + "2001:DB8::/32", // RFC 3849 + } { + cidr := netaddr.MustParseIPPrefix(s) + if err := lr.addRoute(cidr); err != nil { + t.Fatal(err) + } + for i := 0; i < 2; i++ { + if err := lr.delRoute(cidr); err != nil { + t.Fatalf("delRoute(i=%d): %v", i, err) + } + } + } + + wantSubs := map[string]int{ + "warning: tried to delete route 192.0.2.0/24 but it was already gone; ignoring error": 1, + "warning: tried to delete route 2001:db8::/32 but it was already gone; ignoring error": 1, + } + out := logOutput.String() + for sub, want := range wantSubs { + got := strings.Count(out, sub) + if got != want { + t.Errorf("log output substring %q occurred %d time; want %d", sub, got, want) + } + } + if t.Failed() { + t.Logf("Log output:\n%s", out) + } +}