wgengine/router: make Linux delRoute idempotent, cidrDiff fail late as possible

This makes cidrDiff do as much as possible before failing, and makes a
delete of an already-deleted rule be a no-op. We should never do this
ourselves, but other things on the system can, and this should help us
recover a bit.

Also adds the start of root-requiring tests.

TODO: hook into wgengine/monitor and notice when routes are changed
behind our back, and invalidate our routes map and re-read from
kernel (via the ip command) at least on the next reconfig call.

Updates tailscale/corp#1338

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
pull/1304/head
Brad Fitzpatrick 4 years ago committed by Brad Fitzpatrick
parent 6a2c6541da
commit 2d96215d97

@ -478,7 +478,42 @@ func (r *linuxRouter) delRoute(cidr netaddr.IPPrefix) error {
if r.ipRuleAvailable {
args = append(args, "table", tailscaleRouteTable)
}
return r.cmd.run(args...)
err := r.cmd.run(args...)
if err != nil {
ok, err := r.hasRoute(cidr)
if err != nil {
r.logf("warning: error checking whether %v even exists after error deleting it: %v", err)
} else {
if !ok {
r.logf("warning: tried to delete route %v but it was already gone; ignoring error", cidr)
return nil
}
}
}
return err
}
func dashFam(ip netaddr.IP) string {
if ip.Is6() {
return "-6"
}
return "-4"
}
func (r *linuxRouter) hasRoute(cidr netaddr.IPPrefix) (bool, error) {
args := []string{
"ip", dashFam(cidr.IP), "route", "show",
normalizeCIDR(cidr),
"dev", r.tunname,
}
if r.ipRuleAvailable {
args = append(args, "table", tailscaleRouteTable)
}
out, err := r.cmd.output(args...)
if err != nil {
return false, err
}
return len(out) > 0, nil
}
// upInterface brings up the tunnel interface.
@ -987,26 +1022,44 @@ func cidrDiff(kind string, old map[netaddr.IPPrefix]bool, new []netaddr.IPPrefix
ret[cidr] = true
}
var delFail []error
for cidr := range old {
if newMap[cidr] {
continue
}
if err := del(cidr); err != nil {
logf("%s del failed: %v", kind, err)
return ret, err
}
delFail = append(delFail, err)
} else {
delete(ret, cidr)
}
}
if len(delFail) == 1 {
return ret, delFail[0]
}
if len(delFail) > 0 {
return ret, fmt.Errorf("%d delete %s failures; first was: %w", len(delFail), kind, delFail[0])
}
var addFail []error
for cidr := range newMap {
if old[cidr] {
continue
}
if err := add(cidr); err != nil {
logf("%s add failed: %v", kind, err)
return ret, err
}
addFail = append(addFail, err)
} else {
ret[cidr] = true
}
}
if len(addFail) == 1 {
return ret, addFail[0]
}
if len(addFail) > 0 {
return ret, fmt.Errorf("%d add %s failures; first was: %w", len(addFail), kind, addFail[0])
}
return ret, nil
}

@ -5,14 +5,18 @@
package router
import (
"bytes"
"errors"
"fmt"
"math/rand"
"os"
"sort"
"strings"
"sync/atomic"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/tailscale/wireguard-go/tun"
"inet.af/netaddr"
)
@ -595,3 +599,69 @@ func (o *fakeOS) output(args ...string) ([]byte, error) {
}
return []byte(strings.Join(ret, "\n")), nil
}
var tunTestNum int64
func createTestTUN(t *testing.T) tun.Device {
const minimalMTU = 1280
tunName := fmt.Sprintf("tuntest%d", atomic.AddInt64(&tunTestNum, 1))
tun, err := tun.CreateTUN(tunName, minimalMTU)
if err != nil {
t.Fatalf("CreateTUN(%q): %v", tunName, err)
}
return tun
}
func TestDelRouteIdempotent(t *testing.T) {
if os.Getuid() != 0 {
t.Skip("test requires root")
}
tun := createTestTUN(t)
defer tun.Close()
var logOutput bytes.Buffer
logf := func(format string, args ...interface{}) {
fmt.Fprintf(&logOutput, format, args...)
if !bytes.HasSuffix(logOutput.Bytes(), []byte("\n")) {
logOutput.WriteByte('\n')
}
}
r, err := newUserspaceRouter(logf, nil, tun)
if err != nil {
t.Fatal(err)
}
lr := r.(*linuxRouter)
if err := lr.upInterface(); err != nil {
t.Fatal(err)
}
for _, s := range []string{
"192.0.2.0/24", // RFC 5737
"2001:DB8::/32", // RFC 3849
} {
cidr := netaddr.MustParseIPPrefix(s)
if err := lr.addRoute(cidr); err != nil {
t.Fatal(err)
}
for i := 0; i < 2; i++ {
if err := lr.delRoute(cidr); err != nil {
t.Fatalf("delRoute(i=%d): %v", i, err)
}
}
}
wantSubs := map[string]int{
"warning: tried to delete route 192.0.2.0/24 but it was already gone; ignoring error": 1,
"warning: tried to delete route 2001:db8::/32 but it was already gone; ignoring error": 1,
}
out := logOutput.String()
for sub, want := range wantSubs {
got := strings.Count(out, sub)
if got != want {
t.Errorf("log output substring %q occurred %d time; want %d", sub, got, want)
}
}
if t.Failed() {
t.Logf("Log output:\n%s", out)
}
}

Loading…
Cancel
Save