diff --git a/net/netns/netns_linux.go b/net/netns/netns_linux.go index 30c6775b6..d8022c62a 100644 --- a/net/netns/netns_linux.go +++ b/net/netns/netns_linux.go @@ -7,9 +7,14 @@ package netns import ( + "errors" "fmt" + "io/ioutil" "os" + "os/exec" "path/filepath" + "strings" + "sync" "syscall" "golang.org/x/sys/unix" @@ -23,44 +28,126 @@ import ( // wgengine/router/router_linux.go. const tailscaleBypassMark = 0x20000 +// checkIPRule runs the ipRuleAvailable check exactly once. +var checkIPRule sync.Once + +// ipRuleAvailable is true if and only if the 'ip rule' command works. +// If it doesn't, we have to use SO_BINDTODEVICE on our sockets instead. +var ipRuleAvailable bool + +// defaultRouteInterface returns the name of the network interface that owns +// the default route, not including any tailscale interfaces. We only use +// this in SO_BINDTODEVICE mode. +func defaultRouteInterface() (string, error) { + b, err := ioutil.ReadFile("/proc/net/route") + if err != nil { + return "", err + } + + for _, line := range strings.Split(string(b), "\n")[1:] { + fields := strings.Fields(line) + ifc := fields[0] + ip := fields[1] + netmask := fields[7] + + if strings.HasPrefix(ifc, "tailscale") || + strings.HasPrefix(ifc, "wg") { + continue + } + if ip == "00000000" && netmask == "00000000" { + // default route + return ifc, nil // interface name + } + } + + return "", errors.New("no default routes found") +} + +// ignoreErrors returns true if we should ignore setsocketopt errors in +// this instance. +func ignoreErrors() bool { + if os.Getuid() != 0 { + // only root can manipulate these socket flags + return true + } + + // TODO(apenwarr): this snooping around in the args is way too magic. + // It would be better to explicitly activate, or not, this dialer + // by passing it from the toplevel program. + v, _ := os.Executable() + switch filepath.Base(v) { + case "tailscale": + for _, arg := range os.Args { + if arg == "netcheck" { + return true + } + } + case "tailscaled": + for _, arg := range os.Args { + if arg == "-fake" || arg == "--fake" { + return true + } + } + } + + return false +} + // control marks c as necessary to dial in a separate network namespace. // // It's intentionally the same signature as net.Dialer.Control // and net.ListenConfig.Control. func control(network, address string, c syscall.RawConn) error { + checkIPRule.Do(func() { + _, err := exec.Command("ip", "rule").Output() + ipRuleAvailable = (err == nil) + }) + if skipPrivileged.Get() { // We can't set socket marks without CAP_NET_ADMIN on linux, // skip as requested. return nil } - var controlErr error - err := c.Control(func(fd uintptr) { - controlErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_MARK, tailscaleBypassMark) - }) - // Before returning some fatal error, skip it in some cases. - if (err != nil || controlErr != nil) && os.Getuid() != 0 { - v, _ := os.Executable() - switch filepath.Base(v) { - case "tailscale": - for _, arg := range os.Args { - if arg == "netcheck" { - return nil - } - } - case "tailscaled": - for _, arg := range os.Args { - if arg == "-fake" || arg == "--fake" { - return nil - } + if ipRuleAvailable { + var controlErr error + err := c.Control(func(fd uintptr) { + controlErr = unix.SetsockoptInt(int(fd), + unix.SOL_SOCKET, unix.SO_MARK, + tailscaleBypassMark) + }) + if (err != nil || controlErr != nil) && ignoreErrors() { + return nil + } + if err != nil { + return fmt.Errorf("setting socket mark1: %w", err) + } + if controlErr != nil { + return fmt.Errorf("setting socket mark2: %w", controlErr) + } + } else { + var controlErr error + err := c.Control(func(fd uintptr) { + ifc, err := defaultRouteInterface() + if err != nil { + // Make sure we bind to *some* interface, + // or we could get a routing loop. + // "lo" is always wrong, but if we don't have + // a default route anyway, it doesn't matter. + ifc = "lo" } + controlErr = unix.SetsockoptString(int(fd), + unix.SOL_SOCKET, unix.SO_BINDTODEVICE, ifc) + }) + if (err != nil || controlErr != nil) && ignoreErrors() { + return nil + } + if err != nil { + return fmt.Errorf("setting SO_BINDTODEVICE 1: %w", err) + } + if controlErr != nil { + return fmt.Errorf("setting SO_BINDTODEVICE 2: %w", controlErr) } - } - if err != nil { - return fmt.Errorf("setting socket mark: %w", err) - } - if controlErr != nil { - return fmt.Errorf("setting socket mark: %w", controlErr) } return nil } diff --git a/wgengine/router/router_linux.go b/wgengine/router/router_linux.go index 2b998326d..1fa82909f 100644 --- a/wgengine/router/router_linux.go +++ b/wgengine/router/router_linux.go @@ -70,6 +70,7 @@ type netfilterRunner interface { type linuxRouter struct { logf func(fmt string, args ...interface{}) + ipRuleAvailable bool tunname string addrs map[netaddr.IPPrefix]bool routes map[netaddr.IPPrefix]bool @@ -96,12 +97,16 @@ func newUserspaceRouter(logf logger.Logf, _ *device.Device, tunDev tun.Device) ( } func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, netfilter netfilterRunner, cmd commandRunner) (Router, error) { + _, err := exec.Command("ip", "rule").Output() + ipRuleAvailable := (err == nil) + return &linuxRouter{ - logf: logf, - tunname: tunname, - netfilterMode: NetfilterOff, - ipt4: netfilter, - cmd: cmd, + logf: logf, + ipRuleAvailable: ipRuleAvailable, + tunname: tunname, + netfilterMode: NetfilterOff, + ipt4: netfilter, + cmd: cmd, }, nil } @@ -109,10 +114,10 @@ func (r *linuxRouter) Up() error { if err := r.delLegacyNetfilter(); err != nil { return err } - if err := r.setNetfilterMode(NetfilterOff); err != nil { + if err := r.addIPRules(); err != nil { return err } - if err := r.addBypassRule(); err != nil { + if err := r.setNetfilterMode(NetfilterOff); err != nil { return err } if err := r.upInterface(); err != nil { @@ -126,7 +131,7 @@ func (r *linuxRouter) down() error { if err := r.downInterface(); err != nil { return err } - if err := r.delBypassRule(); err != nil { + if err := r.delIPRules(); err != nil { return err } if err := r.setNetfilterMode(NetfilterOff); err != nil { @@ -477,24 +482,30 @@ func (r *linuxRouter) delLoopbackRule(addr netaddr.IP) error { // interface. Fails if the route already exists, or if adding the // route fails. func (r *linuxRouter) addRoute(cidr netaddr.IPPrefix) error { - return r.cmd.run( + args := []string{ "ip", "route", "add", normalizeCIDR(cidr), "dev", r.tunname, - "table", "88", - ) + } + if r.ipRuleAvailable { + args = append(args, "table", "88") + } + return r.cmd.run(args...) } // delRoute removes the route for cidr pointing to the tunnel // interface. Fails if the route doesn't exist, or if removing the // route fails. func (r *linuxRouter) delRoute(cidr netaddr.IPPrefix) error { - return r.cmd.run( + args := []string{ "ip", "route", "del", normalizeCIDR(cidr), "dev", r.tunname, - "table", "88", - ) + } + if r.ipRuleAvailable { + args = append(args, "table", "88") + } + return r.cmd.run(args...) } // addSubnetRule adds a netfilter rule that allows traffic to flow @@ -540,13 +551,17 @@ func (r *linuxRouter) downInterface() error { return r.cmd.run("ip", "link", "set", "dev", r.tunname, "down") } -// addBypassRule adds the policy routing rule that avoids tailscaled +// addIPRules adds the policy routing rule that avoids tailscaled // routing loops. If the rule exists and appears to be a // tailscale-managed rule, it is gracefully replaced. -func (r *linuxRouter) addBypassRule() error { +func (r *linuxRouter) addIPRules() error { + if !r.ipRuleAvailable { + return nil + } + // Clear out old rules. After that, any error adding a rule is fatal, // because there should be no reason we add a duplicate. - if err := r.delBypassRule(); err != nil { + if err := r.delIPRules(); err != nil { return err } @@ -611,7 +626,11 @@ func (r *linuxRouter) addBypassRule() error { // delBypassrule removes the policy routing rules that avoid // tailscaled routing loops, if it exists. -func (r *linuxRouter) delBypassRule() error { +func (r *linuxRouter) delIPRules() error { + if !r.ipRuleAvailable { + return nil + } + // Error codes: 'ip rule' returns error code 2 if the rule is a // duplicate (add) or not found (del). It returns a different code // for syntax errors. This is also true of busybox.