// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package filter is a stateful packet filter. package filter import ( "fmt" "net/netip" "sync" "time" "go4.org/netipx" "tailscale.com/envknob" "tailscale.com/net/flowtrack" "tailscale.com/net/netaddr" "tailscale.com/net/packet" "tailscale.com/tstime/rate" "tailscale.com/types/ipproto" "tailscale.com/types/logger" ) // Filter is a stateful packet filter. type Filter struct { logf logger.Logf // local is the set of IPs prefixes that we know to be "local" to // this node. All packets coming in over tailscale must have a // destination within local, regardless of the policy filter // below. local *netipx.IPSet // logIPs is the set of IPs that are allowed to appear in flow // logs. If a packet is to or from an IP not in logIPs, it will // never be logged. logIPs *netipx.IPSet // matches4 and matches6 are lists of match->action rules // applied to all packets arriving over tailscale // tunnels. Matches are checked in order, and processing stops // at the first matching rule. The default policy if no rules // match is to drop the packet. matches4 matches matches6 matches // cap4 and cap6 are the subsets of the matches that are about // capability grants, partitioned by source IP address family. cap4, cap6 matches // state is the connection tracking state attached to this // filter. It is used to allow incoming traffic that is a response // to an outbound connection that this node made, even if those // incoming packets don't get accepted by matches above. state *filterState shieldsUp bool } // filterState is a state cache of past seen packets. type filterState struct { mu sync.Mutex lru *flowtrack.Cache // from flowtrack.Tuple -> nil } // lruMax is the size of the LRU cache in filterState. const lruMax = 512 // Response is a verdict from the packet filter. type Response int const ( Drop Response = iota // do not continue processing packet. DropSilently // do not continue processing packet, but also don't log Accept // continue processing packet. noVerdict // no verdict yet, continue running filter ) func (r Response) String() string { switch r { case Drop: return "Drop" case DropSilently: return "DropSilently" case Accept: return "Accept" case noVerdict: return "noVerdict" default: return "???" } } func (r Response) IsDrop() bool { return r == Drop || r == DropSilently } // RunFlags controls the filter's debug log verbosity at runtime. type RunFlags int const ( LogDrops RunFlags = 1 << iota // write dropped packet info to logf LogAccepts // write accepted packet info to logf HexdumpDrops // print packet hexdump when logging drops HexdumpAccepts // print packet hexdump when logging accepts ) // NewAllowAllForTest returns a packet filter that accepts // everything. Use in tests only, as it permits some kinds of spoofing // attacks to reach the OS network stack. func NewAllowAllForTest(logf logger.Logf) *Filter { any4 := netip.PrefixFrom(netaddr.IPv4(0, 0, 0, 0), 0) any6 := netip.PrefixFrom(netip.AddrFrom16([16]byte{}), 0) ms := []Match{ { IPProto: []ipproto.Proto{ipproto.TCP, ipproto.UDP, ipproto.ICMPv4}, Srcs: []netip.Prefix{any4}, Dsts: []NetPortRange{ { Net: any4, Ports: PortRange{ First: 0, Last: 65535, }, }, }, }, { IPProto: []ipproto.Proto{ipproto.TCP, ipproto.UDP, ipproto.ICMPv6}, Srcs: []netip.Prefix{any6}, Dsts: []NetPortRange{ { Net: any6, Ports: PortRange{ First: 0, Last: 65535, }, }, }, }, } var sb netipx.IPSetBuilder sb.AddPrefix(any4) sb.AddPrefix(any6) ipSet, _ := sb.IPSet() return New(ms, ipSet, ipSet, nil, logf) } // NewAllowNone returns a packet filter that rejects everything. func NewAllowNone(logf logger.Logf, logIPs *netipx.IPSet) *Filter { return New(nil, &netipx.IPSet{}, logIPs, nil, logf) } // NewShieldsUpFilter returns a packet filter that rejects incoming connections. // // If shareStateWith is non-nil, the returned filter shares state with the previous one, // as long as the previous one was also a shields up filter. func NewShieldsUpFilter(localNets *netipx.IPSet, logIPs *netipx.IPSet, shareStateWith *Filter, logf logger.Logf) *Filter { // Don't permit sharing state with a prior filter that wasn't a shields-up filter. if shareStateWith != nil && !shareStateWith.shieldsUp { shareStateWith = nil } f := New(nil, localNets, logIPs, shareStateWith, logf) f.shieldsUp = true return f } // New creates a new packet filter. The filter enforces that incoming // packets must be destined to an IP in localNets, and must be allowed // by matches. If shareStateWith is non-nil, the returned filter // shares state with the previous one, to enable changing rules at // runtime without breaking existing stateful flows. func New(matches []Match, localNets *netipx.IPSet, logIPs *netipx.IPSet, shareStateWith *Filter, logf logger.Logf) *Filter { var state *filterState if shareStateWith != nil { state = shareStateWith.state } else { state = &filterState{ lru: &flowtrack.Cache{MaxEntries: lruMax}, } } f := &Filter{ logf: logf, matches4: matchesFamily(matches, netip.Addr.Is4), matches6: matchesFamily(matches, netip.Addr.Is6), cap4: capMatchesFunc(matches, netip.Addr.Is4), cap6: capMatchesFunc(matches, netip.Addr.Is6), local: localNets, logIPs: logIPs, state: state, } return f } // matchesFamily returns the subset of ms for which keep(srcNet.IP) // and keep(dstNet.IP) are both true. func matchesFamily(ms matches, keep func(netip.Addr) bool) matches { var ret matches for _, m := range ms { var retm Match retm.IPProto = m.IPProto for _, src := range m.Srcs { if keep(src.Addr()) { retm.Srcs = append(retm.Srcs, src) } } for _, dst := range m.Dsts { if keep(dst.Net.Addr()) { retm.Dsts = append(retm.Dsts, dst) } } if len(retm.Srcs) > 0 && len(retm.Dsts) > 0 { ret = append(ret, retm) } } return ret } // capMatchesFunc returns a copy of the subset of ms for which keep(srcNet.IP) // and the match is a capability grant. func capMatchesFunc(ms matches, keep func(netip.Addr) bool) matches { var ret matches for _, m := range ms { if len(m.Caps) == 0 { continue } retm := Match{Caps: m.Caps} for _, src := range m.Srcs { if keep(src.Addr()) { retm.Srcs = append(retm.Srcs, src) } } if len(retm.Srcs) > 0 { ret = append(ret, retm) } } return ret } func maybeHexdump(flag RunFlags, b []byte) string { if flag == 0 { return "" } return packet.Hexdump(b) + "\n" } // TODO(apenwarr): use a bigger bucket for specifically TCP SYN accept logging? // Logging is a quick way to record every newly opened TCP connection, but // we have to be cautious about flooding the logs vs letting people use // flood protection to hide their traffic. We could use a rate limiter in // the actual *filter* for SYN accepts, perhaps. var acceptBucket = rate.NewLimiter(rate.Every(10*time.Second), 3) var dropBucket = rate.NewLimiter(rate.Every(5*time.Second), 10) // NOTE(Xe): This func init is used to detect // TS_DEBUG_FILTER_RATE_LIMIT_LOGS=all, and if it matches, to // effectively disable the limits on the log rate by setting the limit // to 1 millisecond. This should capture everything. func init() { if envknob.String("TS_DEBUG_FILTER_RATE_LIMIT_LOGS") != "all" { return } acceptBucket = rate.NewLimiter(rate.Every(time.Millisecond), 10) dropBucket = rate.NewLimiter(rate.Every(time.Millisecond), 10) } func (f *Filter) logRateLimit(runflags RunFlags, q *packet.Parsed, dir direction, r Response, why string) { if !f.loggingAllowed(q) { return } if r == Drop && omitDropLogging(q, dir) { return } var verdict string if r == Drop && (runflags&LogDrops) != 0 && dropBucket.Allow() { verdict = "Drop" runflags &= HexdumpDrops } else if r == Accept && (runflags&LogAccepts) != 0 && acceptBucket.Allow() { verdict = "Accept" runflags &= HexdumpAccepts } // Note: it is crucial that q.String() be called only if {accept,drop}Bucket.Allow() passes, // since it causes an allocation. if verdict != "" { b := q.Buffer() f.logf("%s: %s %d %s\n%s", verdict, q.String(), len(b), why, maybeHexdump(runflags, b)) } } // dummyPacket is a 20-byte slice of garbage, to pass the filter // pre-check when evaluating synthesized packets. var dummyPacket = []byte{ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, } // CheckTCP determines whether TCP traffic from srcIP to dstIP:dstPort // is allowed. func (f *Filter) CheckTCP(srcIP, dstIP netip.Addr, dstPort uint16) Response { pkt := &packet.Parsed{} pkt.Decode(dummyPacket) // initialize private fields switch { case (srcIP.Is4() && dstIP.Is6()) || (srcIP.Is6() && srcIP.Is4()): // Mismatched address families, no filters will // match. return Drop case srcIP.Is4(): pkt.IPVersion = 4 case srcIP.Is6(): pkt.IPVersion = 6 default: panic("unreachable") } pkt.Src = netip.AddrPortFrom(srcIP, 0) pkt.Dst = netip.AddrPortFrom(dstIP, dstPort) pkt.IPProto = ipproto.TCP pkt.TCPFlags = packet.TCPSyn return f.RunIn(pkt, 0) } // AppendCaps appends to base the capabilities that srcIP has talking // to dstIP. func (f *Filter) AppendCaps(base []string, srcIP, dstIP netip.Addr) []string { ret := base var mm matches switch { case srcIP.Is4(): mm = f.cap4 case srcIP.Is6(): mm = f.cap6 } for _, m := range mm { if !ipInList(srcIP, m.Srcs) { continue } for _, cm := range m.Caps { if cm.Cap != "" && cm.Dst.Contains(dstIP) { ret = append(ret, cm.Cap) } } } return ret } // ShieldsUp reports whether this is a "shields up" (block everything // incoming) filter. func (f *Filter) ShieldsUp() bool { return f.shieldsUp } // RunIn determines whether this node is allowed to receive q from a // Tailscale peer. func (f *Filter) RunIn(q *packet.Parsed, rf RunFlags) Response { dir := in r := f.pre(q, rf, dir) if r == Accept || r == Drop { // already logged return r } var why string switch q.IPVersion { case 4: r, why = f.runIn4(q) case 6: r, why = f.runIn6(q) default: r, why = Drop, "not-ip" } f.logRateLimit(rf, q, dir, r, why) return r } // RunOut determines whether this node is allowed to send q to a // Tailscale peer. func (f *Filter) RunOut(q *packet.Parsed, rf RunFlags) Response { dir := out r := f.pre(q, rf, dir) if r == Drop || r == Accept { // already logged return r } r, why := f.runOut(q) f.logRateLimit(rf, q, dir, r, why) return r } func (f *Filter) runIn4(q *packet.Parsed) (r Response, why string) { // A compromised peer could try to send us packets for // destinations we didn't explicitly advertise. This check is to // prevent that. if !f.local.Contains(q.Dst.Addr()) { return Drop, "destination not allowed" } switch q.IPProto { case ipproto.ICMPv4: if q.IsEchoResponse() || q.IsError() { // ICMP responses are allowed. // TODO(apenwarr): consider using conntrack state. // We could choose to reject all packets that aren't // related to an existing ICMP-Echo, TCP, or UDP // session. return Accept, "icmp response ok" } else if f.matches4.matchIPsOnly(q) { // If any port is open to an IP, allow ICMP to it. return Accept, "icmp ok" } case ipproto.TCP: // For TCP, we want to allow *outgoing* connections, // which means we want to allow return packets on those // connections. To make this restriction work, we need to // allow non-SYN packets (continuation of an existing session) // to arrive. This should be okay since a new incoming session // can't be initiated without first sending a SYN. // It happens to also be much faster. // TODO(apenwarr): Skip the rest of decoding in this path? if !q.IsTCPSyn() { return Accept, "tcp non-syn" } if f.matches4.match(q) { return Accept, "tcp ok" } case ipproto.UDP, ipproto.SCTP: t := flowtrack.Tuple{Proto: q.IPProto, Src: q.Src, Dst: q.Dst} f.state.mu.Lock() _, ok := f.state.lru.Get(t) f.state.mu.Unlock() if ok { return Accept, "cached" } if f.matches4.match(q) { return Accept, "ok" } case ipproto.TSMP: return Accept, "tsmp ok" default: if f.matches4.matchProtoAndIPsOnlyIfAllPorts(q) { return Accept, "otherproto ok" } return Drop, "Unknown proto" } return Drop, "no rules matched" } func (f *Filter) runIn6(q *packet.Parsed) (r Response, why string) { // A compromised peer could try to send us packets for // destinations we didn't explicitly advertise. This check is to // prevent that. if !f.local.Contains(q.Dst.Addr()) { return Drop, "destination not allowed" } switch q.IPProto { case ipproto.ICMPv6: if q.IsEchoResponse() || q.IsError() { // ICMP responses are allowed. // TODO(apenwarr): consider using conntrack state. // We could choose to reject all packets that aren't // related to an existing ICMP-Echo, TCP, or UDP // session. return Accept, "icmp response ok" } else if f.matches6.matchIPsOnly(q) { // If any port is open to an IP, allow ICMP to it. return Accept, "icmp ok" } case ipproto.TCP: // For TCP, we want to allow *outgoing* connections, // which means we want to allow return packets on those // connections. To make this restriction work, we need to // allow non-SYN packets (continuation of an existing session) // to arrive. This should be okay since a new incoming session // can't be initiated without first sending a SYN. // It happens to also be much faster. // TODO(apenwarr): Skip the rest of decoding in this path? if q.IPProto == ipproto.TCP && !q.IsTCPSyn() { return Accept, "tcp non-syn" } if f.matches6.match(q) { return Accept, "tcp ok" } case ipproto.UDP, ipproto.SCTP: t := flowtrack.Tuple{Proto: q.IPProto, Src: q.Src, Dst: q.Dst} f.state.mu.Lock() _, ok := f.state.lru.Get(t) f.state.mu.Unlock() if ok { return Accept, "cached" } if f.matches6.match(q) { return Accept, "ok" } case ipproto.TSMP: return Accept, "tsmp ok" default: if f.matches6.matchProtoAndIPsOnlyIfAllPorts(q) { return Accept, "otherproto ok" } return Drop, "Unknown proto" } return Drop, "no rules matched" } // runIn runs the output-specific part of the filter logic. func (f *Filter) runOut(q *packet.Parsed) (r Response, why string) { switch q.IPProto { case ipproto.UDP, ipproto.SCTP: tuple := flowtrack.Tuple{ Proto: q.IPProto, Src: q.Dst, Dst: q.Src, // src/dst reversed } f.state.mu.Lock() f.state.lru.Add(tuple, nil) f.state.mu.Unlock() } return Accept, "ok out" } // direction is whether a packet was flowing into this machine, or // flowing out. type direction int const ( in direction = iota // from Tailscale peer to local machine out // from local machine to Tailscale peer ) func (d direction) String() string { switch d { case in: return "in" case out: return "out" default: return fmt.Sprintf("[??dir=%d]", int(d)) } } var gcpDNSAddr = netaddr.IPv4(169, 254, 169, 254) // pre runs the direction-agnostic filter logic. dir is only used for // logging. func (f *Filter) pre(q *packet.Parsed, rf RunFlags, dir direction) Response { if len(q.Buffer()) == 0 { // wireguard keepalive packet, always permit. return Accept } if len(q.Buffer()) < 20 { f.logRateLimit(rf, q, dir, Drop, "too short") return Drop } if q.Dst.Addr().IsMulticast() { f.logRateLimit(rf, q, dir, Drop, "multicast") return Drop } if q.Dst.Addr().IsLinkLocalUnicast() && q.Dst.Addr() != gcpDNSAddr { f.logRateLimit(rf, q, dir, Drop, "link-local-unicast") return Drop } switch q.IPProto { case ipproto.Unknown: // Unknown packets are dangerous; always drop them. f.logRateLimit(rf, q, dir, Drop, "unknown") return Drop case ipproto.Fragment: // Fragments after the first always need to be passed through. // Very small fragments are considered Junk by Parsed. f.logRateLimit(rf, q, dir, Accept, "fragment") return Accept } return noVerdict } // loggingAllowed reports whether p can appear in logs at all. func (f *Filter) loggingAllowed(p *packet.Parsed) bool { return f.logIPs.Contains(p.Src.Addr()) && f.logIPs.Contains(p.Dst.Addr()) } // omitDropLogging reports whether packet p, which has already been // deemed a packet to Drop, should bypass the [rate-limited] logging. // We don't want to log scary & spammy reject warnings for packets // that are totally normal, like IPv6 route announcements. func omitDropLogging(p *packet.Parsed, dir direction) bool { if dir != out { return false } return p.Dst.Addr().IsMulticast() || (p.Dst.Addr().IsLinkLocalUnicast() && p.Dst.Addr() != gcpDNSAddr) || p.IPProto == ipproto.IGMP }