From c6162c2a9440785e46d5718637e05a990b40ef5a Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Tue, 20 Sep 2022 15:31:49 -0400 Subject: [PATCH] net/netcheck: add check for captive portal (#5593) This doesn't change any behaviour for now, other than maybe running a full netcheck more often. The intent is to start gathering data on captive portals, and additionally, seeing this in the 'tailscale netcheck' command should provide a bit of additional information to users. Updates #1634 Change-Id: I6ba08f9c584dc0200619fa97f9fde1a319f25c76 Signed-off-by: Andrew Dunham --- cmd/tailscale/cli/netcheck.go | 3 + net/netcheck/netcheck.go | 146 ++++++++++++++++++ net/netcheck/netcheck_test.go | 59 +++++++ tstest/integration/integration_test.go | 1 + tstest/integration/testcontrol/testcontrol.go | 3 + 5 files changed, 212 insertions(+) diff --git a/cmd/tailscale/cli/netcheck.go b/cmd/tailscale/cli/netcheck.go index 37d2c4fc5..794b8800d 100644 --- a/cmd/tailscale/cli/netcheck.go +++ b/cmd/tailscale/cli/netcheck.go @@ -133,6 +133,9 @@ func printReport(dm *tailcfg.DERPMap, report *netcheck.Report) error { printf("\t* MappingVariesByDestIP: %v\n", report.MappingVariesByDestIP) printf("\t* HairPinning: %v\n", report.HairPinning) printf("\t* PortMapping: %v\n", portMapping(report)) + if report.CaptivePortal != "" { + printf("\t* CaptivePortal: %v\n", report.CaptivePortal) + } // When DERP latency checking failed, // magicsock will try to pick the DERP server that diff --git a/net/netcheck/netcheck.go b/net/netcheck/netcheck.go index 34fff68d1..e89ad65b3 100644 --- a/net/netcheck/netcheck.go +++ b/net/netcheck/netcheck.go @@ -13,6 +13,7 @@ import ( "fmt" "io" "log" + "math/rand" "net" "net/http" "net/netip" @@ -112,6 +113,10 @@ type Report struct { GlobalV4 string // ip:port of global IPv4 GlobalV6 string // [ip]:port of global IPv6 + // CaptivePortal is set when we think there's a captive portal that is + // intercepting HTTP traffic. + CaptivePortal opt.Bool + // TODO: update Clone when adding new fields } @@ -175,6 +180,10 @@ type Client struct { // If nil, portmap discovery is not done. PortMapper *portmapper.Client // lazily initialized on first use + // For tests + testEnoughRegions int + testCaptivePortalDelay time.Duration + mu sync.Mutex // guards following nextFull bool // do a full region scan, even if last != nil prev map[time.Time]*Report // some previous reports @@ -192,6 +201,9 @@ type STUNConn interface { } func (c *Client) enoughRegions() int { + if c.testEnoughRegions > 0 { + return c.testEnoughRegions + } if c.Verbose { // Abuse verbose a bit here so netcheck can show all region latencies // in verbose mode. @@ -200,6 +212,14 @@ func (c *Client) enoughRegions() int { return 3 } +func (c *Client) captivePortalDelay() time.Duration { + if c.testCaptivePortalDelay > 0 { + return c.testCaptivePortalDelay + } + // Chosen semi-arbitrarily + return 200 * time.Millisecond +} + func (c *Client) logf(format string, a ...any) { if c.Logf != nil { c.Logf(format, a...) @@ -783,13 +803,35 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report, } c.curState = rs last := c.last + + // Even if we're doing a non-incremental update, we may want to try our + // preferred DERP region for captive portal detection. Save that, if we + // have it. + var preferredDERP int + if last != nil { + preferredDERP = last.PreferredDERP + } + now := c.timeNow() + + doFull := false if c.nextFull || now.Sub(c.lastFull) > 5*time.Minute { + doFull = true + } + // If the last report had a captive portal and reported no UDP access, + // it's possible that we didn't get a useful netcheck due to the + // captive portal blocking us. If so, make this report a full + // (non-incremental) one. + if !doFull && last != nil { + doFull = !last.UDP && last.CaptivePortal.EqualBool(true) + } + if doFull { last = nil // causes makeProbePlan below to do a full (initial) plan c.nextFull = false c.lastFull = now metricNumGetReportFull.Add(1) } + rs.incremental = last != nil c.mu.Unlock() @@ -874,6 +916,48 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report, plan := makeProbePlan(dm, ifState, last) + // If we're doing a full probe, also check for a captive portal. We + // delay by a bit to wait for UDP STUN to finish, to avoid the probe if + // it's unnecessary. + captivePortalDone := syncs.ClosedChan() + captivePortalStop := func() {} + if !rs.incremental { + // NOTE(andrew): we can't simply add this goroutine to the + // `NewWaitGroupChan` below, since we don't wait for that + // waitgroup to finish when exiting this function and thus get + // a data race. + ch := make(chan struct{}) + captivePortalDone = ch + + tmr := time.AfterFunc(c.captivePortalDelay(), func() { + defer close(ch) + found, err := c.checkCaptivePortal(ctx, dm, preferredDERP) + if err != nil { + c.logf("[v1] checkCaptivePortal: %v", err) + return + } + rs.report.CaptivePortal.Set(found) + }) + + captivePortalStop = func() { + // Don't cancel our captive portal check if we're + // explicitly doing a verbose netcheck. + if c.Verbose { + return + } + + if tmr.Stop() { + // Stopped successfully; need to close the + // signal channel ourselves. + close(ch) + return + } + + // Did not stop; do nothing and it'll finish by itself + // and close the signal channel. + } + } + wg := syncs.NewWaitGroupChan() wg.Add(len(plan)) for _, probeSet := range plan { @@ -894,9 +978,17 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report, case <-stunTimer.C: case <-ctx.Done(): case <-wg.DoneChan(): + // All of our probes finished, so if we have >0 responses, we + // stop our captive portal check. + if rs.anyUDP() { + captivePortalStop() + } case <-rs.stopProbeCh: // Saw enough regions. c.vlogf("saw enough regions; not waiting for rest") + // We can stop the captive portal check since we know that we + // got a bunch of STUN responses. + captivePortalStop() } rs.waitHairCheck(ctx) @@ -965,6 +1057,9 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report, wg.Wait() } + // Wait for captive portal check before finishing the report. + <-captivePortalDone + return c.finishAndStoreReport(rs, dm), nil } @@ -979,6 +1074,54 @@ func (c *Client) finishAndStoreReport(rs *reportState, dm *tailcfg.DERPMap) *Rep return report } +var noRedirectClient = &http.Client{ + // No redirects allowed + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + }, + + // Remaining fields are the same as the default client. + Transport: http.DefaultClient.Transport, + Jar: http.DefaultClient.Jar, + Timeout: http.DefaultClient.Timeout, +} + +// checkCaptivePortal reports whether or not we think the system is behind a +// captive portal, detected by making a request to a URL that we know should +// return a "204 No Content" response and checking if that's what we get. +// +// The boolean return is whether we think we have a captive portal. +func (c *Client) checkCaptivePortal(ctx context.Context, dm *tailcfg.DERPMap, preferredDERP int) (bool, error) { + defer noRedirectClient.CloseIdleConnections() + + // If we have a preferred DERP region with more than one node, try + // that; otherwise, pick a random one not marked as "Avoid". + if preferredDERP == 0 || dm.Regions[preferredDERP] == nil || + (preferredDERP != 0 && len(dm.Regions[preferredDERP].Nodes) == 0) { + rids := make([]int, 0, len(dm.Regions)) + for id, reg := range dm.Regions { + if reg == nil || reg.Avoid || len(reg.Nodes) == 0 { + continue + } + rids = append(rids, id) + } + preferredDERP = rids[rand.Intn(len(rids))] + } + + node := dm.Regions[preferredDERP].Nodes[0] + req, err := http.NewRequestWithContext(ctx, "GET", "http://"+node.HostName+"/generate_204", nil) + if err != nil { + return false, err + } + r, err := noRedirectClient.Do(req) + if err != nil { + return false, err + } + c.logf("[v2] checkCaptivePortal url=%q status_code=%d", req.URL.String(), r.StatusCode) + + return r.StatusCode != 204, nil +} + // runHTTPOnlyChecks is the netcheck done by environments that can // only do HTTP requests, such as ws/wasm. func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *reportState, dm *tailcfg.DERPMap) error { @@ -1200,6 +1343,9 @@ func (c *Client) logConciseReport(r *Report, dm *tailcfg.DERPMap) { if r.GlobalV6 != "" { fmt.Fprintf(w, " v6a=%v", r.GlobalV6) } + if r.CaptivePortal != "" { + fmt.Fprintf(w, " captiveportal=%v", r.CaptivePortal) + } fmt.Fprintf(w, " derp=%v", r.PreferredDERP) if r.PreferredDERP != 0 { fmt.Fprintf(w, " derpdist=") diff --git a/net/netcheck/netcheck_test.go b/net/netcheck/netcheck_test.go index 946d7c2fb..270ad3224 100644 --- a/net/netcheck/netcheck_test.go +++ b/net/netcheck/netcheck_test.go @@ -9,11 +9,13 @@ import ( "context" "fmt" "net" + "net/http" "net/netip" "reflect" "sort" "strconv" "strings" + "sync/atomic" "testing" "time" @@ -115,6 +117,9 @@ func TestWorksWhenUDPBlocked(t *testing.T) { // OS IPv6 test is irrelevant here, accept whatever the current // machine has. want.OSHasIPv6 = r.OSHasIPv6 + // Captive portal test is irrelevant; accept what the current report + // has. + want.CaptivePortal = r.CaptivePortal if !reflect.DeepEqual(r, want) { t.Errorf("mismatch\n got: %+v\nwant: %+v\n", r, want) @@ -661,3 +666,57 @@ func TestSortRegions(t *testing.T) { t.Errorf("got %v; want %v", got, want) } } + +func TestNoCaptivePortalWhenUDP(t *testing.T) { + // Override noRedirectClient to handle the /generate_204 endpoint + var generate204Called atomic.Bool + tr := RoundTripFunc(func(req *http.Request) *http.Response { + if !strings.HasSuffix(req.URL.String(), "/generate_204") { + panic("bad URL: " + req.URL.String()) + } + generate204Called.Store(true) + return &http.Response{ + StatusCode: http.StatusNoContent, + Header: make(http.Header), + } + }) + + oldTransport := noRedirectClient.Transport + t.Cleanup(func() { noRedirectClient.Transport = oldTransport }) + noRedirectClient.Transport = tr + + stunAddr, cleanup := stuntest.Serve(t) + defer cleanup() + + c := &Client{ + Logf: t.Logf, + UDPBindAddr: "127.0.0.1:0", + testEnoughRegions: 1, + + // Set the delay long enough that we have time to cancel it + // when our STUN probe succeeds. + testCaptivePortalDelay: 10 * time.Second, + } + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + r, err := c.GetReport(ctx, stuntest.DERPMapOf(stunAddr.String())) + if err != nil { + t.Fatal(err) + } + + // Should not have called our captive portal function. + if generate204Called.Load() { + t.Errorf("captive portal check called; expected no call") + } + if r.CaptivePortal != "" { + t.Errorf("got CaptivePortal=%q, want empty", r.CaptivePortal) + } +} + +type RoundTripFunc func(req *http.Request) *http.Response + +func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req), nil +} diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index de3162f75..f43fa26b5 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -810,6 +810,7 @@ func (n *testNode) StartDaemonAsIPNGOOS(ipnGOOS string) *Daemon { "HTTPS_PROXY="+n.env.TrafficTrapServer.URL, "TS_DEBUG_TAILSCALED_IPN_GOOS="+ipnGOOS, "TS_LOGS_DIR="+t.TempDir(), + "TS_NETCHECK_GENERATE_204_URL="+n.env.ControlServer.URL+"/generate_204", ) cmd.Stderr = &nodeOutputParser{n: n} if *verboseTailscaled { diff --git a/tstest/integration/testcontrol/testcontrol.go b/tstest/integration/testcontrol/testcontrol.go index 3c53a97eb..bc90bf0f7 100644 --- a/tstest/integration/testcontrol/testcontrol.go +++ b/tstest/integration/testcontrol/testcontrol.go @@ -200,6 +200,9 @@ func (s *Server) logf(format string, a ...any) { func (s *Server) initMux() { s.mux = http.NewServeMux() s.mux.HandleFunc("/", s.serveUnhandled) + s.mux.HandleFunc("/generate_204", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNoContent) + }) s.mux.HandleFunc("/key", s.serveKey) s.mux.HandleFunc("/machine/", s.serveMachine) }