diff --git a/.github/workflows/request-dataplane-review.yml b/.github/workflows/request-dataplane-review.yml index 7ae5668c3..58f6d3d0b 100644 --- a/.github/workflows/request-dataplane-review.yml +++ b/.github/workflows/request-dataplane-review.yml @@ -2,6 +2,7 @@ name: request-dataplane-review on: pull_request: + types: [ opened, synchronize, reopened, ready_for_review ] paths: - ".github/workflows/request-dataplane-review.yml" - "**/*derp*" @@ -10,6 +11,7 @@ on: jobs: request-dataplane-review: + if: github.event.pull_request.draft == false name: Request Dataplane Review runs-on: ubuntu-latest steps: diff --git a/VERSION.txt b/VERSION.txt index 6979a6c06..95784efdd 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -1.91.0 +1.93.0 diff --git a/cmd/cigocacher/cigocacher.go b/cmd/cigocacher/cigocacher.go new file mode 100644 index 000000000..b38df4c2b --- /dev/null +++ b/cmd/cigocacher/cigocacher.go @@ -0,0 +1,308 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +// cigocacher is an opinionated-to-Tailscale client for gocached. It connects +// at a URL like "https://ci-gocached-azure-1.corp.ts.net:31364", but that is +// stored in a GitHub actions variable so that its hostname can be updated for +// all branches at the same time in sync with the actual infrastructure. +// +// It authenticates using GitHub OIDC tokens, and all HTTP errors are ignored +// so that its failure mode is just that builds get slower and fall back to +// disk-only cache. +package main + +import ( + "bytes" + "context" + jsonv1 "encoding/json" + "errors" + "flag" + "fmt" + "io" + "log" + "net" + "net/http" + "os" + "path/filepath" + "strings" + "sync/atomic" + "time" + + "github.com/bradfitz/go-tool-cache/cacheproc" + "github.com/bradfitz/go-tool-cache/cachers" +) + +func main() { + var ( + auth = flag.Bool("auth", false, "auth with cigocached and exit, printing the access token as output") + token = flag.String("token", "", "the cigocached access token to use, as created using --auth") + cigocachedURL = flag.String("cigocached-url", "", "optional cigocached URL (scheme, host, and port). empty means to not use one.") + verbose = flag.Bool("verbose", false, "enable verbose logging") + ) + flag.Parse() + + if *auth { + if *cigocachedURL == "" { + log.Print("--cigocached-url is empty, skipping auth") + return + } + tk, err := fetchAccessToken(httpClient(), os.Getenv("ACTIONS_ID_TOKEN_REQUEST_URL"), os.Getenv("ACTIONS_ID_TOKEN_REQUEST_TOKEN"), *cigocachedURL) + if err != nil { + log.Printf("error fetching access token, skipping auth: %v", err) + return + } + fmt.Println(tk) + return + } + + d, err := os.UserCacheDir() + if err != nil { + log.Fatal(err) + } + d = filepath.Join(d, "go-cacher") + log.Printf("Defaulting to cache dir %v ...", d) + if err := os.MkdirAll(d, 0750); err != nil { + log.Fatal(err) + } + + c := &cigocacher{ + disk: &cachers.DiskCache{Dir: d}, + verbose: *verbose, + } + if *cigocachedURL != "" { + log.Printf("Using cigocached at %s", *cigocachedURL) + c.gocached = &gocachedClient{ + baseURL: *cigocachedURL, + cl: httpClient(), + accessToken: *token, + verbose: *verbose, + } + } + var p *cacheproc.Process + p = &cacheproc.Process{ + Close: func() error { + log.Printf("gocacheprog: closing; %d gets (%d hits, %d misses, %d errors); %d puts (%d errors)", + p.Gets.Load(), p.GetHits.Load(), p.GetMisses.Load(), p.GetErrors.Load(), p.Puts.Load(), p.PutErrors.Load()) + return c.close() + }, + Get: c.get, + Put: c.put, + } + + if err := p.Run(); err != nil { + log.Fatal(err) + } +} + +func httpClient() *http.Client { + return &http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + host, port, err := net.SplitHostPort(addr) + if err == nil { + // This does not run in a tailnet. We serve corp.ts.net + // TLS certs, and override DNS resolution to lookup the + // private IP for the VM by its hostname. + if vm, ok := strings.CutSuffix(host, ".corp.ts.net"); ok { + addr = net.JoinHostPort(vm, port) + } + } + var d net.Dialer + return d.DialContext(ctx, network, addr) + }, + }, + } +} + +type cigocacher struct { + disk *cachers.DiskCache + gocached *gocachedClient + verbose bool + + getNanos atomic.Int64 // total nanoseconds spent in gets + putNanos atomic.Int64 // total nanoseconds spent in puts + getHTTP atomic.Int64 // HTTP get requests made + getHTTPBytes atomic.Int64 // HTTP get bytes transferred + getHTTPHits atomic.Int64 // HTTP get hits + getHTTPMisses atomic.Int64 // HTTP get misses + getHTTPErrors atomic.Int64 // HTTP get errors ignored on best-effort basis + getHTTPNanos atomic.Int64 // total nanoseconds spent in HTTP gets + putHTTP atomic.Int64 // HTTP put requests made + putHTTPBytes atomic.Int64 // HTTP put bytes transferred + putHTTPErrors atomic.Int64 // HTTP put errors ignored on best-effort basis + putHTTPNanos atomic.Int64 // total nanoseconds spent in HTTP puts +} + +func (c *cigocacher) get(ctx context.Context, actionID string) (outputID, diskPath string, err error) { + t0 := time.Now() + defer func() { + c.getNanos.Add(time.Since(t0).Nanoseconds()) + }() + if c.gocached == nil { + return c.disk.Get(ctx, actionID) + } + + outputID, diskPath, err = c.disk.Get(ctx, actionID) + if err == nil && outputID != "" { + return outputID, diskPath, nil + } + + c.getHTTP.Add(1) + t0HTTP := time.Now() + defer func() { + c.getHTTPNanos.Add(time.Since(t0HTTP).Nanoseconds()) + }() + outputID, res, err := c.gocached.get(ctx, actionID) + if err != nil { + c.getHTTPErrors.Add(1) + return "", "", nil + } + if outputID == "" || res == nil { + c.getHTTPMisses.Add(1) + return "", "", nil + } + + defer res.Body.Close() + + // TODO(tomhjp): make sure we timeout if cigocached disappears, but for some + // reason, this seemed to tank network performance. + // ctx, cancel := context.WithTimeout(ctx, httpTimeout(res.ContentLength)) + // defer cancel() + diskPath, err = c.disk.Put(ctx, actionID, outputID, res.ContentLength, res.Body) + if err != nil { + return "", "", fmt.Errorf("error filling disk cache from HTTP: %w", err) + } + + c.getHTTPHits.Add(1) + c.getHTTPBytes.Add(res.ContentLength) + return outputID, diskPath, nil +} + +func (c *cigocacher) put(ctx context.Context, actionID, outputID string, size int64, r io.Reader) (diskPath string, err error) { + t0 := time.Now() + defer func() { + c.putNanos.Add(time.Since(t0).Nanoseconds()) + }() + if c.gocached == nil { + return c.disk.Put(ctx, actionID, outputID, size, r) + } + + c.putHTTP.Add(1) + var diskReader, httpReader io.Reader + tee := &bestEffortTeeReader{r: r} + if size == 0 { + // Special case the empty file so NewRequest sets "Content-Length: 0", + // as opposed to thinking we didn't set it and not being able to sniff its size + // from the type. + diskReader, httpReader = bytes.NewReader(nil), bytes.NewReader(nil) + } else { + pr, pw := io.Pipe() + defer pw.Close() + // The diskReader is in the driving seat. We will try to forward data + // to httpReader as well, but only best-effort. + diskReader = tee + tee.w = pw + httpReader = pr + } + httpErrCh := make(chan error) + go func() { + // TODO(tomhjp): make sure we timeout if cigocached disappears, but for some + // reason, this seemed to tank network performance. + // ctx, cancel := context.WithTimeout(ctx, httpTimeout(size)) + // defer cancel() + t0HTTP := time.Now() + defer func() { + c.putHTTPNanos.Add(time.Since(t0HTTP).Nanoseconds()) + }() + httpErrCh <- c.gocached.put(ctx, actionID, outputID, size, httpReader) + }() + + diskPath, err = c.disk.Put(ctx, actionID, outputID, size, diskReader) + if err != nil { + return "", fmt.Errorf("error writing to disk cache: %w", errors.Join(err, tee.err)) + } + + select { + case err := <-httpErrCh: + if err != nil { + c.putHTTPErrors.Add(1) + } else { + c.putHTTPBytes.Add(size) + } + case <-ctx.Done(): + } + + return diskPath, nil +} + +func (c *cigocacher) close() error { + log.Printf("cigocacher HTTP stats: %d gets (%.1fMiB, %.2fs, %d hits, %d misses, %d errors ignored); %d puts (%.1fMiB, %.2fs, %d errors ignored)", + c.getHTTP.Load(), float64(c.getHTTPBytes.Load())/float64(1<<20), float64(c.getHTTPNanos.Load())/float64(time.Second), c.getHTTPHits.Load(), c.getHTTPMisses.Load(), c.getHTTPErrors.Load(), + c.putHTTP.Load(), float64(c.putHTTPBytes.Load())/float64(1<<20), float64(c.putHTTPNanos.Load())/float64(time.Second), c.putHTTPErrors.Load()) + if !c.verbose || c.gocached == nil { + return nil + } + + stats, err := c.gocached.fetchStats() + if err != nil { + log.Printf("error fetching gocached stats: %v", err) + } else { + log.Printf("gocached session stats: %s", stats) + } + + return nil +} + +func fetchAccessToken(cl *http.Client, idTokenURL, idTokenRequestToken, gocachedURL string) (string, error) { + req, err := http.NewRequest("GET", idTokenURL+"&audience=gocached", nil) + if err != nil { + return "", err + } + req.Header.Set("Authorization", "Bearer "+idTokenRequestToken) + resp, err := cl.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + type idTokenResp struct { + Value string `json:"value"` + } + var idToken idTokenResp + if err := jsonv1.NewDecoder(resp.Body).Decode(&idToken); err != nil { + return "", err + } + + req, _ = http.NewRequest("POST", gocachedURL+"/auth/exchange-token", strings.NewReader(`{"jwt":"`+idToken.Value+`"}`)) + req.Header.Set("Content-Type", "application/json") + resp, err = cl.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + type accessTokenResp struct { + AccessToken string `json:"access_token"` + } + var accessToken accessTokenResp + if err := jsonv1.NewDecoder(resp.Body).Decode(&accessToken); err != nil { + return "", err + } + + return accessToken.AccessToken, nil +} + +type bestEffortTeeReader struct { + r io.Reader + w io.WriteCloser + err error +} + +func (t *bestEffortTeeReader) Read(p []byte) (int, error) { + n, err := t.r.Read(p) + if n > 0 && t.w != nil { + if _, err := t.w.Write(p[:n]); err != nil { + t.err = errors.Join(err, t.w.Close()) + t.w = nil + } + } + return n, err +} diff --git a/cmd/cigocacher/http.go b/cmd/cigocacher/http.go new file mode 100644 index 000000000..57d3bfb45 --- /dev/null +++ b/cmd/cigocacher/http.go @@ -0,0 +1,115 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package main + +import ( + "context" + "fmt" + "io" + "log" + "net/http" +) + +type gocachedClient struct { + baseURL string // base URL of the cacher server, like "http://localhost:31364". + cl *http.Client // http.Client to use. + accessToken string // Bearer token to use in the Authorization header. + verbose bool +} + +// drainAndClose reads and throws away a small bounded amount of data. This is a +// best-effort attempt to allow connection reuse; Go's HTTP/1 Transport won't +// reuse a TCP connection unless you fully consume HTTP responses. +func drainAndClose(body io.ReadCloser) { + io.CopyN(io.Discard, body, 4<<10) + body.Close() +} + +func tryReadErrorMessage(res *http.Response) []byte { + msg, _ := io.ReadAll(io.LimitReader(res.Body, 4<<10)) + return msg +} + +func (c *gocachedClient) get(ctx context.Context, actionID string) (outputID string, resp *http.Response, err error) { + // TODO(tomhjp): make sure we timeout if cigocached disappears, but for some + // reason, this seemed to tank network performance. + // // Set a generous upper limit on the time we'll wait for a response. We'll + // // shorten this deadline later once we know the content length. + // ctx, cancel := context.WithTimeout(ctx, time.Minute) + // defer cancel() + req, _ := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/action/"+actionID, nil) + req.Header.Set("Want-Object", "1") // opt in to single roundtrip protocol + if c.accessToken != "" { + req.Header.Set("Authorization", "Bearer "+c.accessToken) + } + + res, err := c.cl.Do(req) + if err != nil { + return "", nil, err + } + defer func() { + if resp == nil { + drainAndClose(res.Body) + } + }() + if res.StatusCode == http.StatusNotFound { + return "", nil, nil + } + if res.StatusCode != http.StatusOK { + msg := tryReadErrorMessage(res) + if c.verbose { + log.Printf("error GET /action/%s: %v, %s", actionID, res.Status, msg) + } + return "", nil, fmt.Errorf("unexpected GET /action/%s status %v", actionID, res.Status) + } + + outputID = res.Header.Get("Go-Output-Id") + if outputID == "" { + return "", nil, fmt.Errorf("missing Go-Output-Id header in response") + } + if res.ContentLength == -1 { + return "", nil, fmt.Errorf("no Content-Length from server") + } + return outputID, res, nil +} + +func (c *gocachedClient) put(ctx context.Context, actionID, outputID string, size int64, body io.Reader) error { + req, _ := http.NewRequestWithContext(ctx, "PUT", c.baseURL+"/"+actionID+"/"+outputID, body) + req.ContentLength = size + if c.accessToken != "" { + req.Header.Set("Authorization", "Bearer "+c.accessToken) + } + res, err := c.cl.Do(req) + if err != nil { + if c.verbose { + log.Printf("error PUT /%s/%s: %v", actionID, outputID, err) + } + return err + } + defer res.Body.Close() + if res.StatusCode != http.StatusNoContent { + msg := tryReadErrorMessage(res) + if c.verbose { + log.Printf("error PUT /%s/%s: %v, %s", actionID, outputID, res.Status, msg) + } + return fmt.Errorf("unexpected PUT /%s/%s status %v", actionID, outputID, res.Status) + } + + return nil +} + +func (c *gocachedClient) fetchStats() (string, error) { + req, _ := http.NewRequest("GET", c.baseURL+"/session/stats", nil) + req.Header.Set("Authorization", "Bearer "+c.accessToken) + resp, err := c.cl.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + b, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + return string(b), nil +} diff --git a/cmd/derper/depaware.txt b/cmd/derper/depaware.txt index 0a75ac43e..9c720fa60 100644 --- a/cmd/derper/depaware.txt +++ b/cmd/derper/depaware.txt @@ -2,6 +2,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa filippo.io/edwards25519 from github.com/hdevalence/ed25519consensus filippo.io/edwards25519/field from filippo.io/edwards25519 + github.com/axiomhq/hyperloglog from tailscale.com/derp/derpserver github.com/beorn7/perks/quantile from github.com/prometheus/client_golang/prometheus 💣 github.com/cespare/xxhash/v2 from github.com/prometheus/client_golang/prometheus github.com/coder/websocket from tailscale.com/cmd/derper+ @@ -9,6 +10,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa github.com/coder/websocket/internal/util from github.com/coder/websocket github.com/coder/websocket/internal/xsync from github.com/coder/websocket W 💣 github.com/dblohm7/wingoes from tailscale.com/util/winutil + github.com/dgryski/go-metro from github.com/axiomhq/hyperloglog github.com/fxamacker/cbor/v2 from tailscale.com/tka github.com/go-json-experiment/json from tailscale.com/types/opt+ github.com/go-json-experiment/json/internal from github.com/go-json-experiment/json+ @@ -30,9 +32,9 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa github.com/prometheus/client_model/go from github.com/prometheus/client_golang/prometheus+ github.com/prometheus/common/expfmt from github.com/prometheus/client_golang/prometheus+ github.com/prometheus/common/model from github.com/prometheus/client_golang/prometheus+ - LD github.com/prometheus/procfs from github.com/prometheus/client_golang/prometheus - LD github.com/prometheus/procfs/internal/fs from github.com/prometheus/procfs - LD github.com/prometheus/procfs/internal/util from github.com/prometheus/procfs + L github.com/prometheus/procfs from github.com/prometheus/client_golang/prometheus + L github.com/prometheus/procfs/internal/fs from github.com/prometheus/procfs + L github.com/prometheus/procfs/internal/util from github.com/prometheus/procfs W 💣 github.com/tailscale/go-winio from tailscale.com/safesocket W 💣 github.com/tailscale/go-winio/internal/fs from github.com/tailscale/go-winio W 💣 github.com/tailscale/go-winio/internal/socket from github.com/tailscale/go-winio @@ -72,7 +74,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa google.golang.org/protobuf/reflect/protoregistry from google.golang.org/protobuf/encoding/prototext+ google.golang.org/protobuf/runtime/protoiface from google.golang.org/protobuf/internal/impl+ google.golang.org/protobuf/runtime/protoimpl from github.com/prometheus/client_model/go+ - google.golang.org/protobuf/types/known/timestamppb from github.com/prometheus/client_golang/prometheus+ + 💣 google.golang.org/protobuf/types/known/timestamppb from github.com/prometheus/client_golang/prometheus+ tailscale.com from tailscale.com/version 💣 tailscale.com/atomicfile from tailscale.com/cmd/derper+ tailscale.com/client/local from tailscale.com/derp/derpserver diff --git a/cmd/k8s-operator/api-server-proxy-pg_test.go b/cmd/k8s-operator/api-server-proxy-pg_test.go index dfef63f22..dee505723 100644 --- a/cmd/k8s-operator/api-server-proxy-pg_test.go +++ b/cmd/k8s-operator/api-server-proxy-pg_test.go @@ -182,9 +182,7 @@ func TestAPIServerProxyReconciler(t *testing.T) { expectEqual(t, fc, certSecretRoleBinding(pg, ns, defaultDomain)) // Simulate certs being issued; should observe AdvertiseServices config change. - if err := populateTLSSecret(t.Context(), fc, pgName, defaultDomain); err != nil { - t.Fatalf("populating TLS Secret: %v", err) - } + populateTLSSecret(t, fc, pgName, defaultDomain) expectReconciled(t, r, "", pgName) expectedCfg.AdvertiseServices = []string{"svc:" + pgName} @@ -247,9 +245,7 @@ func TestAPIServerProxyReconciler(t *testing.T) { expectMissing[rbacv1.RoleBinding](t, fc, ns, defaultDomain) // Check we get the new hostname in the status once ready. - if err := populateTLSSecret(t.Context(), fc, pgName, updatedDomain); err != nil { - t.Fatalf("populating TLS Secret: %v", err) - } + populateTLSSecret(t, fc, pgName, updatedDomain) mustUpdate(t, fc, "operator-ns", "test-pg-0", func(s *corev1.Secret) { s.Data["profile-foo"] = []byte(`{"AdvertiseServices":["svc:test-pg"],"Config":{"NodeID":"node-foo"}}`) }) diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt index 16ad089f3..c76a4236e 100644 --- a/cmd/k8s-operator/depaware.txt +++ b/cmd/k8s-operator/depaware.txt @@ -71,8 +71,9 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ github.com/klauspost/compress/fse from github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0 from github.com/klauspost/compress/zstd github.com/klauspost/compress/internal/cpuinfo from github.com/klauspost/compress/huff0+ + 💣 github.com/klauspost/compress/internal/le from github.com/klauspost/compress/huff0+ github.com/klauspost/compress/internal/snapref from github.com/klauspost/compress/zstd - github.com/klauspost/compress/zstd from tailscale.com/util/zstdframe+ + github.com/klauspost/compress/zstd from tailscale.com/util/zstdframe github.com/klauspost/compress/zstd/internal/xxhash from github.com/klauspost/compress/zstd github.com/mailru/easyjson/buffer from github.com/mailru/easyjson/jwriter 💣 github.com/mailru/easyjson/jlexer from github.com/go-openapi/swag @@ -94,6 +95,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ github.com/prometheus/client_golang/prometheus/collectors from sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics+ github.com/prometheus/client_golang/prometheus/internal from github.com/prometheus/client_golang/prometheus+ github.com/prometheus/client_golang/prometheus/promhttp from sigs.k8s.io/controller-runtime/pkg/metrics/server+ + github.com/prometheus/client_golang/prometheus/promhttp/internal from github.com/prometheus/client_golang/prometheus/promhttp github.com/prometheus/client_model/go from github.com/prometheus/client_golang/prometheus+ github.com/prometheus/common/expfmt from github.com/prometheus/client_golang/prometheus+ github.com/prometheus/common/model from github.com/prometheus/client_golang/prometheus+ @@ -180,10 +182,10 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ google.golang.org/protobuf/reflect/protoregistry from github.com/golang/protobuf/proto+ google.golang.org/protobuf/runtime/protoiface from github.com/golang/protobuf/proto+ google.golang.org/protobuf/runtime/protoimpl from github.com/golang/protobuf/proto+ - google.golang.org/protobuf/types/descriptorpb from github.com/google/gnostic-models/openapiv3+ - google.golang.org/protobuf/types/gofeaturespb from google.golang.org/protobuf/reflect/protodesc - google.golang.org/protobuf/types/known/anypb from github.com/google/gnostic-models/compiler+ - google.golang.org/protobuf/types/known/timestamppb from github.com/prometheus/client_golang/prometheus+ + 💣 google.golang.org/protobuf/types/descriptorpb from github.com/google/gnostic-models/openapiv3+ + 💣 google.golang.org/protobuf/types/gofeaturespb from google.golang.org/protobuf/reflect/protodesc + 💣 google.golang.org/protobuf/types/known/anypb from github.com/google/gnostic-models/compiler+ + 💣 google.golang.org/protobuf/types/known/timestamppb from github.com/prometheus/client_golang/prometheus+ gopkg.in/evanphx/json-patch.v4 from k8s.io/client-go/testing gopkg.in/inf.v0 from k8s.io/apimachinery/pkg/api/resource gopkg.in/yaml.v3 from github.com/go-openapi/swag+ diff --git a/cmd/k8s-operator/deploy/crds/tailscale.com_recorders.yaml b/cmd/k8s-operator/deploy/crds/tailscale.com_recorders.yaml index 0f3dcfcca..48db3ef4b 100644 --- a/cmd/k8s-operator/deploy/crds/tailscale.com_recorders.yaml +++ b/cmd/k8s-operator/deploy/crds/tailscale.com_recorders.yaml @@ -68,6 +68,11 @@ spec: Corresponds to --ui tsrecorder flag https://tailscale.com/kb/1246/tailscale-ssh-session-recording#deploy-a-recorder-node. Required if S3 storage is not set up, to ensure that recordings are accessible. type: boolean + replicas: + description: Replicas specifies how many instances of tsrecorder to run. Defaults to 1. + type: integer + format: int32 + minimum: 0 statefulSet: description: |- Configuration parameters for the Recorder's StatefulSet. The operator @@ -1683,6 +1688,9 @@ spec: items: type: string pattern: ^tag:[a-zA-Z][a-zA-Z0-9-]*$ + x-kubernetes-validations: + - rule: '!(self.replicas > 1 && (!has(self.storage) || !has(self.storage.s3)))' + message: S3 storage must be used when deploying multiple Recorder replicas status: description: |- RecorderStatus describes the status of the recorder. This is set diff --git a/cmd/k8s-operator/deploy/manifests/operator.yaml b/cmd/k8s-operator/deploy/manifests/operator.yaml index c5da367e0..2757f09e5 100644 --- a/cmd/k8s-operator/deploy/manifests/operator.yaml +++ b/cmd/k8s-operator/deploy/manifests/operator.yaml @@ -3348,6 +3348,11 @@ spec: Corresponds to --ui tsrecorder flag https://tailscale.com/kb/1246/tailscale-ssh-session-recording#deploy-a-recorder-node. Required if S3 storage is not set up, to ensure that recordings are accessible. type: boolean + replicas: + description: Replicas specifies how many instances of tsrecorder to run. Defaults to 1. + format: int32 + minimum: 0 + type: integer statefulSet: description: |- Configuration parameters for the Recorder's StatefulSet. The operator @@ -4964,6 +4969,9 @@ spec: type: string type: array type: object + x-kubernetes-validations: + - message: S3 storage must be used when deploying multiple Recorder replicas + rule: '!(self.replicas > 1 && (!has(self.storage) || !has(self.storage.s3)))' status: description: |- RecorderStatus describes the status of the recorder. This is set diff --git a/cmd/k8s-operator/ingress-for-pg.go b/cmd/k8s-operator/ingress-for-pg.go index 4d8311805..460a1914e 100644 --- a/cmd/k8s-operator/ingress-for-pg.go +++ b/cmd/k8s-operator/ingress-for-pg.go @@ -29,6 +29,7 @@ import ( "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "tailscale.com/internal/client/tailscale" "tailscale.com/ipn" "tailscale.com/ipn/ipnstate" @@ -504,10 +505,7 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, logger.Infof("Ensuring that Tailscale Service %q configuration is cleaned up", hostname) serviceName := tailcfg.ServiceName("svc:" + hostname) svc, err := r.tsClient.GetVIPService(ctx, serviceName) - if err != nil { - if isErrorTailscaleServiceNotFound(err) { - return false, nil - } + if err != nil && !isErrorTailscaleServiceNotFound(err) { return false, fmt.Errorf("error getting Tailscale Service: %w", err) } @@ -713,10 +711,15 @@ func (r *HAIngressReconciler) cleanupTailscaleService(ctx context.Context, svc * } if len(o.OwnerRefs) == 1 { logger.Infof("Deleting Tailscale Service %q", svc.Name) - return false, r.tsClient.DeleteVIPService(ctx, svc.Name) + if err = r.tsClient.DeleteVIPService(ctx, svc.Name); err != nil && !isErrorTailscaleServiceNotFound(err) { + return false, err + } + + return false, nil } + o.OwnerRefs = slices.Delete(o.OwnerRefs, ix, ix+1) - logger.Infof("Deleting Tailscale Service %q", svc.Name) + logger.Infof("Creating/Updating Tailscale Service %q", svc.Name) json, err := json.Marshal(o) if err != nil { return false, fmt.Errorf("error marshalling updated Tailscale Service owner reference: %w", err) diff --git a/cmd/k8s-operator/ingress-for-pg_test.go b/cmd/k8s-operator/ingress-for-pg_test.go index 77e5ecb37..5cc806ad1 100644 --- a/cmd/k8s-operator/ingress-for-pg_test.go +++ b/cmd/k8s-operator/ingress-for-pg_test.go @@ -25,6 +25,7 @@ import ( "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "tailscale.com/internal/client/tailscale" "tailscale.com/ipn" "tailscale.com/ipn/ipnstate" @@ -67,7 +68,7 @@ func TestIngressPGReconciler(t *testing.T) { // Verify initial reconciliation expectReconciled(t, ingPGR, "default", "test-ingress") - populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net") + populateTLSSecret(t, fc, "test-pg", "my-svc.ts.net") expectReconciled(t, ingPGR, "default", "test-ingress") verifyServeConfig(t, fc, "svc:my-svc", false) verifyTailscaleService(t, ft, "svc:my-svc", []string{"tcp:443"}) @@ -89,7 +90,7 @@ func TestIngressPGReconciler(t *testing.T) { expectReconciled(t, ingPGR, "default", "test-ingress") // Verify Tailscale Service uses custom tags - tsSvc, err := ft.GetVIPService(context.Background(), "svc:my-svc") + tsSvc, err := ft.GetVIPService(t.Context(), "svc:my-svc") if err != nil { t.Fatalf("getting Tailscale Service: %v", err) } @@ -134,7 +135,7 @@ func TestIngressPGReconciler(t *testing.T) { // Verify second Ingress reconciliation expectReconciled(t, ingPGR, "default", "my-other-ingress") - populateTLSSecret(context.Background(), fc, "test-pg", "my-other-svc.ts.net") + populateTLSSecret(t, fc, "test-pg", "my-other-svc.ts.net") expectReconciled(t, ingPGR, "default", "my-other-ingress") verifyServeConfig(t, fc, "svc:my-other-svc", false) verifyTailscaleService(t, ft, "svc:my-other-svc", []string{"tcp:443"}) @@ -151,14 +152,14 @@ func TestIngressPGReconciler(t *testing.T) { verifyTailscaledConfig(t, fc, "test-pg", []string{"svc:my-svc", "svc:my-other-svc"}) // Delete second Ingress - if err := fc.Delete(context.Background(), ing2); err != nil { + if err := fc.Delete(t.Context(), ing2); err != nil { t.Fatalf("deleting second Ingress: %v", err) } expectReconciled(t, ingPGR, "default", "my-other-ingress") // Verify second Ingress cleanup cm := &corev1.ConfigMap{} - if err := fc.Get(context.Background(), types.NamespacedName{ + if err := fc.Get(t.Context(), types.NamespacedName{ Name: "test-pg-ingress-config", Namespace: "operator-ns", }, cm); err != nil { @@ -199,7 +200,7 @@ func TestIngressPGReconciler(t *testing.T) { expectEqual(t, fc, certSecretRoleBinding(pg, "operator-ns", "my-svc.ts.net")) // Delete the first Ingress and verify cleanup - if err := fc.Delete(context.Background(), ing); err != nil { + if err := fc.Delete(t.Context(), ing); err != nil { t.Fatalf("deleting Ingress: %v", err) } @@ -207,7 +208,7 @@ func TestIngressPGReconciler(t *testing.T) { // Verify the ConfigMap was cleaned up cm = &corev1.ConfigMap{} - if err := fc.Get(context.Background(), types.NamespacedName{ + if err := fc.Get(t.Context(), types.NamespacedName{ Name: "test-pg-second-ingress-config", Namespace: "operator-ns", }, cm); err != nil { @@ -228,6 +229,47 @@ func TestIngressPGReconciler(t *testing.T) { expectMissing[corev1.Secret](t, fc, "operator-ns", "my-svc.ts.net") expectMissing[rbacv1.Role](t, fc, "operator-ns", "my-svc.ts.net") expectMissing[rbacv1.RoleBinding](t, fc, "operator-ns", "my-svc.ts.net") + + // Create a third ingress + ing3 := &networkingv1.Ingress{ + TypeMeta: metav1.TypeMeta{Kind: "Ingress", APIVersion: "networking.k8s.io/v1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "my-other-ingress", + Namespace: "default", + UID: types.UID("5678-UID"), + Annotations: map[string]string{ + "tailscale.com/proxy-group": "test-pg", + }, + }, + Spec: networkingv1.IngressSpec{ + IngressClassName: ptr.To("tailscale"), + DefaultBackend: &networkingv1.IngressBackend{ + Service: &networkingv1.IngressServiceBackend{ + Name: "test", + Port: networkingv1.ServiceBackendPort{ + Number: 8080, + }, + }, + }, + TLS: []networkingv1.IngressTLS{ + {Hosts: []string{"my-other-svc.tailnetxyz.ts.net"}}, + }, + }, + } + + mustCreate(t, fc, ing3) + expectReconciled(t, ingPGR, ing3.Namespace, ing3.Name) + + // Delete the service from "control" + ft.vipServices = make(map[tailcfg.ServiceName]*tailscale.VIPService) + + // Delete the ingress and confirm we don't get stuck due to the VIP service not existing. + if err = fc.Delete(t.Context(), ing3); err != nil { + t.Fatalf("deleting Ingress: %v", err) + } + + expectReconciled(t, ingPGR, ing3.Namespace, ing3.Name) + expectMissing[networkingv1.Ingress](t, fc, ing3.Namespace, ing3.Name) } func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) { @@ -262,7 +304,7 @@ func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) { // Verify initial reconciliation expectReconciled(t, ingPGR, "default", "test-ingress") - populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net") + populateTLSSecret(t, fc, "test-pg", "my-svc.ts.net") expectReconciled(t, ingPGR, "default", "test-ingress") verifyServeConfig(t, fc, "svc:my-svc", false) verifyTailscaleService(t, ft, "svc:my-svc", []string{"tcp:443"}) @@ -273,13 +315,13 @@ func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) { ing.Spec.TLS[0].Hosts[0] = "updated-svc" }) expectReconciled(t, ingPGR, "default", "test-ingress") - populateTLSSecret(context.Background(), fc, "test-pg", "updated-svc.ts.net") + populateTLSSecret(t, fc, "test-pg", "updated-svc.ts.net") expectReconciled(t, ingPGR, "default", "test-ingress") verifyServeConfig(t, fc, "svc:updated-svc", false) verifyTailscaleService(t, ft, "svc:updated-svc", []string{"tcp:443"}) verifyTailscaledConfig(t, fc, "test-pg", []string{"svc:updated-svc"}) - _, err := ft.GetVIPService(context.Background(), tailcfg.ServiceName("svc:my-svc")) + _, err := ft.GetVIPService(context.Background(), "svc:my-svc") if err == nil { t.Fatalf("svc:my-svc not cleaned up") } @@ -500,7 +542,7 @@ func TestIngressPGReconciler_HTTPEndpoint(t *testing.T) { // Verify initial reconciliation with HTTP enabled expectReconciled(t, ingPGR, "default", "test-ingress") - populateTLSSecret(context.Background(), fc, "test-pg", "my-svc.ts.net") + populateTLSSecret(t, fc, "test-pg", "my-svc.ts.net") expectReconciled(t, ingPGR, "default", "test-ingress") verifyTailscaleService(t, ft, "svc:my-svc", []string{"tcp:80", "tcp:443"}) verifyServeConfig(t, fc, "svc:my-svc", true) @@ -717,7 +759,9 @@ func TestOwnerAnnotations(t *testing.T) { } } -func populateTLSSecret(ctx context.Context, c client.Client, pgName, domain string) error { +func populateTLSSecret(t *testing.T, c client.Client, pgName, domain string) { + t.Helper() + secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: domain, @@ -736,10 +780,12 @@ func populateTLSSecret(ctx context.Context, c client.Client, pgName, domain stri }, } - _, err := createOrUpdate(ctx, c, "operator-ns", secret, func(s *corev1.Secret) { + _, err := createOrUpdate(t.Context(), c, "operator-ns", secret, func(s *corev1.Secret) { s.Data = secret.Data }) - return err + if err != nil { + t.Fatalf("failed to populate TLS secret: %v", err) + } } func verifyTailscaleService(t *testing.T, ft *fakeTSClient, serviceName string, wantPorts []string) { diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index 6b545a827..816fea566 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -44,10 +44,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager/signals" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "tailscale.com/envknob" "tailscale.com/client/local" "tailscale.com/client/tailscale" + "tailscale.com/envknob" "tailscale.com/hostinfo" "tailscale.com/ipn" "tailscale.com/ipn/store/kubestore" diff --git a/cmd/k8s-operator/tsrecorder.go b/cmd/k8s-operator/tsrecorder.go index c922f78fe..bfb01fa86 100644 --- a/cmd/k8s-operator/tsrecorder.go +++ b/cmd/k8s-operator/tsrecorder.go @@ -12,6 +12,7 @@ import ( "fmt" "net/http" "slices" + "strconv" "strings" "sync" @@ -29,6 +30,7 @@ import ( "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "tailscale.com/client/tailscale" tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" @@ -69,13 +71,13 @@ func (r *RecorderReconciler) logger(name string) *zap.SugaredLogger { return r.log.With("Recorder", name) } -func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) { +func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { logger := r.logger(req.Name) logger.Debugf("starting reconcile") defer logger.Debugf("reconcile finished") tsr := new(tsapi.Recorder) - err = r.Get(ctx, req.NamespacedName, tsr) + err := r.Get(ctx, req.NamespacedName, tsr) if apierrors.IsNotFound(err) { logger.Debugf("Recorder not found, assuming it was deleted") return reconcile.Result{}, nil @@ -98,7 +100,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques } tsr.Finalizers = slices.Delete(tsr.Finalizers, ix, ix+1) - if err := r.Update(ctx, tsr); err != nil { + if err = r.Update(ctx, tsr); err != nil { return reconcile.Result{}, err } return reconcile.Result{}, nil @@ -110,10 +112,11 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques if !apiequality.Semantic.DeepEqual(oldTSRStatus, &tsr.Status) { // An error encountered here should get returned by the Reconcile function. if updateErr := r.Client.Status().Update(ctx, tsr); updateErr != nil { - err = errors.Join(err, updateErr) + return reconcile.Result{}, errors.Join(err, updateErr) } } - return reconcile.Result{}, err + + return reconcile.Result{}, nil } if !slices.Contains(tsr.Finalizers, FinalizerName) { @@ -123,12 +126,12 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques // operation is underway. logger.Infof("ensuring Recorder is set up") tsr.Finalizers = append(tsr.Finalizers, FinalizerName) - if err := r.Update(ctx, tsr); err != nil { + if err = r.Update(ctx, tsr); err != nil { return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderCreationFailed, reasonRecorderCreationFailed) } } - if err := r.validate(ctx, tsr); err != nil { + if err = r.validate(ctx, tsr); err != nil { message := fmt.Sprintf("Recorder is invalid: %s", err) r.recorder.Eventf(tsr, corev1.EventTypeWarning, reasonRecorderInvalid, message) return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderInvalid, message) @@ -160,19 +163,29 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco gaugeRecorderResources.Set(int64(r.recorders.Len())) r.mu.Unlock() - if err := r.ensureAuthSecretCreated(ctx, tsr); err != nil { + if err := r.ensureAuthSecretsCreated(ctx, tsr); err != nil { return fmt.Errorf("error creating secrets: %w", err) } - // State Secret is precreated so we can use the Recorder CR as its owner ref. - sec := tsrStateSecret(tsr, r.tsNamespace) - if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sec, func(s *corev1.Secret) { - s.ObjectMeta.Labels = sec.ObjectMeta.Labels - s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations - }); err != nil { - return fmt.Errorf("error creating state Secret: %w", err) + + // State Secrets are pre-created so we can use the Recorder CR as its owner ref. + var replicas int32 = 1 + if tsr.Spec.Replicas != nil { + replicas = *tsr.Spec.Replicas + } + + for replica := range replicas { + sec := tsrStateSecret(tsr, r.tsNamespace, replica) + _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sec, func(s *corev1.Secret) { + s.ObjectMeta.Labels = sec.ObjectMeta.Labels + s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations + }) + if err != nil { + return fmt.Errorf("error creating state Secret %q: %w", sec.Name, err) + } } + sa := tsrServiceAccount(tsr, r.tsNamespace) - if _, err := createOrMaybeUpdate(ctx, r.Client, r.tsNamespace, sa, func(s *corev1.ServiceAccount) error { + _, err := createOrMaybeUpdate(ctx, r.Client, r.tsNamespace, sa, func(s *corev1.ServiceAccount) error { // Perform this check within the update function to make sure we don't // have a race condition between the previous check and the update. if err := saOwnedByRecorder(s, tsr); err != nil { @@ -183,54 +196,68 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco s.ObjectMeta.Annotations = sa.ObjectMeta.Annotations return nil - }); err != nil { + }) + if err != nil { return fmt.Errorf("error creating ServiceAccount: %w", err) } + role := tsrRole(tsr, r.tsNamespace) - if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) { + _, err = createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) { r.ObjectMeta.Labels = role.ObjectMeta.Labels r.ObjectMeta.Annotations = role.ObjectMeta.Annotations r.Rules = role.Rules - }); err != nil { + }) + if err != nil { return fmt.Errorf("error creating Role: %w", err) } + roleBinding := tsrRoleBinding(tsr, r.tsNamespace) - if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) { + _, err = createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) { r.ObjectMeta.Labels = roleBinding.ObjectMeta.Labels r.ObjectMeta.Annotations = roleBinding.ObjectMeta.Annotations r.RoleRef = roleBinding.RoleRef r.Subjects = roleBinding.Subjects - }); err != nil { + }) + if err != nil { return fmt.Errorf("error creating RoleBinding: %w", err) } + ss := tsrStatefulSet(tsr, r.tsNamespace, r.loginServer) - if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) { + _, err = createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) { s.ObjectMeta.Labels = ss.ObjectMeta.Labels s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations s.Spec = ss.Spec - }); err != nil { + }) + if err != nil { return fmt.Errorf("error creating StatefulSet: %w", err) } // ServiceAccount name may have changed, in which case we need to clean up // the previous ServiceAccount. RoleBinding will already be updated to point // to the new ServiceAccount. - if err := r.maybeCleanupServiceAccounts(ctx, tsr, sa.Name); err != nil { + if err = r.maybeCleanupServiceAccounts(ctx, tsr, sa.Name); err != nil { return fmt.Errorf("error cleaning up ServiceAccounts: %w", err) } + // If we have scaled the recorder down, we will have dangling state secrets + // that we need to clean up. + if err = r.maybeCleanupSecrets(ctx, tsr); err != nil { + return fmt.Errorf("error cleaning up Secrets: %w", err) + } + var devices []tsapi.RecorderTailnetDevice + for replica := range replicas { + dev, ok, err := r.getDeviceInfo(ctx, tsr.Name, replica) + switch { + case err != nil: + return fmt.Errorf("failed to get device info: %w", err) + case !ok: + logger.Debugf("no Tailscale hostname known yet, waiting for Recorder pod to finish auth") + continue + } - device, ok, err := r.getDeviceInfo(ctx, tsr.Name) - if err != nil { - return fmt.Errorf("failed to get device info: %w", err) + devices = append(devices, dev) } - if !ok { - logger.Debugf("no Tailscale hostname known yet, waiting for Recorder pod to finish auth") - return nil - } - - devices = append(devices, device) tsr.Status.Devices = devices @@ -257,22 +284,89 @@ func saOwnedByRecorder(sa *corev1.ServiceAccount, tsr *tsapi.Recorder) error { func (r *RecorderReconciler) maybeCleanupServiceAccounts(ctx context.Context, tsr *tsapi.Recorder, currentName string) error { logger := r.logger(tsr.Name) - // List all ServiceAccounts owned by this Recorder. + options := []client.ListOption{ + client.InNamespace(r.tsNamespace), + client.MatchingLabels(tsrLabels("recorder", tsr.Name, nil)), + } + sas := &corev1.ServiceAccountList{} - if err := r.List(ctx, sas, client.InNamespace(r.tsNamespace), client.MatchingLabels(labels("recorder", tsr.Name, nil))); err != nil { + if err := r.List(ctx, sas, options...); err != nil { return fmt.Errorf("error listing ServiceAccounts for cleanup: %w", err) } - for _, sa := range sas.Items { - if sa.Name == currentName { + + for _, serviceAccount := range sas.Items { + if serviceAccount.Name == currentName { + continue + } + + err := r.Delete(ctx, &serviceAccount) + switch { + case apierrors.IsNotFound(err): + logger.Debugf("ServiceAccount %s not found, likely already deleted", serviceAccount.Name) + continue + case err != nil: + return fmt.Errorf("error deleting ServiceAccount %s: %w", serviceAccount.Name, err) + } + } + + return nil +} + +func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsr *tsapi.Recorder) error { + options := []client.ListOption{ + client.InNamespace(r.tsNamespace), + client.MatchingLabels(tsrLabels("recorder", tsr.Name, nil)), + } + + secrets := &corev1.SecretList{} + if err := r.List(ctx, secrets, options...); err != nil { + return fmt.Errorf("error listing Secrets for cleanup: %w", err) + } + + // Get the largest ordinal suffix that we expect. Then we'll go through the list of secrets owned by this + // recorder and remove them. + var replicas int32 = 1 + if tsr.Spec.Replicas != nil { + replicas = *tsr.Spec.Replicas + } + + for _, secret := range secrets.Items { + parts := strings.Split(secret.Name, "-") + if len(parts) == 0 { + continue + } + + ordinal, err := strconv.ParseUint(parts[len(parts)-1], 10, 32) + if err != nil { + return fmt.Errorf("error parsing secret name %q: %w", secret.Name, err) + } + + if int32(ordinal) < replicas { continue } - if err := r.Delete(ctx, &sa); err != nil { - if apierrors.IsNotFound(err) { - logger.Debugf("ServiceAccount %s not found, likely already deleted", sa.Name) - } else { - return fmt.Errorf("error deleting ServiceAccount %s: %w", sa.Name, err) + + devicePrefs, ok, err := getDevicePrefs(&secret) + if err != nil { + return err + } + + if ok { + var errResp *tailscale.ErrResponse + + r.log.Debugf("deleting device %s", devicePrefs.Config.NodeID) + err = r.tsClient.DeleteDevice(ctx, string(devicePrefs.Config.NodeID)) + switch { + case errors.As(err, &errResp) && errResp.Status == http.StatusNotFound: + // This device has possibly already been deleted in the admin console. So we can ignore this + // and move on to removing the secret. + case err != nil: + return err } } + + if err = r.Delete(ctx, &secret); err != nil { + return err + } } return nil @@ -284,30 +378,38 @@ func (r *RecorderReconciler) maybeCleanupServiceAccounts(ctx context.Context, ts func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder) (bool, error) { logger := r.logger(tsr.Name) - prefs, ok, err := r.getDevicePrefs(ctx, tsr.Name) - if err != nil { - return false, err + var replicas int32 = 1 + if tsr.Spec.Replicas != nil { + replicas = *tsr.Spec.Replicas } - if !ok { - logger.Debugf("state Secret %s-0 not found or does not contain node ID, continuing cleanup", tsr.Name) - r.mu.Lock() - r.recorders.Remove(tsr.UID) - gaugeRecorderResources.Set(int64(r.recorders.Len())) - r.mu.Unlock() - return true, nil - } - - id := string(prefs.Config.NodeID) - logger.Debugf("deleting device %s from control", string(id)) - if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil { - errResp := &tailscale.ErrResponse{} - if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound { - logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id)) - } else { + + for replica := range replicas { + devicePrefs, ok, err := r.getDevicePrefs(ctx, tsr.Name, replica) + if err != nil { + return false, err + } + if !ok { + logger.Debugf("state Secret %s-%d not found or does not contain node ID, continuing cleanup", tsr.Name, replica) + r.mu.Lock() + r.recorders.Remove(tsr.UID) + gaugeRecorderResources.Set(int64(r.recorders.Len())) + r.mu.Unlock() + return true, nil + } + + nodeID := string(devicePrefs.Config.NodeID) + logger.Debugf("deleting device %s from control", nodeID) + if err = r.tsClient.DeleteDevice(ctx, nodeID); err != nil { + errResp := &tailscale.ErrResponse{} + if errors.As(err, errResp) && errResp.Status == http.StatusNotFound { + logger.Debugf("device %s not found, likely because it has already been deleted from control", nodeID) + continue + } + return false, fmt.Errorf("error deleting device: %w", err) } - } else { - logger.Debugf("device %s deleted from control", string(id)) + + logger.Debugf("device %s deleted from control", nodeID) } // Unlike most log entries in the reconcile loop, this will get printed @@ -319,38 +421,46 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record r.recorders.Remove(tsr.UID) gaugeRecorderResources.Set(int64(r.recorders.Len())) r.mu.Unlock() + return true, nil } -func (r *RecorderReconciler) ensureAuthSecretCreated(ctx context.Context, tsr *tsapi.Recorder) error { - logger := r.logger(tsr.Name) - key := types.NamespacedName{ - Namespace: r.tsNamespace, - Name: tsr.Name, - } - if err := r.Get(ctx, key, &corev1.Secret{}); err == nil { - // No updates, already created the auth key. - logger.Debugf("auth Secret %s already exists", key.Name) - return nil - } else if !apierrors.IsNotFound(err) { - return err +func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tsr *tsapi.Recorder) error { + var replicas int32 = 1 + if tsr.Spec.Replicas != nil { + replicas = *tsr.Spec.Replicas } - // Create the auth key Secret which is going to be used by the StatefulSet - // to authenticate with Tailscale. - logger.Debugf("creating authkey for new Recorder") tags := tsr.Spec.Tags if len(tags) == 0 { tags = tsapi.Tags{"tag:k8s"} } - authKey, err := newAuthKey(ctx, r.tsClient, tags.Stringify()) - if err != nil { - return err - } - logger.Debug("creating a new Secret for the Recorder") - if err := r.Create(ctx, tsrAuthSecret(tsr, r.tsNamespace, authKey)); err != nil { - return err + logger := r.logger(tsr.Name) + + for replica := range replicas { + key := types.NamespacedName{ + Namespace: r.tsNamespace, + Name: fmt.Sprintf("%s-auth-%d", tsr.Name, replica), + } + + err := r.Get(ctx, key, &corev1.Secret{}) + switch { + case err == nil: + logger.Debugf("auth Secret %q already exists", key.Name) + continue + case !apierrors.IsNotFound(err): + return fmt.Errorf("failed to get Secret %q: %w", key.Name, err) + } + + authKey, err := newAuthKey(ctx, r.tsClient, tags.Stringify()) + if err != nil { + return err + } + + if err = r.Create(ctx, tsrAuthSecret(tsr, r.tsNamespace, authKey, replica)); err != nil { + return err + } } return nil @@ -361,6 +471,10 @@ func (r *RecorderReconciler) validate(ctx context.Context, tsr *tsapi.Recorder) return errors.New("must either enable UI or use S3 storage to ensure recordings are accessible") } + if tsr.Spec.Replicas != nil && *tsr.Spec.Replicas > 1 && tsr.Spec.Storage.S3 == nil { + return errors.New("must use S3 storage when using multiple replicas to ensure recordings are accessible") + } + // Check any custom ServiceAccount config doesn't conflict with pre-existing // ServiceAccounts. This check is performed once during validation to ensure // errors are raised early, but also again during any Updates to prevent a race. @@ -394,11 +508,11 @@ func (r *RecorderReconciler) validate(ctx context.Context, tsr *tsapi.Recorder) return nil } -func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string) (*corev1.Secret, error) { +func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string, replica int32) (*corev1.Secret, error) { secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Namespace: r.tsNamespace, - Name: fmt.Sprintf("%s-0", tsrName), + Name: fmt.Sprintf("%s-%d", tsrName, replica), }, } if err := r.Get(ctx, client.ObjectKeyFromObject(secret), secret); err != nil { @@ -412,8 +526,8 @@ func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string) return secret, nil } -func (r *RecorderReconciler) getDevicePrefs(ctx context.Context, tsrName string) (prefs prefs, ok bool, err error) { - secret, err := r.getStateSecret(ctx, tsrName) +func (r *RecorderReconciler) getDevicePrefs(ctx context.Context, tsrName string, replica int32) (prefs prefs, ok bool, err error) { + secret, err := r.getStateSecret(ctx, tsrName, replica) if err != nil || secret == nil { return prefs, false, err } @@ -441,8 +555,8 @@ func getDevicePrefs(secret *corev1.Secret) (prefs prefs, ok bool, err error) { return prefs, ok, nil } -func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string) (d tsapi.RecorderTailnetDevice, ok bool, err error) { - secret, err := r.getStateSecret(ctx, tsrName) +func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string, replica int32) (d tsapi.RecorderTailnetDevice, ok bool, err error) { + secret, err := r.getStateSecret(ctx, tsrName, replica) if err != nil || secret == nil { return tsapi.RecorderTailnetDevice{}, false, err } diff --git a/cmd/k8s-operator/tsrecorder_specs.go b/cmd/k8s-operator/tsrecorder_specs.go index 83d7439db..b4a10f296 100644 --- a/cmd/k8s-operator/tsrecorder_specs.go +++ b/cmd/k8s-operator/tsrecorder_specs.go @@ -12,30 +12,36 @@ import ( corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/types/ptr" "tailscale.com/version" ) func tsrStatefulSet(tsr *tsapi.Recorder, namespace string, loginServer string) *appsv1.StatefulSet { - return &appsv1.StatefulSet{ + var replicas int32 = 1 + if tsr.Spec.Replicas != nil { + replicas = *tsr.Spec.Replicas + } + + ss := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ Name: tsr.Name, Namespace: namespace, - Labels: labels("recorder", tsr.Name, tsr.Spec.StatefulSet.Labels), + Labels: tsrLabels("recorder", tsr.Name, tsr.Spec.StatefulSet.Labels), OwnerReferences: tsrOwnerReference(tsr), Annotations: tsr.Spec.StatefulSet.Annotations, }, Spec: appsv1.StatefulSetSpec{ - Replicas: ptr.To[int32](1), + Replicas: ptr.To(replicas), Selector: &metav1.LabelSelector{ - MatchLabels: labels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels), + MatchLabels: tsrLabels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels), }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Name: tsr.Name, Namespace: namespace, - Labels: labels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels), + Labels: tsrLabels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels), Annotations: tsr.Spec.StatefulSet.Pod.Annotations, }, Spec: corev1.PodSpec{ @@ -59,7 +65,7 @@ func tsrStatefulSet(tsr *tsapi.Recorder, namespace string, loginServer string) * ImagePullPolicy: tsr.Spec.StatefulSet.Pod.Container.ImagePullPolicy, Resources: tsr.Spec.StatefulSet.Pod.Container.Resources, SecurityContext: tsr.Spec.StatefulSet.Pod.Container.SecurityContext, - Env: env(tsr, loginServer), + Env: tsrEnv(tsr, loginServer), EnvFrom: func() []corev1.EnvFromSource { if tsr.Spec.Storage.S3 == nil || tsr.Spec.Storage.S3.Credentials.Secret.Name == "" { return nil @@ -95,6 +101,28 @@ func tsrStatefulSet(tsr *tsapi.Recorder, namespace string, loginServer string) * }, }, } + + for replica := range replicas { + volumeName := fmt.Sprintf("authkey-%d", replica) + + ss.Spec.Template.Spec.Containers[0].VolumeMounts = append(ss.Spec.Template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ + Name: volumeName, + ReadOnly: true, + MountPath: fmt.Sprintf("/etc/tailscaled/%s-%d", ss.Name, replica), + }) + + ss.Spec.Template.Spec.Volumes = append(ss.Spec.Template.Spec.Volumes, corev1.Volume{ + Name: volumeName, + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: fmt.Sprintf("%s-auth-%d", tsr.Name, replica), + Items: []corev1.KeyToPath{{Key: "authkey", Path: "authkey"}}, + }, + }, + }) + } + + return ss } func tsrServiceAccount(tsr *tsapi.Recorder, namespace string) *corev1.ServiceAccount { @@ -102,7 +130,7 @@ func tsrServiceAccount(tsr *tsapi.Recorder, namespace string) *corev1.ServiceAcc ObjectMeta: metav1.ObjectMeta{ Name: tsrServiceAccountName(tsr), Namespace: namespace, - Labels: labels("recorder", tsr.Name, nil), + Labels: tsrLabels("recorder", tsr.Name, nil), OwnerReferences: tsrOwnerReference(tsr), Annotations: tsr.Spec.StatefulSet.Pod.ServiceAccount.Annotations, }, @@ -120,11 +148,24 @@ func tsrServiceAccountName(tsr *tsapi.Recorder) string { } func tsrRole(tsr *tsapi.Recorder, namespace string) *rbacv1.Role { + var replicas int32 = 1 + if tsr.Spec.Replicas != nil { + replicas = *tsr.Spec.Replicas + } + + resourceNames := make([]string, 0) + for replica := range replicas { + resourceNames = append(resourceNames, + fmt.Sprintf("%s-%d", tsr.Name, replica), // State secret. + fmt.Sprintf("%s-auth-%d", tsr.Name, replica), // Auth key secret. + ) + } + return &rbacv1.Role{ ObjectMeta: metav1.ObjectMeta{ Name: tsr.Name, Namespace: namespace, - Labels: labels("recorder", tsr.Name, nil), + Labels: tsrLabels("recorder", tsr.Name, nil), OwnerReferences: tsrOwnerReference(tsr), }, Rules: []rbacv1.PolicyRule{ @@ -136,10 +177,7 @@ func tsrRole(tsr *tsapi.Recorder, namespace string) *rbacv1.Role { "patch", "update", }, - ResourceNames: []string{ - tsr.Name, // Contains the auth key. - fmt.Sprintf("%s-0", tsr.Name), // Contains the node state. - }, + ResourceNames: resourceNames, }, { APIGroups: []string{""}, @@ -159,7 +197,7 @@ func tsrRoleBinding(tsr *tsapi.Recorder, namespace string) *rbacv1.RoleBinding { ObjectMeta: metav1.ObjectMeta{ Name: tsr.Name, Namespace: namespace, - Labels: labels("recorder", tsr.Name, nil), + Labels: tsrLabels("recorder", tsr.Name, nil), OwnerReferences: tsrOwnerReference(tsr), }, Subjects: []rbacv1.Subject{ @@ -176,12 +214,12 @@ func tsrRoleBinding(tsr *tsapi.Recorder, namespace string) *rbacv1.RoleBinding { } } -func tsrAuthSecret(tsr *tsapi.Recorder, namespace string, authKey string) *corev1.Secret { +func tsrAuthSecret(tsr *tsapi.Recorder, namespace string, authKey string, replica int32) *corev1.Secret { return &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Namespace: namespace, - Name: tsr.Name, - Labels: labels("recorder", tsr.Name, nil), + Name: fmt.Sprintf("%s-auth-%d", tsr.Name, replica), + Labels: tsrLabels("recorder", tsr.Name, nil), OwnerReferences: tsrOwnerReference(tsr), }, StringData: map[string]string{ @@ -190,30 +228,19 @@ func tsrAuthSecret(tsr *tsapi.Recorder, namespace string, authKey string) *corev } } -func tsrStateSecret(tsr *tsapi.Recorder, namespace string) *corev1.Secret { +func tsrStateSecret(tsr *tsapi.Recorder, namespace string, replica int32) *corev1.Secret { return &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-0", tsr.Name), + Name: fmt.Sprintf("%s-%d", tsr.Name, replica), Namespace: namespace, - Labels: labels("recorder", tsr.Name, nil), + Labels: tsrLabels("recorder", tsr.Name, nil), OwnerReferences: tsrOwnerReference(tsr), }, } } -func env(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar { +func tsrEnv(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar { envs := []corev1.EnvVar{ - { - Name: "TS_AUTHKEY", - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: tsr.Name, - }, - Key: "authkey", - }, - }, - }, { Name: "POD_NAME", ValueFrom: &corev1.EnvVarSource{ @@ -231,6 +258,10 @@ func env(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar { }, }, }, + { + Name: "TS_AUTHKEY_FILE", + Value: "/etc/tailscaled/$(POD_NAME)/authkey", + }, { Name: "TS_STATE", Value: "kube:$(POD_NAME)", @@ -280,7 +311,7 @@ func env(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar { return envs } -func labels(app, instance string, customLabels map[string]string) map[string]string { +func tsrLabels(app, instance string, customLabels map[string]string) map[string]string { labels := make(map[string]string, len(customLabels)+3) for k, v := range customLabels { labels[k] = v diff --git a/cmd/k8s-operator/tsrecorder_specs_test.go b/cmd/k8s-operator/tsrecorder_specs_test.go index 49332d09b..0d78129fc 100644 --- a/cmd/k8s-operator/tsrecorder_specs_test.go +++ b/cmd/k8s-operator/tsrecorder_specs_test.go @@ -12,6 +12,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/types/ptr" ) @@ -23,6 +24,7 @@ func TestRecorderSpecs(t *testing.T) { Name: "test", }, Spec: tsapi.RecorderSpec{ + Replicas: ptr.To[int32](3), StatefulSet: tsapi.RecorderStatefulSet{ Labels: map[string]string{ "ss-label-key": "ss-label-value", @@ -101,10 +103,10 @@ func TestRecorderSpecs(t *testing.T) { } // Pod-level. - if diff := cmp.Diff(ss.Labels, labels("recorder", "test", tsr.Spec.StatefulSet.Labels)); diff != "" { + if diff := cmp.Diff(ss.Labels, tsrLabels("recorder", "test", tsr.Spec.StatefulSet.Labels)); diff != "" { t.Errorf("(-got +want):\n%s", diff) } - if diff := cmp.Diff(ss.Spec.Template.Labels, labels("recorder", "test", tsr.Spec.StatefulSet.Pod.Labels)); diff != "" { + if diff := cmp.Diff(ss.Spec.Template.Labels, tsrLabels("recorder", "test", tsr.Spec.StatefulSet.Pod.Labels)); diff != "" { t.Errorf("(-got +want):\n%s", diff) } if diff := cmp.Diff(ss.Spec.Template.Spec.Affinity, tsr.Spec.StatefulSet.Pod.Affinity); diff != "" { @@ -124,7 +126,7 @@ func TestRecorderSpecs(t *testing.T) { } // Container-level. - if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Env, env(tsr, tsLoginServer)); diff != "" { + if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Env, tsrEnv(tsr, tsLoginServer)); diff != "" { t.Errorf("(-got +want):\n%s", diff) } if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Image, tsr.Spec.StatefulSet.Pod.Container.Image); diff != "" { @@ -139,5 +141,17 @@ func TestRecorderSpecs(t *testing.T) { if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Resources, tsr.Spec.StatefulSet.Pod.Container.Resources); diff != "" { t.Errorf("(-got +want):\n%s", diff) } + + if *ss.Spec.Replicas != *tsr.Spec.Replicas { + t.Errorf("expected %d replicas, got %d", *tsr.Spec.Replicas, *ss.Spec.Replicas) + } + + if len(ss.Spec.Template.Spec.Volumes) != int(*tsr.Spec.Replicas)+1 { + t.Errorf("expected %d volumes, got %d", *tsr.Spec.Replicas+1, len(ss.Spec.Template.Spec.Volumes)) + } + + if len(ss.Spec.Template.Spec.Containers[0].VolumeMounts) != int(*tsr.Spec.Replicas)+1 { + t.Errorf("expected %d volume mounts, got %d", *tsr.Spec.Replicas+1, len(ss.Spec.Template.Spec.Containers[0].VolumeMounts)) + } }) } diff --git a/cmd/k8s-operator/tsrecorder_test.go b/cmd/k8s-operator/tsrecorder_test.go index 184af2344..f7ff797b1 100644 --- a/cmd/k8s-operator/tsrecorder_test.go +++ b/cmd/k8s-operator/tsrecorder_test.go @@ -8,6 +8,7 @@ package main import ( "context" "encoding/json" + "fmt" "strings" "testing" @@ -20,9 +21,11 @@ import ( "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/tstest" + "tailscale.com/types/ptr" ) const ( @@ -36,6 +39,9 @@ func TestRecorder(t *testing.T) { Name: "test", Finalizers: []string{"tailscale.com/finalizer"}, }, + Spec: tsapi.RecorderSpec{ + Replicas: ptr.To[int32](3), + }, } fc := fake.NewClientBuilder(). @@ -80,6 +86,15 @@ func TestRecorder(t *testing.T) { }) expectReconciled(t, reconciler, "", tsr.Name) + expectedEvent = "Warning RecorderInvalid Recorder is invalid: must use S3 storage when using multiple replicas to ensure recordings are accessible" + expectEvents(t, fr, []string{expectedEvent}) + + tsr.Spec.Storage.S3 = &tsapi.S3{} + mustUpdate(t, fc, "", "test", func(t *tsapi.Recorder) { + t.Spec = tsr.Spec + }) + expectReconciled(t, reconciler, "", tsr.Name) + // Only check part of this error message, because it's defined in an // external package and may change. if err := fc.Get(context.Background(), client.ObjectKey{ @@ -180,33 +195,47 @@ func TestRecorder(t *testing.T) { }) t.Run("populate_node_info_in_state_secret_and_see_it_appear_in_status", func(t *testing.T) { - bytes, err := json.Marshal(map[string]any{ - "Config": map[string]any{ - "NodeID": "nodeid-123", - "UserProfile": map[string]any{ - "LoginName": "test-0.example.ts.net", - }, - }, - }) - if err != nil { - t.Fatal(err) - } const key = "profile-abc" - mustUpdate(t, fc, tsNamespace, "test-0", func(s *corev1.Secret) { - s.Data = map[string][]byte{ - currentProfileKey: []byte(key), - key: bytes, + for replica := range *tsr.Spec.Replicas { + bytes, err := json.Marshal(map[string]any{ + "Config": map[string]any{ + "NodeID": fmt.Sprintf("node-%d", replica), + "UserProfile": map[string]any{ + "LoginName": fmt.Sprintf("test-%d.example.ts.net", replica), + }, + }, + }) + if err != nil { + t.Fatal(err) } - }) + + name := fmt.Sprintf("%s-%d", "test", replica) + mustUpdate(t, fc, tsNamespace, name, func(s *corev1.Secret) { + s.Data = map[string][]byte{ + currentProfileKey: []byte(key), + key: bytes, + } + }) + } expectReconciled(t, reconciler, "", tsr.Name) tsr.Status.Devices = []tsapi.RecorderTailnetDevice{ { - Hostname: "hostname-nodeid-123", + Hostname: "hostname-node-0", TailnetIPs: []string{"1.2.3.4", "::1"}, URL: "https://test-0.example.ts.net", }, + { + Hostname: "hostname-node-1", + TailnetIPs: []string{"1.2.3.4", "::1"}, + URL: "https://test-1.example.ts.net", + }, + { + Hostname: "hostname-node-2", + TailnetIPs: []string{"1.2.3.4", "::1"}, + URL: "https://test-2.example.ts.net", + }, } expectEqual(t, fc, tsr) }) @@ -222,7 +251,7 @@ func TestRecorder(t *testing.T) { if expected := 0; reconciler.recorders.Len() != expected { t.Fatalf("expected %d recorders, got %d", expected, reconciler.recorders.Len()) } - if diff := cmp.Diff(tsClient.deleted, []string{"nodeid-123"}); diff != "" { + if diff := cmp.Diff(tsClient.deleted, []string{"node-0", "node-1", "node-2"}); diff != "" { t.Fatalf("unexpected deleted devices (-got +want):\n%s", diff) } // The fake client does not clean up objects whose owner has been @@ -233,26 +262,38 @@ func TestRecorder(t *testing.T) { func expectRecorderResources(t *testing.T, fc client.WithWatch, tsr *tsapi.Recorder, shouldExist bool) { t.Helper() - auth := tsrAuthSecret(tsr, tsNamespace, "secret-authkey") - state := tsrStateSecret(tsr, tsNamespace) + var replicas int32 = 1 + if tsr.Spec.Replicas != nil { + replicas = *tsr.Spec.Replicas + } + role := tsrRole(tsr, tsNamespace) roleBinding := tsrRoleBinding(tsr, tsNamespace) serviceAccount := tsrServiceAccount(tsr, tsNamespace) statefulSet := tsrStatefulSet(tsr, tsNamespace, tsLoginServer) if shouldExist { - expectEqual(t, fc, auth) - expectEqual(t, fc, state) expectEqual(t, fc, role) expectEqual(t, fc, roleBinding) expectEqual(t, fc, serviceAccount) expectEqual(t, fc, statefulSet, removeResourceReqs) } else { - expectMissing[corev1.Secret](t, fc, auth.Namespace, auth.Name) - expectMissing[corev1.Secret](t, fc, state.Namespace, state.Name) expectMissing[rbacv1.Role](t, fc, role.Namespace, role.Name) expectMissing[rbacv1.RoleBinding](t, fc, roleBinding.Namespace, roleBinding.Name) expectMissing[corev1.ServiceAccount](t, fc, serviceAccount.Namespace, serviceAccount.Name) expectMissing[appsv1.StatefulSet](t, fc, statefulSet.Namespace, statefulSet.Name) } + + for replica := range replicas { + auth := tsrAuthSecret(tsr, tsNamespace, "secret-authkey", replica) + state := tsrStateSecret(tsr, tsNamespace, replica) + + if shouldExist { + expectEqual(t, fc, auth) + expectEqual(t, fc, state) + } else { + expectMissing[corev1.Secret](t, fc, auth.Namespace, auth.Name) + expectMissing[corev1.Secret](t, fc, state.Namespace, state.Name) + } + } } diff --git a/cmd/stund/depaware.txt b/cmd/stund/depaware.txt index 7b3d05f94..7b945dd77 100644 --- a/cmd/stund/depaware.txt +++ b/cmd/stund/depaware.txt @@ -14,9 +14,9 @@ tailscale.com/cmd/stund dependencies: (generated by github.com/tailscale/depawar github.com/prometheus/client_model/go from github.com/prometheus/client_golang/prometheus+ github.com/prometheus/common/expfmt from github.com/prometheus/client_golang/prometheus+ github.com/prometheus/common/model from github.com/prometheus/client_golang/prometheus+ - LD github.com/prometheus/procfs from github.com/prometheus/client_golang/prometheus - LD github.com/prometheus/procfs/internal/fs from github.com/prometheus/procfs - LD github.com/prometheus/procfs/internal/util from github.com/prometheus/procfs + L github.com/prometheus/procfs from github.com/prometheus/client_golang/prometheus + L github.com/prometheus/procfs/internal/fs from github.com/prometheus/procfs + L github.com/prometheus/procfs/internal/util from github.com/prometheus/procfs 💣 go4.org/mem from tailscale.com/metrics+ go4.org/netipx from tailscale.com/net/tsaddr google.golang.org/protobuf/encoding/protodelim from github.com/prometheus/common/expfmt @@ -47,7 +47,7 @@ tailscale.com/cmd/stund dependencies: (generated by github.com/tailscale/depawar google.golang.org/protobuf/reflect/protoregistry from google.golang.org/protobuf/encoding/prototext+ google.golang.org/protobuf/runtime/protoiface from google.golang.org/protobuf/internal/impl+ google.golang.org/protobuf/runtime/protoimpl from github.com/prometheus/client_model/go+ - google.golang.org/protobuf/types/known/timestamppb from github.com/prometheus/client_golang/prometheus+ + 💣 google.golang.org/protobuf/types/known/timestamppb from github.com/prometheus/client_golang/prometheus+ tailscale.com from tailscale.com/version tailscale.com/envknob from tailscale.com/tsweb+ tailscale.com/feature from tailscale.com/tsweb diff --git a/cmd/tailscale/cli/serve_v2.go b/cmd/tailscale/cli/serve_v2.go index b60e645f3..89d247be9 100644 --- a/cmd/tailscale/cli/serve_v2.go +++ b/cmd/tailscale/cli/serve_v2.go @@ -478,11 +478,6 @@ func (e *serveEnv) runServeCombined(subcmd serveMode) execFunc { } wantFg := !e.bg.Value && !turnOff if wantFg { - // validate the config before creating a WatchIPNBus session - if err := e.validateConfig(parentSC, srvPort, srvType, svcName); err != nil { - return err - } - // if foreground mode, create a WatchIPNBus session // and use the nested config for all following operations // TODO(marwan-at-work): nested-config validations should happen here or previous to this point. @@ -508,9 +503,6 @@ func (e *serveEnv) runServeCombined(subcmd serveMode) execFunc { // only unset serve when trying to unset with type and port flags. err = e.unsetServe(sc, dnsName, srvType, srvPort, mount, magicDNSSuffix) } else { - if err := e.validateConfig(parentSC, srvPort, srvType, svcName); err != nil { - return err - } if forService { e.addServiceToPrefs(ctx, svcName) } @@ -907,66 +899,6 @@ func (e *serveEnv) runServeSetConfig(ctx context.Context, args []string) (err er return e.lc.SetServeConfig(ctx, sc) } -const backgroundExistsMsg = "background configuration already exists, use `tailscale %s --%s=%d off` to remove the existing configuration" - -// validateConfig checks if the serve config is valid to serve the type wanted on the port. -// dnsName is a FQDN or a serviceName (with `svc:` prefix). -func (e *serveEnv) validateConfig(sc *ipn.ServeConfig, port uint16, wantServe serveType, svcName tailcfg.ServiceName) error { - var tcpHandlerForPort *ipn.TCPPortHandler - if svcName != noService { - svc := sc.Services[svcName] - if svc == nil { - return nil - } - if wantServe == serveTypeTUN && (svc.TCP != nil || svc.Web != nil) { - return errors.New("service already has a TCP or Web handler, cannot serve in TUN mode") - } - if svc.Tun && wantServe != serveTypeTUN { - return errors.New("service is already being served in TUN mode") - } - if svc.TCP[port] == nil { - return nil - } - tcpHandlerForPort = svc.TCP[port] - } else { - sc, isFg := sc.FindConfig(port) - if sc == nil { - return nil - } - if isFg { - return errors.New("foreground already exists under this port") - } - if !e.bg.Value { - return fmt.Errorf(backgroundExistsMsg, infoMap[e.subcmd].Name, wantServe.String(), port) - } - tcpHandlerForPort = sc.TCP[port] - } - existingServe := serveFromPortHandler(tcpHandlerForPort) - if wantServe != existingServe { - target := svcName - if target == noService { - target = "machine" - } - return fmt.Errorf("want to serve %q but port is already serving %q for %q", wantServe, existingServe, target) - } - return nil -} - -func serveFromPortHandler(tcp *ipn.TCPPortHandler) serveType { - switch { - case tcp.HTTP: - return serveTypeHTTP - case tcp.HTTPS: - return serveTypeHTTPS - case tcp.TerminateTLS != "": - return serveTypeTLSTerminatedTCP - case tcp.TCPForward != "": - return serveTypeTCP - default: - return -1 - } -} - func (e *serveEnv) setServe(sc *ipn.ServeConfig, dnsName string, srvType serveType, srvPort uint16, mount string, target string, allowFunnel bool, mds string, caps []tailcfg.PeerCapability, proxyProtocol int) error { // update serve config based on the type switch srvType { diff --git a/cmd/tailscale/cli/serve_v2_test.go b/cmd/tailscale/cli/serve_v2_test.go index 491baf9dd..513c0d1ec 100644 --- a/cmd/tailscale/cli/serve_v2_test.go +++ b/cmd/tailscale/cli/serve_v2_test.go @@ -819,26 +819,6 @@ func TestServeDevConfigMutations(t *testing.T) { }, }, }, - { - name: "forground_with_bg_conflict", - steps: []step{ - { - command: cmd("serve --bg --http=3000 localhost:3000"), - want: &ipn.ServeConfig{ - TCP: map[uint16]*ipn.TCPPortHandler{3000: {HTTP: true}}, - Web: map[ipn.HostPort]*ipn.WebServerConfig{ - "foo.test.ts.net:3000": {Handlers: map[string]*ipn.HTTPHandler{ - "/": {Proxy: "http://localhost:3000"}, - }}, - }, - }, - }, - { - command: cmd("serve --http=3000 localhost:3000"), - wantErr: exactErrMsg(fmt.Errorf(backgroundExistsMsg, "serve", "http", 3000)), - }, - }, - }, { name: "advertise_service", initialState: fakeLocalServeClient{ @@ -1067,190 +1047,6 @@ func TestServeDevConfigMutations(t *testing.T) { } } -func TestValidateConfig(t *testing.T) { - tests := [...]struct { - name string - desc string - cfg *ipn.ServeConfig - svc tailcfg.ServiceName - servePort uint16 - serveType serveType - bg bgBoolFlag - wantErr bool - }{ - { - name: "nil_config", - desc: "when config is nil, all requests valid", - cfg: nil, - servePort: 3000, - serveType: serveTypeHTTPS, - }, - { - name: "new_bg_tcp", - desc: "no error when config exists but we're adding a new bg tcp port", - cfg: &ipn.ServeConfig{ - TCP: map[uint16]*ipn.TCPPortHandler{ - 443: {HTTPS: true}, - }, - }, - bg: bgBoolFlag{true, false}, - servePort: 10000, - serveType: serveTypeHTTPS, - }, - { - name: "override_bg_tcp", - desc: "no error when overwriting previous port under the same serve type", - cfg: &ipn.ServeConfig{ - TCP: map[uint16]*ipn.TCPPortHandler{ - 443: {TCPForward: "http://localhost:4545"}, - }, - }, - bg: bgBoolFlag{true, false}, - servePort: 443, - serveType: serveTypeTCP, - }, - { - name: "override_bg_tcp", - desc: "error when overwriting previous port under a different serve type", - cfg: &ipn.ServeConfig{ - TCP: map[uint16]*ipn.TCPPortHandler{ - 443: {HTTPS: true}, - }, - }, - bg: bgBoolFlag{true, false}, - servePort: 443, - serveType: serveTypeHTTP, - wantErr: true, - }, - { - name: "new_fg_port", - desc: "no error when serving a new foreground port", - cfg: &ipn.ServeConfig{ - TCP: map[uint16]*ipn.TCPPortHandler{ - 443: {HTTPS: true}, - }, - Foreground: map[string]*ipn.ServeConfig{ - "abc123": { - TCP: map[uint16]*ipn.TCPPortHandler{ - 3000: {HTTPS: true}, - }, - }, - }, - }, - servePort: 4040, - serveType: serveTypeTCP, - }, - { - name: "same_fg_port", - desc: "error when overwriting a previous fg port", - cfg: &ipn.ServeConfig{ - Foreground: map[string]*ipn.ServeConfig{ - "abc123": { - TCP: map[uint16]*ipn.TCPPortHandler{ - 3000: {HTTPS: true}, - }, - }, - }, - }, - servePort: 3000, - serveType: serveTypeTCP, - wantErr: true, - }, - { - name: "new_service_tcp", - desc: "no error when adding a new service port", - cfg: &ipn.ServeConfig{ - Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ - "svc:foo": { - TCP: map[uint16]*ipn.TCPPortHandler{80: {HTTP: true}}, - }, - }, - }, - svc: "svc:foo", - servePort: 8080, - serveType: serveTypeTCP, - }, - { - name: "override_service_tcp", - desc: "no error when overwriting a previous service port", - cfg: &ipn.ServeConfig{ - Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ - "svc:foo": { - TCP: map[uint16]*ipn.TCPPortHandler{ - 443: {TCPForward: "http://localhost:4545"}, - }, - }, - }, - }, - svc: "svc:foo", - servePort: 443, - serveType: serveTypeTCP, - }, - { - name: "override_service_tcp", - desc: "error when overwriting a previous service port with a different serve type", - cfg: &ipn.ServeConfig{ - Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ - "svc:foo": { - TCP: map[uint16]*ipn.TCPPortHandler{ - 443: {HTTPS: true}, - }, - }, - }, - }, - svc: "svc:foo", - servePort: 443, - serveType: serveTypeHTTP, - wantErr: true, - }, - { - name: "override_service_tcp", - desc: "error when setting previous tcp service to tun mode", - cfg: &ipn.ServeConfig{ - Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ - "svc:foo": { - TCP: map[uint16]*ipn.TCPPortHandler{ - 443: {TCPForward: "http://localhost:4545"}, - }, - }, - }, - }, - svc: "svc:foo", - serveType: serveTypeTUN, - wantErr: true, - }, - { - name: "override_service_tun", - desc: "error when setting previous tun service to tcp forwarder", - cfg: &ipn.ServeConfig{ - Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ - "svc:foo": { - Tun: true, - }, - }, - }, - svc: "svc:foo", - serveType: serveTypeTCP, - servePort: 443, - wantErr: true, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - se := serveEnv{bg: tc.bg} - err := se.validateConfig(tc.cfg, tc.servePort, tc.serveType, tc.svc) - if err == nil && tc.wantErr { - t.Fatal("expected an error but got nil") - } - if err != nil && !tc.wantErr { - t.Fatalf("expected no error but got: %v", err) - } - }) - } - -} - func TestSrcTypeFromFlags(t *testing.T) { tests := []struct { name string diff --git a/cmd/tailscale/cli/set.go b/cmd/tailscale/cli/set.go index cb3a07a6f..31662392f 100644 --- a/cmd/tailscale/cli/set.go +++ b/cmd/tailscale/cli/set.go @@ -11,6 +11,7 @@ import ( "net/netip" "os/exec" "runtime" + "slices" "strconv" "strings" @@ -25,6 +26,7 @@ import ( "tailscale.com/types/opt" "tailscale.com/types/ptr" "tailscale.com/types/views" + "tailscale.com/util/set" "tailscale.com/version" ) @@ -43,29 +45,30 @@ Only settings explicitly mentioned will be set. There are no default values.`, } type setArgsT struct { - acceptRoutes bool - acceptDNS bool - exitNodeIP string - exitNodeAllowLANAccess bool - shieldsUp bool - runSSH bool - runWebClient bool - hostname string - advertiseRoutes string - advertiseDefaultRoute bool - advertiseConnector bool - opUser string - acceptedRisks string - profileName string - forceDaemon bool - updateCheck bool - updateApply bool - reportPosture bool - snat bool - statefulFiltering bool - sync bool - netfilterMode string - relayServerPort string + acceptRoutes bool + acceptDNS bool + exitNodeIP string + exitNodeAllowLANAccess bool + shieldsUp bool + runSSH bool + runWebClient bool + hostname string + advertiseRoutes string + advertiseDefaultRoute bool + advertiseConnector bool + opUser string + acceptedRisks string + profileName string + forceDaemon bool + updateCheck bool + updateApply bool + reportPosture bool + snat bool + statefulFiltering bool + sync bool + netfilterMode string + relayServerPort string + relayServerStaticEndpoints string } func newSetFlagSet(goos string, setArgs *setArgsT) *flag.FlagSet { @@ -88,6 +91,7 @@ func newSetFlagSet(goos string, setArgs *setArgsT) *flag.FlagSet { setf.BoolVar(&setArgs.runWebClient, "webclient", false, "expose the web interface for managing this node over Tailscale at port 5252") setf.BoolVar(&setArgs.sync, "sync", false, hidden+"actively sync configuration from the control plane (set to false only for network failure testing)") setf.StringVar(&setArgs.relayServerPort, "relay-server-port", "", "UDP port number (0 will pick a random unused port) for the relay server to bind to, on all interfaces, or empty string to disable relay server functionality") + setf.StringVar(&setArgs.relayServerStaticEndpoints, "relay-server-static-endpoints", "", "static IP:port endpoints to advertise as candidates for relay connections (comma-separated, e.g. \"[2001:db8::1]:40000,192.0.2.1:40000\") or empty string to not advertise any static endpoints") ffcomplete.Flag(setf, "exit-node", func(args []string) ([]string, ffcomplete.ShellCompDirective, error) { st, err := localClient.Status(context.Background()) @@ -245,7 +249,22 @@ func runSet(ctx context.Context, args []string) (retErr error) { if err != nil { return fmt.Errorf("failed to set relay server port: %v", err) } - maskedPrefs.Prefs.RelayServerPort = ptr.To(int(uport)) + maskedPrefs.Prefs.RelayServerPort = ptr.To(uint16(uport)) + } + + if setArgs.relayServerStaticEndpoints != "" { + endpointsSet := make(set.Set[netip.AddrPort]) + endpointsSplit := strings.Split(setArgs.relayServerStaticEndpoints, ",") + for _, s := range endpointsSplit { + ap, err := netip.ParseAddrPort(s) + if err != nil { + return fmt.Errorf("failed to set relay server static endpoints: %q is not a valid IP:port", s) + } + endpointsSet.Add(ap) + } + endpoints := endpointsSet.Slice() + slices.SortFunc(endpoints, netip.AddrPort.Compare) + maskedPrefs.Prefs.RelayServerStaticEndpoints = endpoints } checkPrefs := curPrefs.Clone() diff --git a/cmd/tailscale/cli/up.go b/cmd/tailscale/cli/up.go index 7f5b2e6b4..72515400d 100644 --- a/cmd/tailscale/cli/up.go +++ b/cmd/tailscale/cli/up.go @@ -887,6 +887,7 @@ func init() { addPrefFlagMapping("report-posture", "PostureChecking") addPrefFlagMapping("relay-server-port", "RelayServerPort") addPrefFlagMapping("sync", "Sync") + addPrefFlagMapping("relay-server-static-endpoints", "RelayServerStaticEndpoints") } func addPrefFlagMapping(flagName string, prefNames ...string) { diff --git a/cmd/tailscaled/depaware-min.txt b/cmd/tailscaled/depaware-min.txt index e750f86e6..69e6559a0 100644 --- a/cmd/tailscaled/depaware-min.txt +++ b/cmd/tailscaled/depaware-min.txt @@ -16,6 +16,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de github.com/klauspost/compress/fse from github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0 from github.com/klauspost/compress/zstd github.com/klauspost/compress/internal/cpuinfo from github.com/klauspost/compress/huff0+ + 💣 github.com/klauspost/compress/internal/le from github.com/klauspost/compress/huff0+ github.com/klauspost/compress/internal/snapref from github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd from tailscale.com/util/zstdframe github.com/klauspost/compress/zstd/internal/xxhash from github.com/klauspost/compress/zstd @@ -69,7 +70,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/ipn/ipnstate from tailscale.com/control/controlclient+ tailscale.com/ipn/localapi from tailscale.com/ipn/ipnserver tailscale.com/ipn/store from tailscale.com/cmd/tailscaled - tailscale.com/ipn/store/mem from tailscale.com/ipn/store + tailscale.com/ipn/store/mem from tailscale.com/ipn/store+ tailscale.com/kube/kubetypes from tailscale.com/envknob tailscale.com/log/filelogger from tailscale.com/logpolicy tailscale.com/log/sockstatlog from tailscale.com/ipn/ipnlocal diff --git a/cmd/tailscaled/depaware-minbox.txt b/cmd/tailscaled/depaware-minbox.txt index 17f1a22b2..55a21c426 100644 --- a/cmd/tailscaled/depaware-minbox.txt +++ b/cmd/tailscaled/depaware-minbox.txt @@ -20,6 +20,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de github.com/klauspost/compress/fse from github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0 from github.com/klauspost/compress/zstd github.com/klauspost/compress/internal/cpuinfo from github.com/klauspost/compress/huff0+ + 💣 github.com/klauspost/compress/internal/le from github.com/klauspost/compress/huff0+ github.com/klauspost/compress/internal/snapref from github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd from tailscale.com/util/zstdframe github.com/klauspost/compress/zstd/internal/xxhash from github.com/klauspost/compress/zstd @@ -92,7 +93,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/ipn/ipnstate from tailscale.com/control/controlclient+ tailscale.com/ipn/localapi from tailscale.com/ipn/ipnserver tailscale.com/ipn/store from tailscale.com/cmd/tailscaled - tailscale.com/ipn/store/mem from tailscale.com/ipn/store + tailscale.com/ipn/store/mem from tailscale.com/ipn/store+ tailscale.com/kube/kubetypes from tailscale.com/envknob tailscale.com/licenses from tailscale.com/cmd/tailscale/cli tailscale.com/log/filelogger from tailscale.com/logpolicy diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index d15402092..79f92deb9 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -139,6 +139,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de github.com/klauspost/compress/fse from github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0 from github.com/klauspost/compress/zstd github.com/klauspost/compress/internal/cpuinfo from github.com/klauspost/compress/huff0+ + 💣 github.com/klauspost/compress/internal/le from github.com/klauspost/compress/huff0+ github.com/klauspost/compress/internal/snapref from github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd from tailscale.com/util/zstdframe github.com/klauspost/compress/zstd/internal/xxhash from github.com/klauspost/compress/zstd diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go index f14cdcff0..d923ca1ed 100644 --- a/cmd/tailscaled/tailscaled.go +++ b/cmd/tailscaled/tailscaled.go @@ -33,12 +33,14 @@ import ( "tailscale.com/feature" "tailscale.com/feature/buildfeatures" _ "tailscale.com/feature/condregister" + "tailscale.com/health" "tailscale.com/hostinfo" "tailscale.com/ipn" "tailscale.com/ipn/conffile" "tailscale.com/ipn/ipnlocal" "tailscale.com/ipn/ipnserver" "tailscale.com/ipn/store" + "tailscale.com/ipn/store/mem" "tailscale.com/logpolicy" "tailscale.com/logtail" "tailscale.com/net/dns" @@ -644,7 +646,16 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID store, err := store.New(logf, statePathOrDefault()) if err != nil { - return nil, fmt.Errorf("store.New: %w", err) + // If we can't create the store (for example if it's TPM-sealed and the + // TPM is reset), create a dummy in-memory store to propagate the error + // to the user. + ht, ok := sys.HealthTracker.GetOK() + if !ok { + return nil, fmt.Errorf("store.New: %w", err) + } + logf("store.New failed: %v; starting with in-memory store with a health warning", err) + store = new(mem.Store) + ht.SetUnhealthy(ipn.StateStoreHealth, health.Args{health.ArgError: err.Error()}) } sys.Set(store) diff --git a/cmd/tailscaled/tailscaled_test.go b/cmd/tailscaled/tailscaled_test.go index c50c23759..1188ad35f 100644 --- a/cmd/tailscaled/tailscaled_test.go +++ b/cmd/tailscaled/tailscaled_test.go @@ -4,9 +4,17 @@ package main // import "tailscale.com/cmd/tailscaled" import ( + "os" + "strings" "testing" + "tailscale.com/envknob" + "tailscale.com/ipn" + "tailscale.com/net/netmon" + "tailscale.com/tsd" "tailscale.com/tstest/deptest" + "tailscale.com/types/logid" + "tailscale.com/util/must" ) func TestNothing(t *testing.T) { @@ -38,3 +46,45 @@ func TestDeps(t *testing.T) { }, }.Check(t) } + +func TestStateStoreError(t *testing.T) { + logID, err := logid.NewPrivateID() + if err != nil { + t.Fatal(err) + } + // Don't upload any logs from tests. + envknob.SetNoLogsNoSupport() + + args.statedir = t.TempDir() + args.tunname = "userspace-networking" + + t.Run("new state", func(t *testing.T) { + sys := tsd.NewSystem() + sys.NetMon.Set(must.Get(netmon.New(sys.Bus.Get(), t.Logf))) + lb, err := getLocalBackend(t.Context(), t.Logf, logID.Public(), sys) + if err != nil { + t.Fatal(err) + } + defer lb.Shutdown() + if lb.HealthTracker().IsUnhealthy(ipn.StateStoreHealth) { + t.Errorf("StateStoreHealth is unhealthy on fresh LocalBackend:\n%s", strings.Join(lb.HealthTracker().Strings(), "\n")) + } + }) + t.Run("corrupt state", func(t *testing.T) { + sys := tsd.NewSystem() + sys.NetMon.Set(must.Get(netmon.New(sys.Bus.Get(), t.Logf))) + // Populate the state file with something that will fail to parse to + // trigger an error from store.New. + if err := os.WriteFile(statePathOrDefault(), []byte("bad json"), 0644); err != nil { + t.Fatal(err) + } + lb, err := getLocalBackend(t.Context(), t.Logf, logID.Public(), sys) + if err != nil { + t.Fatal(err) + } + defer lb.Shutdown() + if !lb.HealthTracker().IsUnhealthy(ipn.StateStoreHealth) { + t.Errorf("StateStoreHealth is healthy when state file is corrupt") + } + }) +} diff --git a/cmd/tsidp/depaware.txt b/cmd/tsidp/depaware.txt index 14db7414a..5c6aae512 100644 --- a/cmd/tsidp/depaware.txt +++ b/cmd/tsidp/depaware.txt @@ -36,6 +36,7 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar github.com/klauspost/compress/fse from github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0 from github.com/klauspost/compress/zstd github.com/klauspost/compress/internal/cpuinfo from github.com/klauspost/compress/huff0+ + 💣 github.com/klauspost/compress/internal/le from github.com/klauspost/compress/huff0+ github.com/klauspost/compress/internal/snapref from github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd from tailscale.com/util/zstdframe github.com/klauspost/compress/zstd/internal/xxhash from github.com/klauspost/compress/zstd diff --git a/cmd/vet/jsontags_allowlist b/cmd/vet/jsontags_allowlist index 060a81b05..9526f44ef 100644 --- a/cmd/vet/jsontags_allowlist +++ b/cmd/vet/jsontags_allowlist @@ -107,7 +107,7 @@ OmitEmptyShouldBeOmitZero tailscale.com/tailcfg.MapResponse.ClientVersion OmitEmptyShouldBeOmitZero tailscale.com/tailcfg.MapResponse.CollectServices OmitEmptyShouldBeOmitZero tailscale.com/tailcfg.MapResponse.ControlDialPlan OmitEmptyShouldBeOmitZero tailscale.com/tailcfg.MapResponse.Debug -OmitEmptyShouldBeOmitZero tailscale.com/tailcfg.MapResponse.DefaultAutoUpdate +OmitEmptyShouldBeOmitZero tailscale.com/tailcfg.MapResponse.DeprecatedDefaultAutoUpdate OmitEmptyShouldBeOmitZero tailscale.com/tailcfg.MapResponse.DERPMap OmitEmptyShouldBeOmitZero tailscale.com/tailcfg.MapResponse.DNSConfig OmitEmptyShouldBeOmitZero tailscale.com/tailcfg.MapResponse.Node diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go index 006a801ef..d5cd6a13e 100644 --- a/control/controlclient/direct.go +++ b/control/controlclient/direct.go @@ -1184,7 +1184,19 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap metricMapResponseKeepAlives.Add(1) continue } - if au, ok := resp.DefaultAutoUpdate.Get(); ok { + + // DefaultAutoUpdate in its CapMap and deprecated top-level field forms. + if self := resp.Node; self != nil { + for _, v := range self.CapMap[tailcfg.NodeAttrDefaultAutoUpdate] { + switch v { + case "true", "false": + c.autoUpdatePub.Publish(AutoUpdate{c.controlClientID, v == "true"}) + default: + c.logf("netmap: [unexpected] unknown %s in CapMap: %q", tailcfg.NodeAttrDefaultAutoUpdate, v) + } + } + } + if au, ok := resp.DeprecatedDefaultAutoUpdate.Get(); ok { c.autoUpdatePub.Publish(AutoUpdate{c.controlClientID, au}) } diff --git a/derp/derpserver/derpserver.go b/derp/derpserver/derpserver.go index 0bbc66780..1879e0c53 100644 --- a/derp/derpserver/derpserver.go +++ b/derp/derpserver/derpserver.go @@ -36,6 +36,7 @@ import ( "sync/atomic" "time" + "github.com/axiomhq/hyperloglog" "go4.org/mem" "golang.org/x/sync/errgroup" "tailscale.com/client/local" @@ -1643,6 +1644,12 @@ type sclient struct { sawSrc map[key.NodePublic]set.Handle bw *lazyBufioWriter + // senderCardinality estimates the number of unique peers that have + // sent packets to this client. Owned by sendLoop, protected by + // senderCardinalityMu for reads from other goroutines. + senderCardinalityMu sync.Mutex + senderCardinality *hyperloglog.Sketch + // Guarded by s.mu // // peerStateChange is used by mesh peers (a set of regional @@ -1778,6 +1785,8 @@ func (c *sclient) onSendLoopDone() { func (c *sclient) sendLoop(ctx context.Context) error { defer c.onSendLoopDone() + c.senderCardinality = hyperloglog.New() + jitter := rand.N(5 * time.Second) keepAliveTick, keepAliveTickChannel := c.s.clock.NewTicker(derp.KeepAlive + jitter) defer keepAliveTick.Stop() @@ -2000,6 +2009,11 @@ func (c *sclient) sendPacket(srcKey key.NodePublic, contents []byte) (err error) if withKey { pktLen += key.NodePublicRawLen c.noteSendFromSrc(srcKey) + if c.senderCardinality != nil { + c.senderCardinalityMu.Lock() + c.senderCardinality.Insert(srcKey.AppendTo(nil)) + c.senderCardinalityMu.Unlock() + } } if err = derp.WriteFrameHeader(c.bw.bw(), derp.FrameRecvPacket, uint32(pktLen)); err != nil { return err @@ -2013,6 +2027,17 @@ func (c *sclient) sendPacket(srcKey key.NodePublic, contents []byte) (err error) return err } +// EstimatedUniqueSenders returns an estimate of the number of unique peers +// that have sent packets to this client. +func (c *sclient) EstimatedUniqueSenders() uint64 { + c.senderCardinalityMu.Lock() + defer c.senderCardinalityMu.Unlock() + if c.senderCardinality == nil { + return 0 + } + return c.senderCardinality.Estimate() +} + // noteSendFromSrc notes that we are about to write a packet // from src to sclient. // @@ -2295,7 +2320,8 @@ type BytesSentRecv struct { Sent uint64 Recv uint64 // Key is the public key of the client which sent/received these bytes. - Key key.NodePublic + Key key.NodePublic + UniqueSenders uint64 `json:",omitzero"` } // parseSSOutput parses the output from the specific call to ss in ServeDebugTraffic. @@ -2349,6 +2375,11 @@ func (s *Server) ServeDebugTraffic(w http.ResponseWriter, r *http.Request) { if prev.Sent < next.Sent || prev.Recv < next.Recv { if pkey, ok := s.keyOfAddr[k]; ok { next.Key = pkey + if cs, ok := s.clients[pkey]; ok { + if c := cs.activeClient.Load(); c != nil { + next.UniqueSenders = c.EstimatedUniqueSenders() + } + } if err := enc.Encode(next); err != nil { s.mu.Unlock() return diff --git a/derp/derpserver/derpserver_test.go b/derp/derpserver/derpserver_test.go index 2db5f25bc..1dd86f314 100644 --- a/derp/derpserver/derpserver_test.go +++ b/derp/derpserver/derpserver_test.go @@ -9,6 +9,7 @@ import ( "context" "crypto/x509" "encoding/asn1" + "encoding/binary" "expvar" "fmt" "log" @@ -20,6 +21,7 @@ import ( "testing" "time" + "github.com/axiomhq/hyperloglog" qt "github.com/frankban/quicktest" "go4.org/mem" "golang.org/x/time/rate" @@ -755,6 +757,35 @@ func TestParseSSOutput(t *testing.T) { } } +func TestServeDebugTrafficUniqueSenders(t *testing.T) { + s := New(key.NewNode(), t.Logf) + defer s.Close() + + clientKey := key.NewNode().Public() + c := &sclient{ + key: clientKey, + s: s, + logf: logger.Discard, + senderCardinality: hyperloglog.New(), + } + + for i := 0; i < 5; i++ { + c.senderCardinality.Insert(key.NewNode().Public().AppendTo(nil)) + } + + s.mu.Lock() + cs := &clientSet{} + cs.activeClient.Store(c) + s.clients[clientKey] = cs + s.mu.Unlock() + + estimate := c.EstimatedUniqueSenders() + t.Logf("Estimated unique senders: %d", estimate) + if estimate < 4 || estimate > 6 { + t.Errorf("EstimatedUniqueSenders() = %d, want ~5 (4-6 range)", estimate) + } +} + func TestGetPerClientSendQueueDepth(t *testing.T) { c := qt.New(t) envKey := "TS_DEBUG_DERP_PER_CLIENT_SEND_QUEUE_DEPTH" @@ -780,3 +811,167 @@ func TestGetPerClientSendQueueDepth(t *testing.T) { }) } } + +func TestSenderCardinality(t *testing.T) { + s := New(key.NewNode(), t.Logf) + defer s.Close() + + c := &sclient{ + key: key.NewNode().Public(), + s: s, + logf: logger.WithPrefix(t.Logf, "test client: "), + } + + if got := c.EstimatedUniqueSenders(); got != 0 { + t.Errorf("EstimatedUniqueSenders() before init = %d, want 0", got) + } + + c.senderCardinality = hyperloglog.New() + + if got := c.EstimatedUniqueSenders(); got != 0 { + t.Errorf("EstimatedUniqueSenders() with no senders = %d, want 0", got) + } + + senders := make([]key.NodePublic, 10) + for i := range senders { + senders[i] = key.NewNode().Public() + c.senderCardinality.Insert(senders[i].AppendTo(nil)) + } + + estimate := c.EstimatedUniqueSenders() + t.Logf("Estimated unique senders after 10 inserts: %d", estimate) + + if estimate < 8 || estimate > 12 { + t.Errorf("EstimatedUniqueSenders() = %d, want ~10 (8-12 range)", estimate) + } + + for i := 0; i < 5; i++ { + c.senderCardinality.Insert(senders[i].AppendTo(nil)) + } + + estimate2 := c.EstimatedUniqueSenders() + t.Logf("Estimated unique senders after duplicates: %d", estimate2) + + if estimate2 < 8 || estimate2 > 12 { + t.Errorf("EstimatedUniqueSenders() after duplicates = %d, want ~10 (8-12 range)", estimate2) + } +} + +func TestSenderCardinality100(t *testing.T) { + s := New(key.NewNode(), t.Logf) + defer s.Close() + + c := &sclient{ + key: key.NewNode().Public(), + s: s, + logf: logger.WithPrefix(t.Logf, "test client: "), + senderCardinality: hyperloglog.New(), + } + + numSenders := 100 + for i := 0; i < numSenders; i++ { + c.senderCardinality.Insert(key.NewNode().Public().AppendTo(nil)) + } + + estimate := c.EstimatedUniqueSenders() + t.Logf("Estimated unique senders for 100 actual senders: %d", estimate) + + if estimate < 85 || estimate > 115 { + t.Errorf("EstimatedUniqueSenders() = %d, want ~100 (85-115 range)", estimate) + } +} + +func TestSenderCardinalityTracking(t *testing.T) { + s := New(key.NewNode(), t.Logf) + defer s.Close() + + c := &sclient{ + key: key.NewNode().Public(), + s: s, + logf: logger.WithPrefix(t.Logf, "test client: "), + senderCardinality: hyperloglog.New(), + } + + zeroKey := key.NodePublic{} + if zeroKey != (key.NodePublic{}) { + c.senderCardinality.Insert(zeroKey.AppendTo(nil)) + } + + if estimate := c.EstimatedUniqueSenders(); estimate != 0 { + t.Errorf("EstimatedUniqueSenders() after zero key = %d, want 0", estimate) + } + + sender1 := key.NewNode().Public() + sender2 := key.NewNode().Public() + + if sender1 != (key.NodePublic{}) { + c.senderCardinality.Insert(sender1.AppendTo(nil)) + } + if sender2 != (key.NodePublic{}) { + c.senderCardinality.Insert(sender2.AppendTo(nil)) + } + + estimate := c.EstimatedUniqueSenders() + t.Logf("Estimated unique senders after 2 senders: %d", estimate) + + if estimate < 1 || estimate > 3 { + t.Errorf("EstimatedUniqueSenders() = %d, want ~2 (1-3 range)", estimate) + } +} + +func BenchmarkHyperLogLogInsert(b *testing.B) { + hll := hyperloglog.New() + sender := key.NewNode().Public() + senderBytes := sender.AppendTo(nil) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + hll.Insert(senderBytes) + } +} + +func BenchmarkHyperLogLogInsertUnique(b *testing.B) { + hll := hyperloglog.New() + + b.ResetTimer() + + buf := make([]byte, 32) + for i := 0; i < b.N; i++ { + binary.LittleEndian.PutUint64(buf, uint64(i)) + hll.Insert(buf) + } +} + +func BenchmarkHyperLogLogEstimate(b *testing.B) { + hll := hyperloglog.New() + + for i := 0; i < 100; i++ { + hll.Insert(key.NewNode().Public().AppendTo(nil)) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = hll.Estimate() + } +} + +func BenchmarkSenderCardinalityOverhead(b *testing.B) { + hll := hyperloglog.New() + sender := key.NewNode().Public() + + b.Run("WithTracking", func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + if hll != nil { + hll.Insert(sender.AppendTo(nil)) + } + } + }) + + b.Run("WithoutTracking", func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _ = sender.AppendTo(nil) + } + }) +} diff --git a/feature/feature.go b/feature/feature.go index 110b104da..48a4aff43 100644 --- a/feature/feature.go +++ b/feature/feature.go @@ -7,6 +7,8 @@ package feature import ( "errors" "reflect" + + "tailscale.com/util/testenv" ) var ErrUnavailable = errors.New("feature not included in this build") @@ -55,6 +57,19 @@ func (h *Hook[Func]) Set(f Func) { h.ok = true } +// SetForTest sets the hook function for tests, blowing +// away any previous value. It will panic if called from +// non-test code. +// +// It returns a restore function that resets the hook +// to its previous value. +func (h *Hook[Func]) SetForTest(f Func) (restore func()) { + testenv.AssertInTest() + old := *h + h.f, h.ok = f, true + return func() { *h = old } +} + // Get returns the hook function, or panics if it hasn't been set. // Use IsSet to check if it's been set, or use GetOrNil if you're // okay with a nil return value. diff --git a/feature/hooks.go b/feature/hooks.go index a3c6c0395..7e31061a7 100644 --- a/feature/hooks.go +++ b/feature/hooks.go @@ -6,6 +6,8 @@ package feature import ( "net/http" "net/url" + "os" + "sync" "tailscale.com/types/logger" "tailscale.com/types/persist" @@ -15,9 +17,16 @@ import ( // to conditionally initialize. var HookCanAutoUpdate Hook[func() bool] +var testAllowAutoUpdate = sync.OnceValue(func() bool { + return os.Getenv("TS_TEST_ALLOW_AUTO_UPDATE") == "1" +}) + // CanAutoUpdate reports whether the current binary is built with auto-update // support and, if so, whether the current platform supports it. func CanAutoUpdate() bool { + if testAllowAutoUpdate() { + return true + } if f, ok := HookCanAutoUpdate.GetOk(); ok { return f() } diff --git a/feature/relayserver/relayserver.go b/feature/relayserver/relayserver.go index 7d12d62e5..4f23ae18e 100644 --- a/feature/relayserver/relayserver.go +++ b/feature/relayserver/relayserver.go @@ -9,6 +9,7 @@ import ( "encoding/json" "fmt" "net/http" + "net/netip" "tailscale.com/disco" "tailscale.com/feature" @@ -23,6 +24,7 @@ import ( "tailscale.com/types/key" "tailscale.com/types/logger" "tailscale.com/types/ptr" + "tailscale.com/types/views" "tailscale.com/util/eventbus" "tailscale.com/wgengine/magicsock" ) @@ -67,7 +69,7 @@ func servePeerRelayDebugSessions(h *localapi.Handler, w http.ResponseWriter, r * // imported. func newExtension(logf logger.Logf, sb ipnext.SafeBackend) (ipnext.Extension, error) { e := &extension{ - newServerFn: func(logf logger.Logf, port int, onlyStaticAddrPorts bool) (relayServer, error) { + newServerFn: func(logf logger.Logf, port uint16, onlyStaticAddrPorts bool) (relayServer, error) { return udprelay.NewServer(logf, port, onlyStaticAddrPorts) }, logf: logger.WithPrefix(logf, featureName+": "), @@ -85,22 +87,24 @@ type relayServer interface { AllocateEndpoint(discoA, discoB key.DiscoPublic) (endpoint.ServerEndpoint, error) GetSessions() []status.ServerSession SetDERPMapView(tailcfg.DERPMapView) + SetStaticAddrPorts(addrPorts views.Slice[netip.AddrPort]) } // extension is an [ipnext.Extension] managing the relay server on platforms // that import this package. type extension struct { - newServerFn func(logf logger.Logf, port int, onlyStaticAddrPorts bool) (relayServer, error) // swappable for tests + newServerFn func(logf logger.Logf, port uint16, onlyStaticAddrPorts bool) (relayServer, error) // swappable for tests logf logger.Logf ec *eventbus.Client respPub *eventbus.Publisher[magicsock.UDPRelayAllocResp] - mu syncs.Mutex // guards the following fields - shutdown bool // true if Shutdown() has been called - rs relayServer // nil when disabled - port *int // ipn.Prefs.RelayServerPort, nil if disabled - derpMapView tailcfg.DERPMapView // latest seen over the eventbus - hasNodeAttrDisableRelayServer bool // [tailcfg.NodeAttrDisableRelayServer] + mu syncs.Mutex // guards the following fields + shutdown bool // true if Shutdown() has been called + rs relayServer // nil when disabled + port *uint16 // ipn.Prefs.RelayServerPort, nil if disabled + staticEndpoints views.Slice[netip.AddrPort] // ipn.Prefs.RelayServerStaticEndpoints + derpMapView tailcfg.DERPMapView // latest seen over the eventbus + hasNodeAttrDisableRelayServer bool // [tailcfg.NodeAttrDisableRelayServer] } // Name implements [ipnext.Extension]. @@ -147,7 +151,12 @@ func (e *extension) onAllocReq(req magicsock.UDPRelayAllocReq) { e.logf("error allocating endpoint: %v", err) return } - e.respPub.Publish(magicsock.UDPRelayAllocResp{ + // Take a defensive stance around publishing from within an + // [*eventbus.SubscribeFunc] by publishing from a separate goroutine. At the + // time of writing (2025-11-21), publishing from within the + // [*eventbus.SubscribeFunc] goroutine is potentially unsafe if publisher + // and subscriber share a lock. + go e.respPub.Publish(magicsock.UDPRelayAllocResp{ ReqRxFromNodeKey: req.RxFromNodeKey, ReqRxFromDiscoKey: req.RxFromDiscoKey, Message: &disco.AllocateUDPRelayEndpointResponse{ @@ -181,6 +190,7 @@ func (e *extension) relayServerShouldBeRunningLocked() bool { // handleRelayServerLifetimeLocked handles the lifetime of [e.rs]. func (e *extension) handleRelayServerLifetimeLocked() { + defer e.handleRelayServerStaticAddrPortsLocked() if !e.relayServerShouldBeRunningLocked() { e.stopRelayServerLocked() return @@ -190,6 +200,13 @@ func (e *extension) handleRelayServerLifetimeLocked() { e.tryStartRelayServerLocked() } +func (e *extension) handleRelayServerStaticAddrPortsLocked() { + if e.rs != nil { + // TODO(jwhited): env var support + e.rs.SetStaticAddrPorts(e.staticEndpoints) + } +} + func (e *extension) selfNodeViewChanged(nodeView tailcfg.NodeView) { e.mu.Lock() defer e.mu.Unlock() @@ -200,6 +217,7 @@ func (e *extension) selfNodeViewChanged(nodeView tailcfg.NodeView) { func (e *extension) profileStateChanged(_ ipn.LoginProfileView, prefs ipn.PrefsView, sameNode bool) { e.mu.Lock() defer e.mu.Unlock() + e.staticEndpoints = prefs.RelayServerStaticEndpoints() newPort, ok := prefs.RelayServerPort().GetOk() enableOrDisableServer := ok != (e.port != nil) portChanged := ok && e.port != nil && newPort != *e.port diff --git a/feature/relayserver/relayserver_test.go b/feature/relayserver/relayserver_test.go index 3d71c55d7..807306c70 100644 --- a/feature/relayserver/relayserver_test.go +++ b/feature/relayserver/relayserver_test.go @@ -5,7 +5,9 @@ package relayserver import ( "errors" + "net/netip" "reflect" + "slices" "testing" "tailscale.com/ipn" @@ -17,15 +19,21 @@ import ( "tailscale.com/types/key" "tailscale.com/types/logger" "tailscale.com/types/ptr" + "tailscale.com/types/views" ) func Test_extension_profileStateChanged(t *testing.T) { - prefsWithPortOne := ipn.Prefs{RelayServerPort: ptr.To(1)} + prefsWithPortOne := ipn.Prefs{RelayServerPort: ptr.To(uint16(1))} prefsWithNilPort := ipn.Prefs{RelayServerPort: nil} + prefsWithPortOneRelayEndpoints := ipn.Prefs{ + RelayServerPort: ptr.To(uint16(1)), + RelayServerStaticEndpoints: []netip.AddrPort{netip.MustParseAddrPort("127.0.0.1:7777")}, + } type fields struct { - port *int - rs relayServer + port *uint16 + staticEndpoints views.Slice[netip.AddrPort] + rs relayServer } type args struct { prefs ipn.PrefsView @@ -35,28 +43,75 @@ func Test_extension_profileStateChanged(t *testing.T) { name string fields fields args args - wantPort *int + wantPort *uint16 wantRelayServerFieldNonNil bool wantRelayServerFieldMutated bool + wantEndpoints []netip.AddrPort }{ { name: "no changes non-nil port previously running", fields: fields{ - port: ptr.To(1), + port: ptr.To(uint16(1)), rs: mockRelayServerNotZeroVal(), }, args: args{ prefs: prefsWithPortOne.View(), sameNode: true, }, - wantPort: ptr.To(1), + wantPort: ptr.To(uint16(1)), + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: false, + }, + { + name: "set addr ports unchanged port previously running", + fields: fields{ + port: ptr.To(uint16(1)), + rs: mockRelayServerNotZeroVal(), + }, + args: args{ + prefs: prefsWithPortOneRelayEndpoints.View(), + sameNode: true, + }, + wantPort: ptr.To(uint16(1)), + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: false, + wantEndpoints: prefsWithPortOneRelayEndpoints.RelayServerStaticEndpoints, + }, + { + name: "set addr ports not previously running", + fields: fields{ + port: nil, + rs: nil, + }, + args: args{ + prefs: prefsWithPortOneRelayEndpoints.View(), + sameNode: true, + }, + wantPort: ptr.To(uint16(1)), + wantRelayServerFieldNonNil: true, + wantRelayServerFieldMutated: true, + wantEndpoints: prefsWithPortOneRelayEndpoints.RelayServerStaticEndpoints, + }, + { + name: "clear addr ports unchanged port previously running", + fields: fields{ + port: ptr.To(uint16(1)), + staticEndpoints: views.SliceOf(prefsWithPortOneRelayEndpoints.RelayServerStaticEndpoints), + rs: mockRelayServerNotZeroVal(), + }, + args: args{ + prefs: prefsWithPortOne.View(), + sameNode: true, + }, + wantPort: ptr.To(uint16(1)), wantRelayServerFieldNonNil: true, wantRelayServerFieldMutated: false, + wantEndpoints: nil, }, { name: "prefs port nil", fields: fields{ - port: ptr.To(1), + port: ptr.To(uint16(1)), }, args: args{ prefs: prefsWithNilPort.View(), @@ -69,7 +124,7 @@ func Test_extension_profileStateChanged(t *testing.T) { { name: "prefs port nil previously running", fields: fields{ - port: ptr.To(1), + port: ptr.To(uint16(1)), rs: mockRelayServerNotZeroVal(), }, args: args{ @@ -83,54 +138,54 @@ func Test_extension_profileStateChanged(t *testing.T) { { name: "prefs port changed", fields: fields{ - port: ptr.To(2), + port: ptr.To(uint16(2)), }, args: args{ prefs: prefsWithPortOne.View(), sameNode: true, }, - wantPort: ptr.To(1), + wantPort: ptr.To(uint16(1)), wantRelayServerFieldNonNil: true, wantRelayServerFieldMutated: true, }, { name: "prefs port changed previously running", fields: fields{ - port: ptr.To(2), + port: ptr.To(uint16(2)), rs: mockRelayServerNotZeroVal(), }, args: args{ prefs: prefsWithPortOne.View(), sameNode: true, }, - wantPort: ptr.To(1), + wantPort: ptr.To(uint16(1)), wantRelayServerFieldNonNil: true, wantRelayServerFieldMutated: true, }, { name: "sameNode false", fields: fields{ - port: ptr.To(1), + port: ptr.To(uint16(1)), }, args: args{ prefs: prefsWithPortOne.View(), sameNode: false, }, - wantPort: ptr.To(1), + wantPort: ptr.To(uint16(1)), wantRelayServerFieldNonNil: true, wantRelayServerFieldMutated: true, }, { name: "sameNode false previously running", fields: fields{ - port: ptr.To(1), + port: ptr.To(uint16(1)), rs: mockRelayServerNotZeroVal(), }, args: args{ prefs: prefsWithPortOne.View(), sameNode: false, }, - wantPort: ptr.To(1), + wantPort: ptr.To(uint16(1)), wantRelayServerFieldNonNil: true, wantRelayServerFieldMutated: true, }, @@ -143,7 +198,7 @@ func Test_extension_profileStateChanged(t *testing.T) { prefs: prefsWithPortOne.View(), sameNode: false, }, - wantPort: ptr.To(1), + wantPort: ptr.To(uint16(1)), wantRelayServerFieldNonNil: true, wantRelayServerFieldMutated: true, }, @@ -156,10 +211,11 @@ func Test_extension_profileStateChanged(t *testing.T) { t.Fatal(err) } e := ipne.(*extension) - e.newServerFn = func(logf logger.Logf, port int, onlyStaticAddrPorts bool) (relayServer, error) { + e.newServerFn = func(logf logger.Logf, port uint16, onlyStaticAddrPorts bool) (relayServer, error) { return &mockRelayServer{}, nil } e.port = tt.fields.port + e.staticEndpoints = tt.fields.staticEndpoints e.rs = tt.fields.rs defer e.Shutdown() e.profileStateChanged(ipn.LoginProfileView{}, tt.args.prefs, tt.args.sameNode) @@ -174,24 +230,34 @@ func Test_extension_profileStateChanged(t *testing.T) { if tt.wantRelayServerFieldMutated != !reflect.DeepEqual(tt.fields.rs, e.rs) { t.Errorf("wantRelayServerFieldMutated: %v != !reflect.DeepEqual(tt.fields.rs, e.rs): %v", tt.wantRelayServerFieldMutated, !reflect.DeepEqual(tt.fields.rs, e.rs)) } + if !slices.Equal(tt.wantEndpoints, e.staticEndpoints.AsSlice()) { + t.Errorf("wantEndpoints: %v != %v", tt.wantEndpoints, e.staticEndpoints.AsSlice()) + } + if e.rs != nil && !slices.Equal(tt.wantEndpoints, e.rs.(*mockRelayServer).addrPorts.AsSlice()) { + t.Errorf("wantEndpoints: %v != %v", tt.wantEndpoints, e.rs.(*mockRelayServer).addrPorts.AsSlice()) + } }) } } func mockRelayServerNotZeroVal() *mockRelayServer { - return &mockRelayServer{true} + return &mockRelayServer{set: true} } type mockRelayServer struct { - set bool + set bool + addrPorts views.Slice[netip.AddrPort] } -func (mockRelayServer) Close() error { return nil } -func (mockRelayServer) AllocateEndpoint(_, _ key.DiscoPublic) (endpoint.ServerEndpoint, error) { +func (m *mockRelayServer) Close() error { return nil } +func (m *mockRelayServer) AllocateEndpoint(_, _ key.DiscoPublic) (endpoint.ServerEndpoint, error) { return endpoint.ServerEndpoint{}, errors.New("not implemented") } -func (mockRelayServer) GetSessions() []status.ServerSession { return nil } -func (mockRelayServer) SetDERPMapView(tailcfg.DERPMapView) { return } +func (m *mockRelayServer) GetSessions() []status.ServerSession { return nil } +func (m *mockRelayServer) SetDERPMapView(tailcfg.DERPMapView) { return } +func (m *mockRelayServer) SetStaticAddrPorts(aps views.Slice[netip.AddrPort]) { + m.addrPorts = aps +} type mockSafeBackend struct { sys *tsd.System @@ -205,7 +271,7 @@ func Test_extension_handleRelayServerLifetimeLocked(t *testing.T) { tests := []struct { name string shutdown bool - port *int + port *uint16 rs relayServer hasNodeAttrDisableRelayServer bool wantRelayServerFieldNonNil bool @@ -214,7 +280,7 @@ func Test_extension_handleRelayServerLifetimeLocked(t *testing.T) { { name: "want running", shutdown: false, - port: ptr.To(1), + port: ptr.To(uint16(1)), hasNodeAttrDisableRelayServer: false, wantRelayServerFieldNonNil: true, wantRelayServerFieldMutated: true, @@ -222,7 +288,7 @@ func Test_extension_handleRelayServerLifetimeLocked(t *testing.T) { { name: "want running previously running", shutdown: false, - port: ptr.To(1), + port: ptr.To(uint16(1)), rs: mockRelayServerNotZeroVal(), hasNodeAttrDisableRelayServer: false, wantRelayServerFieldNonNil: true, @@ -231,7 +297,7 @@ func Test_extension_handleRelayServerLifetimeLocked(t *testing.T) { { name: "shutdown true", shutdown: true, - port: ptr.To(1), + port: ptr.To(uint16(1)), hasNodeAttrDisableRelayServer: false, wantRelayServerFieldNonNil: false, wantRelayServerFieldMutated: false, @@ -239,7 +305,7 @@ func Test_extension_handleRelayServerLifetimeLocked(t *testing.T) { { name: "shutdown true previously running", shutdown: true, - port: ptr.To(1), + port: ptr.To(uint16(1)), rs: mockRelayServerNotZeroVal(), hasNodeAttrDisableRelayServer: false, wantRelayServerFieldNonNil: false, @@ -288,7 +354,7 @@ func Test_extension_handleRelayServerLifetimeLocked(t *testing.T) { t.Fatal(err) } e := ipne.(*extension) - e.newServerFn = func(logf logger.Logf, port int, onlyStaticAddrPorts bool) (relayServer, error) { + e.newServerFn = func(logf logger.Logf, port uint16, onlyStaticAddrPorts bool) (relayServer, error) { return &mockRelayServer{}, nil } e.shutdown = tt.shutdown diff --git a/feature/tpm/tpm_linux.go b/feature/tpm/tpm_linux.go index 6c8131e8d..3f05c9a8c 100644 --- a/feature/tpm/tpm_linux.go +++ b/feature/tpm/tpm_linux.go @@ -4,6 +4,8 @@ package tpm import ( + "errors" + "github.com/google/go-tpm/tpm2/transport" "github.com/google/go-tpm/tpm2/transport/linuxtpm" ) @@ -13,5 +15,10 @@ func open() (transport.TPMCloser, error) { if err == nil { return tpm, nil } - return linuxtpm.Open("/dev/tpm0") + errs := []error{err} + tpm, err = linuxtpm.Open("/dev/tpm0") + if err == nil { + return tpm, nil + } + return nil, errors.Join(errs...) } diff --git a/flake.nix b/flake.nix index fc3a466fc..855ce555b 100644 --- a/flake.nix +++ b/flake.nix @@ -151,5 +151,4 @@ }); }; } -# nix-direnv cache busting line: sha256-sGPgML2YM/XNWfsAdDZvzWHagcydwCmR6nKOHJj5COs= - +# nix-direnv cache busting line: sha256-IkodqRYdueML7U2Hh8vRw6Et7+WII+VXuPJ3jZ2xYx8= diff --git a/go.mod b/go.mod index 3b4f34b2d..bd6fe441d 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,8 @@ require ( github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.58 github.com/aws/aws-sdk-go-v2/service/s3 v1.75.3 github.com/aws/aws-sdk-go-v2/service/ssm v1.44.7 + github.com/axiomhq/hyperloglog v0.0.0-20240319100328-84253e514e02 + github.com/bradfitz/go-tool-cache v0.0.0-20251113223507-0124e698e0bd github.com/bramvdbogaerde/go-scp v1.4.0 github.com/cilium/ebpf v0.15.0 github.com/coder/websocket v1.8.12 @@ -60,7 +62,7 @@ require ( github.com/jellydator/ttlcache/v3 v3.1.0 github.com/jsimonetti/rtnetlink v1.4.0 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 - github.com/klauspost/compress v1.17.11 + github.com/klauspost/compress v1.18.0 github.com/kortschak/wol v0.0.0-20200729010619-da482cc4850a github.com/mattn/go-colorable v0.1.13 github.com/mattn/go-isatty v0.0.20 @@ -74,8 +76,8 @@ require ( github.com/pkg/errors v0.9.1 github.com/pkg/sftp v1.13.6 github.com/prometheus-community/pro-bing v0.4.0 - github.com/prometheus/client_golang v1.20.5 - github.com/prometheus/common v0.55.0 + github.com/prometheus/client_golang v1.23.0 + github.com/prometheus/common v0.65.0 github.com/prometheus/prometheus v0.49.2-0.20240125131847-c3b8ef1694ff github.com/safchain/ethtool v0.3.0 github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e @@ -102,8 +104,8 @@ require ( go.uber.org/zap v1.27.0 go4.org/mem v0.0.0-20240501181205-ae6ca9944745 go4.org/netipx v0.0.0-20231129151722-fdeea329fbba - golang.org/x/crypto v0.44.0 - golang.org/x/exp v0.0.0-20250210185358-939b2ce775ac + golang.org/x/crypto v0.45.0 + golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b golang.org/x/mod v0.30.0 golang.org/x/net v0.47.0 golang.org/x/oauth2 v0.30.0 @@ -148,6 +150,7 @@ require ( github.com/containerd/typeurl/v2 v2.2.3 // indirect github.com/cyphar/filepath-securejoin v0.3.6 // indirect github.com/deckarep/golang-set/v2 v2.8.0 // indirect + github.com/dgryski/go-metro v0.0.0-20180109044635-280f6062b5bc // indirect github.com/docker/go-connections v0.5.0 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -355,8 +358,8 @@ require ( github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/polyfloyd/go-errorlint v1.4.8 // indirect - github.com/prometheus/client_model v0.6.1 - github.com/prometheus/procfs v0.15.1 // indirect + github.com/prometheus/client_model v0.6.2 + github.com/prometheus/procfs v0.16.1 // indirect github.com/quasilyte/go-ruleguard v0.4.2 // indirect github.com/quasilyte/gogrep v0.5.0 // indirect github.com/quasilyte/regex/syntax v0.0.0-20210819130434-b3f0c404a727 // indirect @@ -414,7 +417,7 @@ require ( golang.org/x/image v0.27.0 // indirect golang.org/x/text v0.31.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/protobuf v1.36.3 // indirect + google.golang.org/protobuf v1.36.6 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect diff --git a/go.mod.sri b/go.mod.sri index 76c72f0c9..329fe9405 100644 --- a/go.mod.sri +++ b/go.mod.sri @@ -1 +1 @@ -sha256-sGPgML2YM/XNWfsAdDZvzWHagcydwCmR6nKOHJj5COs= +sha256-IkodqRYdueML7U2Hh8vRw6Et7+WII+VXuPJ3jZ2xYx8= diff --git a/go.sum b/go.sum index f0758f2d4..111c99ac9 100644 --- a/go.sum +++ b/go.sum @@ -170,6 +170,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.33.13 h1:3LXNnmtH3TURctC23hnC0p/39Q5 github.com/aws/aws-sdk-go-v2/service/sts v1.33.13/go.mod h1:7Yn+p66q/jt38qMoVfNvjbm3D89mGBnkwDcijgtih8w= github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ= github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= +github.com/axiomhq/hyperloglog v0.0.0-20240319100328-84253e514e02 h1:bXAPYSbdYbS5VTy92NIUbeDI1qyggi+JYh5op9IFlcQ= +github.com/axiomhq/hyperloglog v0.0.0-20240319100328-84253e514e02/go.mod h1:k08r+Yj1PRAmuayFiRK6MYuR5Ve4IuZtTfxErMIh0+c= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -186,6 +188,8 @@ github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= github.com/bombsimon/wsl/v4 v4.2.1 h1:Cxg6u+XDWff75SIFFmNsqnIOgob+Q9hG6y/ioKbRFiM= github.com/bombsimon/wsl/v4 v4.2.1/go.mod h1:Xu/kDxGZTofQcDGCtQe9KCzhHphIe0fDuyWTxER9Feo= +github.com/bradfitz/go-tool-cache v0.0.0-20251113223507-0124e698e0bd h1:1Df3FBmfyUCIQ4eKzAPXIWTfewY89L0fWPWO56zWCyI= +github.com/bradfitz/go-tool-cache v0.0.0-20251113223507-0124e698e0bd/go.mod h1:2+xptBAd0m2kZ1wLO4AYZhldLEFPy+KeGwmnlXLvy+w= github.com/bramvdbogaerde/go-scp v1.4.0 h1:jKMwpwCbcX1KyvDbm/PDJuXcMuNVlLGi0Q0reuzjyKY= github.com/bramvdbogaerde/go-scp v1.4.0/go.mod h1:on2aH5AxaFb2G0N5Vsdy6B0Ml7k9HuHSwfo1y0QzAbQ= github.com/breml/bidichk v0.2.7 h1:dAkKQPLl/Qrk7hnP6P+E0xOodrq8Us7+U0o4UBOAlQY= @@ -269,6 +273,8 @@ github.com/deckarep/golang-set/v2 v2.8.0 h1:swm0rlPCmdWn9mESxKOjWk8hXSqoxOp+Zlfu github.com/deckarep/golang-set/v2 v2.8.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= github.com/denis-tingaikin/go-header v0.5.0 h1:SRdnP5ZKvcO9KKRP1KJrhFR3RrlGuD+42t4429eC9k8= github.com/denis-tingaikin/go-header v0.5.0/go.mod h1:mMenU5bWrok6Wl2UsZjy+1okegmwQ3UgWl4V1D8gjlY= +github.com/dgryski/go-metro v0.0.0-20180109044635-280f6062b5bc h1:8WFBn63wegobsYAX0YjD+8suexZDga5CctH4CCTx2+8= +github.com/dgryski/go-metro v0.0.0-20180109044635-280f6062b5bc/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= github.com/digitalocean/go-smbios v0.0.0-20180907143718-390a4f403a8e h1:vUmf0yezR0y7jJ5pceLHthLaYf4bA5T14B6q39S4q2Q= github.com/digitalocean/go-smbios v0.0.0-20180907143718-390a4f403a8e/go.mod h1:YTIHhz/QFSYnu/EhlF2SpU2Uk+32abacUYA5ZPljz1A= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= @@ -662,8 +668,8 @@ github.com/kisielk/errcheck v1.7.0/go.mod h1:1kLL+jV4e+CFfueBmI1dSK2ADDyQnlrnrY/ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kkHAIKE/contextcheck v1.1.4 h1:B6zAaLhOEEcjvUgIYEqystmnFk1Oemn8bvJhbt0GMb8= github.com/kkHAIKE/contextcheck v1.1.4/go.mod h1:1+i/gWqokIa+dm31mqGLZhZJ7Uh44DJGZVmr6QRBNJg= -github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= -github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -840,29 +846,29 @@ github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= -github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= -github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= +github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= -github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= -github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= +github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= -github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/prometheus/prometheus v0.49.2-0.20240125131847-c3b8ef1694ff h1:X1Tly81aZ22DA1fxBdfvR3iw8+yFoUBUHMEd+AX/ZXI= github.com/prometheus/prometheus v0.49.2-0.20240125131847-c3b8ef1694ff/go.mod h1:FvE8dtQ1Ww63IlyKBn1V4s+zMwF9kHkVNkQBR1pM4CU= github.com/puzpuzpuz/xsync v1.5.2 h1:yRAP4wqSOZG+/4pxJ08fPTwrfL0IzE/LKQ/cw509qGY= @@ -1128,8 +1134,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= -golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU= -golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -1140,8 +1146,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20250210185358-939b2ce775ac h1:l5+whBCLH3iH2ZNHYLbAe58bo7yrN4mVcnkHDYz5vvs= -golang.org/x/exp v0.0.0-20250210185358-939b2ce775ac/go.mod h1:hH+7mtFmImwwcMvScyxUhjuVHR3HGaDPMn9rMSUUbxo= +golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o= +golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8= golang.org/x/exp/typeparams v0.0.0-20220428152302-39d4317da171/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk= golang.org/x/exp/typeparams v0.0.0-20230203172020-98cc5a0785f9/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk= golang.org/x/exp/typeparams v0.0.0-20240314144324-c7f7c6466f7f h1:phY1HzDcf18Aq9A8KkmRtY9WvOFIxN8wgfvy6Zm1DV8= @@ -1498,8 +1504,8 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.36.3 h1:82DV7MYdb8anAVi3qge1wSnMDrnKK7ebr+I0hHRN1BU= -google.golang.org/protobuf v1.36.3/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/ipn/ipn_clone.go b/ipn/ipn_clone.go index 1be716197..4bf78b40b 100644 --- a/ipn/ipn_clone.go +++ b/ipn/ipn_clone.go @@ -64,46 +64,48 @@ func (src *Prefs) Clone() *Prefs { if dst.RelayServerPort != nil { dst.RelayServerPort = ptr.To(*src.RelayServerPort) } + dst.RelayServerStaticEndpoints = append(src.RelayServerStaticEndpoints[:0:0], src.RelayServerStaticEndpoints...) dst.Persist = src.Persist.Clone() return dst } // A compilation failure here means this code must be regenerated, with the command at the top of this file. var _PrefsCloneNeedsRegeneration = Prefs(struct { - ControlURL string - RouteAll bool - ExitNodeID tailcfg.StableNodeID - ExitNodeIP netip.Addr - AutoExitNode ExitNodeExpression - InternalExitNodePrior tailcfg.StableNodeID - ExitNodeAllowLANAccess bool - CorpDNS bool - RunSSH bool - RunWebClient bool - WantRunning bool - LoggedOut bool - ShieldsUp bool - AdvertiseTags []string - Hostname string - NotepadURLs bool - ForceDaemon bool - Egg bool - AdvertiseRoutes []netip.Prefix - AdvertiseServices []string - Sync opt.Bool - NoSNAT bool - NoStatefulFiltering opt.Bool - NetfilterMode preftype.NetfilterMode - OperatorUser string - ProfileName string - AutoUpdate AutoUpdatePrefs - AppConnector AppConnectorPrefs - PostureChecking bool - NetfilterKind string - DriveShares []*drive.Share - RelayServerPort *int - AllowSingleHosts marshalAsTrueInJSON - Persist *persist.Persist + ControlURL string + RouteAll bool + ExitNodeID tailcfg.StableNodeID + ExitNodeIP netip.Addr + AutoExitNode ExitNodeExpression + InternalExitNodePrior tailcfg.StableNodeID + ExitNodeAllowLANAccess bool + CorpDNS bool + RunSSH bool + RunWebClient bool + WantRunning bool + LoggedOut bool + ShieldsUp bool + AdvertiseTags []string + Hostname string + NotepadURLs bool + ForceDaemon bool + Egg bool + AdvertiseRoutes []netip.Prefix + AdvertiseServices []string + Sync opt.Bool + NoSNAT bool + NoStatefulFiltering opt.Bool + NetfilterMode preftype.NetfilterMode + OperatorUser string + ProfileName string + AutoUpdate AutoUpdatePrefs + AppConnector AppConnectorPrefs + PostureChecking bool + NetfilterKind string + DriveShares []*drive.Share + RelayServerPort *uint16 + RelayServerStaticEndpoints []netip.AddrPort + AllowSingleHosts marshalAsTrueInJSON + Persist *persist.Persist }{}) // Clone makes a deep copy of ServeConfig. diff --git a/ipn/ipn_view.go b/ipn/ipn_view.go index d3836416b..4157ec76e 100644 --- a/ipn/ipn_view.go +++ b/ipn/ipn_view.go @@ -441,13 +441,18 @@ func (v PrefsView) DriveShares() views.SliceView[*drive.Share, drive.ShareView] // RelayServerPort is the UDP port number for the relay server to bind to, // on all interfaces. A non-nil zero value signifies a random unused port // should be used. A nil value signifies relay server functionality -// should be disabled. This field is currently experimental, and therefore -// no guarantees are made about its current naming and functionality when -// non-nil/enabled. -func (v PrefsView) RelayServerPort() views.ValuePointer[int] { +// should be disabled. +func (v PrefsView) RelayServerPort() views.ValuePointer[uint16] { return views.ValuePointerOf(v.ж.RelayServerPort) } +// RelayServerStaticEndpoints are static IP:port endpoints to advertise as +// candidates for relay connections. Only relevant when RelayServerPort is +// non-nil. +func (v PrefsView) RelayServerStaticEndpoints() views.Slice[netip.AddrPort] { + return views.SliceOf(v.ж.RelayServerStaticEndpoints) +} + // AllowSingleHosts was a legacy field that was always true // for the past 4.5 years. It controlled whether Tailscale // peers got /32 or /128 routes for each other. @@ -468,40 +473,41 @@ func (v PrefsView) Persist() persist.PersistView { return v.ж.Persist.View() } // A compilation failure here means this code must be regenerated, with the command at the top of this file. var _PrefsViewNeedsRegeneration = Prefs(struct { - ControlURL string - RouteAll bool - ExitNodeID tailcfg.StableNodeID - ExitNodeIP netip.Addr - AutoExitNode ExitNodeExpression - InternalExitNodePrior tailcfg.StableNodeID - ExitNodeAllowLANAccess bool - CorpDNS bool - RunSSH bool - RunWebClient bool - WantRunning bool - LoggedOut bool - ShieldsUp bool - AdvertiseTags []string - Hostname string - NotepadURLs bool - ForceDaemon bool - Egg bool - AdvertiseRoutes []netip.Prefix - AdvertiseServices []string - Sync opt.Bool - NoSNAT bool - NoStatefulFiltering opt.Bool - NetfilterMode preftype.NetfilterMode - OperatorUser string - ProfileName string - AutoUpdate AutoUpdatePrefs - AppConnector AppConnectorPrefs - PostureChecking bool - NetfilterKind string - DriveShares []*drive.Share - RelayServerPort *int - AllowSingleHosts marshalAsTrueInJSON - Persist *persist.Persist + ControlURL string + RouteAll bool + ExitNodeID tailcfg.StableNodeID + ExitNodeIP netip.Addr + AutoExitNode ExitNodeExpression + InternalExitNodePrior tailcfg.StableNodeID + ExitNodeAllowLANAccess bool + CorpDNS bool + RunSSH bool + RunWebClient bool + WantRunning bool + LoggedOut bool + ShieldsUp bool + AdvertiseTags []string + Hostname string + NotepadURLs bool + ForceDaemon bool + Egg bool + AdvertiseRoutes []netip.Prefix + AdvertiseServices []string + Sync opt.Bool + NoSNAT bool + NoStatefulFiltering opt.Bool + NetfilterMode preftype.NetfilterMode + OperatorUser string + ProfileName string + AutoUpdate AutoUpdatePrefs + AppConnector AppConnectorPrefs + PostureChecking bool + NetfilterKind string + DriveShares []*drive.Share + RelayServerPort *uint16 + RelayServerStaticEndpoints []netip.AddrPort + AllowSingleHosts marshalAsTrueInJSON + Persist *persist.Persist }{}) // View returns a read-only view of ServeConfig. diff --git a/ipn/ipnlocal/drive.go b/ipn/ipnlocal/drive.go index 7d6dc2427..456cd4544 100644 --- a/ipn/ipnlocal/drive.go +++ b/ipn/ipnlocal/drive.go @@ -433,7 +433,7 @@ func (rbw *responseBodyWrapper) Close() error { // b.Dialer().PeerAPITransport() with metrics tracking. type driveTransport struct { b *LocalBackend - tr *http.Transport + tr http.RoundTripper } func (b *LocalBackend) newDriveTransport() *driveTransport { @@ -443,7 +443,7 @@ func (b *LocalBackend) newDriveTransport() *driveTransport { } } -func (dt *driveTransport) RoundTrip(req *http.Request) (resp *http.Response, err error) { +func (dt *driveTransport) RoundTrip(req *http.Request) (*http.Response, error) { // Some WebDAV clients include origin and refer headers, which peerapi does // not like. Remove them. req.Header.Del("origin") @@ -455,42 +455,45 @@ func (dt *driveTransport) RoundTrip(req *http.Request) (resp *http.Response, err req.Body = bw } - defer func() { - contentType := "unknown" - if ct := req.Header.Get("Content-Type"); ct != "" { - contentType = ct - } + resp, err := dt.tr.RoundTrip(req) + if err != nil { + return nil, err + } - dt.b.mu.Lock() - selfNodeKey := dt.b.currentNode().Self().Key().ShortString() - dt.b.mu.Unlock() - n, _, ok := dt.b.WhoIs("tcp", netip.MustParseAddrPort(req.URL.Host)) - shareNodeKey := "unknown" - if ok { - shareNodeKey = string(n.Key().ShortString()) - } + contentType := "unknown" + if ct := req.Header.Get("Content-Type"); ct != "" { + contentType = ct + } - rbw := responseBodyWrapper{ - log: dt.b.logf, - logVerbose: req.Method != httpm.GET && req.Method != httpm.PUT, // other requests like PROPFIND are quite chatty, so we log those at verbose level - method: req.Method, - bytesTx: int64(bw.bytesRead), - selfNodeKey: selfNodeKey, - shareNodeKey: shareNodeKey, - contentType: contentType, - contentLength: resp.ContentLength, - fileExtension: parseDriveFileExtensionForLog(req.URL.Path), - statusCode: resp.StatusCode, - ReadCloser: resp.Body, - } + dt.b.mu.Lock() + selfNodeKey := dt.b.currentNode().Self().Key().ShortString() + dt.b.mu.Unlock() + n, _, ok := dt.b.WhoIs("tcp", netip.MustParseAddrPort(req.URL.Host)) + shareNodeKey := "unknown" + if ok { + shareNodeKey = string(n.Key().ShortString()) + } - if resp.StatusCode >= 400 { - // in case of error response, just log immediately - rbw.logAccess("") - } else { - resp.Body = &rbw - } - }() + rbw := responseBodyWrapper{ + log: dt.b.logf, + logVerbose: req.Method != httpm.GET && req.Method != httpm.PUT, // other requests like PROPFIND are quite chatty, so we log those at verbose level + method: req.Method, + bytesTx: int64(bw.bytesRead), + selfNodeKey: selfNodeKey, + shareNodeKey: shareNodeKey, + contentType: contentType, + contentLength: resp.ContentLength, + fileExtension: parseDriveFileExtensionForLog(req.URL.Path), + statusCode: resp.StatusCode, + ReadCloser: resp.Body, + } + + if resp.StatusCode >= 400 { + // in case of error response, just log immediately + rbw.logAccess("") + } else { + resp.Body = &rbw + } - return dt.tr.RoundTrip(req) + return resp, nil } diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 0ff299399..fbf34aa42 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -14,7 +14,6 @@ import ( "errors" "fmt" "io" - "log" "math" "math/rand/v2" "net" @@ -544,7 +543,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo netMon := sys.NetMon.Get() b.sockstatLogger, err = sockstatlog.NewLogger(logpolicy.LogsDir(logf), logf, logID, netMon, sys.HealthTracker.Get(), sys.Bus.Get()) if err != nil { - log.Printf("error setting up sockstat logger: %v", err) + logf("error setting up sockstat logger: %v", err) } // Enable sockstats logs only on non-mobile unstable builds if version.IsUnstableBuild() && !version.IsMobile() && b.sockstatLogger != nil { @@ -877,6 +876,7 @@ func (b *LocalBackend) initPrefsFromConfig(conf *conffile.Config) error { } func (b *LocalBackend) setStaticEndpointsFromConfigLocked(conf *conffile.Config) { + syncs.RequiresMutex(&b.mu) if conf.Parsed.StaticEndpoints == nil && (b.conf == nil || b.conf.Parsed.StaticEndpoints == nil) { return } @@ -895,6 +895,7 @@ func (b *LocalBackend) setStaticEndpointsFromConfigLocked(conf *conffile.Config) } func (b *LocalBackend) setStateLocked(state ipn.State) { + syncs.RequiresMutex(&b.mu) if b.state == state { return } @@ -907,6 +908,7 @@ func (b *LocalBackend) setStateLocked(state ipn.State) { // setConfigLocked uses the provided config to update the backend's prefs // and other state. func (b *LocalBackend) setConfigLocked(conf *conffile.Config) error { + syncs.RequiresMutex(&b.mu) p := b.pm.CurrentPrefs().AsStruct() mp, err := conf.Parsed.ToPrefs() if err != nil { @@ -928,6 +930,7 @@ var assumeNetworkUpdateForTest = envknob.RegisterBool("TS_ASSUME_NETWORK_UP_FOR_ // // b.mu must be held. func (b *LocalBackend) pauseOrResumeControlClientLocked() { + syncs.RequiresMutex(&b.mu) if b.cc == nil { return } @@ -945,12 +948,12 @@ func (b *LocalBackend) pauseOrResumeControlClientLocked() { // down, clients switch over to other replicas whilst the existing connections are kept alive for some period of time. func (b *LocalBackend) DisconnectControl() { b.mu.Lock() - defer b.mu.Unlock() cc := b.resetControlClientLocked() - if cc == nil { - return + b.mu.Unlock() + + if cc != nil { + cc.Shutdown() } - cc.Shutdown() } // linkChange is our network monitor callback, called whenever the network changes. @@ -1205,6 +1208,7 @@ func (b *LocalBackend) Prefs() ipn.PrefsView { } func (b *LocalBackend) sanitizedPrefsLocked() ipn.PrefsView { + syncs.RequiresMutex(&b.mu) return stripKeysFromPrefs(b.pm.CurrentPrefs()) } @@ -1336,6 +1340,7 @@ func (b *LocalBackend) UpdateStatus(sb *ipnstate.StatusBuilder) { } func (b *LocalBackend) populatePeerStatusLocked(sb *ipnstate.StatusBuilder) { + syncs.RequiresMutex(&b.mu) cn := b.currentNode() nm := cn.NetMap() if nm == nil { @@ -1874,6 +1879,8 @@ func (b *LocalBackend) applySysPolicyLocked(prefs *ipn.Prefs) (anyChange bool) { if !buildfeatures.HasSystemPolicy { return false } + syncs.RequiresMutex(&b.mu) + if controlURL, err := b.polc.GetString(pkey.ControlURL, prefs.ControlURL); err == nil && prefs.ControlURL != controlURL { prefs.ControlURL = controlURL anyChange = true @@ -1942,6 +1949,8 @@ func (b *LocalBackend) applyExitNodeSysPolicyLocked(prefs *ipn.Prefs) (anyChange if !buildfeatures.HasUseExitNode { return false } + syncs.RequiresMutex(&b.mu) + if exitNodeIDStr, _ := b.polc.GetString(pkey.ExitNodeID, ""); exitNodeIDStr != "" { exitNodeID := tailcfg.StableNodeID(exitNodeIDStr) @@ -2183,6 +2192,8 @@ func (b *LocalBackend) resolveAutoExitNodeLocked(prefs *ipn.Prefs) (prefsChanged if !buildfeatures.HasUseExitNode { return false } + syncs.RequiresMutex(&b.mu) + // As of 2025-07-08, the only supported auto exit node expression is [ipn.AnyExitNode]. // // However, to maintain forward compatibility with future auto exit node expressions, @@ -2296,6 +2307,8 @@ func (b *LocalBackend) setWgengineStatus(s *wgengine.Status, err error) { // // b.mu must be held. func (b *LocalBackend) setWgengineStatusLocked(s *wgengine.Status) { + syncs.RequiresMutex(&b.mu) + es := b.parseWgStatusLocked(s) cc := b.cc @@ -2409,7 +2422,8 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error { var clientToShutdown controlclient.Client defer func() { if clientToShutdown != nil { - clientToShutdown.Shutdown() + // Shutdown outside of b.mu to avoid deadlocks. + b.goTracker.Go(clientToShutdown.Shutdown) } }() @@ -3747,6 +3761,9 @@ func (b *LocalBackend) StartLoginInteractive(ctx context.Context) error { // the control plane sends us one. Otherwise, the notification will be delivered to all // active [watchSession]s. func (b *LocalBackend) StartLoginInteractiveAs(ctx context.Context, user ipnauth.Actor) error { + if b.health.IsUnhealthy(ipn.StateStoreHealth) { + return errors.New("cannot log in when state store is unhealthy") + } b.mu.Lock() defer b.mu.Unlock() if b.cc == nil { @@ -4309,6 +4326,7 @@ func (b *LocalBackend) EditPrefsAs(mp *ipn.MaskedPrefs, actor ipnauth.Actor) (ip // // b.mu must be held. func (b *LocalBackend) checkEditPrefsAccessLocked(actor ipnauth.Actor, prefs ipn.PrefsView, mp *ipn.MaskedPrefs) error { + syncs.RequiresMutex(&b.mu) var errs []error if mp.RunSSHSet && mp.RunSSH && !envknob.CanSSHD() { @@ -4359,6 +4377,7 @@ func (b *LocalBackend) checkEditPrefsAccessLocked(actor ipnauth.Actor, prefs ipn // // b.mu must be held. func (b *LocalBackend) changeDisablesExitNodeLocked(prefs ipn.PrefsView, change *ipn.MaskedPrefs) bool { + syncs.RequiresMutex(&b.mu) if !buildfeatures.HasUseExitNode { return false } @@ -4400,6 +4419,7 @@ func (b *LocalBackend) changeDisablesExitNodeLocked(prefs ipn.PrefsView, change // // b.mu must be held. func (b *LocalBackend) adjustEditPrefsLocked(prefs ipn.PrefsView, mp *ipn.MaskedPrefs) { + syncs.RequiresMutex(&b.mu) // Zeroing the ExitNodeID via localAPI must also zero the prior exit node. if mp.ExitNodeIDSet && mp.ExitNodeID == "" && !mp.InternalExitNodePriorSet { mp.InternalExitNodePrior = "" @@ -4477,6 +4497,7 @@ func (b *LocalBackend) onEditPrefsLocked(_ ipnauth.Actor, mp *ipn.MaskedPrefs, o // startReconnectTimerLocked sets a timer to automatically set WantRunning to true // after the specified duration. func (b *LocalBackend) startReconnectTimerLocked(d time.Duration) { + syncs.RequiresMutex(&b.mu) if b.reconnectTimer != nil { // Stop may return false if the timer has already fired, // and the function has been called in its own goroutine, @@ -4519,11 +4540,13 @@ func (b *LocalBackend) startReconnectTimerLocked(d time.Duration) { } func (b *LocalBackend) resetAlwaysOnOverrideLocked() { + syncs.RequiresMutex(&b.mu) b.overrideAlwaysOn = false b.stopReconnectTimerLocked() } func (b *LocalBackend) stopReconnectTimerLocked() { + syncs.RequiresMutex(&b.mu) if b.reconnectTimer != nil { // Stop may return false if the timer has already fired, // and the function has been called in its own goroutine, @@ -4539,6 +4562,7 @@ func (b *LocalBackend) stopReconnectTimerLocked() { // b.mu must be held. func (b *LocalBackend) editPrefsLocked(actor ipnauth.Actor, mp *ipn.MaskedPrefs) (ipn.PrefsView, error) { + syncs.RequiresMutex(&b.mu) p0 := b.pm.CurrentPrefs() // Check if the changes in mp are allowed. @@ -5657,6 +5681,7 @@ func (b *LocalBackend) enterStateLocked(newState ipn.State) { } func (b *LocalBackend) hasNodeKeyLocked() bool { + syncs.RequiresMutex(&b.mu) // we can't use b.Prefs(), because it strips the keys, oops! p := b.pm.CurrentPrefs() return p.Valid() && p.Persist().Valid() && !p.Persist().PrivateNodeKey().IsZero() @@ -5677,6 +5702,11 @@ func (b *LocalBackend) NodeKey() key.NodePublic { // // b.mu must be held func (b *LocalBackend) nextStateLocked() ipn.State { + syncs.RequiresMutex(&b.mu) + if b.health.IsUnhealthy(ipn.StateStoreHealth) { + return ipn.NoState + } + var ( cc = b.cc cn = b.currentNode() @@ -5752,6 +5782,8 @@ func (b *LocalBackend) nextStateLocked() ipn.State { // // requires b.mu to be held. func (b *LocalBackend) stateMachineLocked() { + syncs.RequiresMutex(&b.mu) + b.enterStateLocked(b.nextStateLocked()) } @@ -5761,6 +5793,7 @@ func (b *LocalBackend) stateMachineLocked() { // // b.mu must be held. func (b *LocalBackend) stopEngineAndWaitLocked() { + syncs.RequiresMutex(&b.mu) b.logf("stopEngineAndWait...") st, _ := b.e.ResetAndStop() // TODO: what should we do if this returns an error? b.setWgengineStatusLocked(st) @@ -5781,6 +5814,7 @@ func (b *LocalBackend) setControlClientLocked(cc controlclient.Client) { // returned value is non-nil, the caller must call Shutdown on it after // releasing b.mu. func (b *LocalBackend) resetControlClientLocked() controlclient.Client { + syncs.RequiresMutex(&b.mu) if b.cc == nil { return nil } @@ -5807,6 +5841,8 @@ func (b *LocalBackend) resetControlClientLocked() controlclient.Client { // resetAuthURLLocked resets authURL, canceling any pending interactive login. func (b *LocalBackend) resetAuthURLLocked() { + syncs.RequiresMutex(&b.mu) + b.authURL = "" b.authURLTime = time.Time{} b.authActor = nil @@ -5836,6 +5872,8 @@ func (b *LocalBackend) ShouldExposeRemoteWebClient() bool { // // b.mu must be held. func (b *LocalBackend) setWebClientAtomicBoolLocked(nm *netmap.NetworkMap) { + syncs.RequiresMutex(&b.mu) + shouldRun := !nm.HasCap(tailcfg.NodeAttrDisableWebClient) wasRunning := b.webClientAtomicBool.Swap(shouldRun) if wasRunning && !shouldRun { @@ -5848,6 +5886,8 @@ func (b *LocalBackend) setWebClientAtomicBoolLocked(nm *netmap.NetworkMap) { // // b.mu must be held. func (b *LocalBackend) setExposeRemoteWebClientAtomicBoolLocked(prefs ipn.PrefsView) { + syncs.RequiresMutex(&b.mu) + if !buildfeatures.HasWebClient { return } @@ -5976,6 +6016,8 @@ func (b *LocalBackend) RefreshExitNode() { // refreshExitNodeLocked is like RefreshExitNode but requires b.mu be held. func (b *LocalBackend) refreshExitNodeLocked() { + syncs.RequiresMutex(&b.mu) + if b.resolveExitNodeLocked() { b.authReconfigLocked() } @@ -5991,6 +6033,8 @@ func (b *LocalBackend) refreshExitNodeLocked() { // // b.mu must be held. func (b *LocalBackend) resolveExitNodeLocked() (changed bool) { + syncs.RequiresMutex(&b.mu) + if !buildfeatures.HasUseExitNode { return false } @@ -6052,6 +6096,7 @@ func (b *LocalBackend) reconcilePrefsLocked(prefs *ipn.Prefs) (changed bool) { // // b.mu must be held. func (b *LocalBackend) resolveExitNodeInPrefsLocked(prefs *ipn.Prefs) (changed bool) { + syncs.RequiresMutex(&b.mu) if !buildfeatures.HasUseExitNode { return false } @@ -6886,7 +6931,8 @@ func (b *LocalBackend) resetForProfileChangeLocked() error { // Reset the NetworkMap in the engine b.e.SetNetworkMap(new(netmap.NetworkMap)) if prevCC := b.resetControlClientLocked(); prevCC != nil { - defer prevCC.Shutdown() + // Shutdown outside of b.mu to avoid deadlocks. + b.goTracker.Go(prevCC.Shutdown) } // TKA errors should not prevent resetting the backend state. // However, we should still return the error to the caller. @@ -6936,6 +6982,9 @@ func (b *LocalBackend) CurrentProfile() ipn.LoginProfileView { // NewProfile creates and switches to the new profile. func (b *LocalBackend) NewProfile() error { + if b.health.IsUnhealthy(ipn.StateStoreHealth) { + return errors.New("cannot log in when state store is unhealthy") + } b.mu.Lock() defer b.mu.Unlock() @@ -6964,7 +7013,8 @@ func (b *LocalBackend) ResetAuth() error { defer b.mu.Unlock() if prevCC := b.resetControlClientLocked(); prevCC != nil { - defer prevCC.Shutdown() + // Shutdown outside of b.mu to avoid deadlocks. + b.goTracker.Go(prevCC.Shutdown) } if err := b.clearMachineKeyLocked(); err != nil { return err @@ -7250,7 +7300,12 @@ func (b *LocalBackend) refreshAllowedSuggestions() { } b.allowedSuggestedExitNodesMu.Lock() defer b.allowedSuggestedExitNodesMu.Unlock() - b.allowedSuggestedExitNodes = fillAllowedSuggestions(b.polc) + + var err error + b.allowedSuggestedExitNodes, err = fillAllowedSuggestions(b.polc) + if err != nil { + b.logf("error refreshing allowed suggestions: %v", err) + } } // selectRegionFunc returns a DERP region from the slice of candidate regions. @@ -7262,20 +7317,19 @@ type selectRegionFunc func(views.Slice[int]) int // choice. type selectNodeFunc func(nodes views.Slice[tailcfg.NodeView], last tailcfg.StableNodeID) tailcfg.NodeView -func fillAllowedSuggestions(polc policyclient.Client) set.Set[tailcfg.StableNodeID] { +func fillAllowedSuggestions(polc policyclient.Client) (set.Set[tailcfg.StableNodeID], error) { nodes, err := polc.GetStringArray(pkey.AllowedSuggestedExitNodes, nil) if err != nil { - log.Printf("fillAllowedSuggestions: unable to look up %q policy: %v", pkey.AllowedSuggestedExitNodes, err) - return nil + return nil, fmt.Errorf("fillAllowedSuggestions: unable to look up %q policy: %w", pkey.AllowedSuggestedExitNodes, err) } if nodes == nil { - return nil + return nil, nil } s := make(set.Set[tailcfg.StableNodeID], len(nodes)) for _, n := range nodes { s.Add(tailcfg.StableNodeID(n)) } - return s + return s, nil } // suggestExitNode returns a suggestion for reasonably good exit node based on @@ -7286,6 +7340,9 @@ func suggestExitNode(report *netcheck.Report, nb *nodeBackend, prevSuggestion ta // The traffic-steering feature flag is enabled on this tailnet. return suggestExitNodeUsingTrafficSteering(nb, allowList) default: + // The control plane will always strip the `traffic-steering` + // node attribute if it isn’t enabled for this tailnet, even if + // it is set in the policy file: tailscale/corp#34401 return suggestExitNodeUsingDERP(report, nb, prevSuggestion, selectRegion, selectNode, allowList) } } @@ -7414,6 +7471,16 @@ func suggestExitNodeUsingDERP(report *netcheck.Report, nb *nodeBackend, prevSugg } } bestCandidates := pickWeighted(pickFrom) + + // We may have an empty list of candidates here, if none of the candidates + // have home DERP info. + // + // We know that candidates is non-empty or we'd already have returned, so if + // we've filtered everything out of bestCandidates, just use candidates. + if len(bestCandidates) == 0 { + bestCandidates = candidates + } + chosen := selectNode(views.SliceOf(bestCandidates), prevSuggestion) if !chosen.Valid() { return res, errors.New("chosen candidate invalid: this is a bug") diff --git a/ipn/ipnlocal/local_test.go b/ipn/ipnlocal/local_test.go index f17fabb60..02997a0e1 100644 --- a/ipn/ipnlocal/local_test.go +++ b/ipn/ipnlocal/local_test.go @@ -4436,6 +4436,14 @@ func deterministicRegionForTest(t testing.TB, want views.Slice[int], use int) se } } +// deterministicNodeForTest returns a deterministic selectNodeFunc, which +// allows us to make stable assertions about which exit node will be chosen +// from a list of possible candidates. +// +// When given a list of candidates, it checks that `use` is in the list and +// returns that. +// +// It verifies that `wantLast` was passed to `selectNode(…, want)`. func deterministicNodeForTest(t testing.TB, want views.Slice[tailcfg.StableNodeID], wantLast tailcfg.StableNodeID, use tailcfg.StableNodeID) selectNodeFunc { t.Helper() @@ -4444,6 +4452,16 @@ func deterministicNodeForTest(t testing.TB, want views.Slice[tailcfg.StableNodeI } return func(got views.Slice[tailcfg.NodeView], last tailcfg.StableNodeID) tailcfg.NodeView { + // In the tests, we choose nodes deterministically so we can get + // stable results, but in the real code, we choose nodes randomly. + // + // Call the randomNode function anyway, and ensure it returns + // a sensible result. + view := randomNode(got, last) + if !views.SliceContains(got, view) { + t.Fatalf("randomNode returns an unexpected node") + } + var ret tailcfg.NodeView gotIDs := make([]tailcfg.StableNodeID, got.Len()) @@ -4529,6 +4547,7 @@ func TestSuggestExitNode(t *testing.T) { Longitude: -97.3325, Priority: 100, } + var emptyLocation *tailcfg.Location peer1 := makePeer(1, withExitRoutes(), @@ -4568,6 +4587,18 @@ func TestSuggestExitNode(t *testing.T) { withExitRoutes(), withSuggest(), withLocation(fortWorthLowPriority.View())) + emptyLocationPeer9 := makePeer(9, + withoutDERP(), + withExitRoutes(), + withSuggest(), + withLocation(emptyLocation.View()), + ) + emptyLocationPeer10 := makePeer(10, + withoutDERP(), + withExitRoutes(), + withSuggest(), + withLocation(emptyLocation.View()), + ) selfNode := tailcfg.Node{ Addresses: []netip.Prefix{ @@ -4898,6 +4929,31 @@ func TestSuggestExitNode(t *testing.T) { wantName: "San Jose", wantLocation: sanJose.View(), }, + { + // Regression test for https://github.com/tailscale/tailscale/issues/17661 + name: "exit nodes with no home DERP, randomly selected", + lastReport: &netcheck.Report{ + RegionLatency: map[int]time.Duration{ + 1: 10, + 2: 20, + 3: 10, + }, + PreferredDERP: 1, + }, + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + DERPMap: defaultDERPMap, + Peers: []tailcfg.NodeView{ + emptyLocationPeer9, + emptyLocationPeer10, + }, + }, + wantRegions: []int{1, 2}, + wantName: "peer9", + wantNodes: []tailcfg.StableNodeID{"stable9", "stable10"}, + wantID: "stable9", + useRegion: 1, + }, } for _, tt := range tests { @@ -5173,6 +5229,26 @@ func TestSuggestExitNodeTrafficSteering(t *testing.T) { wantID: "stable3", wantName: "peer3", }, + { + name: "exit-nodes-without-priority-for-suggestions", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest()), + makePeer(2, + withExitRoutes(), + withSuggest()), + makePeer(3, + withExitRoutes(), + withLocationPriority(1)), + }, + }, + wantID: "stable1", + wantName: "peer1", + wantPri: 0, + }, { name: "exit-nodes-with-and-without-priority", netMap: &netmap.NetworkMap{ @@ -5590,7 +5666,10 @@ func TestFillAllowedSuggestions(t *testing.T) { var pol policytest.Config pol.Set(pkey.AllowedSuggestedExitNodes, tt.allowPolicy) - got := fillAllowedSuggestions(pol) + got, err := fillAllowedSuggestions(pol) + if err != nil { + t.Fatal(err) + } if got == nil { if tt.want == nil { return diff --git a/ipn/ipnlocal/network-lock.go b/ipn/ipnlocal/network-lock.go index 78d4d236d..f25c6fa9b 100644 --- a/ipn/ipnlocal/network-lock.go +++ b/ipn/ipnlocal/network-lock.go @@ -368,20 +368,6 @@ func (b *LocalBackend) tkaSyncIfNeeded(nm *netmap.NetworkMap, prefs ipn.PrefsVie return nil } -func toSyncOffer(head string, ancestors []string) (tka.SyncOffer, error) { - var out tka.SyncOffer - if err := out.Head.UnmarshalText([]byte(head)); err != nil { - return tka.SyncOffer{}, fmt.Errorf("head.UnmarshalText: %v", err) - } - out.Ancestors = make([]tka.AUMHash, len(ancestors)) - for i, a := range ancestors { - if err := out.Ancestors[i].UnmarshalText([]byte(a)); err != nil { - return tka.SyncOffer{}, fmt.Errorf("ancestor[%d].UnmarshalText: %v", i, err) - } - } - return out, nil -} - // tkaSyncLocked synchronizes TKA state with control. b.mu must be held // and tka must be initialized. b.mu will be stepped out of (and back into) // during network RPCs. @@ -399,7 +385,7 @@ func (b *LocalBackend) tkaSyncLocked(ourNodeKey key.NodePublic) error { if err != nil { return fmt.Errorf("offer RPC: %w", err) } - controlOffer, err := toSyncOffer(offerResp.Head, offerResp.Ancestors) + controlOffer, err := tka.ToSyncOffer(offerResp.Head, offerResp.Ancestors) if err != nil { return fmt.Errorf("control offer: %v", err) } @@ -694,7 +680,7 @@ func (b *LocalBackend) NetworkLockInit(keys []tka.Key, disablementValues [][]byt // Our genesis AUM was accepted but before Control turns on enforcement of // node-key signatures, we need to sign keys for all the existing nodes. - // If we don't get these signatures ahead of time, everyone will loose + // If we don't get these signatures ahead of time, everyone will lose // connectivity because control won't have any signatures to send which // satisfy network-lock checks. sigs := make(map[tailcfg.NodeID]tkatype.MarshaledSignature, len(initResp.NeedSignatures)) @@ -1294,27 +1280,10 @@ func (b *LocalBackend) tkaFetchBootstrap(ourNodeKey key.NodePublic, head tka.AUM return a, nil } -func fromSyncOffer(offer tka.SyncOffer) (head string, ancestors []string, err error) { - headBytes, err := offer.Head.MarshalText() - if err != nil { - return "", nil, fmt.Errorf("head.MarshalText: %v", err) - } - - ancestors = make([]string, len(offer.Ancestors)) - for i, ancestor := range offer.Ancestors { - hash, err := ancestor.MarshalText() - if err != nil { - return "", nil, fmt.Errorf("ancestor[%d].MarshalText: %v", i, err) - } - ancestors[i] = string(hash) - } - return string(headBytes), ancestors, nil -} - // tkaDoSyncOffer sends a /machine/tka/sync/offer RPC to the control plane // over noise. This is the first of two RPCs implementing tka synchronization. func (b *LocalBackend) tkaDoSyncOffer(ourNodeKey key.NodePublic, offer tka.SyncOffer) (*tailcfg.TKASyncOfferResponse, error) { - head, ancestors, err := fromSyncOffer(offer) + head, ancestors, err := tka.FromSyncOffer(offer) if err != nil { return nil, fmt.Errorf("encoding offer: %v", err) } diff --git a/ipn/ipnlocal/network-lock_test.go b/ipn/ipnlocal/network-lock_test.go index 5d22425a1..e5df38bdb 100644 --- a/ipn/ipnlocal/network-lock_test.go +++ b/ipn/ipnlocal/network-lock_test.go @@ -33,6 +33,7 @@ import ( "tailscale.com/tka" "tailscale.com/tsd" "tailscale.com/tstest" + "tailscale.com/tstest/tkatest" "tailscale.com/types/key" "tailscale.com/types/netmap" "tailscale.com/types/persist" @@ -101,7 +102,8 @@ func TestTKAEnablementFlow(t *testing.T) { // our mock server can communicate. nlPriv := key.NewNLPrivate() key := tka.Key{Kind: tka.Key25519, Public: nlPriv.Public().Verifier(), Votes: 2} - a1, genesisAUM, err := tka.Create(tka.ChonkMem(), tka.State{ + chonk := tka.ChonkMem() + a1, genesisAUM, err := tka.Create(chonk, tka.State{ Keys: []tka.Key{key}, DisablementSecrets: [][]byte{bytes.Repeat([]byte{0xa5}, 32)}, }, nlPriv) @@ -113,51 +115,31 @@ func TestTKAEnablementFlow(t *testing.T) { defer r.Body.Close() switch r.URL.Path { case "/machine/tka/bootstrap": - body := new(tailcfg.TKABootstrapRequest) - if err := json.NewDecoder(r.Body).Decode(body); err != nil { - t.Fatal(err) - } - if body.Version != tailcfg.CurrentCapabilityVersion { - t.Errorf("bootstrap CapVer = %v, want %v", body.Version, tailcfg.CurrentCapabilityVersion) - } - if body.NodeKey != nodePriv.Public() { - t.Errorf("bootstrap nodeKey=%v, want %v", body.NodeKey, nodePriv.Public()) + resp := tailcfg.TKABootstrapResponse{ + GenesisAUM: genesisAUM.Serialize(), } - if body.Head != "" { - t.Errorf("bootstrap head=%s, want empty hash", body.Head) + req, err := tkatest.HandleTKABootstrap(w, r, resp) + if err != nil { + t.Errorf("HandleTKABootstrap: %v", err) } - - w.WriteHeader(200) - out := tailcfg.TKABootstrapResponse{ - GenesisAUM: genesisAUM.Serialize(), + if req.NodeKey != nodePriv.Public() { + t.Errorf("bootstrap nodeKey=%v, want %v", req.NodeKey, nodePriv.Public()) } - if err := json.NewEncoder(w).Encode(out); err != nil { - t.Fatal(err) + if req.Head != "" { + t.Errorf("bootstrap head=%s, want empty hash", req.Head) } // Sync offer/send endpoints are hit even though the node is up-to-date, // so we implement enough of a fake that the client doesn't explode. case "/machine/tka/sync/offer": - head, err := a1.Head().MarshalText() + err := tkatest.HandleTKASyncOffer(w, r, a1, chonk) if err != nil { - t.Fatal(err) - } - w.WriteHeader(200) - if err := json.NewEncoder(w).Encode(tailcfg.TKASyncOfferResponse{ - Head: string(head), - }); err != nil { - t.Fatal(err) + t.Errorf("HandleTKASyncOffer: %v", err) } case "/machine/tka/sync/send": - head, err := a1.Head().MarshalText() + err := tkatest.HandleTKASyncSend(w, r, a1, chonk) if err != nil { - t.Fatal(err) - } - w.WriteHeader(200) - if err := json.NewEncoder(w).Encode(tailcfg.TKASyncSendResponse{ - Head: string(head), - }); err != nil { - t.Fatal(err) + t.Errorf("HandleTKASyncOffer: %v", err) } default: @@ -225,37 +207,28 @@ func TestTKADisablementFlow(t *testing.T) { defer r.Body.Close() switch r.URL.Path { case "/machine/tka/bootstrap": - body := new(tailcfg.TKABootstrapRequest) - if err := json.NewDecoder(r.Body).Decode(body); err != nil { - t.Fatal(err) - } - if body.Version != tailcfg.CurrentCapabilityVersion { - t.Errorf("bootstrap CapVer = %v, want %v", body.Version, tailcfg.CurrentCapabilityVersion) - } - if body.NodeKey != nodePriv.Public() { - t.Errorf("nodeKey=%v, want %v", body.NodeKey, nodePriv.Public()) - } - var head tka.AUMHash - if err := head.UnmarshalText([]byte(body.Head)); err != nil { - t.Fatalf("failed unmarshal of body.Head: %v", err) - } - if head != authority.Head() { - t.Errorf("reported head = %x, want %x", head, authority.Head()) - } - var disablement []byte if returnWrongSecret { disablement = bytes.Repeat([]byte{0x42}, 32) // wrong secret } else { disablement = disablementSecret } - - w.WriteHeader(200) - out := tailcfg.TKABootstrapResponse{ + resp := tailcfg.TKABootstrapResponse{ DisablementSecret: disablement, } - if err := json.NewEncoder(w).Encode(out); err != nil { - t.Fatal(err) + req, err := tkatest.HandleTKABootstrap(w, r, resp) + if err != nil { + t.Errorf("HandleTKABootstrap: %v", err) + } + if req.NodeKey != nodePriv.Public() { + t.Errorf("nodeKey=%v, want %v", req.NodeKey, nodePriv.Public()) + } + var head tka.AUMHash + if err := head.UnmarshalText([]byte(req.Head)); err != nil { + t.Fatalf("failed unmarshal of body.Head: %v", err) + } + if head != authority.Head() { + t.Errorf("reported head = %x, want %x", head, authority.Head()) } default: @@ -430,76 +403,15 @@ func TestTKASync(t *testing.T) { defer r.Body.Close() switch r.URL.Path { case "/machine/tka/sync/offer": - body := new(tailcfg.TKASyncOfferRequest) - if err := json.NewDecoder(r.Body).Decode(body); err != nil { - t.Fatal(err) - } - t.Logf("got sync offer:\n%+v", body) - nodeOffer, err := toSyncOffer(body.Head, body.Ancestors) + err := tkatest.HandleTKASyncOffer(w, r, controlAuthority, controlStorage) if err != nil { - t.Fatal(err) - } - controlOffer, err := controlAuthority.SyncOffer(controlStorage) - if err != nil { - t.Fatal(err) - } - sendAUMs, err := controlAuthority.MissingAUMs(controlStorage, nodeOffer) - if err != nil { - t.Fatal(err) - } - - head, ancestors, err := fromSyncOffer(controlOffer) - if err != nil { - t.Fatal(err) - } - resp := tailcfg.TKASyncOfferResponse{ - Head: head, - Ancestors: ancestors, - MissingAUMs: make([]tkatype.MarshaledAUM, len(sendAUMs)), - } - for i, a := range sendAUMs { - resp.MissingAUMs[i] = a.Serialize() - } - - t.Logf("responding to sync offer with:\n%+v", resp) - w.WriteHeader(200) - if err := json.NewEncoder(w).Encode(resp); err != nil { - t.Fatal(err) + t.Errorf("HandleTKASyncOffer: %v", err) } case "/machine/tka/sync/send": - body := new(tailcfg.TKASyncSendRequest) - if err := json.NewDecoder(r.Body).Decode(body); err != nil { - t.Fatal(err) - } - t.Logf("got sync send:\n%+v", body) - - var remoteHead tka.AUMHash - if err := remoteHead.UnmarshalText([]byte(body.Head)); err != nil { - t.Fatalf("head unmarshal: %v", err) - } - toApply := make([]tka.AUM, len(body.MissingAUMs)) - for i, a := range body.MissingAUMs { - if err := toApply[i].Unserialize(a); err != nil { - t.Fatalf("decoding missingAUM[%d]: %v", i, err) - } - } - - if len(toApply) > 0 { - if err := controlAuthority.Inform(controlStorage, toApply); err != nil { - t.Fatalf("control.Inform(%+v) failed: %v", toApply, err) - } - } - head, err := controlAuthority.Head().MarshalText() + err := tkatest.HandleTKASyncSend(w, r, controlAuthority, controlStorage) if err != nil { - t.Fatal(err) - } - - w.WriteHeader(200) - if err := json.NewEncoder(w).Encode(tailcfg.TKASyncSendResponse{ - Head: string(head), - }); err != nil { - t.Fatal(err) + t.Errorf("HandleTKASyncSend: %v", err) } default: @@ -608,76 +520,15 @@ func TestTKASyncTriggersCompact(t *testing.T) { defer r.Body.Close() switch r.URL.Path { case "/machine/tka/sync/offer": - body := new(tailcfg.TKASyncOfferRequest) - if err := json.NewDecoder(r.Body).Decode(body); err != nil { - t.Fatal(err) - } - t.Logf("got sync offer:\n%+v", body) - nodeOffer, err := toSyncOffer(body.Head, body.Ancestors) - if err != nil { - t.Fatal(err) - } - controlOffer, err := controlAuthority.SyncOffer(controlStorage) - if err != nil { - t.Fatal(err) - } - sendAUMs, err := controlAuthority.MissingAUMs(controlStorage, nodeOffer) - if err != nil { - t.Fatal(err) - } - - head, ancestors, err := fromSyncOffer(controlOffer) + err := tkatest.HandleTKASyncOffer(w, r, controlAuthority, controlStorage) if err != nil { - t.Fatal(err) - } - resp := tailcfg.TKASyncOfferResponse{ - Head: head, - Ancestors: ancestors, - MissingAUMs: make([]tkatype.MarshaledAUM, len(sendAUMs)), - } - for i, a := range sendAUMs { - resp.MissingAUMs[i] = a.Serialize() - } - - t.Logf("responding to sync offer with:\n%+v", resp) - w.WriteHeader(200) - if err := json.NewEncoder(w).Encode(resp); err != nil { - t.Fatal(err) + t.Errorf("HandleTKASyncOffer: %v", err) } case "/machine/tka/sync/send": - body := new(tailcfg.TKASyncSendRequest) - if err := json.NewDecoder(r.Body).Decode(body); err != nil { - t.Fatal(err) - } - t.Logf("got sync send:\n%+v", body) - - var remoteHead tka.AUMHash - if err := remoteHead.UnmarshalText([]byte(body.Head)); err != nil { - t.Fatalf("head unmarshal: %v", err) - } - toApply := make([]tka.AUM, len(body.MissingAUMs)) - for i, a := range body.MissingAUMs { - if err := toApply[i].Unserialize(a); err != nil { - t.Fatalf("decoding missingAUM[%d]: %v", i, err) - } - } - - if len(toApply) > 0 { - if err := controlAuthority.Inform(controlStorage, toApply); err != nil { - t.Fatalf("control.Inform(%+v) failed: %v", toApply, err) - } - } - head, err := controlAuthority.Head().MarshalText() + err := tkatest.HandleTKASyncSend(w, r, controlAuthority, controlStorage) if err != nil { - t.Fatal(err) - } - - w.WriteHeader(200) - if err := json.NewEncoder(w).Encode(tailcfg.TKASyncSendResponse{ - Head: string(head), - }); err != nil { - t.Fatal(err) + t.Errorf("HandleTKASyncSend: %v", err) } default: @@ -1019,29 +870,9 @@ func TestTKASign(t *testing.T) { defer r.Body.Close() switch r.URL.Path { case "/machine/tka/sign": - body := new(tailcfg.TKASubmitSignatureRequest) - if err := json.NewDecoder(r.Body).Decode(body); err != nil { - t.Fatal(err) - } - if body.Version != tailcfg.CurrentCapabilityVersion { - t.Errorf("sign CapVer = %v, want %v", body.Version, tailcfg.CurrentCapabilityVersion) - } - if body.NodeKey != nodePriv.Public() { - t.Errorf("nodeKey = %v, want %v", body.NodeKey, nodePriv.Public()) - } - - var sig tka.NodeKeySignature - if err := sig.Unserialize(body.Signature); err != nil { - t.Fatalf("malformed signature: %v", err) - } - - if err := authority.NodeKeyAuthorized(toSign.Public(), body.Signature); err != nil { - t.Errorf("signature does not verify: %v", err) - } - - w.WriteHeader(200) - if err := json.NewEncoder(w).Encode(tailcfg.TKASubmitSignatureResponse{}); err != nil { - t.Fatal(err) + _, _, err := tkatest.HandleTKASign(w, r, authority) + if err != nil { + t.Errorf("HandleTKASign: %v", err) } default: @@ -1098,23 +929,15 @@ func TestTKAForceDisable(t *testing.T) { defer r.Body.Close() switch r.URL.Path { case "/machine/tka/bootstrap": - body := new(tailcfg.TKABootstrapRequest) - if err := json.NewDecoder(r.Body).Decode(body); err != nil { - t.Fatal(err) - } - if body.Version != tailcfg.CurrentCapabilityVersion { - t.Errorf("bootstrap CapVer = %v, want %v", body.Version, tailcfg.CurrentCapabilityVersion) - } - if body.NodeKey != nodePriv.Public() { - t.Errorf("nodeKey=%v, want %v", body.NodeKey, nodePriv.Public()) - } - - w.WriteHeader(200) - out := tailcfg.TKABootstrapResponse{ + resp := tailcfg.TKABootstrapResponse{ GenesisAUM: genesis.Serialize(), } - if err := json.NewEncoder(w).Encode(out); err != nil { - t.Fatal(err) + req, err := tkatest.HandleTKABootstrap(w, r, resp) + if err != nil { + t.Errorf("HandleTKABootstrap: %v", err) + } + if req.NodeKey != nodePriv.Public() { + t.Errorf("nodeKey=%v, want %v", req.NodeKey, nodePriv.Public()) } default: @@ -1323,37 +1146,16 @@ func TestTKARecoverCompromisedKeyFlow(t *testing.T) { defer r.Body.Close() switch r.URL.Path { case "/machine/tka/sync/send": - body := new(tailcfg.TKASyncSendRequest) - if err := json.NewDecoder(r.Body).Decode(body); err != nil { - t.Fatal(err) - } - t.Logf("got sync send:\n%+v", body) - - var remoteHead tka.AUMHash - if err := remoteHead.UnmarshalText([]byte(body.Head)); err != nil { - t.Fatalf("head unmarshal: %v", err) - } - toApply := make([]tka.AUM, len(body.MissingAUMs)) - for i, a := range body.MissingAUMs { - if err := toApply[i].Unserialize(a); err != nil { - t.Fatalf("decoding missingAUM[%d]: %v", i, err) - } + err := tkatest.HandleTKASyncSend(w, r, authority, chonk) + if err != nil { + t.Errorf("HandleTKASyncSend: %v", err) } - // Apply the recovery AUM to an authority to make sure it works. - if err := authority.Inform(chonk, toApply); err != nil { - t.Errorf("recovery AUM could not be applied: %v", err) - } // Make sure the key we removed isn't trusted. if authority.KeyTrusted(compromisedPriv.KeyID()) { t.Error("compromised key was not removed from tka") } - w.WriteHeader(200) - if err := json.NewEncoder(w).Encode(tailcfg.TKASubmitSignatureResponse{}); err != nil { - t.Fatal(err) - } - default: t.Errorf("unhandled endpoint path: %v", r.URL.Path) w.WriteHeader(404) diff --git a/ipn/ipnlocal/serve.go b/ipn/ipnlocal/serve.go index b5118873b..ef4e91545 100644 --- a/ipn/ipnlocal/serve.go +++ b/ipn/ipnlocal/serve.go @@ -292,6 +292,10 @@ func (b *LocalBackend) updateServeTCPPortNetMapAddrListenersLocked(ports []uint1 // SetServeConfig establishes or replaces the current serve config. // ETag is an optional parameter to enforce Optimistic Concurrency Control. // If it is an empty string, then the config will be overwritten. +// +// New foreground config cannot override existing listeners--neither existing +// foreground listeners nor existing background listeners. Background config can +// change as long as the serve type (e.g. HTTP, TCP, etc.) remains the same. func (b *LocalBackend) SetServeConfig(config *ipn.ServeConfig, etag string) error { b.mu.Lock() defer b.mu.Unlock() @@ -307,12 +311,6 @@ func (b *LocalBackend) setServeConfigLocked(config *ipn.ServeConfig, etag string return errors.New("can't reconfigure tailscaled when using a config file; config file is locked") } - if config != nil { - if err := config.CheckValidServicesConfig(); err != nil { - return err - } - } - nm := b.NetMap() if nm == nil { return errors.New("netMap is nil") @@ -340,6 +338,10 @@ func (b *LocalBackend) setServeConfigLocked(config *ipn.ServeConfig, etag string } } + if err := validateServeConfigUpdate(prevConfig, config.View()); err != nil { + return err + } + var bs []byte if config != nil { j, err := json.Marshal(config) @@ -1566,3 +1568,144 @@ func vipServiceHash(logf logger.Logf, services []*tailcfg.VIPService) string { h.Sum(buf[:0]) return hex.EncodeToString(buf[:]) } + +// validateServeConfigUpdate validates changes proposed by incoming serve +// configuration. +func validateServeConfigUpdate(existing, incoming ipn.ServeConfigView) error { + // Error messages returned by this function may be presented to end-users by + // frontends like the CLI. Thus these error messages should provide enough + // information for end-users to diagnose and resolve conflicts. + + if !incoming.Valid() { + return nil + } + + // For Services, TUN mode is mutually exclusive with L4 or L7 handlers. + for svcName, svcCfg := range incoming.Services().All() { + hasTCP := svcCfg.TCP().Len() > 0 + hasWeb := svcCfg.Web().Len() > 0 + if svcCfg.Tun() && (hasTCP || hasWeb) { + return fmt.Errorf("cannot configure TUN mode in combination with TCP or web handlers for %s", svcName) + } + } + + if !existing.Valid() { + return nil + } + + // New foreground listeners must be on open ports. + for sessionID, incomingFg := range incoming.Foreground().All() { + if !existing.Foreground().Has(sessionID) { + // This is a new session. + for port := range incomingFg.TCPs() { + if _, exists := existing.FindTCP(port); exists { + return fmt.Errorf("listener already exists for port %d", port) + } + } + } + } + + // New background listeners cannot overwrite existing foreground listeners. + for port := range incoming.TCP().All() { + if _, exists := existing.FindForegroundTCP(port); exists { + return fmt.Errorf("foreground listener already exists for port %d", port) + } + } + + // Incoming configuration cannot change the serve type in use by a port. + for port, incomingHandler := range incoming.TCP().All() { + existingHandler, exists := existing.FindTCP(port) + if !exists { + continue + } + + existingServeType := serveTypeFromPortHandler(existingHandler) + incomingServeType := serveTypeFromPortHandler(incomingHandler) + if incomingServeType != existingServeType { + return fmt.Errorf("want to serve %q, but port %d is already serving %q", incomingServeType, port, existingServeType) + } + } + + // Validations for Tailscale Services. + for svcName, incomingSvcCfg := range incoming.Services().All() { + existingSvcCfg, exists := existing.Services().GetOk(svcName) + if !exists { + continue + } + + // Incoming configuration cannot change the serve type in use by a port. + for port, incomingHandler := range incomingSvcCfg.TCP().All() { + existingHandler, exists := existingSvcCfg.TCP().GetOk(port) + if !exists { + continue + } + + existingServeType := serveTypeFromPortHandler(existingHandler) + incomingServeType := serveTypeFromPortHandler(incomingHandler) + if incomingServeType != existingServeType { + return fmt.Errorf("want to serve %q, but port %d is already serving %q for %s", incomingServeType, port, existingServeType, svcName) + } + } + + existingHasTCP := existingSvcCfg.TCP().Len() > 0 + existingHasWeb := existingSvcCfg.Web().Len() > 0 + + // A Service cannot turn on TUN mode if TCP or web handlers exist. + if incomingSvcCfg.Tun() && (existingHasTCP || existingHasWeb) { + return fmt.Errorf("cannot turn on TUN mode with existing TCP or web handlers for %s", svcName) + } + + incomingHasTCP := incomingSvcCfg.TCP().Len() > 0 + incomingHasWeb := incomingSvcCfg.Web().Len() > 0 + + // A Service cannot add TCP or web handlers if TUN mode is enabled. + if (incomingHasTCP || incomingHasWeb) && existingSvcCfg.Tun() { + return fmt.Errorf("cannot add TCP or web handlers as TUN mode is enabled for %s", svcName) + } + } + + return nil +} + +// serveType is a high-level descriptor of the kind of serve performed by a TCP +// port handler. +type serveType int + +const ( + serveTypeHTTPS serveType = iota + serveTypeHTTP + serveTypeTCP + serveTypeTLSTerminatedTCP +) + +func (s serveType) String() string { + switch s { + case serveTypeHTTP: + return "http" + case serveTypeHTTPS: + return "https" + case serveTypeTCP: + return "tcp" + case serveTypeTLSTerminatedTCP: + return "tls-terminated-tcp" + default: + return "unknownServeType" + } +} + +// serveTypeFromPortHandler is used to get a high-level descriptor of the kind +// of serve being performed by a port handler. +func serveTypeFromPortHandler(ph ipn.TCPPortHandlerView) serveType { + switch { + case ph.HTTP(): + return serveTypeHTTP + case ph.HTTPS(): + return serveTypeHTTPS + case ph.TerminateTLS() != "": + return serveTypeTLSTerminatedTCP + case ph.TCPForward() != "": + return serveTypeTCP + default: + return -1 + } +} diff --git a/ipn/ipnlocal/serve_test.go b/ipn/ipnlocal/serve_test.go index c3e5b2ff9..6ee2181a0 100644 --- a/ipn/ipnlocal/serve_test.go +++ b/ipn/ipnlocal/serve_test.go @@ -388,7 +388,7 @@ func TestServeConfigServices(t *testing.T) { tests := []struct { name string conf *ipn.ServeConfig - expectedErr error + errExpected bool packetDstAddrPort []netip.AddrPort intercepted bool }{ @@ -412,7 +412,7 @@ func TestServeConfigServices(t *testing.T) { }, }, }, - expectedErr: ipn.ErrServiceConfigHasBothTCPAndTun, + errExpected: true, }, { // one correctly configured service with packet should be intercepted @@ -519,13 +519,13 @@ func TestServeConfigServices(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { err := b.SetServeConfig(tt.conf, "") - if err != nil && tt.expectedErr != nil { - if !errors.Is(err, tt.expectedErr) { - t.Fatalf("expected error %v,\n got %v", tt.expectedErr, err) - } - return + if err == nil && tt.errExpected { + t.Fatal("expected error") } if err != nil { + if tt.errExpected { + return + } t.Fatal(err) } for _, addrPort := range tt.packetDstAddrPort { @@ -1454,3 +1454,315 @@ func TestServeHTTPRedirect(t *testing.T) { }) } } + +func TestValidateServeConfigUpdate(t *testing.T) { + tests := []struct { + name, description string + existing, incoming *ipn.ServeConfig + wantError bool + }{ + { + name: "empty existing config", + description: "should be able to update with empty existing config", + existing: &ipn.ServeConfig{}, + incoming: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 8080: {}, + }, + }, + wantError: false, + }, + { + name: "no existing config", + description: "should be able to update with no existing config", + existing: nil, + incoming: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 8080: {}, + }, + }, + wantError: false, + }, + { + name: "empty incoming config", + description: "wiping config should work", + existing: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + incoming: &ipn.ServeConfig{}, + wantError: false, + }, + { + name: "no incoming config", + description: "missing incoming config should not result in an error", + existing: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + incoming: nil, + wantError: false, + }, + { + name: "non-overlapping update", + description: "non-overlapping update should work", + existing: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + incoming: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 8080: {}, + }, + }, + wantError: false, + }, + { + name: "overwriting background port", + description: "should be able to overwrite a background port", + existing: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: { + TCPForward: "localhost:8080", + }, + }, + }, + incoming: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: { + TCPForward: "localhost:9999", + }, + }, + }, + wantError: false, + }, + { + name: "broken existing config", + description: "broken existing config should not prevent new config updates", + existing: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + // Broken because HTTPS and TCPForward are mutually exclusive. + 9000: { + HTTPS: true, + TCPForward: "127.0.0.1:9000", + }, + // Broken because foreground and background handlers cannot coexist. + 443: {}, + }, + Foreground: map[string]*ipn.ServeConfig{ + "12345": { + TCP: map[uint16]*ipn.TCPPortHandler{ + // Broken because foreground and background handlers cannot coexist. + 443: {}, + }, + }, + }, + // Broken because Services cannot specify TUN mode and a TCP handler. + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:foo": { + TCP: map[uint16]*ipn.TCPPortHandler{ + 6060: {}, + }, + Tun: true, + }, + }, + }, + incoming: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + wantError: false, + }, + { + name: "services same port as background", + description: "services should be able to use the same port as background listeners", + existing: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + incoming: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:foo": { + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + }, + }, + wantError: false, + }, + { + name: "services tun mode", + description: "TUN mode should be mutually exclusive with TCP or web handlers for new Services", + existing: &ipn.ServeConfig{}, + incoming: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:foo": { + TCP: map[uint16]*ipn.TCPPortHandler{ + 6060: {}, + }, + Tun: true, + }, + }, + }, + wantError: true, + }, + { + name: "new foreground listener", + description: "new foreground listeners must be on open ports", + existing: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + incoming: &ipn.ServeConfig{ + Foreground: map[string]*ipn.ServeConfig{ + "12345": { + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + }, + }, + wantError: true, + }, + { + name: "new background listener", + description: "new background listers cannot overwrite foreground listeners", + existing: &ipn.ServeConfig{ + Foreground: map[string]*ipn.ServeConfig{ + "12345": { + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + }, + }, + incoming: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: {}, + }, + }, + wantError: true, + }, + { + name: "serve type overwrite", + description: "incoming configuration cannot change the serve type in use by a port", + existing: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: { + HTTP: true, + }, + }, + }, + incoming: &ipn.ServeConfig{ + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: { + TCPForward: "localhost:8080", + }, + }, + }, + wantError: true, + }, + { + name: "serve type overwrite services", + description: "incoming Services configuration cannot change the serve type in use by a port", + existing: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:foo": { + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: { + HTTP: true, + }, + }, + }, + }, + }, + incoming: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:foo": { + TCP: map[uint16]*ipn.TCPPortHandler{ + 80: { + TCPForward: "localhost:8080", + }, + }, + }, + }, + }, + wantError: true, + }, + { + name: "tun mode with handlers", + description: "Services cannot enable TUN mode if L4 or L7 handlers already exist", + existing: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:foo": { + TCP: map[uint16]*ipn.TCPPortHandler{ + 443: { + HTTPS: true, + }, + }, + Web: map[ipn.HostPort]*ipn.WebServerConfig{ + "127.0.0.1:443": { + Handlers: map[string]*ipn.HTTPHandler{}, + }, + }, + }, + }, + }, + incoming: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:foo": { + Tun: true, + }, + }, + }, + wantError: true, + }, + { + name: "handlers with tun mode", + description: "Services cannot add L4 or L7 handlers if TUN mode is already enabled", + existing: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:foo": { + Tun: true, + }, + }, + }, + incoming: &ipn.ServeConfig{ + Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{ + "svc:foo": { + TCP: map[uint16]*ipn.TCPPortHandler{ + 443: { + HTTPS: true, + }, + }, + Web: map[ipn.HostPort]*ipn.WebServerConfig{ + "127.0.0.1:443": { + Handlers: map[string]*ipn.HTTPHandler{}, + }, + }, + }, + }, + }, + wantError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateServeConfigUpdate(tt.existing.View(), tt.incoming.View()) + if err != nil && !tt.wantError { + t.Error("unexpected error:", err) + } + if err == nil && tt.wantError { + t.Error("expected error, got nil;", tt.description) + } + }) + } +} diff --git a/ipn/localapi/localapi.go b/ipn/localapi/localapi.go index d3503d302..7f249fe53 100644 --- a/ipn/localapi/localapi.go +++ b/ipn/localapi/localapi.go @@ -930,7 +930,10 @@ func (h *Handler) serveLoginInteractive(w http.ResponseWriter, r *http.Request) http.Error(w, "want POST", http.StatusBadRequest) return } - h.b.StartLoginInteractiveAs(r.Context(), h.Actor) + if err := h.b.StartLoginInteractiveAs(r.Context(), h.Actor); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } w.WriteHeader(http.StatusNoContent) return } @@ -949,6 +952,11 @@ func (h *Handler) serveStart(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return } + + if h.b.HealthTracker().IsUnhealthy(ipn.StateStoreHealth) { + http.Error(w, "cannot start backend when state store is unhealthy", http.StatusInternalServerError) + return + } err := h.b.Start(o) if err != nil { // TODO(bradfitz): map error to a good HTTP error diff --git a/ipn/localapi/localapi_test.go b/ipn/localapi/localapi_test.go index 3cf55ccbf..2c64d446f 100644 --- a/ipn/localapi/localapi_test.go +++ b/ipn/localapi/localapi_test.go @@ -25,9 +25,11 @@ import ( "testing" "tailscale.com/client/tailscale/apitype" + "tailscale.com/health" "tailscale.com/ipn" "tailscale.com/ipn/ipnauth" "tailscale.com/ipn/ipnlocal" + "tailscale.com/ipn/ipnstate" "tailscale.com/ipn/store/mem" "tailscale.com/tailcfg" "tailscale.com/tsd" diff --git a/ipn/prefs.go b/ipn/prefs.go index 7f8216c60..9f98465d2 100644 --- a/ipn/prefs.go +++ b/ipn/prefs.go @@ -283,10 +283,13 @@ type Prefs struct { // RelayServerPort is the UDP port number for the relay server to bind to, // on all interfaces. A non-nil zero value signifies a random unused port // should be used. A nil value signifies relay server functionality - // should be disabled. This field is currently experimental, and therefore - // no guarantees are made about its current naming and functionality when - // non-nil/enabled. - RelayServerPort *int `json:",omitempty"` + // should be disabled. + RelayServerPort *uint16 `json:",omitempty"` + + // RelayServerStaticEndpoints are static IP:port endpoints to advertise as + // candidates for relay connections. Only relevant when RelayServerPort is + // non-nil. + RelayServerStaticEndpoints []netip.AddrPort `json:",omitempty"` // AllowSingleHosts was a legacy field that was always true // for the past 4.5 years. It controlled whether Tailscale @@ -350,38 +353,39 @@ type AppConnectorPrefs struct { type MaskedPrefs struct { Prefs - ControlURLSet bool `json:",omitempty"` - RouteAllSet bool `json:",omitempty"` - ExitNodeIDSet bool `json:",omitempty"` - ExitNodeIPSet bool `json:",omitempty"` - AutoExitNodeSet bool `json:",omitempty"` - InternalExitNodePriorSet bool `json:",omitempty"` // Internal; can't be set by LocalAPI clients - ExitNodeAllowLANAccessSet bool `json:",omitempty"` - CorpDNSSet bool `json:",omitempty"` - RunSSHSet bool `json:",omitempty"` - RunWebClientSet bool `json:",omitempty"` - WantRunningSet bool `json:",omitempty"` - LoggedOutSet bool `json:",omitempty"` - ShieldsUpSet bool `json:",omitempty"` - AdvertiseTagsSet bool `json:",omitempty"` - HostnameSet bool `json:",omitempty"` - NotepadURLsSet bool `json:",omitempty"` - ForceDaemonSet bool `json:",omitempty"` - EggSet bool `json:",omitempty"` - AdvertiseRoutesSet bool `json:",omitempty"` - AdvertiseServicesSet bool `json:",omitempty"` - SyncSet bool `json:",omitzero"` - NoSNATSet bool `json:",omitempty"` - NoStatefulFilteringSet bool `json:",omitempty"` - NetfilterModeSet bool `json:",omitempty"` - OperatorUserSet bool `json:",omitempty"` - ProfileNameSet bool `json:",omitempty"` - AutoUpdateSet AutoUpdatePrefsMask `json:",omitzero"` - AppConnectorSet bool `json:",omitempty"` - PostureCheckingSet bool `json:",omitempty"` - NetfilterKindSet bool `json:",omitempty"` - DriveSharesSet bool `json:",omitempty"` - RelayServerPortSet bool `json:",omitempty"` + ControlURLSet bool `json:",omitempty"` + RouteAllSet bool `json:",omitempty"` + ExitNodeIDSet bool `json:",omitempty"` + ExitNodeIPSet bool `json:",omitempty"` + AutoExitNodeSet bool `json:",omitempty"` + InternalExitNodePriorSet bool `json:",omitempty"` // Internal; can't be set by LocalAPI clients + ExitNodeAllowLANAccessSet bool `json:",omitempty"` + CorpDNSSet bool `json:",omitempty"` + RunSSHSet bool `json:",omitempty"` + RunWebClientSet bool `json:",omitempty"` + WantRunningSet bool `json:",omitempty"` + LoggedOutSet bool `json:",omitempty"` + ShieldsUpSet bool `json:",omitempty"` + AdvertiseTagsSet bool `json:",omitempty"` + HostnameSet bool `json:",omitempty"` + NotepadURLsSet bool `json:",omitempty"` + ForceDaemonSet bool `json:",omitempty"` + EggSet bool `json:",omitempty"` + AdvertiseRoutesSet bool `json:",omitempty"` + AdvertiseServicesSet bool `json:",omitempty"` + SyncSet bool `json:",omitzero"` + NoSNATSet bool `json:",omitempty"` + NoStatefulFilteringSet bool `json:",omitempty"` + NetfilterModeSet bool `json:",omitempty"` + OperatorUserSet bool `json:",omitempty"` + ProfileNameSet bool `json:",omitempty"` + AutoUpdateSet AutoUpdatePrefsMask `json:",omitzero"` + AppConnectorSet bool `json:",omitempty"` + PostureCheckingSet bool `json:",omitempty"` + NetfilterKindSet bool `json:",omitempty"` + DriveSharesSet bool `json:",omitempty"` + RelayServerPortSet bool `json:",omitempty"` + RelayServerStaticEndpointsSet bool `json:",omitzero"` } // SetsInternal reports whether mp has any of the Internal*Set field bools set @@ -621,6 +625,9 @@ func (p *Prefs) pretty(goos string) string { if buildfeatures.HasRelayServer && p.RelayServerPort != nil { fmt.Fprintf(&sb, "relayServerPort=%d ", *p.RelayServerPort) } + if buildfeatures.HasRelayServer && len(p.RelayServerStaticEndpoints) > 0 { + fmt.Fprintf(&sb, "relayServerStaticEndpoints=%v ", p.RelayServerStaticEndpoints) + } if p.Persist != nil { sb.WriteString(p.Persist.Pretty()) } else { @@ -685,7 +692,8 @@ func (p *Prefs) Equals(p2 *Prefs) bool { p.PostureChecking == p2.PostureChecking && slices.EqualFunc(p.DriveShares, p2.DriveShares, drive.SharesEqual) && p.NetfilterKind == p2.NetfilterKind && - compareIntPtrs(p.RelayServerPort, p2.RelayServerPort) + compareUint16Ptrs(p.RelayServerPort, p2.RelayServerPort) && + slices.Equal(p.RelayServerStaticEndpoints, p2.RelayServerStaticEndpoints) } func (au AutoUpdatePrefs) Pretty() string { @@ -705,7 +713,7 @@ func (ap AppConnectorPrefs) Pretty() string { return "" } -func compareIntPtrs(a, b *int) bool { +func compareUint16Ptrs(a, b *uint16) bool { if (a == nil) != (b == nil) { return false } diff --git a/ipn/prefs_test.go b/ipn/prefs_test.go index 7c9c3ef43..aa152843a 100644 --- a/ipn/prefs_test.go +++ b/ipn/prefs_test.go @@ -69,6 +69,7 @@ func TestPrefsEqual(t *testing.T) { "NetfilterKind", "DriveShares", "RelayServerPort", + "RelayServerStaticEndpoints", "AllowSingleHosts", "Persist", } @@ -77,7 +78,7 @@ func TestPrefsEqual(t *testing.T) { have, prefsHandles) } - relayServerPort := func(port int) *int { + relayServerPort := func(port uint16) *uint16 { return &port } nets := func(strs ...string) (ns []netip.Prefix) { @@ -90,6 +91,16 @@ func TestPrefsEqual(t *testing.T) { } return ns } + aps := func(strs ...string) (ret []netip.AddrPort) { + for _, s := range strs { + n, err := netip.ParseAddrPort(s) + if err != nil { + panic(err) + } + ret = append(ret, n) + } + return ret + } tests := []struct { a, b *Prefs want bool @@ -369,6 +380,16 @@ func TestPrefsEqual(t *testing.T) { &Prefs{RelayServerPort: relayServerPort(1)}, false, }, + { + &Prefs{RelayServerStaticEndpoints: aps("[2001:db8::1]:40000", "192.0.2.1:40000")}, + &Prefs{RelayServerStaticEndpoints: aps("[2001:db8::1]:40000", "192.0.2.1:40000")}, + true, + }, + { + &Prefs{RelayServerStaticEndpoints: aps("[2001:db8::1]:40000", "192.0.2.2:40000")}, + &Prefs{RelayServerStaticEndpoints: aps("[2001:db8::1]:40000", "192.0.2.1:40000")}, + false, + }, } for i, tt := range tests { got := tt.a.Equals(tt.b) diff --git a/ipn/serve.go b/ipn/serve.go index 74195191c..1f1557889 100644 --- a/ipn/serve.go +++ b/ipn/serve.go @@ -238,6 +238,20 @@ func (sc *ServeConfig) HasPathHandler() bool { } } + if sc.Services != nil { + for _, serviceConfig := range sc.Services { + if serviceConfig.Web != nil { + for _, webServerConfig := range serviceConfig.Web { + for _, httpHandler := range webServerConfig.Handlers { + if httpHandler.Path != "" { + return true + } + } + } + } + } + } + if sc.Foreground != nil { for _, fgConfig := range sc.Foreground { if fgConfig.HasPathHandler() { @@ -802,6 +816,7 @@ func (v ServeConfigView) FindServiceTCP(svcName tailcfg.ServiceName, port uint16 return svcCfg.TCP().GetOk(port) } +// FindServiceWeb returns the web handler for the service's host-port. func (v ServeConfigView) FindServiceWeb(svcName tailcfg.ServiceName, hp HostPort) (res WebServerConfigView, ok bool) { if svcCfg, ok := v.Services().GetOk(svcName); ok { if res, ok := svcCfg.Web().GetOk(hp); ok { @@ -815,10 +830,9 @@ func (v ServeConfigView) FindServiceWeb(svcName tailcfg.ServiceName, hp HostPort // prefers a foreground match first followed by a background search if none // existed. func (v ServeConfigView) FindTCP(port uint16) (res TCPPortHandlerView, ok bool) { - for _, conf := range v.Foreground().All() { - if res, ok := conf.TCP().GetOk(port); ok { - return res, ok - } + res, ok = v.FindForegroundTCP(port) + if ok { + return res, ok } return v.TCP().GetOk(port) } @@ -835,6 +849,17 @@ func (v ServeConfigView) FindWeb(hp HostPort) (res WebServerConfigView, ok bool) return v.Web().GetOk(hp) } +// FindForegroundTCP returns the first foreground TCP handler matching the input +// port. +func (v ServeConfigView) FindForegroundTCP(port uint16) (res TCPPortHandlerView, ok bool) { + for _, conf := range v.Foreground().All() { + if res, ok := conf.TCP().GetOk(port); ok { + return res, ok + } + } + return res, false +} + // HasAllowFunnel returns whether this config has at least one AllowFunnel // set in the background or foreground configs. func (v ServeConfigView) HasAllowFunnel() bool { @@ -863,17 +888,6 @@ func (v ServeConfigView) HasFunnelForTarget(target HostPort) bool { return false } -// CheckValidServicesConfig reports whether the ServeConfig has -// invalid service configurations. -func (sc *ServeConfig) CheckValidServicesConfig() error { - for svcName, service := range sc.Services { - if err := service.checkValidConfig(); err != nil { - return fmt.Errorf("invalid service configuration for %q: %w", svcName, err) - } - } - return nil -} - // ServicePortRange returns the list of tailcfg.ProtoPortRange that represents // the proto/ports pairs that are being served by the service. // @@ -911,17 +925,3 @@ func (v ServiceConfigView) ServicePortRange() []tailcfg.ProtoPortRange { } return ranges } - -// ErrServiceConfigHasBothTCPAndTun signals that a service -// in Tun mode cannot also has TCP or Web handlers set. -var ErrServiceConfigHasBothTCPAndTun = errors.New("the VIP Service configuration can not set TUN at the same time as TCP or Web") - -// checkValidConfig checks if the service configuration is valid. -// Currently, the only invalid configuration is when the service is in Tun mode -// and has TCP or Web handlers. -func (v *ServiceConfig) checkValidConfig() error { - if v.Tun && (len(v.TCP) > 0 || len(v.Web) > 0) { - return ErrServiceConfigHasBothTCPAndTun - } - return nil -} diff --git a/ipn/serve_test.go b/ipn/serve_test.go index 063ff3a87..5e0f4a43a 100644 --- a/ipn/serve_test.go +++ b/ipn/serve_test.go @@ -117,6 +117,36 @@ func TestHasPathHandler(t *testing.T) { }, want: false, }, + { + name: "with-service-path-handler", + cfg: ServeConfig{ + Services: map[tailcfg.ServiceName]*ServiceConfig{ + "svc:foo": { + Web: map[HostPort]*WebServerConfig{ + "foo.test.ts.net:443": {Handlers: map[string]*HTTPHandler{ + "/": {Path: "/tmp"}, + }}, + }, + }, + }, + }, + want: true, + }, + { + name: "with-service-proxy-handler", + cfg: ServeConfig{ + Services: map[tailcfg.ServiceName]*ServiceConfig{ + "svc:foo": { + Web: map[HostPort]*WebServerConfig{ + "foo.test.ts.net:443": {Handlers: map[string]*HTTPHandler{ + "/": {Proxy: "http://127.0.0.1:3000"}, + }}, + }, + }, + }, + }, + want: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/ipn/store.go b/ipn/store.go index 9da5288c0..2034ae09a 100644 --- a/ipn/store.go +++ b/ipn/store.go @@ -10,6 +10,8 @@ import ( "fmt" "net" "strconv" + + "tailscale.com/health" ) // ErrStateNotExist is returned by StateStore.ReadState when the @@ -60,6 +62,19 @@ const ( TaildropReceivedKey = StateKey("_taildrop-received") ) +// StateStoreHealth is a Warnable set when store.New fails at startup. If +// unhealthy, we block all login attempts and return a health message in status +// responses. +var StateStoreHealth = health.Register(&health.Warnable{ + Code: "state-store-health", + Severity: health.SeverityHigh, + Title: "Tailscale state store failed to initialize", + Text: func(args health.Args) string { + return fmt.Sprintf("State store failed to initialize, Tailscale will not work until this is resolved. See https://tailscale.com/s/state-store-init-error. Error: %s", args[health.ArgError]) + }, + ImpactsConnectivity: true, +}) + // CurrentProfileID returns the StateKey that stores the // current profile ID. The value is a JSON-encoded LoginProfile. // If the userID is empty, the key returned is CurrentProfileStateKey, diff --git a/k8s-operator/api.md b/k8s-operator/api.md index 979d199cb..3a4e692d9 100644 --- a/k8s-operator/api.md +++ b/k8s-operator/api.md @@ -887,7 +887,7 @@ _Appears in:_ - +RecorderSpec describes a tsrecorder instance to be deployed in the cluster @@ -900,6 +900,7 @@ _Appears in:_ | `tags` _[Tags](#tags)_ | Tags that the Tailscale device will be tagged with. Defaults to [tag:k8s].
If you specify custom tags here, make sure you also make the operator
an owner of these tags.
See https://tailscale.com/kb/1236/kubernetes-operator/#setting-up-the-kubernetes-operator.
Tags cannot be changed once a Recorder node has been created.
Tag values must be in form ^tag:[a-zA-Z][a-zA-Z0-9-]*$. | | Pattern: `^tag:[a-zA-Z][a-zA-Z0-9-]*$`
Type: string
| | `enableUI` _boolean_ | Set to true to enable the Recorder UI. The UI lists and plays recorded sessions.
The UI will be served at :443. Defaults to false.
Corresponds to --ui tsrecorder flag https://tailscale.com/kb/1246/tailscale-ssh-session-recording#deploy-a-recorder-node.
Required if S3 storage is not set up, to ensure that recordings are accessible. | | | | `storage` _[Storage](#storage)_ | Configure where to store session recordings. By default, recordings will
be stored in a local ephemeral volume, and will not be persisted past the
lifetime of a specific pod. | | | +| `replicas` _integer_ | Replicas specifies how many instances of tsrecorder to run. Defaults to 1. | | Minimum: 0
| #### RecorderStatefulSet diff --git a/k8s-operator/apis/v1alpha1/types_recorder.go b/k8s-operator/apis/v1alpha1/types_recorder.go index 16a610b26..67cffbf09 100644 --- a/k8s-operator/apis/v1alpha1/types_recorder.go +++ b/k8s-operator/apis/v1alpha1/types_recorder.go @@ -44,6 +44,8 @@ type RecorderList struct { Items []Recorder `json:"items"` } +// RecorderSpec describes a tsrecorder instance to be deployed in the cluster +// +kubebuilder:validation:XValidation:rule="!(self.replicas > 1 && (!has(self.storage) || !has(self.storage.s3)))",message="S3 storage must be used when deploying multiple Recorder replicas" type RecorderSpec struct { // Configuration parameters for the Recorder's StatefulSet. The operator // deploys a StatefulSet for each Recorder resource. @@ -74,6 +76,11 @@ type RecorderSpec struct { // lifetime of a specific pod. // +optional Storage Storage `json:"storage,omitempty"` + + // Replicas specifies how many instances of tsrecorder to run. Defaults to 1. + // +optional + // +kubebuilder:validation:Minimum=0 + Replicas *int32 `json:"replicas,omitzero"` } type RecorderStatefulSet struct { diff --git a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go index 7492f1e54..ff0f3f6ac 100644 --- a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go +++ b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go @@ -1068,6 +1068,11 @@ func (in *RecorderSpec) DeepCopyInto(out *RecorderSpec) { copy(*out, *in) } in.Storage.DeepCopyInto(&out.Storage) + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RecorderSpec. diff --git a/net/packet/tsmp.go b/net/packet/tsmp.go index 0ea321e84..8fad1d503 100644 --- a/net/packet/tsmp.go +++ b/net/packet/tsmp.go @@ -15,7 +15,9 @@ import ( "fmt" "net/netip" + "go4.org/mem" "tailscale.com/types/ipproto" + "tailscale.com/types/key" ) const minTSMPSize = 7 // the rejected body is 7 bytes @@ -72,6 +74,9 @@ const ( // TSMPTypePong is the type byte for a TailscalePongResponse. TSMPTypePong TSMPType = 'o' + + // TSPMTypeDiscoAdvertisement is the type byte for sending disco keys + TSMPTypeDiscoAdvertisement TSMPType = 'a' ) type TailscaleRejectReason byte @@ -259,3 +264,53 @@ func (h TSMPPongReply) Marshal(buf []byte) error { binary.BigEndian.PutUint16(buf[9:11], h.PeerAPIPort) return nil } + +// TSMPDiscoKeyAdvertisement is a TSMP message that's used for distributing Disco Keys. +// +// On the wire, after the IP header, it's currently 33 bytes: +// - 'a' (TSMPTypeDiscoAdvertisement) +// - 32 disco key bytes +type TSMPDiscoKeyAdvertisement struct { + Src, Dst netip.Addr + Key key.DiscoPublic +} + +func (ka *TSMPDiscoKeyAdvertisement) Marshal() ([]byte, error) { + var iph Header + if ka.Src.Is4() { + iph = IP4Header{ + IPProto: ipproto.TSMP, + Src: ka.Src, + Dst: ka.Dst, + } + } else { + iph = IP6Header{ + IPProto: ipproto.TSMP, + Src: ka.Src, + Dst: ka.Dst, + } + } + payload := make([]byte, 0, 33) + payload = append(payload, byte(TSMPTypeDiscoAdvertisement)) + payload = ka.Key.AppendTo(payload) + if len(payload) != 33 { + // Mostly to safeguard against ourselves changing this in the future. + return []byte{}, fmt.Errorf("expected payload length 33, got %d", len(payload)) + } + + return Generate(iph, payload), nil +} + +func (pp *Parsed) AsTSMPDiscoAdvertisement() (tka TSMPDiscoKeyAdvertisement, ok bool) { + if pp.IPProto != ipproto.TSMP { + return + } + p := pp.Payload() + if len(p) < 33 || p[0] != byte(TSMPTypeDiscoAdvertisement) { + return + } + tka.Src = pp.Src.Addr() + tka.Key = key.DiscoPublicFromRaw32(mem.B(p[1:33])) + + return tka, true +} diff --git a/net/packet/tsmp_test.go b/net/packet/tsmp_test.go index e261e6a41..d8f1d38d5 100644 --- a/net/packet/tsmp_test.go +++ b/net/packet/tsmp_test.go @@ -4,8 +4,14 @@ package packet import ( + "bytes" + "encoding/hex" "net/netip" + "slices" "testing" + + "go4.org/mem" + "tailscale.com/types/key" ) func TestTailscaleRejectedHeader(t *testing.T) { @@ -71,3 +77,62 @@ func TestTailscaleRejectedHeader(t *testing.T) { } } } + +func TestTSMPDiscoKeyAdvertisementMarshal(t *testing.T) { + var ( + // IPv4: Ver(4)Len(5), TOS, Len(53), ID, Flags, TTL(64), Proto(99), Cksum + headerV4, _ = hex.DecodeString("45000035000000004063705d") + // IPv6: Ver(6)TCFlow, Len(33), NextHdr(99), HopLim(64) + headerV6, _ = hex.DecodeString("6000000000216340") + + packetType = []byte{'a'} + testKey = bytes.Repeat([]byte{'a'}, 32) + + // IPs + srcV4 = netip.MustParseAddr("1.2.3.4") + dstV4 = netip.MustParseAddr("4.3.2.1") + srcV6 = netip.MustParseAddr("2001:db8::1") + dstV6 = netip.MustParseAddr("2001:db8::2") + ) + + join := func(parts ...[]byte) []byte { + return bytes.Join(parts, nil) + } + + tests := []struct { + name string + tka TSMPDiscoKeyAdvertisement + want []byte + }{ + { + name: "v4Header", + tka: TSMPDiscoKeyAdvertisement{ + Src: srcV4, + Dst: dstV4, + Key: key.DiscoPublicFromRaw32(mem.B(testKey)), + }, + want: join(headerV4, srcV4.AsSlice(), dstV4.AsSlice(), packetType, testKey), + }, + { + name: "v6Header", + tka: TSMPDiscoKeyAdvertisement{ + Src: srcV6, + Dst: dstV6, + Key: key.DiscoPublicFromRaw32(mem.B(testKey)), + }, + want: join(headerV6, srcV6.AsSlice(), dstV6.AsSlice(), packetType, testKey), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.tka.Marshal() + if err != nil { + t.Errorf("error mashalling TSMPDiscoAdvertisement: %s", err) + } + if !slices.Equal(got, tt.want) { + t.Errorf("error mashalling TSMPDiscoAdvertisement, expected: \n%x, \ngot:\n%x", tt.want, got) + } + }) + } +} diff --git a/net/tstun/wrap.go b/net/tstun/wrap.go index db4f689bf..6e07c7a3d 100644 --- a/net/tstun/wrap.go +++ b/net/tstun/wrap.go @@ -34,6 +34,7 @@ import ( "tailscale.com/types/logger" "tailscale.com/types/netlogfunc" "tailscale.com/util/clientmetric" + "tailscale.com/util/eventbus" "tailscale.com/util/usermetric" "tailscale.com/wgengine/filter" "tailscale.com/wgengine/netstack/gro" @@ -209,6 +210,9 @@ type Wrapper struct { captureHook syncs.AtomicValue[packet.CaptureCallback] metrics *metrics + + eventClient *eventbus.Client + discoKeyAdvertisementPub *eventbus.Publisher[DiscoKeyAdvertisement] } type metrics struct { @@ -254,15 +258,15 @@ func (w *Wrapper) Start() { close(w.startCh) } -func WrapTAP(logf logger.Logf, tdev tun.Device, m *usermetric.Registry) *Wrapper { - return wrap(logf, tdev, true, m) +func WrapTAP(logf logger.Logf, tdev tun.Device, m *usermetric.Registry, bus *eventbus.Bus) *Wrapper { + return wrap(logf, tdev, true, m, bus) } -func Wrap(logf logger.Logf, tdev tun.Device, m *usermetric.Registry) *Wrapper { - return wrap(logf, tdev, false, m) +func Wrap(logf logger.Logf, tdev tun.Device, m *usermetric.Registry, bus *eventbus.Bus) *Wrapper { + return wrap(logf, tdev, false, m, bus) } -func wrap(logf logger.Logf, tdev tun.Device, isTAP bool, m *usermetric.Registry) *Wrapper { +func wrap(logf logger.Logf, tdev tun.Device, isTAP bool, m *usermetric.Registry, bus *eventbus.Bus) *Wrapper { logf = logger.WithPrefix(logf, "tstun: ") w := &Wrapper{ logf: logf, @@ -283,6 +287,9 @@ func wrap(logf logger.Logf, tdev tun.Device, isTAP bool, m *usermetric.Registry) metrics: registerMetrics(m), } + w.eventClient = bus.Client("net.tstun") + w.discoKeyAdvertisementPub = eventbus.Publish[DiscoKeyAdvertisement](w.eventClient) + w.vectorBuffer = make([][]byte, tdev.BatchSize()) for i := range w.vectorBuffer { w.vectorBuffer[i] = make([]byte, maxBufferSize) @@ -357,6 +364,7 @@ func (t *Wrapper) Close() error { close(t.vectorOutbound) t.outboundMu.Unlock() err = t.tdev.Close() + t.eventClient.Close() }) return err } @@ -1118,6 +1126,11 @@ func (t *Wrapper) injectedRead(res tunInjectedRead, outBuffs [][]byte, sizes []i return n, err } +type DiscoKeyAdvertisement struct { + Src netip.Addr + Key key.DiscoPublic +} + func (t *Wrapper) filterPacketInboundFromWireGuard(p *packet.Parsed, captHook packet.CaptureCallback, pc *peerConfigTable, gro *gro.GRO) (filter.Response, *gro.GRO) { if captHook != nil { captHook(packet.FromPeer, t.now(), p.Buffer(), p.CaptureMeta) @@ -1128,6 +1141,12 @@ func (t *Wrapper) filterPacketInboundFromWireGuard(p *packet.Parsed, captHook pa t.noteActivity() t.injectOutboundPong(p, pingReq) return filter.DropSilently, gro + } else if discoKeyAdvert, ok := p.AsTSMPDiscoAdvertisement(); ok { + t.discoKeyAdvertisementPub.Publish(DiscoKeyAdvertisement{ + Src: discoKeyAdvert.Src, + Key: discoKeyAdvert.Key, + }) + return filter.DropSilently, gro } else if data, ok := p.AsTSMPPong(); ok { if f := t.OnTSMPPongReceived; f != nil { f(data) diff --git a/net/tstun/wrap_test.go b/net/tstun/wrap_test.go index 75cf5afb2..c7d0708df 100644 --- a/net/tstun/wrap_test.go +++ b/net/tstun/wrap_test.go @@ -36,6 +36,8 @@ import ( "tailscale.com/types/netlogtype" "tailscale.com/types/ptr" "tailscale.com/types/views" + "tailscale.com/util/eventbus" + "tailscale.com/util/eventbus/eventbustest" "tailscale.com/util/must" "tailscale.com/util/usermetric" "tailscale.com/wgengine/filter" @@ -170,10 +172,10 @@ func setfilter(logf logger.Logf, tun *Wrapper) { tun.SetFilter(filter.New(matches, nil, ipSet, ipSet, nil, logf)) } -func newChannelTUN(logf logger.Logf, secure bool) (*tuntest.ChannelTUN, *Wrapper) { +func newChannelTUN(logf logger.Logf, bus *eventbus.Bus, secure bool) (*tuntest.ChannelTUN, *Wrapper) { chtun := tuntest.NewChannelTUN() reg := new(usermetric.Registry) - tun := Wrap(logf, chtun.TUN(), reg) + tun := Wrap(logf, chtun.TUN(), reg, bus) if secure { setfilter(logf, tun) } else { @@ -183,10 +185,10 @@ func newChannelTUN(logf logger.Logf, secure bool) (*tuntest.ChannelTUN, *Wrapper return chtun, tun } -func newFakeTUN(logf logger.Logf, secure bool) (*fakeTUN, *Wrapper) { +func newFakeTUN(logf logger.Logf, bus *eventbus.Bus, secure bool) (*fakeTUN, *Wrapper) { ftun := NewFake() reg := new(usermetric.Registry) - tun := Wrap(logf, ftun, reg) + tun := Wrap(logf, ftun, reg, bus) if secure { setfilter(logf, tun) } else { @@ -196,7 +198,8 @@ func newFakeTUN(logf logger.Logf, secure bool) (*fakeTUN, *Wrapper) { } func TestReadAndInject(t *testing.T) { - chtun, tun := newChannelTUN(t.Logf, false) + bus := eventbustest.NewBus(t) + chtun, tun := newChannelTUN(t.Logf, bus, false) defer tun.Close() const size = 2 // all payloads have this size @@ -221,7 +224,7 @@ func TestReadAndInject(t *testing.T) { } var buf [MaxPacketSize]byte - var seen = make(map[string]bool) + seen := make(map[string]bool) sizes := make([]int, 1) // We expect the same packets back, in no particular order. for i := range len(written) + len(injected) { @@ -257,7 +260,8 @@ func TestReadAndInject(t *testing.T) { } func TestWriteAndInject(t *testing.T) { - chtun, tun := newChannelTUN(t.Logf, false) + bus := eventbustest.NewBus(t) + chtun, tun := newChannelTUN(t.Logf, bus, false) defer tun.Close() written := []string{"w0", "w1"} @@ -316,8 +320,8 @@ func mustHexDecode(s string) []byte { } func TestFilter(t *testing.T) { - - chtun, tun := newChannelTUN(t.Logf, true) + bus := eventbustest.NewBus(t) + chtun, tun := newChannelTUN(t.Logf, bus, true) defer tun.Close() // Reset the metrics before test. These are global @@ -462,7 +466,8 @@ func assertMetricPackets(t *testing.T, metricName string, want, got int64) { } func TestAllocs(t *testing.T) { - ftun, tun := newFakeTUN(t.Logf, false) + bus := eventbustest.NewBus(t) + ftun, tun := newFakeTUN(t.Logf, bus, false) defer tun.Close() buf := [][]byte{{0x00}} @@ -473,14 +478,14 @@ func TestAllocs(t *testing.T) { return } }) - if err != nil { t.Error(err) } } func TestClose(t *testing.T) { - ftun, tun := newFakeTUN(t.Logf, false) + bus := eventbustest.NewBus(t) + ftun, tun := newFakeTUN(t.Logf, bus, false) data := [][]byte{udp4("1.2.3.4", "5.6.7.8", 98, 98)} _, err := ftun.Write(data, 0) @@ -497,7 +502,8 @@ func TestClose(t *testing.T) { func BenchmarkWrite(b *testing.B) { b.ReportAllocs() - ftun, tun := newFakeTUN(b.Logf, true) + bus := eventbustest.NewBus(b) + ftun, tun := newFakeTUN(b.Logf, bus, true) defer tun.Close() packet := [][]byte{udp4("5.6.7.8", "1.2.3.4", 89, 89)} @@ -887,7 +893,8 @@ func TestCaptureHook(t *testing.T) { now := time.Unix(1682085856, 0) - _, w := newFakeTUN(t.Logf, true) + bus := eventbustest.NewBus(t) + _, w := newFakeTUN(t.Logf, bus, true) w.timeNow = func() time.Time { return now } @@ -957,3 +964,30 @@ func TestCaptureHook(t *testing.T) { captured, want) } } + +func TestTSMPDisco(t *testing.T) { + t.Run("IPv6DiscoAdvert", func(t *testing.T) { + src := netip.MustParseAddr("2001:db8::1") + dst := netip.MustParseAddr("2001:db8::2") + discoKey := key.NewDisco() + buf, _ := (&packet.TSMPDiscoKeyAdvertisement{ + Src: src, + Dst: dst, + Key: discoKey.Public(), + }).Marshal() + + var p packet.Parsed + p.Decode(buf) + + tda, ok := p.AsTSMPDiscoAdvertisement() + if !ok { + t.Error("Unable to parse message as TSMPDiscoAdversitement") + } + if tda.Src != src { + t.Errorf("Src address did not match, expected %v, got %v", src, tda.Src) + } + if !reflect.DeepEqual(tda.Key, discoKey.Public()) { + t.Errorf("Key did not match, expected %q, got %q", discoKey.Public(), tda.Key) + } + }) +} diff --git a/net/udprelay/server.go b/net/udprelay/server.go index 7138cec7a..e7ca24960 100644 --- a/net/udprelay/server.go +++ b/net/udprelay/server.go @@ -10,6 +10,7 @@ import ( "bytes" "context" "crypto/rand" + "encoding/binary" "errors" "fmt" "net" @@ -20,6 +21,7 @@ import ( "time" "go4.org/mem" + "golang.org/x/crypto/blake2s" "golang.org/x/net/ipv6" "tailscale.com/disco" "tailscale.com/net/batching" @@ -73,7 +75,9 @@ type Server struct { closeCh chan struct{} netChecker *netcheck.Client - mu sync.Mutex // guards the following fields + mu sync.Mutex // guards the following fields + macSecrets [][blake2s.Size]byte // [0] is most recent, max 2 elements + macSecretRotatedAt time.Time derpMap *tailcfg.DERPMap onlyStaticAddrPorts bool // no dynamic addr port discovery when set staticAddrPorts views.Slice[netip.AddrPort] // static ip:port pairs set with [Server.SetStaticAddrPorts] @@ -85,6 +89,8 @@ type Server struct { byDisco map[key.SortedPairOfDiscoPublic]*serverEndpoint } +const macSecretRotationInterval = time.Minute * 2 + const ( minVNI = uint32(1) maxVNI = uint32(1<<24 - 1) @@ -98,22 +104,42 @@ type serverEndpoint struct { // indexing of this array aligns with the following fields, e.g. // discoSharedSecrets[0] is the shared secret to use when sealing // Disco protocol messages for transmission towards discoPubKeys[0]. - discoPubKeys key.SortedPairOfDiscoPublic - discoSharedSecrets [2]key.DiscoShared - handshakeGeneration [2]uint32 // or zero if a handshake has never started for that relay leg - handshakeAddrPorts [2]netip.AddrPort // or zero value if a handshake has never started for that relay leg - boundAddrPorts [2]netip.AddrPort // or zero value if a handshake has never completed for that relay leg - lastSeen [2]time.Time // TODO(jwhited): consider using mono.Time - challenge [2][disco.BindUDPRelayChallengeLen]byte - packetsRx [2]uint64 // num packets received from/sent by each client after they are bound - bytesRx [2]uint64 // num bytes received from/sent by each client after they are bound + discoPubKeys key.SortedPairOfDiscoPublic + discoSharedSecrets [2]key.DiscoShared + inProgressGeneration [2]uint32 // or zero if a handshake has never started, or has just completed + boundAddrPorts [2]netip.AddrPort // or zero value if a handshake has never completed for that relay leg + lastSeen [2]time.Time // TODO(jwhited): consider using mono.Time + packetsRx [2]uint64 // num packets received from/sent by each client after they are bound + bytesRx [2]uint64 // num bytes received from/sent by each client after they are bound lamportID uint64 vni uint32 allocatedAt time.Time } -func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic) (write []byte, to netip.AddrPort) { +func blakeMACFromBindMsg(blakeKey [blake2s.Size]byte, src netip.AddrPort, msg disco.BindUDPRelayEndpointCommon) ([blake2s.Size]byte, error) { + input := make([]byte, 8, 4+4+32+18) // vni + generation + invited party disco key + addr:port + binary.BigEndian.PutUint32(input[0:4], msg.VNI) + binary.BigEndian.PutUint32(input[4:8], msg.Generation) + input = msg.RemoteKey.AppendTo(input) + input, err := src.AppendBinary(input) + if err != nil { + return [blake2s.Size]byte{}, err + } + h, err := blake2s.New256(blakeKey[:]) + if err != nil { + return [blake2s.Size]byte{}, err + } + _, err = h.Write(input) + if err != nil { + return [blake2s.Size]byte{}, err + } + var out [blake2s.Size]byte + h.Sum(out[:0]) + return out, nil +} + +func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex int, discoMsg disco.Message, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) { if senderIndex != 0 && senderIndex != 1 { return nil, netip.AddrPort{} } @@ -144,18 +170,11 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex // Generation must be nonzero, silently drop return nil, netip.AddrPort{} } - if e.handshakeGeneration[senderIndex] == discoMsg.Generation { - // we've seen this generation before, silently drop - return nil, netip.AddrPort{} - } - e.handshakeGeneration[senderIndex] = discoMsg.Generation - e.handshakeAddrPorts[senderIndex] = from + e.inProgressGeneration[senderIndex] = discoMsg.Generation m := new(disco.BindUDPRelayEndpointChallenge) m.VNI = e.vni m.Generation = discoMsg.Generation m.RemoteKey = e.discoPubKeys.Get()[otherSender] - rand.Read(e.challenge[senderIndex][:]) - copy(m.Challenge[:], e.challenge[senderIndex][:]) reply := make([]byte, packet.GeneveFixedHeaderLength, 512) gh := packet.GeneveHeader{Control: true, Protocol: packet.GeneveProtocolDisco} gh.VNI.Set(e.vni) @@ -165,6 +184,11 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex } reply = append(reply, disco.Magic...) reply = serverDisco.AppendTo(reply) + mac, err := blakeMACFromBindMsg(macSecrets[0], from, m.BindUDPRelayEndpointCommon) + if err != nil { + return nil, netip.AddrPort{} + } + m.Challenge = mac box := e.discoSharedSecrets[senderIndex].Seal(m.AppendMarshal(nil)) reply = append(reply, box...) return reply, from @@ -174,17 +198,29 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex // silently drop return nil, netip.AddrPort{} } - generation := e.handshakeGeneration[senderIndex] - if generation == 0 || // we have no active handshake - generation != discoMsg.Generation || // mismatching generation for the active handshake - e.handshakeAddrPorts[senderIndex] != from || // mismatching source for the active handshake - !bytes.Equal(e.challenge[senderIndex][:], discoMsg.Challenge[:]) { // mismatching answer for the active handshake + generation := e.inProgressGeneration[senderIndex] + if generation == 0 || // we have no in-progress handshake + generation != discoMsg.Generation { // mismatching generation for the in-progress handshake // silently drop return nil, netip.AddrPort{} } - // Handshake complete. Update the binding for this sender. - e.boundAddrPorts[senderIndex] = from - e.lastSeen[senderIndex] = time.Now() // record last seen as bound time + for _, macSecret := range macSecrets { + mac, err := blakeMACFromBindMsg(macSecret, from, discoMsg.BindUDPRelayEndpointCommon) + if err != nil { + // silently drop + return nil, netip.AddrPort{} + } + // Speed is favored over constant-time comparison here. The sender is + // already authenticated via disco. + if bytes.Equal(mac[:], discoMsg.Challenge[:]) { + // Handshake complete. Update the binding for this sender. + e.boundAddrPorts[senderIndex] = from + e.lastSeen[senderIndex] = time.Now() // record last seen as bound time + e.inProgressGeneration[senderIndex] = 0 // reset to zero, which indicates there is no in-progress handshake + return nil, netip.AddrPort{} + } + } + // MAC does not match, silently drop return nil, netip.AddrPort{} default: // unexpected message types, silently drop @@ -192,7 +228,7 @@ func (e *serverEndpoint) handleDiscoControlMsg(from netip.AddrPort, senderIndex } } -func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []byte, serverDisco key.DiscoPublic) (write []byte, to netip.AddrPort) { +func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []byte, serverDisco key.DiscoPublic, macSecrets [][blake2s.Size]byte) (write []byte, to netip.AddrPort) { senderRaw, isDiscoMsg := disco.Source(b) if !isDiscoMsg { // Not a Disco message @@ -223,39 +259,29 @@ func (e *serverEndpoint) handleSealedDiscoControlMsg(from netip.AddrPort, b []by return nil, netip.AddrPort{} } - return e.handleDiscoControlMsg(from, senderIndex, discoMsg, serverDisco) + return e.handleDiscoControlMsg(from, senderIndex, discoMsg, serverDisco, macSecrets) } -func (e *serverEndpoint) handlePacket(from netip.AddrPort, gh packet.GeneveHeader, b []byte, serverDisco key.DiscoPublic) (write []byte, to netip.AddrPort) { - if !gh.Control { - if !e.isBound() { - // not a control packet, but serverEndpoint isn't bound - return nil, netip.AddrPort{} - } - switch { - case from == e.boundAddrPorts[0]: - e.lastSeen[0] = time.Now() - e.packetsRx[0]++ - e.bytesRx[0] += uint64(len(b)) - return b, e.boundAddrPorts[1] - case from == e.boundAddrPorts[1]: - e.lastSeen[1] = time.Now() - e.packetsRx[1]++ - e.bytesRx[1] += uint64(len(b)) - return b, e.boundAddrPorts[0] - default: - // unrecognized source - return nil, netip.AddrPort{} - } +func (e *serverEndpoint) handleDataPacket(from netip.AddrPort, b []byte, now time.Time) (write []byte, to netip.AddrPort) { + if !e.isBound() { + // not a control packet, but serverEndpoint isn't bound + return nil, netip.AddrPort{} } - - if gh.Protocol != packet.GeneveProtocolDisco { - // control packet, but not Disco + switch { + case from == e.boundAddrPorts[0]: + e.lastSeen[0] = now + e.packetsRx[0]++ + e.bytesRx[0] += uint64(len(b)) + return b, e.boundAddrPorts[1] + case from == e.boundAddrPorts[1]: + e.lastSeen[1] = now + e.packetsRx[1]++ + e.bytesRx[1] += uint64(len(b)) + return b, e.boundAddrPorts[0] + default: + // unrecognized source return nil, netip.AddrPort{} } - - msg := b[packet.GeneveFixedHeaderLength:] - return e.handleSealedDiscoControlMsg(from, msg, serverDisco) } func (e *serverEndpoint) isExpired(now time.Time, bindLifetime, steadyStateLifetime time.Duration) bool { @@ -283,7 +309,7 @@ func (e *serverEndpoint) isBound() bool { // onlyStaticAddrPorts is true, then dynamic addr:port discovery will be // disabled, and only addr:port's set via [Server.SetStaticAddrPorts] will be // used. -func NewServer(logf logger.Logf, port int, onlyStaticAddrPorts bool) (s *Server, err error) { +func NewServer(logf logger.Logf, port uint16, onlyStaticAddrPorts bool) (s *Server, err error) { s = &Server{ logf: logf, disco: key.NewDisco(), @@ -500,9 +526,9 @@ func trySetUDPSocketOptions(pconn nettype.PacketConn, logf logger.Logf) { // [magicsock.RebindingConn], which would also remove the need for // [singlePacketConn], as [magicsock.RebindingConn] also handles fallback to // single packet syscall operations. -func (s *Server) listenOn(port int) error { +func (s *Server) listenOn(port uint16) error { for _, network := range []string{"udp4", "udp6"} { - uc, err := net.ListenUDP(network, &net.UDPAddr{Port: port}) + uc, err := net.ListenUDP(network, &net.UDPAddr{Port: int(port)}) if err != nil { if network == "udp4" { return err @@ -621,7 +647,35 @@ func (s *Server) handlePacket(from netip.AddrPort, b []byte) (write []byte, to n return nil, netip.AddrPort{} } - return e.handlePacket(from, gh, b, s.discoPublic) + now := time.Now() + if gh.Control { + if gh.Protocol != packet.GeneveProtocolDisco { + // control packet, but not Disco + return nil, netip.AddrPort{} + } + msg := b[packet.GeneveFixedHeaderLength:] + s.maybeRotateMACSecretLocked(now) + return e.handleSealedDiscoControlMsg(from, msg, s.discoPublic, s.macSecrets) + } + return e.handleDataPacket(from, b, now) +} + +func (s *Server) maybeRotateMACSecretLocked(now time.Time) { + if !s.macSecretRotatedAt.IsZero() && now.Sub(s.macSecretRotatedAt) < macSecretRotationInterval { + return + } + switch len(s.macSecrets) { + case 0: + s.macSecrets = make([][blake2s.Size]byte, 1, 2) + case 1: + s.macSecrets = append(s.macSecrets, [blake2s.Size]byte{}) + fallthrough + case 2: + s.macSecrets[1] = s.macSecrets[0] + } + rand.Read(s.macSecrets[0][:]) + s.macSecretRotatedAt = now + return } func (s *Server) packetReadLoop(readFromSocket, otherSocket batching.Conn, readFromSocketIsIPv4 bool) { diff --git a/net/udprelay/server_test.go b/net/udprelay/server_test.go index 6c3d61658..582d4cf67 100644 --- a/net/udprelay/server_test.go +++ b/net/udprelay/server_test.go @@ -5,6 +5,7 @@ package udprelay import ( "bytes" + "crypto/rand" "net" "net/netip" "testing" @@ -14,6 +15,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "go4.org/mem" + "golang.org/x/crypto/blake2s" "tailscale.com/disco" "tailscale.com/net/packet" "tailscale.com/types/key" @@ -352,3 +354,117 @@ func TestServer_getNextVNILocked(t *testing.T) { _, err = s.getNextVNILocked() c.Assert(err, qt.IsNil) } + +func Test_blakeMACFromBindMsg(t *testing.T) { + var macSecret [blake2s.Size]byte + rand.Read(macSecret[:]) + src := netip.MustParseAddrPort("[2001:db8::1]:7") + + msgA := disco.BindUDPRelayEndpointCommon{ + VNI: 1, + Generation: 1, + RemoteKey: key.NewDisco().Public(), + Challenge: [32]byte{}, + } + macA, err := blakeMACFromBindMsg(macSecret, src, msgA) + if err != nil { + t.Fatal(err) + } + + msgB := msgA + msgB.VNI++ + macB, err := blakeMACFromBindMsg(macSecret, src, msgB) + if err != nil { + t.Fatal(err) + } + if macA == macB { + t.Fatalf("varying VNI input produced identical mac: %v", macA) + } + + msgC := msgA + msgC.Generation++ + macC, err := blakeMACFromBindMsg(macSecret, src, msgC) + if err != nil { + t.Fatal(err) + } + if macA == macC { + t.Fatalf("varying Generation input produced identical mac: %v", macA) + } + + msgD := msgA + msgD.RemoteKey = key.NewDisco().Public() + macD, err := blakeMACFromBindMsg(macSecret, src, msgD) + if err != nil { + t.Fatal(err) + } + if macA == macD { + t.Fatalf("varying RemoteKey input produced identical mac: %v", macA) + } + + msgE := msgA + msgE.Challenge = [32]byte{0x01} // challenge is not part of the MAC and should be ignored + macE, err := blakeMACFromBindMsg(macSecret, src, msgE) + if err != nil { + t.Fatal(err) + } + if macA != macE { + t.Fatalf("varying Challenge input produced varying mac: %v", macA) + } + + macSecretB := macSecret + macSecretB[0] ^= 0xFF + macF, err := blakeMACFromBindMsg(macSecretB, src, msgA) + if err != nil { + t.Fatal(err) + } + if macA == macF { + t.Fatalf("varying macSecret input produced identical mac: %v", macA) + } + + srcB := netip.AddrPortFrom(src.Addr(), src.Port()+1) + macG, err := blakeMACFromBindMsg(macSecret, srcB, msgA) + if err != nil { + t.Fatal(err) + } + if macA == macG { + t.Fatalf("varying src input produced identical mac: %v", macA) + } +} + +func Benchmark_blakeMACFromBindMsg(b *testing.B) { + var macSecret [blake2s.Size]byte + rand.Read(macSecret[:]) + src := netip.MustParseAddrPort("[2001:db8::1]:7") + msg := disco.BindUDPRelayEndpointCommon{ + VNI: 1, + Generation: 1, + RemoteKey: key.NewDisco().Public(), + Challenge: [32]byte{}, + } + b.ReportAllocs() + for b.Loop() { + _, err := blakeMACFromBindMsg(macSecret, src, msg) + if err != nil { + b.Fatal(err) + } + } +} + +func TestServer_maybeRotateMACSecretLocked(t *testing.T) { + s := &Server{} + start := time.Now() + s.maybeRotateMACSecretLocked(start) + qt.Assert(t, len(s.macSecrets), qt.Equals, 1) + macSecret := s.macSecrets[0] + s.maybeRotateMACSecretLocked(start.Add(macSecretRotationInterval - time.Nanosecond)) + qt.Assert(t, len(s.macSecrets), qt.Equals, 1) + qt.Assert(t, s.macSecrets[0], qt.Equals, macSecret) + s.maybeRotateMACSecretLocked(start.Add(macSecretRotationInterval)) + qt.Assert(t, len(s.macSecrets), qt.Equals, 2) + qt.Assert(t, s.macSecrets[1], qt.Equals, macSecret) + qt.Assert(t, s.macSecrets[0], qt.Not(qt.Equals), s.macSecrets[1]) + s.maybeRotateMACSecretLocked(s.macSecretRotatedAt.Add(macSecretRotationInterval)) + qt.Assert(t, macSecret, qt.Not(qt.Equals), s.macSecrets[0]) + qt.Assert(t, macSecret, qt.Not(qt.Equals), s.macSecrets[1]) + qt.Assert(t, s.macSecrets[0], qt.Not(qt.Equals), s.macSecrets[1]) +} diff --git a/net/udprelay/status/status.go b/net/udprelay/status/status.go index 3866efada..9ed9a0d2a 100644 --- a/net/udprelay/status/status.go +++ b/net/udprelay/status/status.go @@ -14,8 +14,9 @@ import ( type ServerStatus struct { // UDPPort is the UDP port number that the peer relay server forwards over, // as configured by the user with 'tailscale set --relay-server-port='. - // If the port has not been configured, UDPPort will be nil. - UDPPort *int + // If the port has not been configured, UDPPort will be nil. A non-nil zero + // value signifies the user has opted for a random unused port. + UDPPort *uint16 // Sessions is a slice of detailed status information about each peer // relay session that this node's peer relay server is involved with. It // may be empty. diff --git a/portlist/portlist_test.go b/portlist/portlist_test.go index 34277fdba..791a8b118 100644 --- a/portlist/portlist_test.go +++ b/portlist/portlist_test.go @@ -5,12 +5,24 @@ package portlist import ( "net" + "runtime" "testing" "tailscale.com/tstest" ) +func maybeSkip(t *testing.T) { + if runtime.GOOS == "linux" { + tstest.SkipOnKernelVersions(t, + "https://github.com/tailscale/tailscale/issues/16966", + "6.6.102", "6.6.103", "6.6.104", + "6.12.42", "6.12.43", "6.12.44", "6.12.45", + ) + } +} + func TestGetList(t *testing.T) { + maybeSkip(t) tstest.ResourceCheck(t) var p Poller @@ -25,6 +37,7 @@ func TestGetList(t *testing.T) { } func TestIgnoreLocallyBoundPorts(t *testing.T) { + maybeSkip(t) tstest.ResourceCheck(t) ln, err := net.Listen("tcp", "127.0.0.1:0") @@ -47,6 +60,8 @@ func TestIgnoreLocallyBoundPorts(t *testing.T) { } func TestPoller(t *testing.T) { + maybeSkip(t) + var p Poller p.IncludeLocalhost = true get := func(t *testing.T) []Port { diff --git a/shell.nix b/shell.nix index ffb28a183..28bdbdafb 100644 --- a/shell.nix +++ b/shell.nix @@ -16,4 +16,4 @@ ) { src = ./.; }).shellNix -# nix-direnv cache busting line: sha256-sGPgML2YM/XNWfsAdDZvzWHagcydwCmR6nKOHJj5COs= +# nix-direnv cache busting line: sha256-IkodqRYdueML7U2Hh8vRw6Et7+WII+VXuPJ3jZ2xYx8= diff --git a/syncs/mutex.go b/syncs/mutex.go index e61d1d1ab..8034e1712 100644 --- a/syncs/mutex.go +++ b/syncs/mutex.go @@ -16,3 +16,8 @@ type Mutex = sync.Mutex // // It's only not a sync.RWMutex when built with the ts_mutex_debug build tag. type RWMutex = sync.RWMutex + +// RequiresMutex declares the caller assumes it has the given +// mutex held. In non-debug builds, it's a no-op and compiles to +// nothing. +func RequiresMutex(mu *sync.Mutex) {} diff --git a/syncs/mutex_debug.go b/syncs/mutex_debug.go index 14b52ffe3..55a9b1231 100644 --- a/syncs/mutex_debug.go +++ b/syncs/mutex_debug.go @@ -15,4 +15,8 @@ type RWMutex struct { sync.RWMutex } +func RequiresMutex(mu *sync.Mutex) { + // TODO: check +} + // TODO(bradfitz): actually track stuff when in debug mode. diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index 41e0a0b28..8468aa09e 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -177,7 +177,8 @@ type CapabilityVersion int // - 128: 2025-10-02: can handle C2N /debug/health. // - 129: 2025-10-04: Fixed sleep/wake deadlock in magicsock when using peer relay (PR #17449) // - 130: 2025-10-06: client can send key.HardwareAttestationPublic and key.HardwareAttestationKeySignature in MapRequest -const CurrentCapabilityVersion CapabilityVersion = 130 +// - 131: 2025-11-25: client respects [NodeAttrDefaultAutoUpdate] +const CurrentCapabilityVersion CapabilityVersion = 131 // ID is an integer ID for a user, node, or login allocated by the // control plane. @@ -2149,12 +2150,14 @@ type MapResponse struct { // or nothing to report. ClientVersion *ClientVersion `json:",omitempty"` - // DefaultAutoUpdate is the default node auto-update setting for this + // DeprecatedDefaultAutoUpdate is the default node auto-update setting for this // tailnet. The node is free to opt-in or out locally regardless of this - // value. This value is only used on first MapResponse from control, the - // auto-update setting doesn't change if the tailnet admin flips the - // default after the node registered. - DefaultAutoUpdate opt.Bool `json:",omitempty"` + // value. Once this value has been set and stored in the client, future + // changes from the control plane are ignored. + // + // Deprecated: use NodeAttrDefaultAutoUpdate instead. See + // https://github.com/tailscale/tailscale/issues/11502. + DeprecatedDefaultAutoUpdate opt.Bool `json:"DefaultAutoUpdate,omitempty"` } // DisplayMessage represents a health state of the node from the control plane's @@ -2721,6 +2724,14 @@ const ( // default behavior is to trust the control plane when it claims that a // node is no longer online, but that is not a reliable signal. NodeAttrClientSideReachability = "client-side-reachability" + + // NodeAttrDefaultAutoUpdate advertises the default node auto-update setting + // for this tailnet. The node is free to opt-in or out locally regardless of + // this value. Once this has been set and stored in the client, future + // changes from the control plane are ignored. + // + // The value of the key in [NodeCapMap] is a JSON boolean. + NodeAttrDefaultAutoUpdate NodeCapability = "default-auto-update" ) // SetDNSRequest is a request to add a DNS record. diff --git a/tka/sync.go b/tka/sync.go index e3a858c15..2dbfb7ac4 100644 --- a/tka/sync.go +++ b/tka/sync.go @@ -32,6 +32,41 @@ type SyncOffer struct { Ancestors []AUMHash } +// ToSyncOffer creates a SyncOffer from the fields received in +// a [tailcfg.TKASyncOfferRequest]. +func ToSyncOffer(head string, ancestors []string) (SyncOffer, error) { + var out SyncOffer + if err := out.Head.UnmarshalText([]byte(head)); err != nil { + return SyncOffer{}, fmt.Errorf("head.UnmarshalText: %v", err) + } + out.Ancestors = make([]AUMHash, len(ancestors)) + for i, a := range ancestors { + if err := out.Ancestors[i].UnmarshalText([]byte(a)); err != nil { + return SyncOffer{}, fmt.Errorf("ancestor[%d].UnmarshalText: %v", i, err) + } + } + return out, nil +} + +// FromSyncOffer marshals the fields of a SyncOffer so they can be +// sent in a [tailcfg.TKASyncOfferRequest]. +func FromSyncOffer(offer SyncOffer) (head string, ancestors []string, err error) { + headBytes, err := offer.Head.MarshalText() + if err != nil { + return "", nil, fmt.Errorf("head.MarshalText: %v", err) + } + + ancestors = make([]string, len(offer.Ancestors)) + for i, ancestor := range offer.Ancestors { + hash, err := ancestor.MarshalText() + if err != nil { + return "", nil, fmt.Errorf("ancestor[%d].MarshalText: %v", i, err) + } + ancestors[i] = string(hash) + } + return string(headBytes), ancestors, nil +} + const ( // The starting number of AUMs to skip when listing // ancestors in a SyncOffer. diff --git a/tka/tailchonk.go b/tka/tailchonk.go index a55033bcd..13bdf6aac 100644 --- a/tka/tailchonk.go +++ b/tka/tailchonk.go @@ -193,7 +193,7 @@ updateLoop: for _, aum := range updates { aumHash := aum.Hash() c.aums[aumHash] = aum - c.commitTimes[aumHash] = c.clock.Now() + c.commitTimes[aumHash] = c.now() parent, ok := aum.Parent() if ok { @@ -209,6 +209,16 @@ updateLoop: return nil } +// now returns the current time, optionally using the overridden +// clock if set. +func (c *Mem) now() time.Time { + if c.clock == nil { + return time.Now() + } else { + return c.clock.Now() + } +} + // RemoveAll permanently and completely clears the TKA state. func (c *Mem) RemoveAll() error { c.mu.Lock() diff --git a/tsconsensus/tsconsensus_test.go b/tsconsensus/tsconsensus_test.go index 7f89eb48a..796c8f51b 100644 --- a/tsconsensus/tsconsensus_test.go +++ b/tsconsensus/tsconsensus_test.go @@ -17,7 +17,6 @@ import ( "net/netip" "os" "path/filepath" - "runtime" "strings" "sync" "testing" @@ -27,7 +26,6 @@ import ( "github.com/hashicorp/go-hclog" "github.com/hashicorp/raft" "tailscale.com/client/tailscale" - "tailscale.com/cmd/testwrapper/flakytest" "tailscale.com/ipn/store/mem" "tailscale.com/net/netns" "tailscale.com/tailcfg" @@ -115,8 +113,8 @@ func (f *fsm) Restore(rc io.ReadCloser) error { } func testConfig(t *testing.T) { - if runtime.GOOS == "windows" && cibuild.On() { - t.Skip("cmd/natc isn't supported on Windows, so skipping tsconsensus tests on CI for now; see https://github.com/tailscale/tailscale/issues/16340") + if cibuild.On() { + t.Skip("these integration tests don't always work well in CI and that's bad for CI; see https://github.com/tailscale/tailscale/issues/16340 and https://github.com/tailscale/tailscale/issues/18022") } // -race AND Parallel makes things start to take too long. if !racebuild.On { @@ -251,7 +249,6 @@ func warnLogConfig() Config { } func TestStart(t *testing.T) { - flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/15627") testConfig(t) control, controlURL := startControl(t) ctx := context.Background() @@ -372,7 +369,6 @@ func createConsensusCluster(t testing.TB, ctx context.Context, clusterTag string } func TestApply(t *testing.T) { - flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/15627") testConfig(t) ctx := context.Background() clusterTag := "tag:whatever" @@ -437,7 +433,6 @@ func assertCommandsWorkOnAnyNode(t testing.TB, participants []*participant) { } func TestConfig(t *testing.T) { - flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/15627") testConfig(t) ctx := context.Background() clusterTag := "tag:whatever" @@ -477,7 +472,6 @@ func TestConfig(t *testing.T) { } func TestFollowerFailover(t *testing.T) { - flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/15627") testConfig(t) ctx := context.Background() clusterTag := "tag:whatever" @@ -549,7 +543,6 @@ func TestFollowerFailover(t *testing.T) { } func TestRejoin(t *testing.T) { - flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/15627") testConfig(t) ctx := context.Background() clusterTag := "tag:whatever" @@ -585,7 +578,6 @@ func TestRejoin(t *testing.T) { } func TestOnlyTaggedPeersCanDialRaftPort(t *testing.T) { - flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/15627") testConfig(t) ctx := context.Background() clusterTag := "tag:whatever" @@ -643,7 +635,6 @@ func TestOnlyTaggedPeersCanDialRaftPort(t *testing.T) { } func TestOnlyTaggedPeersCanBeDialed(t *testing.T) { - flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/15627") testConfig(t) ctx := context.Background() clusterTag := "tag:whatever" diff --git a/tsnet/depaware.txt b/tsnet/depaware.txt index 7d5ec0a60..825a39e34 100644 --- a/tsnet/depaware.txt +++ b/tsnet/depaware.txt @@ -36,6 +36,7 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware) github.com/klauspost/compress/fse from github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0 from github.com/klauspost/compress/zstd github.com/klauspost/compress/internal/cpuinfo from github.com/klauspost/compress/huff0+ + 💣 github.com/klauspost/compress/internal/le from github.com/klauspost/compress/huff0+ github.com/klauspost/compress/internal/snapref from github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd from tailscale.com/util/zstdframe github.com/klauspost/compress/zstd/internal/xxhash from github.com/klauspost/compress/zstd diff --git a/tstest/integration/integration.go b/tstest/integration/integration.go index 6700205cf..a62173ae3 100644 --- a/tstest/integration/integration.go +++ b/tstest/integration/integration.go @@ -576,6 +576,7 @@ type TestNode struct { stateFile string upFlagGOOS string // if non-empty, sets TS_DEBUG_UP_FLAG_GOOS for cmd/tailscale CLI encryptState bool + allowUpdates bool mu sync.Mutex onLogLine []func([]byte) @@ -840,6 +841,9 @@ func (n *TestNode) StartDaemonAsIPNGOOS(ipnGOOS string) *Daemon { "TS_DISABLE_PORTMAPPER=1", // shouldn't be needed; test is all localhost "TS_DEBUG_LOG_RATE=all", ) + if n.allowUpdates { + cmd.Env = append(cmd.Env, "TS_TEST_ALLOW_AUTO_UPDATE=1") + } if n.env.loopbackPort != nil { cmd.Env = append(cmd.Env, "TS_DEBUG_NETSTACK_LOOPBACK_PORT="+strconv.Itoa(*n.env.loopbackPort)) } @@ -914,7 +918,7 @@ func (n *TestNode) Ping(otherNode *TestNode) error { t := n.env.t ip := otherNode.AwaitIP4().String() t.Logf("Running ping %v (from %v)...", ip, n.AwaitIP4()) - return n.Tailscale("ping", ip).Run() + return n.Tailscale("ping", "--timeout=1s", ip).Run() } // AwaitListening waits for the tailscaled to be serving local clients @@ -1073,6 +1077,46 @@ func (n *TestNode) MustStatus() *ipnstate.Status { return st } +// PublicKey returns the hex-encoded public key of this node, +// e.g. `nodekey:123456abc` +func (n *TestNode) PublicKey() string { + tb := n.env.t + tb.Helper() + cmd := n.Tailscale("status", "--json") + out, err := cmd.CombinedOutput() + if err != nil { + tb.Fatalf("running `tailscale status`: %v, %s", err, out) + } + + type Self struct{ PublicKey string } + type StatusOutput struct{ Self Self } + + var st StatusOutput + if err := json.Unmarshal(out, &st); err != nil { + tb.Fatalf("decoding `tailscale status` JSON: %v\njson:\n%s", err, out) + } + return st.Self.PublicKey +} + +// NLPublicKey returns the hex-encoded network lock public key of +// this node, e.g. `tlpub:123456abc` +func (n *TestNode) NLPublicKey() string { + tb := n.env.t + tb.Helper() + cmd := n.Tailscale("lock", "status", "--json") + out, err := cmd.CombinedOutput() + if err != nil { + tb.Fatalf("running `tailscale lock status`: %v, %s", err, out) + } + st := struct { + PublicKey string `json:"PublicKey"` + }{} + if err := json.Unmarshal(out, &st); err != nil { + tb.Fatalf("decoding `tailscale lock status` JSON: %v\njson:\n%s", err, out) + } + return st.PublicKey +} + // trafficTrap is an HTTP proxy handler to note whether any // HTTP traffic tries to leave localhost from tailscaled. We don't // expect any, so any request triggers a failure. diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index 9d75cfc29..fc891ad72 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -22,8 +22,10 @@ import ( "path/filepath" "regexp" "runtime" + "slices" "strconv" "strings" + "sync" "sync/atomic" "testing" "time" @@ -36,6 +38,7 @@ import ( "tailscale.com/cmd/testwrapper/flakytest" "tailscale.com/feature" _ "tailscale.com/feature/clientupdate" + "tailscale.com/health" "tailscale.com/hostinfo" "tailscale.com/ipn" "tailscale.com/net/tsaddr" @@ -1410,14 +1413,27 @@ func TestLogoutRemovesAllPeers(t *testing.T) { wantNode0PeerCount(expectedPeers) // all existing peers and the new node } -func TestAutoUpdateDefaults(t *testing.T) { - if !feature.CanAutoUpdate() { - t.Skip("auto-updates not supported on this platform") - } +func TestAutoUpdateDefaults(t *testing.T) { testAutoUpdateDefaults(t, false) } +func TestAutoUpdateDefaults_cap(t *testing.T) { testAutoUpdateDefaults(t, true) } + +// useCap is whether to use NodeAttrDefaultAutoUpdate (as opposed to the old +// DeprecatedDefaultAutoUpdate top-level MapResponse field). +func testAutoUpdateDefaults(t *testing.T, useCap bool) { + t.Cleanup(feature.HookCanAutoUpdate.SetForTest(func() bool { return true })) + tstest.Shard(t) - tstest.Parallel(t) env := NewTestEnv(t) + var ( + modifyMu sync.Mutex + modifyFirstMapResponse = func(*tailcfg.MapResponse, *tailcfg.MapRequest) {} + ) + env.Control.ModifyFirstMapResponse = func(mr *tailcfg.MapResponse, req *tailcfg.MapRequest) { + modifyMu.Lock() + defer modifyMu.Unlock() + modifyFirstMapResponse(mr, req) + } + checkDefault := func(n *TestNode, want bool) error { enabled, ok := n.diskPrefs().AutoUpdate.Apply.Get() if !ok { @@ -1429,17 +1445,23 @@ func TestAutoUpdateDefaults(t *testing.T) { return nil } - sendAndCheckDefault := func(t *testing.T, n *TestNode, send, want bool) { - t.Helper() - if !env.Control.AddRawMapResponse(n.MustStatus().Self.PublicKey, &tailcfg.MapResponse{ - DefaultAutoUpdate: opt.NewBool(send), - }) { - t.Fatal("failed to send MapResponse to node") - } - if err := tstest.WaitFor(2*time.Second, func() error { - return checkDefault(n, want) - }); err != nil { - t.Fatal(err) + setDefaultAutoUpdate := func(send bool) { + modifyMu.Lock() + defer modifyMu.Unlock() + modifyFirstMapResponse = func(mr *tailcfg.MapResponse, req *tailcfg.MapRequest) { + if mr.Node == nil { + mr.Node = &tailcfg.Node{} + } + if useCap { + if mr.Node.CapMap == nil { + mr.Node.CapMap = make(tailcfg.NodeCapMap) + } + mr.Node.CapMap[tailcfg.NodeAttrDefaultAutoUpdate] = []tailcfg.RawMessage{ + tailcfg.RawMessage(fmt.Sprintf("%t", send)), + } + } else { + mr.DeprecatedDefaultAutoUpdate = opt.NewBool(send) + } } } @@ -1450,29 +1472,54 @@ func TestAutoUpdateDefaults(t *testing.T) { { desc: "tailnet-default-false", run: func(t *testing.T, n *TestNode) { - // First received default "false". - sendAndCheckDefault(t, n, false, false) - // Should not be changed even if sent "true" later. - sendAndCheckDefault(t, n, true, false) + + // First the server sends "false", and client should remember that. + setDefaultAutoUpdate(false) + n.MustUp() + n.AwaitRunning() + checkDefault(n, false) + + // Now we disconnect and change the server to send "true", which + // the client should ignore, having previously remembered + // "false". + n.MustDown() + setDefaultAutoUpdate(true) // control sends default "true" + n.MustUp() + n.AwaitRunning() + checkDefault(n, false) // still false + // But can be changed explicitly by the user. if out, err := n.TailscaleForOutput("set", "--auto-update").CombinedOutput(); err != nil { t.Fatalf("failed to enable auto-update on node: %v\noutput: %s", err, out) } - sendAndCheckDefault(t, n, false, true) + checkDefault(n, true) }, }, { desc: "tailnet-default-true", run: func(t *testing.T, n *TestNode) { - // First received default "true". - sendAndCheckDefault(t, n, true, true) - // Should not be changed even if sent "false" later. - sendAndCheckDefault(t, n, false, true) + // Same as above but starting with default "true". + + // First the server sends "true", and client should remember that. + setDefaultAutoUpdate(true) + n.MustUp() + n.AwaitRunning() + checkDefault(n, true) + + // Now we disconnect and change the server to send "false", which + // the client should ignore, having previously remembered + // "true". + n.MustDown() + setDefaultAutoUpdate(false) // control sends default "false" + n.MustUp() + n.AwaitRunning() + checkDefault(n, true) // still true + // But can be changed explicitly by the user. if out, err := n.TailscaleForOutput("set", "--auto-update=false").CombinedOutput(); err != nil { - t.Fatalf("failed to disable auto-update on node: %v\noutput: %s", err, out) + t.Fatalf("failed to enable auto-update on node: %v\noutput: %s", err, out) } - sendAndCheckDefault(t, n, true, false) + checkDefault(n, false) }, }, { @@ -1482,22 +1529,21 @@ func TestAutoUpdateDefaults(t *testing.T) { if out, err := n.TailscaleForOutput("set", "--auto-update=false").CombinedOutput(); err != nil { t.Fatalf("failed to disable auto-update on node: %v\noutput: %s", err, out) } - // Defaults sent from control should be ignored. - sendAndCheckDefault(t, n, true, false) - sendAndCheckDefault(t, n, false, false) + + setDefaultAutoUpdate(true) + n.MustUp() + n.AwaitRunning() + checkDefault(n, false) }, }, } for _, tt := range tests { t.Run(tt.desc, func(t *testing.T) { n := NewTestNode(t, env) + n.allowUpdates = true d := n.StartDaemon() defer d.MustCleanShutdown(t) - n.AwaitResponding() - n.MustUp() - n.AwaitRunning() - tt.run(t, n) }) } @@ -2207,7 +2253,7 @@ func TestC2NDebugNetmap(t *testing.T) { } } -func TestNetworkLock(t *testing.T) { +func TestTailnetLock(t *testing.T) { // If you run `tailscale lock log` on a node where Tailnet Lock isn't // enabled, you get an error explaining that. @@ -2245,4 +2291,112 @@ func TestNetworkLock(t *testing.T) { t.Fatalf("stderr: want %q, got %q", wantErr, errBuf.String()) } }) + + // If you create a tailnet with two signed nodes and one unsigned, + // the signed nodes can talk to each other but the unsigned node cannot + // talk to anybody. + t.Run("node-connectivity", func(t *testing.T) { + tstest.Shard(t) + t.Parallel() + + env := NewTestEnv(t) + env.Control.DefaultNodeCapabilities = &tailcfg.NodeCapMap{ + tailcfg.CapabilityTailnetLock: []tailcfg.RawMessage{}, + } + + // Start two nodes which will be our signing nodes. + signing1 := NewTestNode(t, env) + signing2 := NewTestNode(t, env) + + nodes := []*TestNode{signing1, signing2} + for _, n := range nodes { + d := n.StartDaemon() + defer d.MustCleanShutdown(t) + + n.MustUp() + n.AwaitRunning() + } + + // Initiate Tailnet Lock with the two signing nodes. + initCmd := signing1.Tailscale("lock", "init", + "--gen-disablements", "10", + "--confirm", + signing1.NLPublicKey(), signing2.NLPublicKey(), + ) + out, err := initCmd.CombinedOutput() + if err != nil { + t.Fatalf("init command failed: %q\noutput=%v", err, string(out)) + } + + // Check that the two signing nodes can ping each other + if err := signing1.Ping(signing2); err != nil { + t.Fatalf("ping signing1 -> signing2: %v", err) + } + if err := signing2.Ping(signing1); err != nil { + t.Fatalf("ping signing2 -> signing1: %v", err) + } + + // Create and start a third node + node3 := NewTestNode(t, env) + d3 := node3.StartDaemon() + defer d3.MustCleanShutdown(t) + node3.MustUp() + node3.AwaitRunning() + + if err := signing1.Ping(node3); err == nil { + t.Fatal("ping signing1 -> node3: expected err, but succeeded") + } + if err := node3.Ping(signing1); err == nil { + t.Fatal("ping node3 -> signing1: expected err, but succeeded") + } + + // Sign node3, and check the nodes can now talk to each other + signCmd := signing1.Tailscale("lock", "sign", node3.PublicKey()) + out, err = signCmd.CombinedOutput() + if err != nil { + t.Fatalf("sign command failed: %q\noutput = %v", err, string(out)) + } + + if err := signing1.Ping(node3); err != nil { + t.Fatalf("ping signing1 -> node3: expected success, got err: %v", err) + } + if err := node3.Ping(signing1); err != nil { + t.Fatalf("ping node3 -> signing1: expected success, got err: %v", err) + } + }) +} + +func TestNodeWithBadStateFile(t *testing.T) { + tstest.Shard(t) + tstest.Parallel(t) + env := NewTestEnv(t) + n1 := NewTestNode(t, env) + if err := os.WriteFile(n1.stateFile, []byte("bad json"), 0644); err != nil { + t.Fatal(err) + } + + d1 := n1.StartDaemon() + n1.AwaitResponding() + + // Make sure the health message shows up in status output. + n1.AwaitBackendState("NoState") + st := n1.MustStatus() + wantHealth := ipn.StateStoreHealth.Text(health.Args{health.ArgError: ""}) + if !slices.ContainsFunc(st.Health, func(m string) bool { return strings.HasPrefix(m, wantHealth) }) { + t.Errorf("Status does not contain expected health message %q\ngot health messages: %q", wantHealth, st.Health) + } + + // Make sure login attempts are rejected. + cmd := n1.Tailscale("up", "--login-server="+n1.env.ControlURL()) + t.Logf("Running %v ...", cmd) + out, err := cmd.CombinedOutput() + if err == nil { + t.Fatalf("up succeeded with output %q", out) + } + wantOut := "cannot start backend when state store is unhealthy" + if !strings.Contains(string(out), wantOut) { + t.Fatalf("got up output:\n%s\nwant:\n%s", string(out), wantOut) + } + + d1.MustCleanShutdown(t) } diff --git a/tstest/integration/tailscaled_deps_test_darwin.go b/tstest/integration/tailscaled_deps_test_darwin.go index 217188f75..9f92839d8 100644 --- a/tstest/integration/tailscaled_deps_test_darwin.go +++ b/tstest/integration/tailscaled_deps_test_darwin.go @@ -27,6 +27,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns" diff --git a/tstest/integration/tailscaled_deps_test_freebsd.go b/tstest/integration/tailscaled_deps_test_freebsd.go index 217188f75..9f92839d8 100644 --- a/tstest/integration/tailscaled_deps_test_freebsd.go +++ b/tstest/integration/tailscaled_deps_test_freebsd.go @@ -27,6 +27,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns" diff --git a/tstest/integration/tailscaled_deps_test_linux.go b/tstest/integration/tailscaled_deps_test_linux.go index 217188f75..9f92839d8 100644 --- a/tstest/integration/tailscaled_deps_test_linux.go +++ b/tstest/integration/tailscaled_deps_test_linux.go @@ -27,6 +27,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns" diff --git a/tstest/integration/tailscaled_deps_test_openbsd.go b/tstest/integration/tailscaled_deps_test_openbsd.go index 217188f75..9f92839d8 100644 --- a/tstest/integration/tailscaled_deps_test_openbsd.go +++ b/tstest/integration/tailscaled_deps_test_openbsd.go @@ -27,6 +27,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns" diff --git a/tstest/integration/tailscaled_deps_test_windows.go b/tstest/integration/tailscaled_deps_test_windows.go index f3cd5e75b..82f8097c8 100644 --- a/tstest/integration/tailscaled_deps_test_windows.go +++ b/tstest/integration/tailscaled_deps_test_windows.go @@ -37,6 +37,7 @@ import ( _ "tailscale.com/ipn/ipnlocal" _ "tailscale.com/ipn/ipnserver" _ "tailscale.com/ipn/store" + _ "tailscale.com/ipn/store/mem" _ "tailscale.com/logpolicy" _ "tailscale.com/logtail" _ "tailscale.com/net/dns" diff --git a/tstest/integration/testcontrol/testcontrol.go b/tstest/integration/testcontrol/testcontrol.go index f9a33705b..19964c91f 100644 --- a/tstest/integration/testcontrol/testcontrol.go +++ b/tstest/integration/testcontrol/testcontrol.go @@ -33,6 +33,8 @@ import ( "tailscale.com/net/tsaddr" "tailscale.com/syncs" "tailscale.com/tailcfg" + "tailscale.com/tka" + "tailscale.com/tstest/tkatest" "tailscale.com/types/key" "tailscale.com/types/logger" "tailscale.com/types/opt" @@ -79,6 +81,10 @@ type Server struct { ExplicitBaseURL string // e.g. "http://127.0.0.1:1234" with no trailing URL HTTPTestServer *httptest.Server // if non-nil, used to get BaseURL + // ModifyFirstMapResponse, if non-nil, is called exactly once per + // MapResponse stream to modify the first MapResponse sent in response to it. + ModifyFirstMapResponse func(*tailcfg.MapResponse, *tailcfg.MapRequest) + initMuxOnce sync.Once mux *http.ServeMux @@ -119,6 +125,10 @@ type Server struct { nodeKeyAuthed set.Set[key.NodePublic] msgToSend map[key.NodePublic]any // value is *tailcfg.PingRequest or entire *tailcfg.MapResponse allExpired bool // All nodes will be told their node key is expired. + + // tkaStorage records the Tailnet Lock state, if any. + // If nil, Tailnet Lock is not enabled in the Tailnet. + tkaStorage tka.CompactableChonk } // BaseURL returns the server's base URL, without trailing slash. @@ -325,6 +335,7 @@ func (s *Server) initMux() { w.WriteHeader(http.StatusNoContent) }) s.mux.HandleFunc("/key", s.serveKey) + s.mux.HandleFunc("/machine/tka/", s.serveTKA) s.mux.HandleFunc("/machine/", s.serveMachine) s.mux.HandleFunc("/ts2021", s.serveNoiseUpgrade) s.mux.HandleFunc("/c2n/", s.serveC2N) @@ -435,7 +446,7 @@ func (s *Server) serveKey(w http.ResponseWriter, r *http.Request) { func (s *Server) serveMachine(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { - http.Error(w, "POST required", 400) + http.Error(w, "POST required for serveMachine", 400) return } ctx := r.Context() @@ -464,6 +475,9 @@ func (s *Server) SetSubnetRoutes(nodeKey key.NodePublic, routes []netip.Prefix) defer s.mu.Unlock() s.logf("Setting subnet routes for %s: %v", nodeKey.ShortString(), routes) mak.Set(&s.nodeSubnetRoutes, nodeKey, routes) + if node, ok := s.nodes[nodeKey]; ok { + sendUpdate(s.updates[node.ID], updateSelfChanged) + } } // MasqueradePair is a pair of nodes and the IP address that the @@ -854,6 +868,132 @@ func (s *Server) serveRegister(w http.ResponseWriter, r *http.Request, mkey key. w.Write(res) } +func (s *Server) serveTKA(w http.ResponseWriter, r *http.Request) { + if r.Method != "GET" { + http.Error(w, "GET required for serveTKA", 400) + return + } + + switch r.URL.Path { + case "/machine/tka/init/begin": + s.serveTKAInitBegin(w, r) + case "/machine/tka/init/finish": + s.serveTKAInitFinish(w, r) + case "/machine/tka/bootstrap": + s.serveTKABootstrap(w, r) + case "/machine/tka/sync/offer": + s.serveTKASyncOffer(w, r) + case "/machine/tka/sign": + s.serveTKASign(w, r) + default: + s.serveUnhandled(w, r) + } +} + +func (s *Server) serveTKAInitBegin(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + nodes := maps.Values(s.nodes) + genesisAUM, err := tkatest.HandleTKAInitBegin(w, r, nodes) + if err != nil { + go panic(fmt.Sprintf("HandleTKAInitBegin: %v", err)) + } + s.tkaStorage = tka.ChonkMem() + s.tkaStorage.CommitVerifiedAUMs([]tka.AUM{*genesisAUM}) +} + +func (s *Server) serveTKAInitFinish(w http.ResponseWriter, r *http.Request) { + signatures, err := tkatest.HandleTKAInitFinish(w, r) + if err != nil { + go panic(fmt.Sprintf("HandleTKAInitFinish: %v", err)) + } + + s.mu.Lock() + defer s.mu.Unlock() + + // Apply the signatures to each of the nodes. Because s.nodes is keyed + // by public key instead of node ID, we have to do this inefficiently. + // + // We only have small tailnets in the integration tests, so this isn't + // much of an issue. + for nodeID, sig := range signatures { + for _, n := range s.nodes { + if n.ID == nodeID { + n.KeySignature = sig + } + } + } +} + +func (s *Server) serveTKABootstrap(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + if s.tkaStorage == nil { + http.Error(w, "no TKA state when calling serveTKABootstrap", 400) + return + } + + // Find the genesis AUM, which we need to include in the response. + var genesis *tka.AUM + allAUMs, err := s.tkaStorage.AllAUMs() + if err != nil { + http.Error(w, "unable to retrieve all AUMs from TKA state", 500) + return + } + for _, h := range allAUMs { + aum := must.Get(s.tkaStorage.AUM(h)) + if _, hasParent := aum.Parent(); !hasParent { + genesis = &aum + break + } + } + if genesis == nil { + http.Error(w, "unable to find genesis AUM in TKA state", 500) + return + } + + resp := tailcfg.TKABootstrapResponse{ + GenesisAUM: genesis.Serialize(), + } + _, err = tkatest.HandleTKABootstrap(w, r, resp) + if err != nil { + go panic(fmt.Sprintf("HandleTKABootstrap: %v", err)) + } +} + +func (s *Server) serveTKASyncOffer(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + authority, err := tka.Open(s.tkaStorage) + if err != nil { + go panic(fmt.Sprintf("serveTKASyncOffer: tka.Open: %v", err)) + } + + err = tkatest.HandleTKASyncOffer(w, r, authority, s.tkaStorage) + if err != nil { + go panic(fmt.Sprintf("HandleTKASyncOffer: %v", err)) + } +} + +func (s *Server) serveTKASign(w http.ResponseWriter, r *http.Request) { + s.mu.Lock() + defer s.mu.Unlock() + + authority, err := tka.Open(s.tkaStorage) + if err != nil { + go panic(fmt.Sprintf("serveTKASign: tka.Open: %v", err)) + } + + sig, keyBeingSigned, err := tkatest.HandleTKASign(w, r, authority) + if err != nil { + go panic(fmt.Sprintf("HandleTKASign: %v", err)) + } + s.nodes[*keyBeingSigned].KeySignature = *sig + s.updateLocked("TKASign", s.nodeIDsLocked(0)) +} + // updateType indicates why a long-polling map request is being woken // up for an update. type updateType int @@ -990,6 +1130,7 @@ func (s *Server) serveMap(w http.ResponseWriter, r *http.Request, mkey key.Machi // register an updatesCh to get updates. streaming := req.Stream && !req.ReadOnly compress := req.Compress != "" + first := true w.WriteHeader(200) for { @@ -1022,6 +1163,10 @@ func (s *Server) serveMap(w http.ResponseWriter, r *http.Request, mkey key.Machi if allExpired { res.Node.KeyExpiry = time.Now().Add(-1 * time.Minute) } + if f := s.ModifyFirstMapResponse; first && f != nil { + first = false + f(res, req) + } // TODO: add minner if/when needed resBytes, err := json.Marshal(res) if err != nil { @@ -1185,6 +1330,21 @@ func (s *Server) MapResponse(req *tailcfg.MapRequest) (res *tailcfg.MapResponse, v6Prefix, } + // If the server is tracking TKA state, and there's a single TKA head, + // add it to the MapResponse. + if s.tkaStorage != nil { + heads, err := s.tkaStorage.Heads() + if err != nil { + log.Printf("unable to get TKA heads: %v", err) + } else if len(heads) != 1 { + log.Printf("unable to get single TKA head, got %v", heads) + } else { + res.TKAInfo = &tailcfg.TKAInfo{ + Head: heads[0].Hash().String(), + } + } + } + s.mu.Lock() defer s.mu.Unlock() res.Node.PrimaryRoutes = s.nodeSubnetRoutes[nk] diff --git a/tstest/kernel_linux.go b/tstest/kernel_linux.go new file mode 100644 index 000000000..664fe9bdd --- /dev/null +++ b/tstest/kernel_linux.go @@ -0,0 +1,50 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux + +package tstest + +import ( + "strconv" + "strings" + + "golang.org/x/sys/unix" +) + +// KernelVersion returns the major, minor, and patch version of the Linux kernel. +// It returns (0, 0, 0) if the version cannot be determined. +func KernelVersion() (major, minor, patch int) { + var uname unix.Utsname + if err := unix.Uname(&uname); err != nil { + return 0, 0, 0 + } + release := unix.ByteSliceToString(uname.Release[:]) + + // Parse version string (e.g., "5.15.0-...") + parts := strings.Split(release, ".") + if len(parts) < 3 { + return 0, 0, 0 + } + + major, err := strconv.Atoi(parts[0]) + if err != nil { + return 0, 0, 0 + } + + minor, err = strconv.Atoi(parts[1]) + if err != nil { + return 0, 0, 0 + } + + // Patch version may have additional info after a hyphen (e.g., "0-76-generic") + // Extract just the numeric part before any hyphen + patchStr, _, _ := strings.Cut(parts[2], "-") + + patch, err = strconv.Atoi(patchStr) + if err != nil { + return 0, 0, 0 + } + + return major, minor, patch +} diff --git a/tstest/kernel_other.go b/tstest/kernel_other.go new file mode 100644 index 000000000..bf69be6df --- /dev/null +++ b/tstest/kernel_other.go @@ -0,0 +1,11 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !linux + +package tstest + +// KernelVersion returns (0, 0, 0) on unsupported platforms. +func KernelVersion() (major, minor, patch int) { + return 0, 0, 0 +} diff --git a/tstest/tkatest/tkatest.go b/tstest/tkatest/tkatest.go new file mode 100644 index 000000000..fb157a1a1 --- /dev/null +++ b/tstest/tkatest/tkatest.go @@ -0,0 +1,220 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +// tkatest has functions for creating a mock control server that responds +// to TKA endpoints. +package tkatest + +import ( + "encoding/json" + "errors" + "fmt" + "iter" + "log" + "net/http" + + "tailscale.com/tailcfg" + "tailscale.com/tka" + "tailscale.com/types/key" + "tailscale.com/types/tkatype" +) + +func serverError(w http.ResponseWriter, format string, a ...any) error { + err := fmt.Sprintf(format, a...) + http.Error(w, err, 500) + log.Printf("returning HTTP 500 error: %v", err) + return errors.New(err) +} + +func userError(w http.ResponseWriter, format string, a ...any) error { + err := fmt.Sprintf(format, a...) + http.Error(w, err, 400) + return errors.New(err) +} + +// HandleTKAInitBegin handles a request to /machine/tka/init/begin. +// +// If the request contains a valid genesis AUM, it sends a response to the +// client, and returns the AUM to the caller. +func HandleTKAInitBegin(w http.ResponseWriter, r *http.Request, nodes iter.Seq[*tailcfg.Node]) (*tka.AUM, error) { + var req *tailcfg.TKAInitBeginRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + return nil, userError(w, "Decode: %v", err) + } + var aum tka.AUM + if err := aum.Unserialize(req.GenesisAUM); err != nil { + return nil, userError(w, "invalid genesis AUM: %v", err) + } + beginResp := tailcfg.TKAInitBeginResponse{} + for n := range nodes { + beginResp.NeedSignatures = append( + beginResp.NeedSignatures, + tailcfg.TKASignInfo{ + NodeID: n.ID, + NodePublic: n.Key, + }, + ) + } + + w.WriteHeader(200) + if err := json.NewEncoder(w).Encode(beginResp); err != nil { + return nil, serverError(w, "Encode: %v", err) + } + return &aum, nil +} + +// HandleTKAInitFinish handles a request to /machine/tka/init/finish. +// +// It sends a response to the client, and gives the caller a list of node +// signatures to apply. +// +// This method assumes that the node signatures are valid, and does not +// verify them with the supplied public key. +func HandleTKAInitFinish(w http.ResponseWriter, r *http.Request) (map[tailcfg.NodeID]tkatype.MarshaledSignature, error) { + var req *tailcfg.TKAInitFinishRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + return nil, userError(w, "Decode: %v", err) + } + + w.WriteHeader(200) + w.Write([]byte("{}")) + + return req.Signatures, nil +} + +// HandleTKABootstrap handles a request to /tka/bootstrap. +// +// If the request is valid, it sends a response to the client, and returns +// the parsed request to the caller. +func HandleTKABootstrap(w http.ResponseWriter, r *http.Request, resp tailcfg.TKABootstrapResponse) (*tailcfg.TKABootstrapRequest, error) { + req := new(tailcfg.TKABootstrapRequest) + if err := json.NewDecoder(r.Body).Decode(req); err != nil { + return nil, userError(w, "Decode: %v", err) + } + if req.Version != tailcfg.CurrentCapabilityVersion { + return nil, userError(w, "bootstrap CapVer = %v, want %v", req.Version, tailcfg.CurrentCapabilityVersion) + } + + w.WriteHeader(200) + if err := json.NewEncoder(w).Encode(resp); err != nil { + return nil, serverError(w, "Encode: %v", err) + } + return req, nil +} + +func HandleTKASyncOffer(w http.ResponseWriter, r *http.Request, authority *tka.Authority, chonk tka.Chonk) error { + body := new(tailcfg.TKASyncOfferRequest) + if err := json.NewDecoder(r.Body).Decode(body); err != nil { + return userError(w, "Decode: %v", err) + } + + log.Printf("got sync offer:\n%+v", body) + + nodeOffer, err := tka.ToSyncOffer(body.Head, body.Ancestors) + if err != nil { + return userError(w, "ToSyncOffer: %v", err) + } + + controlOffer, err := authority.SyncOffer(chonk) + if err != nil { + return serverError(w, "authority.SyncOffer: %v", err) + } + sendAUMs, err := authority.MissingAUMs(chonk, nodeOffer) + if err != nil { + return serverError(w, "authority.MissingAUMs: %v", err) + } + + head, ancestors, err := tka.FromSyncOffer(controlOffer) + if err != nil { + return serverError(w, "FromSyncOffer: %v", err) + } + resp := tailcfg.TKASyncOfferResponse{ + Head: head, + Ancestors: ancestors, + MissingAUMs: make([]tkatype.MarshaledAUM, len(sendAUMs)), + } + for i, a := range sendAUMs { + resp.MissingAUMs[i] = a.Serialize() + } + + log.Printf("responding to sync offer with:\n%+v", resp) + w.WriteHeader(200) + if err := json.NewEncoder(w).Encode(resp); err != nil { + return serverError(w, "Encode: %v", err) + } + return nil +} + +// HandleTKASign handles a request to /machine/tka/sign. +// +// If the signature request is valid, it sends a response to the client, and +// gives the caller the signature and public key of the node being signed. +func HandleTKASign(w http.ResponseWriter, r *http.Request, authority *tka.Authority) (*tkatype.MarshaledSignature, *key.NodePublic, error) { + req := new(tailcfg.TKASubmitSignatureRequest) + if err := json.NewDecoder(r.Body).Decode(req); err != nil { + return nil, nil, userError(w, "Decode: %v", err) + } + if req.Version != tailcfg.CurrentCapabilityVersion { + return nil, nil, userError(w, "sign CapVer = %v, want %v", req.Version, tailcfg.CurrentCapabilityVersion) + } + + var sig tka.NodeKeySignature + if err := sig.Unserialize(req.Signature); err != nil { + return nil, nil, userError(w, "malformed signature: %v", err) + } + var keyBeingSigned key.NodePublic + if err := keyBeingSigned.UnmarshalBinary(sig.Pubkey); err != nil { + return nil, nil, userError(w, "malformed signature pubkey: %v", err) + } + if err := authority.NodeKeyAuthorized(keyBeingSigned, req.Signature); err != nil { + return nil, nil, userError(w, "signature does not verify: %v", err) + } + + w.WriteHeader(200) + if err := json.NewEncoder(w).Encode(tailcfg.TKASubmitSignatureResponse{}); err != nil { + return nil, nil, serverError(w, "Encode: %v", err) + } + return &req.Signature, &keyBeingSigned, nil +} + +// HandleTKASyncSend handles a request to /machine/tka/send. +// +// If the request is valid, it adds the new AUMs to the authority, and sends +// a response to the client with the new head. +func HandleTKASyncSend(w http.ResponseWriter, r *http.Request, authority *tka.Authority, chonk tka.Chonk) error { + body := new(tailcfg.TKASyncSendRequest) + if err := json.NewDecoder(r.Body).Decode(body); err != nil { + return userError(w, "Decode: %v", err) + } + log.Printf("got sync send:\n%+v", body) + + var remoteHead tka.AUMHash + if err := remoteHead.UnmarshalText([]byte(body.Head)); err != nil { + return userError(w, "head unmarshal: %v", err) + } + toApply := make([]tka.AUM, len(body.MissingAUMs)) + for i, a := range body.MissingAUMs { + if err := toApply[i].Unserialize(a); err != nil { + return userError(w, "decoding missingAUM[%d]: %v", i, err) + } + } + + if len(toApply) > 0 { + if err := authority.Inform(chonk, toApply); err != nil { + return serverError(w, "control.Inform(%+v) failed: %v", toApply, err) + } + } + head, err := authority.Head().MarshalText() + if err != nil { + return serverError(w, "head marshal: %v", err) + } + + resp := tailcfg.TKASyncSendResponse{ + Head: string(head), + } + w.WriteHeader(200) + if err := json.NewEncoder(w).Encode(resp); err != nil { + return serverError(w, "Encode: %v", err) + } + return nil +} diff --git a/tstest/tstest.go b/tstest/tstest.go index 169450686..d0828f508 100644 --- a/tstest/tstest.go +++ b/tstest/tstest.go @@ -6,6 +6,7 @@ package tstest import ( "context" + "fmt" "os" "strconv" "strings" @@ -93,3 +94,20 @@ func Parallel(t *testing.T) { t.Parallel() } } + +// SkipOnKernelVersions skips the test if the current +// kernel version is in the specified list. +func SkipOnKernelVersions(t testing.TB, issue string, versions ...string) { + major, minor, patch := KernelVersion() + if major == 0 && minor == 0 && patch == 0 { + t.Logf("could not determine kernel version") + return + } + + current := fmt.Sprintf("%d.%d.%d", major, minor, patch) + for _, v := range versions { + if v == current { + t.Skipf("skipping on kernel version %q - see issue %s", current, issue) + } + } +} diff --git a/tstest/tstest_test.go b/tstest/tstest_test.go index e988d5d56..ce59bde53 100644 --- a/tstest/tstest_test.go +++ b/tstest/tstest_test.go @@ -3,7 +3,10 @@ package tstest -import "testing" +import ( + "runtime" + "testing" +) func TestReplace(t *testing.T) { before := "before" @@ -22,3 +25,17 @@ func TestReplace(t *testing.T) { t.Errorf("before = %q; want %q", before, "before") } } + +func TestKernelVersion(t *testing.T) { + switch runtime.GOOS { + case "linux": + default: + t.Skipf("skipping test on %s", runtime.GOOS) + } + + major, minor, patch := KernelVersion() + if major == 0 && minor == 0 && patch == 0 { + t.Fatal("KernelVersion returned (0, 0, 0); expected valid version") + } + t.Logf("Kernel version: %d.%d.%d", major, minor, patch) +} diff --git a/types/netmap/nodemut.go b/types/netmap/nodemut.go index f4de1bf0b..4f93be21c 100644 --- a/types/netmap/nodemut.go +++ b/types/netmap/nodemut.go @@ -177,5 +177,5 @@ func mapResponseContainsNonPatchFields(res *tailcfg.MapResponse) bool { // function is called, so it should never be set anyway. But for // completedness, and for tests, check it too: res.PeersChanged != nil || - res.DefaultAutoUpdate != "" + res.DeprecatedDefaultAutoUpdate != "" } diff --git a/util/eventbus/bus.go b/util/eventbus/bus.go index aa6880d01..880e075cc 100644 --- a/util/eventbus/bus.go +++ b/util/eventbus/bus.go @@ -120,7 +120,14 @@ func (b *Bus) Close() { } func (b *Bus) pump(ctx context.Context) { - var vals queue[PublishedEvent] + // Limit how many published events we can buffer in the PublishedEvent queue. + // + // Subscribers have unbounded DeliveredEvent queues (see tailscale/tailscale#18020), + // so this queue doesn't need to be unbounded. Keeping it bounded may also help + // catch cases where subscribers stop pumping events completely, such as due to a bug + // in [subscribeState.pump], [Subscriber.dispatch], or [SubscriberFunc.dispatch]). + const maxPublishedEvents = 16 + vals := queue[PublishedEvent]{capacity: maxPublishedEvents} acceptCh := func() chan PublishedEvent { if vals.Full() { return nil diff --git a/util/eventbus/bus_test.go b/util/eventbus/bus_test.go index 61728fbfd..88e11e719 100644 --- a/util/eventbus/bus_test.go +++ b/util/eventbus/bus_test.go @@ -9,6 +9,7 @@ import ( "fmt" "log" "regexp" + "sync" "testing" "testing/synctest" "time" @@ -593,6 +594,105 @@ func TestRegression(t *testing.T) { }) } +func TestPublishWithMutex(t *testing.T) { + testPublishWithMutex(t, 1024) // arbitrary large number of events +} + +// testPublishWithMutex publishes the specified number of events, +// acquiring and releasing a mutex around each publish and each +// subscriber event receive. +// +// The test fails if it loses any events or times out due to a deadlock. +// Unfortunately, a goroutine waiting on a mutex held by a durably blocked +// goroutine is not itself considered durably blocked, so [synctest] cannot +// detect this deadlock on its own. +func testPublishWithMutex(t *testing.T, n int) { + synctest.Test(t, func(t *testing.T) { + b := eventbus.New() + defer b.Close() + + c := b.Client("TestClient") + + evts := make([]any, n) + for i := range evts { + evts[i] = EventA{Counter: i} + } + exp := expectEvents(t, evts...) + + var mu sync.Mutex + eventbus.SubscribeFunc[EventA](c, func(e EventA) { + // Acquire the same mutex as the publisher. + mu.Lock() + mu.Unlock() + + // Mark event as received, so we can check for lost events. + exp.Got(e) + }) + + p := eventbus.Publish[EventA](c) + go func() { + // Publish events, acquiring the mutex around each publish. + for i := range n { + mu.Lock() + p.Publish(EventA{Counter: i}) + mu.Unlock() + } + }() + + synctest.Wait() + + if !exp.Empty() { + t.Errorf("unexpected extra events: %+v", exp.want) + } + }) +} + +func TestPublishFromSubscriber(t *testing.T) { + testPublishFromSubscriber(t, 1024) // arbitrary large number of events +} + +// testPublishFromSubscriber publishes the specified number of EventA events. +// Each EventA causes the subscriber to publish an EventB. +// The test fails if it loses any events or if a deadlock occurs. +func testPublishFromSubscriber(t *testing.T, n int) { + synctest.Test(t, func(t *testing.T) { + b := eventbus.New() + defer b.Close() + + c := b.Client("TestClient") + + // Ultimately we expect to receive n EventB events + // published as a result of receiving n EventA events. + evts := make([]any, n) + for i := range evts { + evts[i] = EventB{Counter: i} + } + exp := expectEvents(t, evts...) + + pubA := eventbus.Publish[EventA](c) + pubB := eventbus.Publish[EventB](c) + + eventbus.SubscribeFunc[EventA](c, func(e EventA) { + // Upon receiving EventA, publish EventB. + pubB.Publish(EventB{Counter: e.Counter}) + }) + eventbus.SubscribeFunc[EventB](c, func(e EventB) { + // Mark EventB as received. + exp.Got(e) + }) + + for i := range n { + pubA.Publish(EventA{Counter: i}) + } + + synctest.Wait() + + if !exp.Empty() { + t.Errorf("unexpected extra events: %+v", exp.want) + } + }) +} + type queueChecker struct { t *testing.T want []any diff --git a/util/eventbus/queue.go b/util/eventbus/queue.go index a62bf3c62..2589b75ce 100644 --- a/util/eventbus/queue.go +++ b/util/eventbus/queue.go @@ -7,18 +7,18 @@ import ( "slices" ) -const maxQueuedItems = 16 - -// queue is an ordered queue of length up to maxQueuedItems. +// queue is an ordered queue of length up to capacity, +// if capacity is non-zero. Otherwise it is unbounded. type queue[T any] struct { - vals []T - start int + vals []T + start int + capacity int // zero means unbounded } // canAppend reports whether a value can be appended to q.vals without // shifting values around. func (q *queue[T]) canAppend() bool { - return cap(q.vals) < maxQueuedItems || len(q.vals) < cap(q.vals) + return q.capacity == 0 || cap(q.vals) < q.capacity || len(q.vals) < cap(q.vals) } func (q *queue[T]) Full() bool { diff --git a/util/execqueue/execqueue.go b/util/execqueue/execqueue.go index 2ea0c1f2f..87616a6b5 100644 --- a/util/execqueue/execqueue.go +++ b/util/execqueue/execqueue.go @@ -39,21 +39,21 @@ func (q *ExecQueue) Add(f func()) { // RunSync waits for the queue to be drained and then synchronously runs f. // It returns an error if the queue is closed before f is run or ctx expires. func (q *ExecQueue) RunSync(ctx context.Context, f func()) error { - for { - if err := q.Wait(ctx); err != nil { - return err - } - q.mu.Lock() - if q.inFlight { - q.mu.Unlock() - continue - } - defer q.mu.Unlock() - if q.closed { - return errors.New("closed") - } - f() + q.mu.Lock() + q.initCtxLocked() + shutdownCtx := q.ctx + q.mu.Unlock() + + ch := make(chan struct{}) + q.Add(f) + q.Add(func() { close(ch) }) + select { + case <-ch: return nil + case <-ctx.Done(): + return ctx.Err() + case <-shutdownCtx.Done(): + return errExecQueueShutdown } } @@ -94,6 +94,8 @@ func (q *ExecQueue) initCtxLocked() { } } +var errExecQueueShutdown = errors.New("execqueue shut down") + // Wait waits for the queue to be empty or shut down. func (q *ExecQueue) Wait(ctx context.Context) error { q.mu.Lock() @@ -104,10 +106,11 @@ func (q *ExecQueue) Wait(ctx context.Context) error { q.doneWaiter = waitCh } closed := q.closed + shutdownCtx := q.ctx q.mu.Unlock() if closed { - return errors.New("execqueue shut down") + return errExecQueueShutdown } if waitCh == nil { return nil @@ -116,8 +119,8 @@ func (q *ExecQueue) Wait(ctx context.Context) error { select { case <-waitCh: return nil - case <-q.ctx.Done(): - return errors.New("execqueue shut down") + case <-shutdownCtx.Done(): + return errExecQueueShutdown case <-ctx.Done(): return ctx.Err() } diff --git a/util/execqueue/execqueue_test.go b/util/execqueue/execqueue_test.go index d10b741f7..1bce69556 100644 --- a/util/execqueue/execqueue_test.go +++ b/util/execqueue/execqueue_test.go @@ -20,3 +20,12 @@ func TestExecQueue(t *testing.T) { t.Errorf("n=%d; want 1", got) } } + +// Test that RunSync doesn't hold q.mu and block Shutdown +// as we saw in tailscale/tailscale#18502 +func TestExecQueueRunSyncLocking(t *testing.T) { + q := &ExecQueue{} + q.RunSync(t.Context(), func() { + q.Shutdown() + }) +} diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index 7ae422906..4e1024886 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -211,7 +211,7 @@ func newMagicStackWithKey(t testing.TB, logf logger.Logf, ln nettype.PacketListe } tun := tuntest.NewChannelTUN() - tsTun := tstun.Wrap(logf, tun.TUN(), ®) + tsTun := tstun.Wrap(logf, tun.TUN(), ®, bus) tsTun.SetFilter(filter.NewAllowAllForTest(logf)) tsTun.Start() @@ -1771,7 +1771,6 @@ func TestEndpointSetsEqual(t *testing.T) { t.Errorf("%q vs %q = %v; want %v", tt.a, tt.b, got, tt.want) } } - } func TestBetterAddr(t *testing.T) { @@ -1915,7 +1914,6 @@ func TestBetterAddr(t *testing.T) { t.Errorf("[%d] betterAddr(%+v, %+v) and betterAddr(%+v, %+v) both unexpectedly true", i, tt.a, tt.b, tt.b, tt.a) } } - } func epFromTyped(eps []tailcfg.Endpoint) (ret []netip.AddrPort) { @@ -3138,7 +3136,6 @@ func TestMaybeRebindOnError(t *testing.T) { t.Errorf("expected at least 5 seconds between %s and %s", lastRebindTime, newTime) } } - }) }) } diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 8ad771fc5..a369fa343 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -51,6 +51,7 @@ import ( "tailscale.com/util/checkchange" "tailscale.com/util/clientmetric" "tailscale.com/util/eventbus" + "tailscale.com/util/execqueue" "tailscale.com/util/mak" "tailscale.com/util/set" "tailscale.com/util/testenv" @@ -98,6 +99,8 @@ type userspaceEngine struct { eventBus *eventbus.Bus eventClient *eventbus.Client + linkChangeQueue execqueue.ExecQueue + logf logger.Logf wgLogger *wglog.Logger // a wireguard-go logging wrapper reqCh chan struct{} @@ -320,9 +323,9 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) var tsTUNDev *tstun.Wrapper if conf.IsTAP { - tsTUNDev = tstun.WrapTAP(logf, conf.Tun, conf.Metrics) + tsTUNDev = tstun.WrapTAP(logf, conf.Tun, conf.Metrics, conf.EventBus) } else { - tsTUNDev = tstun.Wrap(logf, conf.Tun, conf.Metrics) + tsTUNDev = tstun.Wrap(logf, conf.Tun, conf.Metrics, conf.EventBus) } closePool.add(tsTUNDev) @@ -544,7 +547,7 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) if f, ok := feature.HookProxyInvalidateCache.GetOk(); ok { f() } - e.linkChange(&cd) + e.linkChangeQueue.Add(func() { e.linkChange(&cd) }) }) e.eventClient = ec e.logf("Engine created.") @@ -1288,6 +1291,9 @@ func (e *userspaceEngine) RequestStatus() { func (e *userspaceEngine) Close() { e.eventClient.Close() + // TODO(cmol): Should we wait for it too? + // Same question raised in appconnector.go. + e.linkChangeQueue.Shutdown() e.mu.Lock() if e.closing { e.mu.Unlock() @@ -1430,6 +1436,7 @@ func (e *userspaceEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, size in e.magicConn.Ping(peer, res, size, cb) case "TSMP": e.sendTSMPPing(ip, peer, res, cb) + e.sendTSMPDiscoAdvertisement(ip) case "ICMP": e.sendICMPEchoRequest(ip, peer, res, cb) } @@ -1550,6 +1557,29 @@ func (e *userspaceEngine) sendTSMPPing(ip netip.Addr, peer tailcfg.NodeView, res e.tundev.InjectOutbound(tsmpPing) } +func (e *userspaceEngine) sendTSMPDiscoAdvertisement(ip netip.Addr) { + srcIP, err := e.mySelfIPMatchingFamily(ip) + if err != nil { + e.logf("getting matching node: %s", err) + return + } + tdka := packet.TSMPDiscoKeyAdvertisement{ + Src: srcIP, + Dst: ip, + Key: e.magicConn.DiscoPublicKey(), + } + payload, err := tdka.Marshal() + if err != nil { + e.logf("error generating TSMP Advertisement: %s", err) + metricTSMPDiscoKeyAdvertisementError.Add(1) + } else if err := e.tundev.InjectOutbound(payload); err != nil { + e.logf("error sending TSMP Advertisement: %s", err) + metricTSMPDiscoKeyAdvertisementError.Add(1) + } else { + metricTSMPDiscoKeyAdvertisementSent.Add(1) + } +} + func (e *userspaceEngine) setTSMPPongCallback(data [8]byte, cb func(packet.TSMPPongReply)) { e.mu.Lock() defer e.mu.Unlock() @@ -1716,6 +1746,9 @@ var ( metricNumMajorChanges = clientmetric.NewCounter("wgengine_major_changes") metricNumMinorChanges = clientmetric.NewCounter("wgengine_minor_changes") + + metricTSMPDiscoKeyAdvertisementSent = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_sent") + metricTSMPDiscoKeyAdvertisementError = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_error") ) func (e *userspaceEngine) InstallCaptureHook(cb packet.CaptureCallback) { diff --git a/wgengine/userspace_test.go b/wgengine/userspace_test.go index 89d75b98a..0a1d2924d 100644 --- a/wgengine/userspace_test.go +++ b/wgengine/userspace_test.go @@ -325,6 +325,64 @@ func TestUserspaceEnginePeerMTUReconfig(t *testing.T) { } } +func TestTSMPKeyAdvertisement(t *testing.T) { + var knobs controlknobs.Knobs + + bus := eventbustest.NewBus(t) + ht := health.NewTracker(bus) + reg := new(usermetric.Registry) + e, err := NewFakeUserspaceEngine(t.Logf, 0, &knobs, ht, reg, bus) + if err != nil { + t.Fatal(err) + } + t.Cleanup(e.Close) + ue := e.(*userspaceEngine) + routerCfg := &router.Config{} + nodeKey := nkFromHex("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb") + nm := &netmap.NetworkMap{ + Peers: nodeViews([]*tailcfg.Node{ + { + ID: 1, + Key: nodeKey, + }, + }), + SelfNode: (&tailcfg.Node{ + StableID: "TESTCTRL00000001", + Name: "test-node.test.ts.net", + Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32"), netip.MustParsePrefix("fd7a:115c:a1e0:ab12:4843:cd96:0:1/128")}, + }).View(), + } + cfg := &wgcfg.Config{ + Peers: []wgcfg.Peer{ + { + PublicKey: nodeKey, + AllowedIPs: []netip.Prefix{ + netip.PrefixFrom(netaddr.IPv4(100, 100, 99, 1), 32), + }, + }, + }, + } + + ue.SetNetworkMap(nm) + err = ue.Reconfig(cfg, routerCfg, &dns.Config{}) + if err != nil { + t.Fatal(err) + } + + addr := netip.MustParseAddr("100.100.99.1") + previousValue := metricTSMPDiscoKeyAdvertisementSent.Value() + ue.sendTSMPDiscoAdvertisement(addr) + if val := metricTSMPDiscoKeyAdvertisementSent.Value(); val <= previousValue { + errs := metricTSMPDiscoKeyAdvertisementError.Value() + t.Errorf("Expected 1 disco key advert, got %d, errors %d", val, errs) + } + // Remove config to have the engine shut down more consistently + err = ue.Reconfig(&wgcfg.Config{}, &router.Config{}, &dns.Config{}) + if err != nil { + t.Fatal(err) + } +} + func nkFromHex(hex string) key.NodePublic { if len(hex) != 64 { panic(fmt.Sprintf("%q is len %d; want 64", hex, len(hex)))