From 74d4652144f11ace04612496095d658414ab09db Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Fri, 22 Nov 2024 15:41:07 +0000 Subject: [PATCH] cmd/{containerboot,k8s-operator},k8s-operator: new options to expose user metrics (#14035) containerboot: Adds 3 new environment variables for containerboot, `TS_LOCAL_ADDR_PORT` (default `"${POD_IP}:9002"`), `TS_METRICS_ENABLED` (default `false`), and `TS_DEBUG_ADDR_PORT` (default `""`), to configure metrics and debug endpoints. In a follow-up PR, the health check endpoint will be updated to use the `TS_LOCAL_ADDR_PORT` if `TS_HEALTHCHECK_ADDR_PORT` hasn't been set. Users previously only had access to internal debug metrics (which are unstable and not recommended) via passing the `--debug` flag to tailscaled, but can now set `TS_METRICS_ENABLED=true` to expose the stable metrics documented at https://tailscale.com/kb/1482/client-metrics at `/metrics` on the addr/port specified by `TS_LOCAL_ADDR_PORT`. Users can also now configure a debug endpoint more directly via the `TS_DEBUG_ADDR_PORT` environment variable. This is not recommended for production use, but exposes an internal set of debug metrics and pprof endpoints. operator: The `ProxyClass` CRD's `.spec.metrics.enable` field now enables serving the stable user metrics documented at https://tailscale.com/kb/1482/client-metrics at `/metrics` on the same "metrics" container port that debug metrics were previously served on. To smooth the transition for anyone relying on the way the operator previously consumed this field, we also _temporarily_ serve tailscaled's internal debug metrics on the same `/debug/metrics` path as before, until 1.82.0 when debug metrics will be turned off by default even if `.spec.metrics.enable` is set. At that point, anyone who wishes to continue using the internal debug metrics (not recommended) will need to set the new `ProxyClass` field `.spec.statefulSet.pod.tailscaleContainer.debug.enable`. Users who wish to opt out of the transitional behaviour, where enabling `.spec.metrics.enable` also enables debug metrics, can set `.spec.statefulSet.pod.tailscaleContainer.debug.enable` to false (recommended). Separately but related, the operator will no longer specify a host port for the "metrics" container port definition. This caused scheduling conflicts when k8s needs to schedule more than one proxy per node, and was not necessary for allowing the pod's port to be exposed to prometheus scrapers. Updates #11292 --------- Co-authored-by: Kristoffer Dalby Signed-off-by: Tom Proctor --- cmd/containerboot/healthz.go | 2 +- cmd/containerboot/main.go | 8 ++ cmd/containerboot/metrics.go | 91 +++++++++++++++++++ cmd/containerboot/settings.go | 22 ++++- cmd/containerboot/tailscaled.go | 6 ++ .../crds/tailscale.com_proxyclasses.yaml | 45 ++++++++- .../deploy/manifests/operator.yaml | 45 ++++++++- cmd/k8s-operator/proxyclass.go | 4 + cmd/k8s-operator/proxyclass_test.go | 53 +++++++++++ cmd/k8s-operator/sts.go | 90 +++++++++++++++--- cmd/k8s-operator/sts_test.go | 74 ++++++++++++--- k8s-operator/api.md | 19 +++- .../apis/v1alpha1/types_proxyclass.go | 27 +++++- .../apis/v1alpha1/zz_generated.deepcopy.go | 20 ++++ 14 files changed, 472 insertions(+), 34 deletions(-) create mode 100644 cmd/containerboot/metrics.go diff --git a/cmd/containerboot/healthz.go b/cmd/containerboot/healthz.go index fb7fccd96..12e7ee9f8 100644 --- a/cmd/containerboot/healthz.go +++ b/cmd/containerboot/healthz.go @@ -39,7 +39,7 @@ func runHealthz(addr string, h *healthz) { log.Fatalf("error listening on the provided health endpoint address %q: %v", addr, err) } mux := http.NewServeMux() - mux.Handle("/healthz", h) + mux.Handle("GET /healthz", h) log.Printf("Running healthcheck endpoint at %s/healthz", addr) hs := &http.Server{Handler: mux} diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 17131faae..313e8deb0 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -178,6 +178,14 @@ func main() { } defer killTailscaled() + if cfg.LocalAddrPort != "" && cfg.MetricsEnabled { + m := &metrics{ + lc: client, + debugEndpoint: cfg.DebugAddrPort, + } + runMetrics(cfg.LocalAddrPort, m) + } + if cfg.EnableForwardingOptimizations { if err := client.SetUDPGROForwarding(bootCtx); err != nil { log.Printf("[unexpected] error enabling UDP GRO forwarding: %v", err) diff --git a/cmd/containerboot/metrics.go b/cmd/containerboot/metrics.go new file mode 100644 index 000000000..e88406f97 --- /dev/null +++ b/cmd/containerboot/metrics.go @@ -0,0 +1,91 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux + +package main + +import ( + "fmt" + "io" + "log" + "net" + "net/http" + + "tailscale.com/client/tailscale" + "tailscale.com/client/tailscale/apitype" +) + +// metrics is a simple metrics HTTP server, if enabled it forwards requests to +// the tailscaled's LocalAPI usermetrics endpoint at /localapi/v0/usermetrics. +type metrics struct { + debugEndpoint string + lc *tailscale.LocalClient +} + +func proxy(w http.ResponseWriter, r *http.Request, url string, do func(*http.Request) (*http.Response, error)) { + req, err := http.NewRequestWithContext(r.Context(), r.Method, url, r.Body) + if err != nil { + http.Error(w, fmt.Sprintf("failed to construct request: %s", err), http.StatusInternalServerError) + return + } + req.Header = r.Header.Clone() + + resp, err := do(req) + if err != nil { + http.Error(w, fmt.Sprintf("failed to proxy request: %s", err), http.StatusInternalServerError) + return + } + defer resp.Body.Close() + + w.WriteHeader(resp.StatusCode) + for key, val := range resp.Header { + for _, v := range val { + w.Header().Add(key, v) + } + } + if _, err := io.Copy(w, resp.Body); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } +} + +func (m *metrics) handleMetrics(w http.ResponseWriter, r *http.Request) { + localAPIURL := "http://" + apitype.LocalAPIHost + "/localapi/v0/usermetrics" + proxy(w, r, localAPIURL, m.lc.DoLocalRequest) +} + +func (m *metrics) handleDebug(w http.ResponseWriter, r *http.Request) { + if m.debugEndpoint == "" { + http.Error(w, "debug endpoint not configured", http.StatusNotFound) + return + } + + debugURL := "http://" + m.debugEndpoint + r.URL.Path + proxy(w, r, debugURL, http.DefaultClient.Do) +} + +// runMetrics runs a simple HTTP metrics endpoint at /metrics, forwarding +// requests to tailscaled's /localapi/v0/usermetrics API. +// +// In 1.78.x and 1.80.x, it also proxies debug paths to tailscaled's debug +// endpoint if configured to ease migration for a breaking change serving user +// metrics instead of debug metrics on the "metrics" port. +func runMetrics(addr string, m *metrics) { + ln, err := net.Listen("tcp", addr) + if err != nil { + log.Fatalf("error listening on the provided metrics endpoint address %q: %v", addr, err) + } + + mux := http.NewServeMux() + mux.HandleFunc("GET /metrics", m.handleMetrics) + mux.HandleFunc("/debug/", m.handleDebug) // TODO(tomhjp): Remove for 1.82.0 release. + + log.Printf("Running metrics endpoint at %s/metrics", addr) + ms := &http.Server{Handler: mux} + + go func() { + if err := ms.Serve(ln); err != nil { + log.Fatalf("failed running metrics endpoint: %v", err) + } + }() +} diff --git a/cmd/containerboot/settings.go b/cmd/containerboot/settings.go index 742713e77..c877682b9 100644 --- a/cmd/containerboot/settings.go +++ b/cmd/containerboot/settings.go @@ -67,11 +67,18 @@ type settings struct { PodIP string PodIPv4 string PodIPv6 string - HealthCheckAddrPort string + HealthCheckAddrPort string // TODO(tomhjp): use the local addr/port instead. + LocalAddrPort string + MetricsEnabled bool + DebugAddrPort string EgressSvcsCfgPath string } func configFromEnv() (*settings, error) { + defaultLocalAddrPort := "" + if v, ok := os.LookupEnv("POD_IP"); ok && v != "" { + defaultLocalAddrPort = fmt.Sprintf("%s:9002", v) + } cfg := &settings{ AuthKey: defaultEnvs([]string{"TS_AUTHKEY", "TS_AUTH_KEY"}, ""), Hostname: defaultEnv("TS_HOSTNAME", ""), @@ -98,6 +105,9 @@ func configFromEnv() (*settings, error) { PodIP: defaultEnv("POD_IP", ""), EnableForwardingOptimizations: defaultBool("TS_EXPERIMENTAL_ENABLE_FORWARDING_OPTIMIZATIONS", false), HealthCheckAddrPort: defaultEnv("TS_HEALTHCHECK_ADDR_PORT", ""), + LocalAddrPort: defaultEnv("TS_LOCAL_ADDR_PORT", defaultLocalAddrPort), + MetricsEnabled: defaultBool("TS_METRICS_ENABLED", false), + DebugAddrPort: defaultEnv("TS_DEBUG_ADDR_PORT", ""), EgressSvcsCfgPath: defaultEnv("TS_EGRESS_SERVICES_CONFIG_PATH", ""), } podIPs, ok := os.LookupEnv("POD_IPS") @@ -175,6 +185,16 @@ func (s *settings) validate() error { return fmt.Errorf("error parsing TS_HEALTH_CHECK_ADDR_PORT value %q: %w", s.HealthCheckAddrPort, err) } } + if s.LocalAddrPort != "" { + if _, err := netip.ParseAddrPort(s.LocalAddrPort); err != nil { + return fmt.Errorf("error parsing TS_LOCAL_ADDR_PORT value %q: %w", s.LocalAddrPort, err) + } + } + if s.DebugAddrPort != "" { + if _, err := netip.ParseAddrPort(s.DebugAddrPort); err != nil { + return fmt.Errorf("error parsing TS_DEBUG_ADDR_PORT value %q: %w", s.DebugAddrPort, err) + } + } return nil } diff --git a/cmd/containerboot/tailscaled.go b/cmd/containerboot/tailscaled.go index 53fb7e703..d8da49b03 100644 --- a/cmd/containerboot/tailscaled.go +++ b/cmd/containerboot/tailscaled.go @@ -90,6 +90,12 @@ func tailscaledArgs(cfg *settings) []string { if cfg.TailscaledConfigFilePath != "" { args = append(args, "--config="+cfg.TailscaledConfigFilePath) } + // Once enough proxy versions have been released for all the supported + // versions to understand this cfg setting, the operator can stop + // setting TS_TAILSCALED_EXTRA_ARGS for the debug flag. + if cfg.DebugAddrPort != "" && !strings.Contains(cfg.DaemonExtraArgs, cfg.DebugAddrPort) { + args = append(args, "--debug="+cfg.DebugAddrPort) + } if cfg.DaemonExtraArgs != "" { args = append(args, strings.Fields(cfg.DaemonExtraArgs)...) } diff --git a/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml b/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml index 7086138c0..4c24a1633 100644 --- a/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml +++ b/cmd/k8s-operator/deploy/crds/tailscale.com_proxyclasses.yaml @@ -73,7 +73,12 @@ spec: enable: description: |- Setting enable to true will make the proxy serve Tailscale metrics - at :9001/debug/metrics. + at :9002/metrics. + + In 1.78.x and 1.80.x, this field also serves as the default value for + .spec.statefulSet.pod.tailscaleContainer.debug.enable. From 1.82.0, both + fields will independently default to false. + Defaults to false. type: boolean statefulSet: @@ -1249,6 +1254,25 @@ spec: description: Configuration for the proxy container running tailscale. type: object properties: + debug: + description: |- + Configuration for enabling extra debug information in the container. + Not recommended for production use. + type: object + properties: + enable: + description: |- + Enable tailscaled's HTTP pprof endpoints at :9001/debug/pprof/ + and internal debug metrics endpoint at :9001/debug/metrics, where + 9001 is a container port named "debug". The endpoints and their responses + may change in backwards incompatible ways in the future, and should not + be considered stable. + + In 1.78.x and 1.80.x, this setting will default to the value of + .spec.metrics.enable, and requests to the "metrics" port matching the + mux pattern /debug/ will be forwarded to the "debug" port. In 1.82.x, + this setting will default to false, and no requests will be proxied. + type: boolean env: description: |- List of environment variables to set in the container. @@ -1553,6 +1577,25 @@ spec: description: Configuration for the proxy init container that enables forwarding. type: object properties: + debug: + description: |- + Configuration for enabling extra debug information in the container. + Not recommended for production use. + type: object + properties: + enable: + description: |- + Enable tailscaled's HTTP pprof endpoints at :9001/debug/pprof/ + and internal debug metrics endpoint at :9001/debug/metrics, where + 9001 is a container port named "debug". The endpoints and their responses + may change in backwards incompatible ways in the future, and should not + be considered stable. + + In 1.78.x and 1.80.x, this setting will default to the value of + .spec.metrics.enable, and requests to the "metrics" port matching the + mux pattern /debug/ will be forwarded to the "debug" port. In 1.82.x, + this setting will default to false, and no requests will be proxied. + type: boolean env: description: |- List of environment variables to set in the container. diff --git a/cmd/k8s-operator/deploy/manifests/operator.yaml b/cmd/k8s-operator/deploy/manifests/operator.yaml index 4035afaba..f764fc09a 100644 --- a/cmd/k8s-operator/deploy/manifests/operator.yaml +++ b/cmd/k8s-operator/deploy/manifests/operator.yaml @@ -540,7 +540,12 @@ spec: enable: description: |- Setting enable to true will make the proxy serve Tailscale metrics - at :9001/debug/metrics. + at :9002/metrics. + + In 1.78.x and 1.80.x, this field also serves as the default value for + .spec.statefulSet.pod.tailscaleContainer.debug.enable. From 1.82.0, both + fields will independently default to false. + Defaults to false. type: boolean required: @@ -1716,6 +1721,25 @@ spec: tailscaleContainer: description: Configuration for the proxy container running tailscale. properties: + debug: + description: |- + Configuration for enabling extra debug information in the container. + Not recommended for production use. + properties: + enable: + description: |- + Enable tailscaled's HTTP pprof endpoints at :9001/debug/pprof/ + and internal debug metrics endpoint at :9001/debug/metrics, where + 9001 is a container port named "debug". The endpoints and their responses + may change in backwards incompatible ways in the future, and should not + be considered stable. + + In 1.78.x and 1.80.x, this setting will default to the value of + .spec.metrics.enable, and requests to the "metrics" port matching the + mux pattern /debug/ will be forwarded to the "debug" port. In 1.82.x, + this setting will default to false, and no requests will be proxied. + type: boolean + type: object env: description: |- List of environment variables to set in the container. @@ -2020,6 +2044,25 @@ spec: tailscaleInitContainer: description: Configuration for the proxy init container that enables forwarding. properties: + debug: + description: |- + Configuration for enabling extra debug information in the container. + Not recommended for production use. + properties: + enable: + description: |- + Enable tailscaled's HTTP pprof endpoints at :9001/debug/pprof/ + and internal debug metrics endpoint at :9001/debug/metrics, where + 9001 is a container port named "debug". The endpoints and their responses + may change in backwards incompatible ways in the future, and should not + be considered stable. + + In 1.78.x and 1.80.x, this setting will default to the value of + .spec.metrics.enable, and requests to the "metrics" port matching the + mux pattern /debug/ will be forwarded to the "debug" port. In 1.82.x, + this setting will default to false, and no requests will be proxied. + type: boolean + type: object env: description: |- List of environment variables to set in the container. diff --git a/cmd/k8s-operator/proxyclass.go b/cmd/k8s-operator/proxyclass.go index 882a9030f..13f217f3c 100644 --- a/cmd/k8s-operator/proxyclass.go +++ b/cmd/k8s-operator/proxyclass.go @@ -160,6 +160,10 @@ func (pcr *ProxyClassReconciler) validate(pc *tsapi.ProxyClass) (violations fiel violations = append(violations, field.TypeInvalid(field.NewPath("spec", "statefulSet", "pod", "tailscaleInitContainer", "image"), tc.Image, err.Error())) } } + + if tc.Debug != nil { + violations = append(violations, field.TypeInvalid(field.NewPath("spec", "statefulSet", "pod", "tailscaleInitContainer", "debug"), tc.Debug, "debug settings cannot be configured on the init container")) + } } } } diff --git a/cmd/k8s-operator/proxyclass_test.go b/cmd/k8s-operator/proxyclass_test.go index eb68811fc..fb17f5fe5 100644 --- a/cmd/k8s-operator/proxyclass_test.go +++ b/cmd/k8s-operator/proxyclass_test.go @@ -135,3 +135,56 @@ func TestProxyClass(t *testing.T) { expectReconciled(t, pcr, "", "test") expectEvents(t, fr, expectedEvents) } + +func TestValidateProxyClass(t *testing.T) { + for name, tc := range map[string]struct { + pc *tsapi.ProxyClass + valid bool + }{ + "empty": { + valid: true, + pc: &tsapi.ProxyClass{}, + }, + "debug_enabled_for_main_container": { + valid: true, + pc: &tsapi.ProxyClass{ + Spec: tsapi.ProxyClassSpec{ + StatefulSet: &tsapi.StatefulSet{ + Pod: &tsapi.Pod{ + TailscaleContainer: &tsapi.Container{ + Debug: &tsapi.Debug{ + Enable: true, + }, + }, + }, + }, + }, + }, + }, + "debug_enabled_for_init_container": { + valid: false, + pc: &tsapi.ProxyClass{ + Spec: tsapi.ProxyClassSpec{ + StatefulSet: &tsapi.StatefulSet{ + Pod: &tsapi.Pod{ + TailscaleInitContainer: &tsapi.Container{ + Debug: &tsapi.Debug{ + Enable: true, + }, + }, + }, + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + pcr := &ProxyClassReconciler{} + err := pcr.validate(tc.pc) + valid := err == nil + if valid != tc.valid { + t.Errorf("expected valid=%v, got valid=%v, err=%v", tc.valid, valid, err) + } + }) + } +} diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index bdacec39b..5df476478 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -476,7 +476,7 @@ var proxyYaml []byte //go:embed deploy/manifests/userspace-proxy.yaml var userspaceProxyYaml []byte -func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string, configs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) (*appsv1.StatefulSet, error) { +func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig, headlessSvc *corev1.Service, proxySecret, tsConfigHash string, _ map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) (*appsv1.StatefulSet, error) { ss := new(appsv1.StatefulSet) if sts.ServeConfig != nil && sts.ForwardClusterTrafficViaL7IngressProxy != true { // If forwarding cluster traffic via is required we need non-userspace + NET_ADMIN + forwarding if err := yaml.Unmarshal(userspaceProxyYaml, &ss); err != nil { @@ -666,24 +666,42 @@ func mergeStatefulSetLabelsOrAnnots(current, custom map[string]string, managed [ return custom } +func debugSetting(pc *tsapi.ProxyClass) bool { + if pc == nil || + pc.Spec.StatefulSet == nil || + pc.Spec.StatefulSet.Pod == nil || + pc.Spec.StatefulSet.Pod.TailscaleContainer == nil || + pc.Spec.StatefulSet.Pod.TailscaleContainer.Debug == nil { + // This default will change to false in 1.82.0. + return pc.Spec.Metrics != nil && pc.Spec.Metrics.Enable + } + + return pc.Spec.StatefulSet.Pod.TailscaleContainer.Debug.Enable +} + func applyProxyClassToStatefulSet(pc *tsapi.ProxyClass, ss *appsv1.StatefulSet, stsCfg *tailscaleSTSConfig, logger *zap.SugaredLogger) *appsv1.StatefulSet { if pc == nil || ss == nil { return ss } - if stsCfg != nil && pc.Spec.Metrics != nil && pc.Spec.Metrics.Enable { - if stsCfg.TailnetTargetFQDN == "" && stsCfg.TailnetTargetIP == "" && !stsCfg.ForwardClusterTrafficViaL7IngressProxy { - enableMetrics(ss) - } else if stsCfg.ForwardClusterTrafficViaL7IngressProxy { + + metricsEnabled := pc.Spec.Metrics != nil && pc.Spec.Metrics.Enable + debugEnabled := debugSetting(pc) + if metricsEnabled || debugEnabled { + isEgress := stsCfg != nil && (stsCfg.TailnetTargetFQDN != "" || stsCfg.TailnetTargetIP != "") + isForwardingL7Ingress := stsCfg != nil && stsCfg.ForwardClusterTrafficViaL7IngressProxy + if isEgress { // TODO (irbekrm): fix this // For Ingress proxies that have been configured with // tailscale.com/experimental-forward-cluster-traffic-via-ingress // annotation, all cluster traffic is forwarded to the // Ingress backend(s). - logger.Info("ProxyClass specifies that metrics should be enabled, but this is currently not supported for Ingress proxies that accept cluster traffic.") - } else { + logger.Info("ProxyClass specifies that metrics should be enabled, but this is currently not supported for egress proxies.") + } else if isForwardingL7Ingress { // TODO (irbekrm): fix this // For egress proxies, currently all cluster traffic is forwarded to the tailnet target. logger.Info("ProxyClass specifies that metrics should be enabled, but this is currently not supported for Ingress proxies that accept cluster traffic.") + } else { + enableEndpoints(ss, metricsEnabled, debugEnabled) } } @@ -761,16 +779,58 @@ func applyProxyClassToStatefulSet(pc *tsapi.ProxyClass, ss *appsv1.StatefulSet, return ss } -func enableMetrics(ss *appsv1.StatefulSet) { +func enableEndpoints(ss *appsv1.StatefulSet, metrics, debug bool) { for i, c := range ss.Spec.Template.Spec.Containers { if c.Name == "tailscale" { - // Serve metrics on on :9001/debug/metrics. If - // we didn't specify Pod IP here, the proxy would, in - // some cases, also listen to its Tailscale IP- we don't - // want folks to start relying on this side-effect as a - // feature. - ss.Spec.Template.Spec.Containers[i].Env = append(ss.Spec.Template.Spec.Containers[i].Env, corev1.EnvVar{Name: "TS_TAILSCALED_EXTRA_ARGS", Value: "--debug=$(POD_IP):9001"}) - ss.Spec.Template.Spec.Containers[i].Ports = append(ss.Spec.Template.Spec.Containers[i].Ports, corev1.ContainerPort{Name: "metrics", Protocol: "TCP", HostPort: 9001, ContainerPort: 9001}) + if debug { + ss.Spec.Template.Spec.Containers[i].Env = append(ss.Spec.Template.Spec.Containers[i].Env, + // Serve tailscaled's debug metrics on on + // :9001/debug/metrics. If we didn't specify Pod IP + // here, the proxy would, in some cases, also listen to its + // Tailscale IP- we don't want folks to start relying on this + // side-effect as a feature. + corev1.EnvVar{ + Name: "TS_DEBUG_ADDR_PORT", + Value: "$(POD_IP):9001", + }, + // TODO(tomhjp): Can remove this env var once 1.76.x is no + // longer supported. + corev1.EnvVar{ + Name: "TS_TAILSCALED_EXTRA_ARGS", + Value: "--debug=$(TS_DEBUG_ADDR_PORT)", + }, + ) + + ss.Spec.Template.Spec.Containers[i].Ports = append(ss.Spec.Template.Spec.Containers[i].Ports, + corev1.ContainerPort{ + Name: "debug", + Protocol: "TCP", + ContainerPort: 9001, + }, + ) + } + + if metrics { + ss.Spec.Template.Spec.Containers[i].Env = append(ss.Spec.Template.Spec.Containers[i].Env, + // Serve client metrics on :9002/metrics. + corev1.EnvVar{ + Name: "TS_LOCAL_ADDR_PORT", + Value: "$(POD_IP):9002", + }, + corev1.EnvVar{ + Name: "TS_METRICS_ENABLED", + Value: "true", + }, + ) + ss.Spec.Template.Spec.Containers[i].Ports = append(ss.Spec.Template.Spec.Containers[i].Ports, + corev1.ContainerPort{ + Name: "metrics", + Protocol: "TCP", + ContainerPort: 9002, + }, + ) + } + break } } diff --git a/cmd/k8s-operator/sts_test.go b/cmd/k8s-operator/sts_test.go index 7263c56c3..7986d1b91 100644 --- a/cmd/k8s-operator/sts_test.go +++ b/cmd/k8s-operator/sts_test.go @@ -125,10 +125,26 @@ func Test_applyProxyClassToStatefulSet(t *testing.T) { }, }, } - proxyClassMetrics := &tsapi.ProxyClass{ - Spec: tsapi.ProxyClassSpec{ - Metrics: &tsapi.Metrics{Enable: true}, - }, + + proxyClassWithMetricsDebug := func(metrics bool, debug *bool) *tsapi.ProxyClass { + return &tsapi.ProxyClass{ + Spec: tsapi.ProxyClassSpec{ + Metrics: &tsapi.Metrics{Enable: metrics}, + StatefulSet: func() *tsapi.StatefulSet { + if debug == nil { + return nil + } + + return &tsapi.StatefulSet{ + Pod: &tsapi.Pod{ + TailscaleContainer: &tsapi.Container{ + Debug: &tsapi.Debug{Enable: *debug}, + }, + }, + } + }(), + }, + } } var userspaceProxySS, nonUserspaceProxySS appsv1.StatefulSet @@ -184,7 +200,7 @@ func Test_applyProxyClassToStatefulSet(t *testing.T) { gotSS := applyProxyClassToStatefulSet(proxyClassAllOpts, nonUserspaceProxySS.DeepCopy(), new(tailscaleSTSConfig), zl.Sugar()) if diff := cmp.Diff(gotSS, wantSS); diff != "" { - t.Fatalf("Unexpected result applying ProxyClass with all fields set to a StatefulSet for non-userspace proxy (-got +want):\n%s", diff) + t.Errorf("Unexpected result applying ProxyClass with all fields set to a StatefulSet for non-userspace proxy (-got +want):\n%s", diff) } // 2. Test that a ProxyClass with custom labels and annotations for @@ -197,7 +213,7 @@ func Test_applyProxyClassToStatefulSet(t *testing.T) { wantSS.Spec.Template.Annotations = proxyClassJustLabels.Spec.StatefulSet.Pod.Annotations gotSS = applyProxyClassToStatefulSet(proxyClassJustLabels, nonUserspaceProxySS.DeepCopy(), new(tailscaleSTSConfig), zl.Sugar()) if diff := cmp.Diff(gotSS, wantSS); diff != "" { - t.Fatalf("Unexpected result applying ProxyClass with custom labels and annotations to a StatefulSet for non-userspace proxy (-got +want):\n%s", diff) + t.Errorf("Unexpected result applying ProxyClass with custom labels and annotations to a StatefulSet for non-userspace proxy (-got +want):\n%s", diff) } // 3. Test that a ProxyClass with all fields set gets correctly applied @@ -221,7 +237,7 @@ func Test_applyProxyClassToStatefulSet(t *testing.T) { wantSS.Spec.Template.Spec.Containers[0].Image = "ghcr.io/my-repo/tailscale:v0.01testsomething" gotSS = applyProxyClassToStatefulSet(proxyClassAllOpts, userspaceProxySS.DeepCopy(), new(tailscaleSTSConfig), zl.Sugar()) if diff := cmp.Diff(gotSS, wantSS); diff != "" { - t.Fatalf("Unexpected result applying ProxyClass with all options to a StatefulSet for a userspace proxy (-got +want):\n%s", diff) + t.Errorf("Unexpected result applying ProxyClass with all options to a StatefulSet for a userspace proxy (-got +want):\n%s", diff) } // 4. Test that a ProxyClass with custom labels and annotations gets correctly applied @@ -233,16 +249,48 @@ func Test_applyProxyClassToStatefulSet(t *testing.T) { wantSS.Spec.Template.Annotations = proxyClassJustLabels.Spec.StatefulSet.Pod.Annotations gotSS = applyProxyClassToStatefulSet(proxyClassJustLabels, userspaceProxySS.DeepCopy(), new(tailscaleSTSConfig), zl.Sugar()) if diff := cmp.Diff(gotSS, wantSS); diff != "" { - t.Fatalf("Unexpected result applying ProxyClass with custom labels and annotations to a StatefulSet for a userspace proxy (-got +want):\n%s", diff) + t.Errorf("Unexpected result applying ProxyClass with custom labels and annotations to a StatefulSet for a userspace proxy (-got +want):\n%s", diff) + } + + // 5. Metrics enabled defaults to enabling both metrics and debug. + wantSS = nonUserspaceProxySS.DeepCopy() + wantSS.Spec.Template.Spec.Containers[0].Env = append(wantSS.Spec.Template.Spec.Containers[0].Env, + corev1.EnvVar{Name: "TS_DEBUG_ADDR_PORT", Value: "$(POD_IP):9001"}, + corev1.EnvVar{Name: "TS_TAILSCALED_EXTRA_ARGS", Value: "--debug=$(TS_DEBUG_ADDR_PORT)"}, + corev1.EnvVar{Name: "TS_LOCAL_ADDR_PORT", Value: "$(POD_IP):9002"}, + corev1.EnvVar{Name: "TS_METRICS_ENABLED", Value: "true"}, + ) + wantSS.Spec.Template.Spec.Containers[0].Ports = []corev1.ContainerPort{ + {Name: "debug", Protocol: "TCP", ContainerPort: 9001}, + {Name: "metrics", Protocol: "TCP", ContainerPort: 9002}, + } + gotSS = applyProxyClassToStatefulSet(proxyClassWithMetricsDebug(true, nil), nonUserspaceProxySS.DeepCopy(), new(tailscaleSTSConfig), zl.Sugar()) + if diff := cmp.Diff(gotSS, wantSS); diff != "" { + t.Errorf("Unexpected result applying ProxyClass with metrics enabled to a StatefulSet (-got +want):\n%s", diff) + } + + // 6. Enable _just_ metrics by explicitly disabling debug. + wantSS = nonUserspaceProxySS.DeepCopy() + wantSS.Spec.Template.Spec.Containers[0].Env = append(wantSS.Spec.Template.Spec.Containers[0].Env, + corev1.EnvVar{Name: "TS_LOCAL_ADDR_PORT", Value: "$(POD_IP):9002"}, + corev1.EnvVar{Name: "TS_METRICS_ENABLED", Value: "true"}, + ) + wantSS.Spec.Template.Spec.Containers[0].Ports = []corev1.ContainerPort{{Name: "metrics", Protocol: "TCP", ContainerPort: 9002}} + gotSS = applyProxyClassToStatefulSet(proxyClassWithMetricsDebug(true, ptr.To(false)), nonUserspaceProxySS.DeepCopy(), new(tailscaleSTSConfig), zl.Sugar()) + if diff := cmp.Diff(gotSS, wantSS); diff != "" { + t.Errorf("Unexpected result applying ProxyClass with metrics enabled to a StatefulSet (-got +want):\n%s", diff) } - // 5. Test that a ProxyClass with metrics enabled gets correctly applied to a StatefulSet. + // 7. Enable _just_ debug without metrics. wantSS = nonUserspaceProxySS.DeepCopy() - wantSS.Spec.Template.Spec.Containers[0].Env = append(wantSS.Spec.Template.Spec.Containers[0].Env, corev1.EnvVar{Name: "TS_TAILSCALED_EXTRA_ARGS", Value: "--debug=$(POD_IP):9001"}) - wantSS.Spec.Template.Spec.Containers[0].Ports = []corev1.ContainerPort{{Name: "metrics", Protocol: "TCP", ContainerPort: 9001, HostPort: 9001}} - gotSS = applyProxyClassToStatefulSet(proxyClassMetrics, nonUserspaceProxySS.DeepCopy(), new(tailscaleSTSConfig), zl.Sugar()) + wantSS.Spec.Template.Spec.Containers[0].Env = append(wantSS.Spec.Template.Spec.Containers[0].Env, + corev1.EnvVar{Name: "TS_DEBUG_ADDR_PORT", Value: "$(POD_IP):9001"}, + corev1.EnvVar{Name: "TS_TAILSCALED_EXTRA_ARGS", Value: "--debug=$(TS_DEBUG_ADDR_PORT)"}, + ) + wantSS.Spec.Template.Spec.Containers[0].Ports = []corev1.ContainerPort{{Name: "debug", Protocol: "TCP", ContainerPort: 9001}} + gotSS = applyProxyClassToStatefulSet(proxyClassWithMetricsDebug(false, ptr.To(true)), nonUserspaceProxySS.DeepCopy(), new(tailscaleSTSConfig), zl.Sugar()) if diff := cmp.Diff(gotSS, wantSS); diff != "" { - t.Fatalf("Unexpected result applying ProxyClass with metrics enabled to a StatefulSet (-got +want):\n%s", diff) + t.Errorf("Unexpected result applying ProxyClass with metrics enabled to a StatefulSet (-got +want):\n%s", diff) } } diff --git a/k8s-operator/api.md b/k8s-operator/api.md index 7b1aca314..640d8fb07 100644 --- a/k8s-operator/api.md +++ b/k8s-operator/api.md @@ -146,6 +146,7 @@ _Appears in:_ | `imagePullPolicy` _[PullPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.3/#pullpolicy-v1-core)_ | Image pull policy. One of Always, Never, IfNotPresent. Defaults to Always.
https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#image | | Enum: [Always Never IfNotPresent]
| | `resources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.3/#resourcerequirements-v1-core)_ | Container resource requirements.
By default Tailscale Kubernetes operator does not apply any resource
requirements. The amount of resources required wil depend on the
amount of resources the operator needs to parse, usage patterns and
cluster size.
https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#resources | | | | `securityContext` _[SecurityContext](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.3/#securitycontext-v1-core)_ | Container security context.
Security context specified here will override the security context by the operator.
By default the operator:
- sets 'privileged: true' for the init container
- set NET_ADMIN capability for tailscale container for proxies that
are created for Services or Connector.
https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#security-context | | | +| `debug` _[Debug](#debug)_ | Configuration for enabling extra debug information in the container.
Not recommended for production use. | | | #### DNSConfig @@ -248,6 +249,22 @@ _Appears in:_ | `nameserver` _[NameserverStatus](#nameserverstatus)_ | Nameserver describes the status of nameserver cluster resources. | | | +#### Debug + + + + + + + +_Appears in:_ +- [Container](#container) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `enable` _boolean_ | Enable tailscaled's HTTP pprof endpoints at :9001/debug/pprof/
and internal debug metrics endpoint at :9001/debug/metrics, where
9001 is a container port named "debug". The endpoints and their responses
may change in backwards incompatible ways in the future, and should not
be considered stable.
In 1.78.x and 1.80.x, this setting will default to the value of
.spec.metrics.enable, and requests to the "metrics" port matching the
mux pattern /debug/ will be forwarded to the "debug" port. In 1.82.x,
this setting will default to false, and no requests will be proxied. | | | + + #### Env @@ -309,7 +326,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `enable` _boolean_ | Setting enable to true will make the proxy serve Tailscale metrics
at :9001/debug/metrics.
Defaults to false. | | | +| `enable` _boolean_ | Setting enable to true will make the proxy serve Tailscale metrics
at :9002/metrics.
In 1.78.x and 1.80.x, this field also serves as the default value for
.spec.statefulSet.pod.tailscaleContainer.debug.enable. From 1.82.0, both
fields will independently default to false.
Defaults to false. | | | #### Name diff --git a/k8s-operator/apis/v1alpha1/types_proxyclass.go b/k8s-operator/apis/v1alpha1/types_proxyclass.go index 0a224b796..7e408cd0a 100644 --- a/k8s-operator/apis/v1alpha1/types_proxyclass.go +++ b/k8s-operator/apis/v1alpha1/types_proxyclass.go @@ -163,7 +163,12 @@ type Pod struct { type Metrics struct { // Setting enable to true will make the proxy serve Tailscale metrics - // at :9001/debug/metrics. + // at :9002/metrics. + // + // In 1.78.x and 1.80.x, this field also serves as the default value for + // .spec.statefulSet.pod.tailscaleContainer.debug.enable. From 1.82.0, both + // fields will independently default to false. + // // Defaults to false. Enable bool `json:"enable"` } @@ -209,6 +214,26 @@ type Container struct { // https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#security-context // +optional SecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"` + // Configuration for enabling extra debug information in the container. + // Not recommended for production use. + // +optional + Debug *Debug `json:"debug,omitempty"` +} + +type Debug struct { + // Enable tailscaled's HTTP pprof endpoints at :9001/debug/pprof/ + // and internal debug metrics endpoint at :9001/debug/metrics, where + // 9001 is a container port named "debug". The endpoints and their responses + // may change in backwards incompatible ways in the future, and should not + // be considered stable. + // + // In 1.78.x and 1.80.x, this setting will default to the value of + // .spec.metrics.enable, and requests to the "metrics" port matching the + // mux pattern /debug/ will be forwarded to the "debug" port. In 1.82.x, + // this setting will default to false, and no requests will be proxied. + // + // +optional + Enable bool `json:"enable"` } type Env struct { diff --git a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go index c2f69dc04..07e46f3f5 100644 --- a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go +++ b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go @@ -163,6 +163,11 @@ func (in *Container) DeepCopyInto(out *Container) { *out = new(corev1.SecurityContext) (*in).DeepCopyInto(*out) } + if in.Debug != nil { + in, out := &in.Debug, &out.Debug + *out = new(Debug) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Container. @@ -281,6 +286,21 @@ func (in *DNSConfigStatus) DeepCopy() *DNSConfigStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Debug) DeepCopyInto(out *Debug) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Debug. +func (in *Debug) DeepCopy() *Debug { + if in == nil { + return nil + } + out := new(Debug) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Env) DeepCopyInto(out *Env) { *out = *in