From 306b85b9a3daabd911fcaeccfb43be5605c5888b Mon Sep 17 00:00:00 2001 From: Maisem Ali Date: Wed, 30 Aug 2023 09:49:11 -0700 Subject: [PATCH] cmd/k8s-operator: add metrics to track usage Updates #502 Signed-off-by: Maisem Ali --- cmd/k8s-operator/ingress.go | 27 ++++++++++++++++++++++++ cmd/k8s-operator/proxy.go | 4 ++++ cmd/k8s-operator/svc.go | 41 +++++++++++++++++++++++++++++++++++++ util/set/slice.go | 3 +++ 4 files changed, 75 insertions(+) diff --git a/cmd/k8s-operator/ingress.go b/cmd/k8s-operator/ingress.go index ff1440cd8..1e042c1f7 100644 --- a/cmd/k8s-operator/ingress.go +++ b/cmd/k8s-operator/ingress.go @@ -9,6 +9,7 @@ import ( "context" "fmt" "strings" + "sync" "go.uber.org/zap" "golang.org/x/exp/slices" @@ -21,6 +22,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "tailscale.com/ipn" "tailscale.com/types/opt" + "tailscale.com/util/clientmetric" + "tailscale.com/util/set" ) type IngressReconciler struct { @@ -29,8 +32,20 @@ type IngressReconciler struct { recorder record.EventRecorder ssr *tailscaleSTSReconciler logger *zap.SugaredLogger + + mu sync.Mutex // protects following + + // managedIngresses is a set of all ingress resources that we're currently + // managing. This is only used for metrics. + managedIngresses set.Slice[types.UID] } +var ( + // gaugeIngressResources tracks the number of ingress resources that we're + // currently managing. + gaugeIngressResources = clientmetric.NewGauge("k8s_ingress_resources") +) + func (a *IngressReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) { logger := a.logger.With("ingress-ns", req.Namespace, "ingress-name", req.Name) logger.Debugf("starting reconcile") @@ -57,6 +72,10 @@ func (a *IngressReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare ix := slices.Index(ing.Finalizers, FinalizerName) if ix < 0 { logger.Debugf("no finalizer, nothing to do") + a.mu.Lock() + defer a.mu.Unlock() + a.managedIngresses.Remove(ing.UID) + gaugeIngressResources.Set(int64(a.managedIngresses.Len())) return nil } @@ -77,6 +96,10 @@ func (a *IngressReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare // cleanup removes the tailscale finalizer, which will make all future // reconciles exit early. logger.Infof("unexposed ingress from tailnet") + a.mu.Lock() + defer a.mu.Unlock() + a.managedIngresses.Remove(ing.UID) + gaugeIngressResources.Set(int64(a.managedIngresses.Len())) return nil } @@ -97,6 +120,10 @@ func (a *IngressReconciler) maybeProvision(ctx context.Context, logger *zap.Suga return fmt.Errorf("failed to add finalizer: %w", err) } } + a.mu.Lock() + a.managedIngresses.Add(ing.UID) + gaugeIngressResources.Set(int64(a.managedIngresses.Len())) + a.mu.Unlock() // magic443 is a fake hostname that we can use to tell containerboot to swap // out with the real hostname once it's known. diff --git a/cmd/k8s-operator/proxy.go b/cmd/k8s-operator/proxy.go index 799cbb033..3040bd173 100644 --- a/cmd/k8s-operator/proxy.go +++ b/cmd/k8s-operator/proxy.go @@ -25,6 +25,7 @@ import ( "tailscale.com/tailcfg" "tailscale.com/tsnet" "tailscale.com/types/logger" + "tailscale.com/util/clientmetric" "tailscale.com/util/set" ) @@ -42,6 +43,8 @@ func addWhoIsToRequest(r *http.Request, who *apitype.WhoIsResponse) *http.Reques return r.WithContext(context.WithValue(r.Context(), whoIsKey{}, who)) } +var counterNumRequestsProxied = clientmetric.NewCounter("k8s_auth_proxy_requests_proxied") + // launchAuthProxy launches the auth proxy, which is a small HTTP server that // authenticates requests using the Tailscale LocalAPI and then proxies them to // the kube-apiserver. @@ -84,6 +87,7 @@ func (h *authProxy) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, "failed to authenticate caller", http.StatusInternalServerError) return } + counterNumRequestsProxied.Add(1) h.rp.ServeHTTP(w, addWhoIsToRequest(r, who)) } diff --git a/cmd/k8s-operator/svc.go b/cmd/k8s-operator/svc.go index 40eb6a9a9..b99053725 100644 --- a/cmd/k8s-operator/svc.go +++ b/cmd/k8s-operator/svc.go @@ -10,13 +10,17 @@ import ( "fmt" "net/netip" "strings" + "sync" "go.uber.org/zap" "golang.org/x/exp/slices" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "tailscale.com/util/clientmetric" + "tailscale.com/util/set" ) type ServiceReconciler struct { @@ -24,8 +28,26 @@ type ServiceReconciler struct { ssr *tailscaleSTSReconciler logger *zap.SugaredLogger isDefaultLoadBalancer bool + + mu sync.Mutex // protects following + + // managedIngressProxies is a set of all ingress proxies that we're + // currently managing. This is only used for metrics. + managedIngressProxies set.Slice[types.UID] + // managedEgressProxies is a set of all egress proxies that we're currently + // managing. This is only used for metrics. + managedEgressProxies set.Slice[types.UID] } +var ( + // gaugeEgressProxies tracks the number of egress proxies that we're + // currently managing. + gaugeEgressProxies = clientmetric.NewGauge("k8s_egress_proxies") + // gaugeIngressProxies tracks the number of ingress proxies that we're + // currently managing. + gaugeIngressProxies = clientmetric.NewGauge("k8s_ingress_proxies") +) + func childResourceLabels(name, ns, typ string) map[string]string { // You might wonder why we're using owner references, since they seem to be // built for exactly this. Unfortunately, Kubernetes does not support @@ -71,6 +93,12 @@ func (a *ServiceReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare ix := slices.Index(svc.Finalizers, FinalizerName) if ix < 0 { logger.Debugf("no finalizer, nothing to do") + a.mu.Lock() + defer a.mu.Unlock() + a.managedIngressProxies.Remove(svc.UID) + a.managedEgressProxies.Remove(svc.UID) + gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len())) + gaugeEgressProxies.Set(int64(a.managedEgressProxies.Len())) return nil } @@ -91,6 +119,13 @@ func (a *ServiceReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare // cleanup removes the tailscale finalizer, which will make all future // reconciles exit early. logger.Infof("unexposed service from tailnet") + + a.mu.Lock() + defer a.mu.Unlock() + a.managedIngressProxies.Remove(svc.UID) + a.managedEgressProxies.Remove(svc.UID) + gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len())) + gaugeEgressProxies.Set(int64(a.managedEgressProxies.Len())) return nil } @@ -130,11 +165,17 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga ChildResourceLabels: crl, } + a.mu.Lock() if a.shouldExpose(svc) { sts.ClusterTargetIP = svc.Spec.ClusterIP + a.managedIngressProxies.Add(svc.UID) + gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len())) } else if a.hasTailnetTargetAnnotation(svc) { sts.TailnetTargetIP = svc.Annotations[AnnotationTailnetTargetIP] + a.managedEgressProxies.Add(svc.UID) + gaugeEgressProxies.Set(int64(a.managedEgressProxies.Len())) } + a.mu.Unlock() var hsvc *corev1.Service if hsvc, err = a.ssr.Provision(ctx, logger, sts); err != nil { diff --git a/util/set/slice.go b/util/set/slice.go index d52376cf4..fe764b550 100644 --- a/util/set/slice.go +++ b/util/set/slice.go @@ -20,6 +20,9 @@ type Slice[T comparable] struct { // The returned value is only valid until ss is modified again. func (ss *Slice[T]) Slice() views.Slice[T] { return views.SliceOf(ss.slice) } +// Len returns the number of elements in the set. +func (ss *Slice[T]) Len() int { return len(ss.slice) } + // Contains reports whether v is in the set. // The amortized cost is O(1). func (ss *Slice[T]) Contains(v T) bool {