diff --git a/cmd/k8s-operator/connector.go b/cmd/k8s-operator/connector.go index 7fa311532..f4d518faa 100644 --- a/cmd/k8s-operator/connector.go +++ b/cmd/k8s-operator/connector.go @@ -25,6 +25,7 @@ import ( "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/kube/kubetypes" @@ -207,6 +208,7 @@ func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logge ProxyClassName: proxyClass, proxyType: proxyTypeConnector, LoginServer: a.ssr.loginServer, + Tailnet: cn.Spec.Tailnet, } if cn.Spec.SubnetRouter != nil && len(cn.Spec.SubnetRouter.AdvertiseRoutes) > 0 { @@ -276,7 +278,7 @@ func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logge } func (a *ConnectorReconciler) maybeCleanupConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) { - if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "connector"), proxyTypeConnector); err != nil { + if done, err := a.ssr.Cleanup(ctx, cn.Spec.Tailnet, logger, childResourceLabels(cn.Name, a.tsnamespace, "connector"), proxyTypeConnector); err != nil { return false, fmt.Errorf("failed to cleanup Connector resources: %w", err) } else if !done { logger.Debugf("Connector cleanup not done yet, waiting for next reconcile") diff --git a/cmd/k8s-operator/ingress.go b/cmd/k8s-operator/ingress.go index 050b03f55..9ef173ece 100644 --- a/cmd/k8s-operator/ingress.go +++ b/cmd/k8s-operator/ingress.go @@ -102,7 +102,7 @@ func (a *IngressReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare return nil } - if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(ing.Name, ing.Namespace, "ingress"), proxyTypeIngressResource); err != nil { + if done, err := a.ssr.Cleanup(ctx, operatorTailnet, logger, childResourceLabels(ing.Name, ing.Namespace, "ingress"), proxyTypeIngressResource); err != nil { return fmt.Errorf("failed to cleanup: %w", err) } else if !done { logger.Debugf("cleanup not done yet, waiting for next reconcile") diff --git a/cmd/k8s-operator/proxygroup.go b/cmd/k8s-operator/proxygroup.go index 946e017a2..3a50ed8fb 100644 --- a/cmd/k8s-operator/proxygroup.go +++ b/cmd/k8s-operator/proxygroup.go @@ -49,11 +49,12 @@ import ( ) const ( - reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed" - reasonProxyGroupReady = "ProxyGroupReady" - reasonProxyGroupAvailable = "ProxyGroupAvailable" - reasonProxyGroupCreating = "ProxyGroupCreating" - reasonProxyGroupInvalid = "ProxyGroupInvalid" + reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed" + reasonProxyGroupReady = "ProxyGroupReady" + reasonProxyGroupAvailable = "ProxyGroupAvailable" + reasonProxyGroupCreating = "ProxyGroupCreating" + reasonProxyGroupInvalid = "ProxyGroupInvalid" + reasonProxyGroupTailnetUnavailable = "ProxyGroupTailnetUnavailable" // Copied from k8s.io/apiserver/pkg/registry/generic/registry/store.go@cccad306d649184bf2a0e319ba830c53f65c445c optimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again" @@ -117,6 +118,23 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ } else if err != nil { return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com ProxyGroup: %w", err) } + + tailscaleClient := r.tsClient + if pg.Spec.Tailnet != "" { + tc, err := clientForTailnet(ctx, r.Client, r.tsNamespace, pg.Spec.Tailnet) + if err != nil { + oldPGStatus := pg.Status.DeepCopy() + nrr := ¬ReadyReason{ + reason: reasonProxyGroupTailnetUnavailable, + message: err.Error(), + } + + return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, make(map[string][]netip.AddrPort))) + } + + tailscaleClient = tc + } + if markedForDeletion(pg) { logger.Debugf("ProxyGroup is being deleted, cleaning up resources") ix := xslices.Index(pg.Finalizers, FinalizerName) @@ -125,7 +143,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ return reconcile.Result{}, nil } - if done, err := r.maybeCleanup(ctx, pg); err != nil { + if done, err := r.maybeCleanup(ctx, tailscaleClient, pg); err != nil { if strings.Contains(err.Error(), optimisticLockErrorMsg) { logger.Infof("optimistic lock error, retrying: %s", err) return reconcile.Result{}, nil @@ -144,7 +162,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ } oldPGStatus := pg.Status.DeepCopy() - staticEndpoints, nrr, err := r.reconcilePG(ctx, pg, logger) + staticEndpoints, nrr, err := r.reconcilePG(ctx, tailscaleClient, pg, logger) return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, staticEndpoints)) } @@ -152,7 +170,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ // for deletion. It is separated out from Reconcile to make a clear separation // between reconciling the ProxyGroup, and posting the status of its created // resources onto the ProxyGroup status field. -func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) { +func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) { if !slices.Contains(pg.Finalizers, FinalizerName) { // This log line is printed exactly once during initial provisioning, // because once the finalizer is in place this block gets skipped. So, @@ -193,7 +211,7 @@ func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, pg *tsapi.ProxyG return notReady(reasonProxyGroupInvalid, fmt.Sprintf("invalid ProxyGroup spec: %v", err)) } - staticEndpoints, nrr, err := r.maybeProvision(ctx, pg, proxyClass) + staticEndpoints, nrr, err := r.maybeProvision(ctx, tailscaleClient, pg, proxyClass) if err != nil { return nil, nrr, err } @@ -279,7 +297,7 @@ func (r *ProxyGroupReconciler) validate(ctx context.Context, pg *tsapi.ProxyGrou return errors.Join(errs...) } -func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) { +func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) { logger := r.logger(pg.Name) r.mu.Lock() r.ensureAddedToGaugeForProxyGroup(pg) @@ -302,7 +320,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro } } - staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, pg, proxyClass, svcToNodePorts) + staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, tailscaleClient, pg, proxyClass, svcToNodePorts) if err != nil { var selectorErr *FindStaticEndpointErr if errors.As(err, &selectorErr) { @@ -414,7 +432,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro return r.notReadyErrf(pg, logger, "error reconciling metrics resources: %w", err) } - if err := r.cleanupDanglingResources(ctx, pg, proxyClass); err != nil { + if err := r.cleanupDanglingResources(ctx, tailscaleClient, pg, proxyClass); err != nil { return r.notReadyErrf(pg, logger, "error cleaning up dangling resources: %w", err) } @@ -611,7 +629,7 @@ func (r *ProxyGroupReconciler) ensureNodePortServiceCreated(ctx context.Context, // cleanupDanglingResources ensures we don't leak config secrets, state secrets, and // tailnet devices when the number of replicas specified is reduced. -func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass) error { +func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass) error { logger := r.logger(pg.Name) metadata, err := r.getNodeMetadata(ctx, pg) if err != nil { @@ -625,7 +643,7 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg // Dangling resource, delete the config + state Secrets, as well as // deleting the device from the tailnet. - if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil { + if err := r.deleteTailnetDevice(ctx, tailscaleClient, m.tsID, logger); err != nil { return err } if err := r.Delete(ctx, m.stateSecret); err != nil && !apierrors.IsNotFound(err) { @@ -668,7 +686,7 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg // maybeCleanup just deletes the device from the tailnet. All the kubernetes // resources linked to a ProxyGroup will get cleaned up via owner references // (which we can use because they are all in the same namespace). -func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.ProxyGroup) (bool, error) { +func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup) (bool, error) { logger := r.logger(pg.Name) metadata, err := r.getNodeMetadata(ctx, pg) @@ -677,7 +695,7 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.Proxy } for _, m := range metadata { - if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil { + if err := r.deleteTailnetDevice(ctx, tailscaleClient, m.tsID, logger); err != nil { return false, err } } @@ -698,9 +716,9 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.Proxy return true, nil } -func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error { +func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, tailscaleClient tsClient, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error { logger.Debugf("deleting device %s from control", string(id)) - if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil { + if err := tailscaleClient.DeleteDevice(ctx, string(id)); err != nil { errResp := &tailscale.ErrResponse{} if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound { logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id)) @@ -714,7 +732,13 @@ func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailc return nil } -func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass, svcToNodePorts map[string]uint16) (endpoints map[string][]netip.AddrPort, err error) { +func (r *ProxyGroupReconciler) ensureConfigSecretsCreated( + ctx context.Context, + tailscaleClient tsClient, + pg *tsapi.ProxyGroup, + proxyClass *tsapi.ProxyClass, + svcToNodePorts map[string]uint16, +) (endpoints map[string][]netip.AddrPort, err error) { logger := r.logger(pg.Name) endpoints = make(map[string][]netip.AddrPort, pgReplicas(pg)) // keyed by Service name. for i := range pgReplicas(pg) { @@ -728,7 +752,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p } var existingCfgSecret *corev1.Secret // unmodified copy of secret - if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil { + if err = r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil { logger.Debugf("Secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName()) existingCfgSecret = cfgSecret.DeepCopy() } else if !apierrors.IsNotFound(err) { @@ -742,7 +766,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p if len(tags) == 0 { tags = r.defaultTags } - key, err := newAuthKey(ctx, r.tsClient, tags) + key, err := newAuthKey(ctx, tailscaleClient, tags) if err != nil { return nil, err } @@ -757,7 +781,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p Namespace: r.tsNamespace, }, } - if err := r.Get(ctx, client.ObjectKeyFromObject(stateSecret), stateSecret); err != nil && !apierrors.IsNotFound(err) { + if err = r.Get(ctx, client.ObjectKeyFromObject(stateSecret), stateSecret); err != nil && !apierrors.IsNotFound(err) { return nil, err } diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index 2b6d1290e..2919e535c 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -107,6 +107,7 @@ const ( letsEncryptStagingEndpoint = "https://acme-staging-v02.api.letsencrypt.org/directory" mainContainerName = "tailscale" + operatorTailnet = "" ) var ( @@ -152,6 +153,9 @@ type tailscaleSTSConfig struct { // HostnamePrefix specifies the desired prefix for the device's hostname. The hostname will be suffixed with the // ordinal number generated by the StatefulSet. HostnamePrefix string + + // Tailnet specifies the Tailnet resource to use for producing auth keys. + Tailnet string } type connector struct { @@ -194,6 +198,16 @@ func IsHTTPSEnabledOnTailnet(tsnetServer tsnetServer) bool { // Provision ensures that the StatefulSet for the given service is running and // up to date. func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig) (*corev1.Service, error) { + tailscaleClient := a.tsClient + if sts.Tailnet != "" { + tc, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, sts.Tailnet) + if err != nil { + return nil, err + } + + tailscaleClient = tc + } + // Do full reconcile. // TODO (don't create Service for the Connector) hsvc, err := a.reconcileHeadlessService(ctx, logger, sts) @@ -213,7 +227,7 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga } sts.ProxyClass = proxyClass - secretNames, err := a.provisionSecrets(ctx, logger, sts, hsvc) + secretNames, err := a.provisionSecrets(ctx, tailscaleClient, logger, sts, hsvc) if err != nil { return nil, fmt.Errorf("failed to create or get API key secret: %w", err) } @@ -237,7 +251,18 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga // Cleanup removes all resources associated that were created by Provision with // the given labels. It returns true when all resources have been removed, // otherwise it returns false and the caller should retry later. -func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.SugaredLogger, labels map[string]string, typ string) (done bool, _ error) { +func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, tailnet string, logger *zap.SugaredLogger, labels map[string]string, typ string) (done bool, _ error) { + tailscaleClient := a.tsClient + if tailnet != "" { + tc, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, tailnet) + if err != nil { + logger.Errorf("failed to get tailscale client: %v", err) + return false, nil + } + + tailscaleClient = tc + } + // Need to delete the StatefulSet first, and delete it with foreground // cascading deletion. That way, the pod that's writing to the Secret will // stop running before we start looking at the Secret's contents, and @@ -279,7 +304,7 @@ func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.Sugare for _, dev := range devices { if dev.id != "" { logger.Debugf("deleting device %s from control", string(dev.id)) - if err = a.tsClient.DeleteDevice(ctx, string(dev.id)); err != nil { + if err = tailscaleClient.DeleteDevice(ctx, string(dev.id)); err != nil { errResp := &tailscale.ErrResponse{} if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound { logger.Debugf("device %s not found, likely because it has already been deleted from control", string(dev.id)) @@ -360,7 +385,7 @@ func (a *tailscaleSTSReconciler) reconcileHeadlessService(ctx context.Context, l return createOrUpdate(ctx, a.Client, a.operatorNamespace, hsvc, func(svc *corev1.Service) { svc.Spec = hsvc.Spec }) } -func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) ([]string, error) { +func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, tailscaleClient tsClient, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) ([]string, error) { secretNames := make([]string, stsC.Replicas) // Start by ensuring we have Secrets for the desired number of replicas. This will handle both creating and scaling @@ -403,7 +428,7 @@ func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, logger *z if len(tags) == 0 { tags = a.defaultTags } - authKey, err = newAuthKey(ctx, a.tsClient, tags) + authKey, err = newAuthKey(ctx, tailscaleClient, tags) if err != nil { return nil, err } @@ -477,7 +502,7 @@ func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, logger *z if dev != nil && dev.id != "" { var errResp *tailscale.ErrResponse - err = a.tsClient.DeleteDevice(ctx, string(dev.id)) + err = tailscaleClient.DeleteDevice(ctx, string(dev.id)) switch { case errors.As(err, &errResp) && errResp.Status == http.StatusNotFound: // This device has possibly already been deleted in the admin console. So we can ignore this diff --git a/cmd/k8s-operator/svc.go b/cmd/k8s-operator/svc.go index 5c163e081..ec7bb8080 100644 --- a/cmd/k8s-operator/svc.go +++ b/cmd/k8s-operator/svc.go @@ -23,6 +23,7 @@ import ( "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/kube/kubetypes" @@ -167,7 +168,7 @@ func (a *ServiceReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare proxyTyp = proxyTypeIngressService } - if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(svc.Name, svc.Namespace, "svc"), proxyTyp); err != nil { + if done, err := a.ssr.Cleanup(ctx, operatorTailnet, logger, childResourceLabels(svc.Name, svc.Namespace, "svc"), proxyTyp); err != nil { return fmt.Errorf("failed to cleanup: %w", err) } else if !done { logger.Debugf("cleanup not done yet, waiting for next reconcile") diff --git a/cmd/k8s-operator/tailnet.go b/cmd/k8s-operator/tailnet.go new file mode 100644 index 000000000..3226ab023 --- /dev/null +++ b/cmd/k8s-operator/tailnet.go @@ -0,0 +1,58 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "context" + "fmt" + + "golang.org/x/oauth2" + "golang.org/x/oauth2/clientcredentials" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "tailscale.com/internal/client/tailscale" + "tailscale.com/ipn" + operatorutils "tailscale.com/k8s-operator" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" +) + +func clientForTailnet(ctx context.Context, cl client.Client, namespace, name string) (tsClient, error) { + var tn tsapi.Tailnet + if err := cl.Get(ctx, client.ObjectKey{Name: name}, &tn); err != nil { + return nil, fmt.Errorf("failed to get Tailnet %q: %w", name, err) + } + + if !operatorutils.TailnetIsReady(&tn) { + return nil, fmt.Errorf("tailnet %q is not ready", name) + } + + var secret corev1.Secret + if err := cl.Get(ctx, client.ObjectKey{Name: tn.Spec.Credentials.SecretName, Namespace: namespace}, &secret); err != nil { + return nil, fmt.Errorf("failed to get Secret %q in namespace %q: %w", tn.Spec.Credentials.SecretName, namespace, err) + } + + baseURL := ipn.DefaultControlURL + if tn.Spec.LoginURL != "" { + baseURL = tn.Spec.LoginURL + } + + credentials := clientcredentials.Config{ + ClientID: string(secret.Data["client_id"]), + ClientSecret: string(secret.Data["client_secret"]), + TokenURL: baseURL + "/api/v2/oauth/token", + } + + source := credentials.TokenSource(ctx) + httpClient := oauth2.NewClient(ctx, source) + + ts := tailscale.NewClient(defaultTailnet, nil) + ts.UserAgent = "tailscale-k8s-operator" + ts.HTTPClient = httpClient + ts.BaseURL = baseURL + + return ts, nil +} diff --git a/cmd/k8s-operator/tsrecorder.go b/cmd/k8s-operator/tsrecorder.go index bfb01fa86..3e8608bc8 100644 --- a/cmd/k8s-operator/tsrecorder.go +++ b/cmd/k8s-operator/tsrecorder.go @@ -42,10 +42,11 @@ import ( ) const ( - reasonRecorderCreationFailed = "RecorderCreationFailed" - reasonRecorderCreating = "RecorderCreating" - reasonRecorderCreated = "RecorderCreated" - reasonRecorderInvalid = "RecorderInvalid" + reasonRecorderCreationFailed = "RecorderCreationFailed" + reasonRecorderCreating = "RecorderCreating" + reasonRecorderCreated = "RecorderCreated" + reasonRecorderInvalid = "RecorderInvalid" + reasonRecorderTailnetUnavailable = "RecorderTailnetUnavailable" currentProfileKey = "_current-profile" ) @@ -84,6 +85,30 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques } else if err != nil { return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com Recorder: %w", err) } + + oldTSRStatus := tsr.Status.DeepCopy() + setStatusReady := func(tsr *tsapi.Recorder, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) { + tsoperator.SetRecorderCondition(tsr, tsapi.RecorderReady, status, reason, message, tsr.Generation, r.clock, logger) + if !apiequality.Semantic.DeepEqual(oldTSRStatus, &tsr.Status) { + // An error encountered here should get returned by the Reconcile function. + if updateErr := r.Client.Status().Update(ctx, tsr); updateErr != nil { + return reconcile.Result{}, errors.Join(err, updateErr) + } + } + + return reconcile.Result{}, nil + } + + tailscaleClient := r.tsClient + if tsr.Spec.Tailnet != "" { + tc, err := clientForTailnet(ctx, r.Client, r.tsNamespace, tsr.Spec.Tailnet) + if err != nil { + return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderTailnetUnavailable, err.Error()) + } + + tailscaleClient = tc + } + if markedForDeletion(tsr) { logger.Debugf("Recorder is being deleted, cleaning up resources") ix := xslices.Index(tsr.Finalizers, FinalizerName) @@ -92,7 +117,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques return reconcile.Result{}, nil } - if done, err := r.maybeCleanup(ctx, tsr); err != nil { + if done, err := r.maybeCleanup(ctx, tsr, tailscaleClient); err != nil { return reconcile.Result{}, err } else if !done { logger.Debugf("Recorder resource cleanup not yet finished, will retry...") @@ -106,19 +131,6 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques return reconcile.Result{}, nil } - oldTSRStatus := tsr.Status.DeepCopy() - setStatusReady := func(tsr *tsapi.Recorder, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) { - tsoperator.SetRecorderCondition(tsr, tsapi.RecorderReady, status, reason, message, tsr.Generation, r.clock, logger) - if !apiequality.Semantic.DeepEqual(oldTSRStatus, &tsr.Status) { - // An error encountered here should get returned by the Reconcile function. - if updateErr := r.Client.Status().Update(ctx, tsr); updateErr != nil { - return reconcile.Result{}, errors.Join(err, updateErr) - } - } - - return reconcile.Result{}, nil - } - if !slices.Contains(tsr.Finalizers, FinalizerName) { // This log line is printed exactly once during initial provisioning, // because once the finalizer is in place this block gets skipped. So, @@ -137,7 +149,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderInvalid, message) } - if err = r.maybeProvision(ctx, tsr); err != nil { + if err = r.maybeProvision(ctx, tailscaleClient, tsr); err != nil { reason := reasonRecorderCreationFailed message := fmt.Sprintf("failed creating Recorder: %s", err) if strings.Contains(err.Error(), optimisticLockErrorMsg) { @@ -155,7 +167,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques return setStatusReady(tsr, metav1.ConditionTrue, reasonRecorderCreated, reasonRecorderCreated) } -func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Recorder) error { +func (r *RecorderReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, tsr *tsapi.Recorder) error { logger := r.logger(tsr.Name) r.mu.Lock() @@ -163,7 +175,7 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco gaugeRecorderResources.Set(int64(r.recorders.Len())) r.mu.Unlock() - if err := r.ensureAuthSecretsCreated(ctx, tsr); err != nil { + if err := r.ensureAuthSecretsCreated(ctx, tailscaleClient, tsr); err != nil { return fmt.Errorf("error creating secrets: %w", err) } @@ -241,13 +253,13 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco // If we have scaled the recorder down, we will have dangling state secrets // that we need to clean up. - if err = r.maybeCleanupSecrets(ctx, tsr); err != nil { + if err = r.maybeCleanupSecrets(ctx, tailscaleClient, tsr); err != nil { return fmt.Errorf("error cleaning up Secrets: %w", err) } var devices []tsapi.RecorderTailnetDevice for replica := range replicas { - dev, ok, err := r.getDeviceInfo(ctx, tsr.Name, replica) + dev, ok, err := r.getDeviceInfo(ctx, tailscaleClient, tsr.Name, replica) switch { case err != nil: return fmt.Errorf("failed to get device info: %w", err) @@ -312,7 +324,7 @@ func (r *RecorderReconciler) maybeCleanupServiceAccounts(ctx context.Context, ts return nil } -func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsr *tsapi.Recorder) error { +func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tailscaleClient tsClient, tsr *tsapi.Recorder) error { options := []client.ListOption{ client.InNamespace(r.tsNamespace), client.MatchingLabels(tsrLabels("recorder", tsr.Name, nil)), @@ -354,7 +366,7 @@ func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsr *tsapi var errResp *tailscale.ErrResponse r.log.Debugf("deleting device %s", devicePrefs.Config.NodeID) - err = r.tsClient.DeleteDevice(ctx, string(devicePrefs.Config.NodeID)) + err = tailscaleClient.DeleteDevice(ctx, string(devicePrefs.Config.NodeID)) switch { case errors.As(err, &errResp) && errResp.Status == http.StatusNotFound: // This device has possibly already been deleted in the admin console. So we can ignore this @@ -375,7 +387,7 @@ func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsr *tsapi // maybeCleanup just deletes the device from the tailnet. All the kubernetes // resources linked to a Recorder will get cleaned up via owner references // (which we can use because they are all in the same namespace). -func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder) (bool, error) { +func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder, tailscaleClient tsClient) (bool, error) { logger := r.logger(tsr.Name) var replicas int32 = 1 @@ -399,7 +411,7 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record nodeID := string(devicePrefs.Config.NodeID) logger.Debugf("deleting device %s from control", nodeID) - if err = r.tsClient.DeleteDevice(ctx, nodeID); err != nil { + if err = tailscaleClient.DeleteDevice(ctx, nodeID); err != nil { errResp := &tailscale.ErrResponse{} if errors.As(err, errResp) && errResp.Status == http.StatusNotFound { logger.Debugf("device %s not found, likely because it has already been deleted from control", nodeID) @@ -425,7 +437,7 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record return true, nil } -func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tsr *tsapi.Recorder) error { +func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tailscaleClient tsClient, tsr *tsapi.Recorder) error { var replicas int32 = 1 if tsr.Spec.Replicas != nil { replicas = *tsr.Spec.Replicas @@ -453,7 +465,7 @@ func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tsr * return fmt.Errorf("failed to get Secret %q: %w", key.Name, err) } - authKey, err := newAuthKey(ctx, r.tsClient, tags.Stringify()) + authKey, err := newAuthKey(ctx, tailscaleClient, tags.Stringify()) if err != nil { return err } @@ -555,7 +567,7 @@ func getDevicePrefs(secret *corev1.Secret) (prefs prefs, ok bool, err error) { return prefs, ok, nil } -func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string, replica int32) (d tsapi.RecorderTailnetDevice, ok bool, err error) { +func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tailscaleClient tsClient, tsrName string, replica int32) (d tsapi.RecorderTailnetDevice, ok bool, err error) { secret, err := r.getStateSecret(ctx, tsrName, replica) if err != nil || secret == nil { return tsapi.RecorderTailnetDevice{}, false, err @@ -569,7 +581,7 @@ func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string, // TODO(tomhjp): The profile info doesn't include addresses, which is why we // need the API. Should maybe update tsrecorder to write IPs to the state // Secret like containerboot does. - device, err := r.tsClient.Device(ctx, string(prefs.Config.NodeID), nil) + device, err := tailscaleClient.Device(ctx, string(prefs.Config.NodeID), nil) if err != nil { return tsapi.RecorderTailnetDevice{}, false, fmt.Errorf("failed to get device info from API: %w", err) } diff --git a/k8s-operator/conditions.go b/k8s-operator/conditions.go index 4e20cf02d..bce6e39bd 100644 --- a/k8s-operator/conditions.go +++ b/k8s-operator/conditions.go @@ -196,3 +196,14 @@ func SvcIsReady(svc *corev1.Service) bool { cond := svc.Status.Conditions[idx] return cond.Status == metav1.ConditionTrue } + +func TailnetIsReady(tn *tsapi.Tailnet) bool { + idx := xslices.IndexFunc(tn.Status.Conditions, func(cond metav1.Condition) bool { + return cond.Type == string(tsapi.TailnetReady) + }) + if idx == -1 { + return false + } + cond := tn.Status.Conditions[idx] + return cond.Status == metav1.ConditionTrue && cond.ObservedGeneration == tn.Generation +} diff --git a/k8s-operator/reconciler/reconciler.go b/k8s-operator/reconciler/reconciler.go index b1d9181a6..275179096 100644 --- a/k8s-operator/reconciler/reconciler.go +++ b/k8s-operator/reconciler/reconciler.go @@ -1,6 +1,8 @@ // Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause +//go:build !plan9 + // Package reconciler provides utilities for working with Kubernetes resources within controller reconciliation // loops. package reconciler diff --git a/k8s-operator/reconciler/reconciler_test.go b/k8s-operator/reconciler/reconciler_test.go index d9b5ee486..573cd4d9d 100644 --- a/k8s-operator/reconciler/reconciler_test.go +++ b/k8s-operator/reconciler/reconciler_test.go @@ -1,6 +1,8 @@ // Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause +//go:build !plan9 + package reconciler_test import ( diff --git a/k8s-operator/reconciler/tailnet/mocks_test.go b/k8s-operator/reconciler/tailnet/mocks_test.go index 6a0ac42bb..7f3f2ddb9 100644 --- a/k8s-operator/reconciler/tailnet/mocks_test.go +++ b/k8s-operator/reconciler/tailnet/mocks_test.go @@ -1,6 +1,8 @@ // Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause +//go:build !plan9 + package tailnet_test import ( diff --git a/k8s-operator/reconciler/tailnet/tailnet.go b/k8s-operator/reconciler/tailnet/tailnet.go index 6073a6fcc..f574c2848 100644 --- a/k8s-operator/reconciler/tailnet/tailnet.go +++ b/k8s-operator/reconciler/tailnet/tailnet.go @@ -1,6 +1,8 @@ // Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause +//go:build !plan9 + // Package tailnet provides reconciliation logic for the Tailnet custom resource definition. It is responsible for // ensuring the referenced OAuth credentials are valid and have the required scopes to be able to generate authentication // keys, manage devices & manage VIP services. diff --git a/k8s-operator/reconciler/tailnet/tailnet_test.go b/k8s-operator/reconciler/tailnet/tailnet_test.go index a5aea7233..c3e4d62cf 100644 --- a/k8s-operator/reconciler/tailnet/tailnet_test.go +++ b/k8s-operator/reconciler/tailnet/tailnet_test.go @@ -1,6 +1,8 @@ // Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause +//go:build !plan9 + package tailnet_test import (