diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 86612d1a6..5ebe22e5f 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -769,5 +769,5 @@ func tailscaledConfigFilePath() string { log.Fatalf("no tailscaled config file found in %q for current capability version %q", dir, tailcfg.CurrentCapabilityVersion) } log.Printf("Using tailscaled config file %q for capability version %q", maxCompatVer, tailcfg.CurrentCapabilityVersion) - return path.Join(dir, kubeutils.TailscaledConfigFileNameForCap(maxCompatVer)) + return path.Join(dir, kubeutils.TailscaledConfigFileName(maxCompatVer)) } diff --git a/cmd/k8s-operator/deploy/chart/values.yaml b/cmd/k8s-operator/deploy/chart/values.yaml index 43ed382c6..de003f149 100644 --- a/cmd/k8s-operator/deploy/chart/values.yaml +++ b/cmd/k8s-operator/deploy/chart/values.yaml @@ -79,7 +79,8 @@ proxyConfig: defaultTags: "tag:k8s" firewallMode: auto # If defined, this proxy class will be used as the default proxy class for - # service and ingress resources that do not have a proxy class defined. + # service and ingress resources that do not have a proxy class defined. It + # does not apply to Connector and ProxyGroup resources. defaultProxyClass: "" # apiServerProxyConfig allows to configure whether the operator should expose diff --git a/cmd/k8s-operator/deploy/crds/tailscale.com_proxygroups.yaml b/cmd/k8s-operator/deploy/crds/tailscale.com_proxygroups.yaml index 5f3520d26..32e2ab450 100644 --- a/cmd/k8s-operator/deploy/crds/tailscale.com_proxygroups.yaml +++ b/cmd/k8s-operator/deploy/crds/tailscale.com_proxygroups.yaml @@ -63,14 +63,15 @@ spec: description: |- ProxyClass is the name of the ProxyClass custom resource that contains configuration options that should be applied to the resources created - for this ProxyGroup. If unset, and no default ProxyClass is set, the - operator will create resources with the default configuration. + for this ProxyGroup. If unset, the operator will create resources with + the default configuration. type: string replicas: description: |- Replicas specifies how many replicas to create the StatefulSet with. Defaults to 2. type: integer + format: int32 tags: description: |- Tags that the Tailscale devices will be tagged with. Defaults to [tag:k8s]. diff --git a/cmd/k8s-operator/deploy/examples/proxygroup.yaml b/cmd/k8s-operator/deploy/examples/proxygroup.yaml new file mode 100644 index 000000000..337d87f0b --- /dev/null +++ b/cmd/k8s-operator/deploy/examples/proxygroup.yaml @@ -0,0 +1,7 @@ +apiVersion: tailscale.com/v1alpha1 +kind: ProxyGroup +metadata: + name: egress-proxies +spec: + type: egress + replicas: 3 diff --git a/cmd/k8s-operator/deploy/manifests/operator.yaml b/cmd/k8s-operator/deploy/manifests/operator.yaml index 25f3b4d1c..e6358708b 100644 --- a/cmd/k8s-operator/deploy/manifests/operator.yaml +++ b/cmd/k8s-operator/deploy/manifests/operator.yaml @@ -2475,13 +2475,14 @@ spec: description: |- ProxyClass is the name of the ProxyClass custom resource that contains configuration options that should be applied to the resources created - for this ProxyGroup. If unset, and no default ProxyClass is set, the - operator will create resources with the default configuration. + for this ProxyGroup. If unset, the operator will create resources with + the default configuration. type: string replicas: description: |- Replicas specifies how many replicas to create the StatefulSet with. Defaults to 2. + format: int32 type: integer tags: description: |- diff --git a/cmd/k8s-operator/egress-services_test.go b/cmd/k8s-operator/egress-services_test.go index 13fa31784..1adde4e90 100644 --- a/cmd/k8s-operator/egress-services_test.go +++ b/cmd/k8s-operator/egress-services_test.go @@ -34,7 +34,7 @@ func TestTailscaleEgressServices(t *testing.T) { UID: types.UID("1234-UID"), }, Spec: tsapi.ProxyGroupSpec{ - Replicas: pointer.To(3), + Replicas: pointer.To[int32](3), Type: tsapi.ProxyGroupTypeEgress, }, } diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index 5255d4f29..f744c9f5e 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -356,12 +356,12 @@ func runReconcilers(opts reconcilerOpts) { } egressSvcFilter := handler.EnqueueRequestsFromMapFunc(egressSvcsHandler) - proxyGroupFilter := handler.EnqueueRequestsFromMapFunc(egressSvcsFromEgressProxyGroup(mgr.GetClient(), opts.log)) + egressProxyGroupFilter := handler.EnqueueRequestsFromMapFunc(egressSvcsFromEgressProxyGroup(mgr.GetClient(), opts.log)) err = builder. ControllerManagedBy(mgr). Named("egress-svcs-reconciler"). Watches(&corev1.Service{}, egressSvcFilter). - Watches(&tsapi.ProxyGroup{}, proxyGroupFilter). + Watches(&tsapi.ProxyGroup{}, egressProxyGroupFilter). Complete(&egressSvcsReconciler{ Client: mgr.GetClient(), tsNamespace: opts.tailscaleNamespace, @@ -457,6 +457,33 @@ func runReconcilers(opts reconcilerOpts) { startlog.Fatalf("could not create Recorder reconciler: %v", err) } + // Recorder reconciler. + ownedByProxyGroupFilter := handler.EnqueueRequestForOwner(mgr.GetScheme(), mgr.GetRESTMapper(), &tsapi.ProxyGroup{}) + proxyClassFilterForProxyGroup := handler.EnqueueRequestsFromMapFunc(proxyClassHandlerForProxyGroup(mgr.GetClient(), startlog)) + err = builder.ControllerManagedBy(mgr). + For(&tsapi.ProxyGroup{}). + Watches(&appsv1.StatefulSet{}, ownedByProxyGroupFilter). + Watches(&corev1.ServiceAccount{}, ownedByProxyGroupFilter). + Watches(&corev1.Secret{}, ownedByProxyGroupFilter). + Watches(&rbacv1.Role{}, ownedByProxyGroupFilter). + Watches(&rbacv1.RoleBinding{}, ownedByProxyGroupFilter). + Watches(&tsapi.ProxyClass{}, proxyClassFilterForProxyGroup). + Complete(&ProxyGroupReconciler{ + recorder: eventRecorder, + Client: mgr.GetClient(), + l: opts.log.Named("proxygroup-reconciler"), + clock: tstime.DefaultClock{}, + tsClient: opts.tsClient, + + tsNamespace: opts.tailscaleNamespace, + proxyImage: opts.proxyImage, + defaultTags: strings.Split(opts.proxyTags, ","), + tsFirewallMode: opts.proxyFirewallMode, + }) + if err != nil { + startlog.Fatalf("could not create ProxyGroup reconciler: %v", err) + } + startlog.Infof("Startup complete, operator running, version: %s", version.Long()) if err := mgr.Start(signals.SetupSignalHandler()); err != nil { startlog.Fatalf("could not start manager: %v", err) @@ -689,6 +716,27 @@ func proxyClassHandlerForConnector(cl client.Client, logger *zap.SugaredLogger) } } +// proxyClassHandlerForConnector returns a handler that, for a given ProxyClass, +// returns a list of reconcile requests for all Connectors that have +// .spec.proxyClass set. +func proxyClassHandlerForProxyGroup(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc { + return func(ctx context.Context, o client.Object) []reconcile.Request { + pgList := new(tsapi.ProxyGroupList) + if err := cl.List(ctx, pgList); err != nil { + logger.Debugf("error listing ProxyGroups for ProxyClass: %v", err) + return nil + } + reqs := make([]reconcile.Request, 0) + proxyClassName := o.GetName() + for _, pg := range pgList.Items { + if pg.Spec.ProxyClass == proxyClassName { + reqs = append(reqs, reconcile.Request{NamespacedName: client.ObjectKeyFromObject(&pg)}) + } + } + return reqs + } +} + // serviceHandlerForIngress returns a handler for Service events for ingress // reconciler that ensures that if the Service associated with an event is of // interest to the reconciler, the associated Ingress(es) gets be reconciled. diff --git a/cmd/k8s-operator/proxygroup.go b/cmd/k8s-operator/proxygroup.go new file mode 100644 index 000000000..f19339059 --- /dev/null +++ b/cmd/k8s-operator/proxygroup.go @@ -0,0 +1,507 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "context" + "crypto/sha256" + "encoding/json" + "fmt" + "net/http" + "slices" + "sync" + + "github.com/pkg/errors" + "go.uber.org/zap" + xslices "golang.org/x/exp/slices" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "tailscale.com/client/tailscale" + "tailscale.com/ipn" + tsoperator "tailscale.com/k8s-operator" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/kube/kubetypes" + "tailscale.com/tailcfg" + "tailscale.com/tstime" + "tailscale.com/types/ptr" + "tailscale.com/util/clientmetric" + "tailscale.com/util/mak" + "tailscale.com/util/set" +) + +const ( + reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed" + reasonProxyGroupReady = "ProxyGroupReady" + reasonProxyGroupCreating = "ProxyGroupCreating" + reasonProxyGroupInvalid = "ProxyGroupInvalid" +) + +var gaugeProxyGroupResources = clientmetric.NewGauge(kubetypes.MetricProxyGroupCount) + +// ProxyGroupReconciler ensures cluster resources for a ProxyGroup definition. +type ProxyGroupReconciler struct { + client.Client + l *zap.SugaredLogger + recorder record.EventRecorder + clock tstime.Clock + tsClient tsClient + + // User-specified defaults from the helm installation. + tsNamespace string + proxyImage string + defaultTags []string + tsFirewallMode string + + mu sync.Mutex // protects following + proxyGroups set.Slice[types.UID] // for proxygroups gauge +} + +func (r *ProxyGroupReconciler) logger(name string) *zap.SugaredLogger { + return r.l.With("ProxyGroup", name) +} + +func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) { + logger := r.logger(req.Name) + logger.Debugf("starting reconcile") + defer logger.Debugf("reconcile finished") + + pg := new(tsapi.ProxyGroup) + err = r.Get(ctx, req.NamespacedName, pg) + if apierrors.IsNotFound(err) { + logger.Debugf("ProxyGroup not found, assuming it was deleted") + return reconcile.Result{}, nil + } else if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com ProxyGroup: %w", err) + } + if markedForDeletion(pg) { + logger.Debugf("ProxyGroup is being deleted, cleaning up resources") + ix := xslices.Index(pg.Finalizers, FinalizerName) + if ix < 0 { + logger.Debugf("no finalizer, nothing to do") + return reconcile.Result{}, nil + } + + if done, err := r.maybeCleanup(ctx, pg); err != nil { + return reconcile.Result{}, err + } else if !done { + logger.Debugf("ProxyGroup resource cleanup not yet finished, will retry...") + return reconcile.Result{RequeueAfter: shortRequeue}, nil + } + + pg.Finalizers = slices.Delete(pg.Finalizers, ix, ix+1) + if err := r.Update(ctx, pg); err != nil { + return reconcile.Result{}, err + } + return reconcile.Result{}, nil + } + + oldPGStatus := pg.Status.DeepCopy() + setStatusReady := func(pg *tsapi.ProxyGroup, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) { + tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, status, reason, message, pg.Generation, r.clock, logger) + if !apiequality.Semantic.DeepEqual(oldPGStatus, pg.Status) { + // An error encountered here should get returned by the Reconcile function. + if updateErr := r.Client.Status().Update(ctx, pg); updateErr != nil { + err = errors.Wrap(err, updateErr.Error()) + } + } + return reconcile.Result{}, err + } + + if !slices.Contains(pg.Finalizers, FinalizerName) { + // This log line is printed exactly once during initial provisioning, + // because once the finalizer is in place this block gets skipped. So, + // this is a nice place to log that the high level, multi-reconcile + // operation is underway. + logger.Infof("ensuring ProxyGroup is set up") + pg.Finalizers = append(pg.Finalizers, FinalizerName) + if err := r.Update(ctx, pg); err != nil { + logger.Errorf("error adding finalizer: %w", err) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreationFailed, reasonProxyGroupCreationFailed) + } + } + + if err := r.validate(pg); err != nil { + logger.Errorf("error validating ProxyGroup spec: %w", err) + message := fmt.Sprintf("ProxyGroup is invalid: %s", err) + r.recorder.Eventf(pg, corev1.EventTypeWarning, reasonProxyGroupInvalid, message) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupInvalid, message) + } + + if err = r.maybeProvision(ctx, pg); err != nil { + logger.Errorf("error provisioning ProxyGroup resources: %w", err) + message := fmt.Sprintf("failed provisioning ProxyGroup: %s", err) + r.recorder.Eventf(pg, corev1.EventTypeWarning, reasonProxyGroupCreationFailed, message) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreationFailed, message) + } + + desiredReplicas := int(pgReplicas(pg)) + if len(pg.Status.Devices) < desiredReplicas { + message := fmt.Sprintf("%d/%d ProxyGroup pods running", len(pg.Status.Devices), desiredReplicas) + logger.Debug(message) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message) + } + + if len(pg.Status.Devices) > desiredReplicas { + message := fmt.Sprintf("waiting for %d ProxyGroup pods to shut down", len(pg.Status.Devices)-desiredReplicas) + logger.Debug(message) + return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message) + } + + logger.Info("ProxyGroup resources synced") + return setStatusReady(pg, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady) +} + +func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.ProxyGroup) error { + logger := r.logger(pg.Name) + r.mu.Lock() + r.proxyGroups.Add(pg.UID) + gaugeProxyGroupResources.Set(int64(r.proxyGroups.Len())) + r.mu.Unlock() + + var proxyClass *tsapi.ProxyClass + if pg.Spec.ProxyClass != "" { + proxyClass = new(tsapi.ProxyClass) + if err := r.Get(ctx, types.NamespacedName{Name: pg.Spec.ProxyClass}, proxyClass); err != nil { + return fmt.Errorf("failed to get ProxyClass: %w", err) + } + if !tsoperator.ProxyClassIsReady(proxyClass) { + logger.Infof("ProxyClass %s specified for the ProxyGroup, but it is not (yet) in a ready state, waiting...", pg.Spec.ProxyClass) + return nil + } + } + + cfgHash, err := r.ensureConfigSecretsCreated(ctx, pg, proxyClass) + if err != nil { + return fmt.Errorf("error provisioning config Secrets: %w", err) + } + // State secrets are precreated so we can use the ProxyGroup CR as their owner ref. + stateSecrets := pgStateSecrets(pg, r.tsNamespace) + for _, sec := range stateSecrets { + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sec, func(s *corev1.Secret) { + s.ObjectMeta.Labels = sec.ObjectMeta.Labels + s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations + s.ObjectMeta.OwnerReferences = sec.ObjectMeta.OwnerReferences + }); err != nil { + return fmt.Errorf("error provisioning state Secrets: %w", err) + } + } + sa := pgServiceAccount(pg, r.tsNamespace) + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sa, func(s *corev1.ServiceAccount) { + s.ObjectMeta.Labels = sa.ObjectMeta.Labels + s.ObjectMeta.Annotations = sa.ObjectMeta.Annotations + s.ObjectMeta.OwnerReferences = sa.ObjectMeta.OwnerReferences + }); err != nil { + return fmt.Errorf("error provisioning ServiceAccount: %w", err) + } + role := pgRole(pg, r.tsNamespace) + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) { + r.ObjectMeta.Labels = role.ObjectMeta.Labels + r.ObjectMeta.Annotations = role.ObjectMeta.Annotations + r.ObjectMeta.OwnerReferences = role.ObjectMeta.OwnerReferences + r.Rules = role.Rules + }); err != nil { + return fmt.Errorf("error provisioning Role: %w", err) + } + roleBinding := pgRoleBinding(pg, r.tsNamespace) + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) { + r.ObjectMeta.Labels = roleBinding.ObjectMeta.Labels + r.ObjectMeta.Annotations = roleBinding.ObjectMeta.Annotations + r.ObjectMeta.OwnerReferences = roleBinding.ObjectMeta.OwnerReferences + r.RoleRef = roleBinding.RoleRef + r.Subjects = roleBinding.Subjects + }); err != nil { + return fmt.Errorf("error provisioning RoleBinding: %w", err) + } + ss := pgStatefulSet(pg, r.tsNamespace, r.proxyImage, r.tsFirewallMode, cfgHash) + ss = applyProxyClassToStatefulSet(proxyClass, ss, nil, logger) + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) { + s.ObjectMeta.Labels = ss.ObjectMeta.Labels + s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations + s.ObjectMeta.OwnerReferences = ss.ObjectMeta.OwnerReferences + s.Spec = ss.Spec + }); err != nil { + return fmt.Errorf("error provisioning StatefulSet: %w", err) + } + + if err := r.cleanupDanglingResources(ctx, pg); err != nil { + return fmt.Errorf("error cleaning up dangling resources: %w", err) + } + + devices, err := r.getDeviceInfo(ctx, pg) + if err != nil { + return fmt.Errorf("failed to get device info: %w", err) + } + + pg.Status.Devices = devices + + return nil +} + +// cleanupDanglingResources ensures we don't leak config secrets, state secrets, and +// tailnet devices when the number of replicas specified is reduced. +func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg *tsapi.ProxyGroup) error { + logger := r.logger(pg.Name) + metadata, err := r.getNodeMetadata(ctx, pg) + if err != nil { + return err + } + + for _, m := range metadata { + if m.ordinal+1 <= int(pgReplicas(pg)) { + continue + } + + // Dangling resource, delete the config + state Secrets, as well as + // deleting the device from the tailnet. + if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil { + return err + } + if err := r.Delete(ctx, m.stateSecret); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("error deleting state Secret %s: %w", m.stateSecret.Name, err) + } + } + configSecret := m.stateSecret.DeepCopy() + configSecret.Name += "-config" + if err := r.Delete(ctx, configSecret); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("error deleting config Secret %s: %w", configSecret.Name, err) + } + } + } + + return nil +} + +// maybeCleanup just deletes the device from the tailnet. All the kubernetes +// resources linked to a ProxyGroup will get cleaned up via owner references +// (which we can use because they are all in the same namespace). +func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.ProxyGroup) (bool, error) { + logger := r.logger(pg.Name) + + metadata, err := r.getNodeMetadata(ctx, pg) + if err != nil { + return false, err + } + + for _, m := range metadata { + if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil { + return false, err + } + } + + logger.Infof("cleaned up ProxyGroup resources") + r.mu.Lock() + r.proxyGroups.Remove(pg.UID) + gaugeProxyGroupResources.Set(int64(r.proxyGroups.Len())) + r.mu.Unlock() + return true, nil +} + +func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error { + logger.Debugf("deleting device %s from control", string(id)) + if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil { + errResp := &tailscale.ErrResponse{} + if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound { + logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id)) + } else { + return fmt.Errorf("error deleting device: %w", err) + } + } else { + logger.Debugf("device %s deleted from control", string(id)) + } + + return nil +} + +func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (hash string, err error) { + logger := r.logger(pg.Name) + var allConfigs []tailscaledConfigs + for i := range pgReplicas(pg) { + cfgSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%d-config", pg.Name, i), + Namespace: r.tsNamespace, + Labels: pgSecretLabels(pg.Name, "config"), + OwnerReferences: pgOwnerReference(pg), + }, + } + + var existingCfgSecret *corev1.Secret // unmodified copy of secret + if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil { + logger.Debugf("secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName()) + existingCfgSecret = cfgSecret.DeepCopy() + } else if !apierrors.IsNotFound(err) { + return "", err + } + + var authKey string + if existingCfgSecret == nil { + logger.Debugf("creating authkey for new ProxyGroup proxy") + tags := pg.Spec.Tags.Stringify() + if len(tags) == 0 { + tags = r.defaultTags + } + authKey, err = newAuthKey(ctx, r.tsClient, tags) + if err != nil { + return "", err + } + } + + configs, err := pgTailscaledConfig(pg, proxyClass, i, authKey, existingCfgSecret) + if err != nil { + return "", fmt.Errorf("error creating tailscaled config: %w", err) + } + allConfigs = append(allConfigs, configs) + + for cap, cfg := range configs { + cfgJSON, err := json.Marshal(cfg) + if err != nil { + return "", fmt.Errorf("error marshalling tailscaled config: %w", err) + } + mak.Set(&cfgSecret.StringData, tsoperator.TailscaledConfigFileName(cap), string(cfgJSON)) + } + + if existingCfgSecret != nil { + logger.Debugf("patching the existing ProxyGroup config Secret %s", cfgSecret.Name) + if err := r.Patch(ctx, cfgSecret, client.MergeFrom(existingCfgSecret)); err != nil { + return "", err + } + } else { + logger.Debugf("creating a new config Secret %s for the ProxyGroup", cfgSecret.Name) + if err := r.Create(ctx, cfgSecret); err != nil { + return "", err + } + } + } + + sum := sha256.New() + b, err := json.Marshal(allConfigs) + if err != nil { + return "", err + } + if _, err := sum.Write(b); err != nil { + return "", err + } + + return fmt.Sprintf("%x", sum.Sum(nil)), nil +} + +func pgTailscaledConfig(pg *tsapi.ProxyGroup, class *tsapi.ProxyClass, idx int32, authKey string, oldSecret *corev1.Secret) (tailscaledConfigs, error) { + conf := &ipn.ConfigVAlpha{ + Version: "alpha0", + AcceptDNS: "false", + AcceptRoutes: "false", // AcceptRoutes defaults to true + Locked: "false", + Hostname: ptr.To(fmt.Sprintf("%s-%d", pg.Name, idx)), + } + + if pg.Spec.HostnamePrefix != "" { + conf.Hostname = ptr.To(fmt.Sprintf("%s%d", pg.Spec.HostnamePrefix, idx)) + } + + if shouldAcceptRoutes(class) { + conf.AcceptRoutes = "true" + } + + deviceAuthed := false + for _, d := range pg.Status.Devices { + if d.Hostname == *conf.Hostname { + deviceAuthed = true + break + } + } + + if authKey != "" { + conf.AuthKey = &authKey + } else if !deviceAuthed { + key, err := authKeyFromSecret(oldSecret) + if err != nil { + return nil, fmt.Errorf("error retrieving auth key from Secret: %w", err) + } + conf.AuthKey = key + } + capVerConfigs := make(map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) + capVerConfigs[106] = *conf + return capVerConfigs, nil +} + +func (r *ProxyGroupReconciler) validate(_ *tsapi.ProxyGroup) error { + return nil +} + +// getNodeMetadata gets metadata for all the pods owned by this ProxyGroup by +// querying their state Secrets. It may not return the same number of items as +// specified in the ProxyGroup spec if e.g. it is getting scaled up or down, or +// some pods have failed to write state. +func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.ProxyGroup) (metadata []nodeMetadata, _ error) { + // List all state secrets owned by this ProxyGroup. + secrets := &corev1.SecretList{} + if err := r.List(ctx, secrets, client.InNamespace(r.tsNamespace), client.MatchingLabels(pgSecretLabels(pg.Name, "state"))); err != nil { + return nil, fmt.Errorf("failed to list state Secrets: %w", err) + } + for _, secret := range secrets.Items { + var ordinal int + if _, err := fmt.Sscanf(secret.Name, pg.Name+"-%d", &ordinal); err != nil { + return nil, fmt.Errorf("unexpected secret %s was labelled as owned by the ProxyGroup %s: %w", secret.Name, pg.Name, err) + } + + id, dnsName, ok, err := getNodeMetadata(ctx, &secret) + if err != nil { + return nil, err + } + if !ok { + continue + } + + metadata = append(metadata, nodeMetadata{ + ordinal: ordinal, + stateSecret: &secret, + tsID: id, + dnsName: dnsName, + }) + } + + return metadata, nil +} + +func (r *ProxyGroupReconciler) getDeviceInfo(ctx context.Context, pg *tsapi.ProxyGroup) (devices []tsapi.TailnetDevice, _ error) { + metadata, err := r.getNodeMetadata(ctx, pg) + if err != nil { + return nil, err + } + + for _, m := range metadata { + device, ok, err := getDeviceInfo(ctx, r.tsClient, m.stateSecret) + if err != nil { + return nil, err + } + if !ok { + continue + } + devices = append(devices, tsapi.TailnetDevice{ + Hostname: device.Hostname, + TailnetIPs: device.TailnetIPs, + }) + } + + return devices, nil +} + +type nodeMetadata struct { + ordinal int + stateSecret *corev1.Secret + tsID tailcfg.StableNodeID + dnsName string +} diff --git a/cmd/k8s-operator/proxygroup_specs.go b/cmd/k8s-operator/proxygroup_specs.go new file mode 100644 index 000000000..bf2adcbf5 --- /dev/null +++ b/cmd/k8s-operator/proxygroup_specs.go @@ -0,0 +1,262 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "fmt" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/types/ptr" +) + +// Returns the base StatefulSet definition for a ProxyGroup. A ProxyClass may be +// applied over the top after. +func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode, cfgHash string) *appsv1.StatefulSet { + return &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: pgLabels(pg.Name, nil), + OwnerReferences: pgOwnerReference(pg), + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(pgReplicas(pg)), + Selector: &metav1.LabelSelector{ + MatchLabels: pgLabels(pg.Name, nil), + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: pgLabels(pg.Name, nil), + DeletionGracePeriodSeconds: ptr.To[int64](10), + Annotations: map[string]string{ + podAnnotationLastSetConfigFileHash: cfgHash, + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: pg.Name, + InitContainers: []corev1.Container{ + { + Name: "sysctler", + Image: image, + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + }, + Command: []string{ + "/bin/sh", + "-c", + }, + Args: []string{ + "sysctl -w net.ipv4.ip_forward=1 && if sysctl net.ipv6.conf.all.forwarding; then sysctl -w net.ipv6.conf.all.forwarding=1; fi", + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "tailscale", + Image: image, + SecurityContext: &corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + Add: []corev1.Capability{ + "NET_ADMIN", + }, + }, + }, + VolumeMounts: func() []corev1.VolumeMount { + var mounts []corev1.VolumeMount + for i := range pgReplicas(pg) { + mounts = append(mounts, corev1.VolumeMount{ + Name: fmt.Sprintf("tailscaledconfig-%d", i), + ReadOnly: true, + MountPath: fmt.Sprintf("/etc/tsconfig/%s-%d", pg.Name, i), + }) + } + + return mounts + }(), + Env: func() []corev1.EnvVar { + envs := []corev1.EnvVar{ + { + Name: "POD_IP", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "status.podIP", + }, + }, + }, + { + Name: "POD_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + // Secret is named after the pod. + FieldPath: "metadata.name", + }, + }, + }, + { + Name: "TS_KUBE_SECRET", + Value: "$(POD_NAME)", + }, + { + Name: "TS_STATE", + Value: "kube:$(POD_NAME)", + }, + { + Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", + Value: "/etc/tsconfig/$(POD_NAME)", + }, + { + Name: "TS_USERSPACE", + Value: "false", + }, + } + + if tsFirewallMode != "" { + envs = append(envs, corev1.EnvVar{ + Name: "TS_DEBUG_FIREWALL_MODE", + Value: tsFirewallMode, + }) + } + + return envs + }(), + }, + }, + Volumes: func() []corev1.Volume { + var volumes []corev1.Volume + for i := range pgReplicas(pg) { + volumes = append(volumes, corev1.Volume{ + Name: fmt.Sprintf("tailscaledconfig-%d", i), + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: fmt.Sprintf("%s-%d-config", pg.Name, i), + }, + }, + }) + } + + return volumes + }(), + }, + }, + }, + } +} + +func pgServiceAccount(pg *tsapi.ProxyGroup, namespace string) *corev1.ServiceAccount { + return &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: pgLabels(pg.Name, nil), + OwnerReferences: pgOwnerReference(pg), + }, + } +} + +func pgRole(pg *tsapi.ProxyGroup, namespace string) *rbacv1.Role { + return &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: pgLabels(pg.Name, nil), + OwnerReferences: pgOwnerReference(pg), + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"secrets"}, + Verbs: []string{ + "get", + "patch", + "update", + }, + ResourceNames: func() (secrets []string) { + for i := range pgReplicas(pg) { + secrets = append(secrets, + fmt.Sprintf("%s-%d-config", pg.Name, i), // Config with auth key. + fmt.Sprintf("%s-%d", pg.Name, i), // State. + ) + } + return secrets + }(), + }, + }, + } +} + +func pgRoleBinding(pg *tsapi.ProxyGroup, namespace string) *rbacv1.RoleBinding { + return &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: pg.Name, + Namespace: namespace, + Labels: pgLabels(pg.Name, nil), + OwnerReferences: pgOwnerReference(pg), + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: pg.Name, + Namespace: namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "Role", + Name: pg.Name, + }, + } +} + +func pgStateSecrets(pg *tsapi.ProxyGroup, namespace string) (secrets []*corev1.Secret) { + for i := range pgReplicas(pg) { + secrets = append(secrets, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%d", pg.Name, i), + Namespace: namespace, + Labels: pgSecretLabels(pg.Name, "state"), + OwnerReferences: pgOwnerReference(pg), + }, + }) + } + + return secrets +} + +func pgSecretLabels(pgName, typ string) map[string]string { + return pgLabels(pgName, map[string]string{ + labelSecretType: typ, // "config" or "state". + }) +} + +func pgLabels(pgName string, customLabels map[string]string) map[string]string { + l := make(map[string]string, len(customLabels)+3) + for k, v := range customLabels { + l[k] = v + } + + l[LabelManaged] = "true" + l[LabelParentType] = "proxygroup" + l[LabelParentName] = pgName + + return l +} + +func pgOwnerReference(owner *tsapi.ProxyGroup) []metav1.OwnerReference { + return []metav1.OwnerReference{*metav1.NewControllerRef(owner, tsapi.SchemeGroupVersion.WithKind("ProxyGroup"))} +} + +func pgReplicas(pg *tsapi.ProxyGroup) int32 { + if pg.Spec.Replicas != nil { + return *pg.Spec.Replicas + } + + return 2 +} diff --git a/cmd/k8s-operator/proxygroup_test.go b/cmd/k8s-operator/proxygroup_test.go new file mode 100644 index 000000000..402d67949 --- /dev/null +++ b/cmd/k8s-operator/proxygroup_test.go @@ -0,0 +1,226 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build !plan9 + +package main + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + "github.com/google/go-cmp/cmp" + "go.uber.org/zap" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "tailscale.com/client/tailscale" + tsoperator "tailscale.com/k8s-operator" + tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/tstest" + "tailscale.com/types/ptr" +) + +const testProxyImage = "tailscale/tailscale:test" + +func TestProxyGroup(t *testing.T) { + pg := &tsapi.ProxyGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Finalizers: []string{"tailscale.com/finalizer"}, + }, + } + + fc := fake.NewClientBuilder(). + WithScheme(tsapi.GlobalScheme). + WithObjects(pg). + WithStatusSubresource(pg). + Build() + tsClient := &fakeTSClient{} + zl, _ := zap.NewDevelopment() + fr := record.NewFakeRecorder(1) + cl := tstest.NewClock(tstest.ClockOpts{}) + reconciler := &ProxyGroupReconciler{ + tsNamespace: tsNamespace, + proxyImage: testProxyImage, + defaultTags: []string{"tag:test-tag"}, + tsFirewallMode: "auto", + Client: fc, + tsClient: tsClient, + recorder: fr, + l: zl.Sugar(), + clock: cl, + } + + t.Run("observe_ProxyGroupCreating_status_reason", func(t *testing.T) { + expectReconciled(t, reconciler, "", pg.Name) + + tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "0/2 ProxyGroup pods running", 0, cl, zl.Sugar()) + expectEqual(t, fc, pg, nil) + if expected := 1; reconciler.proxyGroups.Len() != expected { + t.Fatalf("expected %d recorders, got %d", expected, reconciler.proxyGroups.Len()) + } + expectProxyGroupResources(t, fc, pg, true) + keyReq := tailscale.KeyCapabilities{ + Devices: tailscale.KeyDeviceCapabilities{ + Create: tailscale.KeyDeviceCreateCapabilities{ + Reusable: false, + Ephemeral: false, + Preauthorized: true, + Tags: []string{"tag:test-tag"}, + }, + }, + } + if diff := cmp.Diff(tsClient.KeyRequests(), []tailscale.KeyCapabilities{keyReq, keyReq}); diff != "" { + t.Fatalf("unexpected secrets (-got +want):\n%s", diff) + } + }) + + t.Run("simulate_successful_device_auth", func(t *testing.T) { + addNodeIDToStateSecrets(t, fc, pg) + expectReconciled(t, reconciler, "", pg.Name) + + pg.Status.Devices = []tsapi.TailnetDevice{ + { + Hostname: "hostname-nodeid-0", + TailnetIPs: []string{"1.2.3.4", "::1"}, + }, + { + Hostname: "hostname-nodeid-1", + TailnetIPs: []string{"1.2.3.4", "::1"}, + }, + } + tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady, 0, cl, zl.Sugar()) + expectEqual(t, fc, pg, nil) + expectProxyGroupResources(t, fc, pg, true) + }) + + t.Run("scale_up_to_3", func(t *testing.T) { + pg.Spec.Replicas = ptr.To[int32](3) + mustUpdate(t, fc, "", pg.Name, func(p *tsapi.ProxyGroup) { + p.Spec = pg.Spec + }) + expectReconciled(t, reconciler, "", pg.Name) + tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "2/3 ProxyGroup pods running", 0, cl, zl.Sugar()) + expectEqual(t, fc, pg, nil) + + addNodeIDToStateSecrets(t, fc, pg) + expectReconciled(t, reconciler, "", pg.Name) + tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady, 0, cl, zl.Sugar()) + pg.Status.Devices = append(pg.Status.Devices, tsapi.TailnetDevice{ + Hostname: "hostname-nodeid-2", + TailnetIPs: []string{"1.2.3.4", "::1"}, + }) + expectEqual(t, fc, pg, nil) + expectProxyGroupResources(t, fc, pg, true) + }) + + t.Run("scale_down_to_1", func(t *testing.T) { + pg.Spec.Replicas = ptr.To[int32](1) + mustUpdate(t, fc, "", pg.Name, func(p *tsapi.ProxyGroup) { + p.Spec = pg.Spec + }) + expectReconciled(t, reconciler, "", pg.Name) + pg.Status.Devices = pg.Status.Devices[:1] // truncate to only the first device. + expectEqual(t, fc, pg, nil) + + expectProxyGroupResources(t, fc, pg, true) + }) + + t.Run("delete_and_cleanup", func(t *testing.T) { + if err := fc.Delete(context.Background(), pg); err != nil { + t.Fatal(err) + } + + expectReconciled(t, reconciler, "", pg.Name) + + expectMissing[tsapi.Recorder](t, fc, "", pg.Name) + if expected := 0; reconciler.proxyGroups.Len() != expected { + t.Fatalf("expected %d ProxyGroups, got %d", expected, reconciler.proxyGroups.Len()) + } + // 2 nodes should get deleted as part of the scale down, and then finally + // the first node gets deleted with the ProxyGroup cleanup. + if diff := cmp.Diff(tsClient.deleted, []string{"nodeid-1", "nodeid-2", "nodeid-0"}); diff != "" { + t.Fatalf("unexpected deleted devices (-got +want):\n%s", diff) + } + // The fake client does not clean up objects whose owner has been + // deleted, so we can't test for the owned resources getting deleted. + }) +} + +func expectProxyGroupResources(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyGroup, shouldExist bool) { + t.Helper() + + role := pgRole(pg, tsNamespace) + roleBinding := pgRoleBinding(pg, tsNamespace) + serviceAccount := pgServiceAccount(pg, tsNamespace) + statefulSet := pgStatefulSet(pg, tsNamespace, testProxyImage, "auto", "") + + if shouldExist { + expectEqual(t, fc, role, nil) + expectEqual(t, fc, roleBinding, nil) + expectEqual(t, fc, serviceAccount, nil) + expectEqual(t, fc, statefulSet, func(ss *appsv1.StatefulSet) { + ss.Spec.Template.Annotations[podAnnotationLastSetConfigFileHash] = "" + }) + } else { + expectMissing[rbacv1.Role](t, fc, role.Namespace, role.Name) + expectMissing[rbacv1.RoleBinding](t, fc, roleBinding.Namespace, roleBinding.Name) + expectMissing[corev1.ServiceAccount](t, fc, serviceAccount.Namespace, serviceAccount.Name) + expectMissing[appsv1.StatefulSet](t, fc, statefulSet.Namespace, statefulSet.Name) + } + + var expectedSecrets []string + for i := range pgReplicas(pg) { + expectedSecrets = append(expectedSecrets, + fmt.Sprintf("%s-%d", pg.Name, i), + fmt.Sprintf("%s-%d-config", pg.Name, i), + ) + } + expectSecrets(t, fc, expectedSecrets) +} + +func expectSecrets(t *testing.T, fc client.WithWatch, expected []string) { + t.Helper() + + secrets := &corev1.SecretList{} + if err := fc.List(context.Background(), secrets); err != nil { + t.Fatal(err) + } + + var actual []string + for _, secret := range secrets.Items { + actual = append(actual, secret.Name) + } + + if diff := cmp.Diff(actual, expected); diff != "" { + t.Fatalf("unexpected secrets (-got +want):\n%s", diff) + } +} + +func addNodeIDToStateSecrets(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyGroup) { + const key = "profile-abc" + for i := range pgReplicas(pg) { + bytes, err := json.Marshal(map[string]any{ + "Config": map[string]any{ + "NodeID": fmt.Sprintf("nodeid-%d", i), + }, + }) + if err != nil { + t.Fatal(err) + } + + mustUpdate(t, fc, tsNamespace, fmt.Sprintf("test-%d", i), func(s *corev1.Secret) { + s.Data = map[string][]byte{ + currentProfileKey: []byte(key), + key: bytes, + } + }) + } +} diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index cc6bdb8fe..19c98100f 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -47,6 +47,7 @@ const ( LabelParentType = "tailscale.com/parent-resource-type" LabelParentName = "tailscale.com/parent-resource" LabelParentNamespace = "tailscale.com/parent-resource-ns" + labelSecretType = "tailscale.com/secret-type" // "config" or "state". // LabelProxyClass can be set by users on Connectors, tailscale // Ingresses and Services that define cluster ingress or cluster egress, @@ -304,7 +305,7 @@ func (a *tailscaleSTSReconciler) reconcileHeadlessService(ctx context.Context, l return createOrUpdate(ctx, a.Client, a.operatorNamespace, hsvc, func(svc *corev1.Service) { svc.Spec = hsvc.Spec }) } -func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) (secretName, hash string, configs tailscaleConfigs, _ error) { +func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) (secretName, hash string, configs tailscaledConfigs, _ error) { secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ // Hardcode a -0 suffix so that in future, if we support @@ -362,7 +363,7 @@ func (a *tailscaleSTSReconciler) createOrGetSecret(ctx context.Context, logger * latest := tailcfg.CapabilityVersion(-1) var latestConfig ipn.ConfigVAlpha for key, val := range configs { - fn := tsoperator.TailscaledConfigFileNameForCap(key) + fn := tsoperator.TailscaledConfigFileName(key) b, err := json.Marshal(val) if err != nil { return "", "", nil, fmt.Errorf("error marshalling tailscaled config: %w", err) @@ -672,7 +673,7 @@ func applyProxyClassToStatefulSet(pc *tsapi.ProxyClass, ss *appsv1.StatefulSet, if pc == nil || ss == nil { return ss } - if pc.Spec.Metrics != nil && pc.Spec.Metrics.Enable { + if stsCfg != nil && pc.Spec.Metrics != nil && pc.Spec.Metrics.Enable { if stsCfg.TailnetTargetFQDN == "" && stsCfg.TailnetTargetIP == "" && !stsCfg.ForwardClusterTrafficViaL7IngressProxy { enableMetrics(ss, pc) } else if stsCfg.ForwardClusterTrafficViaL7IngressProxy { @@ -794,7 +795,7 @@ func readAuthKey(secret *corev1.Secret, key string) (*string, error) { // TODO (irbekrm): remove the legacy config once we no longer need to support // versions older than cap94, // https://tailscale.com/kb/1236/kubernetes-operator#operator-and-proxies -func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *corev1.Secret) (tailscaleConfigs, error) { +func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *corev1.Secret) (tailscaledConfigs, error) { conf := &ipn.ConfigVAlpha{ Version: "alpha0", AcceptDNS: "false", @@ -884,7 +885,7 @@ type ptrObject[T any] interface { *T } -type tailscaleConfigs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha +type tailscaledConfigs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha // hashBytes produces a hash for the provided tailscaled config that is the same across // different invocations of this code. We do not use the @@ -895,7 +896,7 @@ type tailscaleConfigs map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha // thing that changed is operator version (the hash is also exposed to users via // an annotation and might be confusing if it changes without the config having // changed). -func tailscaledConfigHash(c tailscaleConfigs) (string, error) { +func tailscaledConfigHash(c tailscaledConfigs) (string, error) { b, err := json.Marshal(c) if err != nil { return "", fmt.Errorf("error marshalling tailscaled configs: %w", err) diff --git a/cmd/k8s-operator/testutils_test.go b/cmd/k8s-operator/testutils_test.go index 457248d57..6b6297cbd 100644 --- a/cmd/k8s-operator/testutils_test.go +++ b/cmd/k8s-operator/testutils_test.go @@ -604,7 +604,7 @@ func (c *fakeTSClient) CreateKey(ctx context.Context, caps tailscale.KeyCapabili func (c *fakeTSClient) Device(ctx context.Context, deviceID string, fields *tailscale.DeviceFieldsOpts) (*tailscale.Device, error) { return &tailscale.Device{ DeviceID: deviceID, - Hostname: "test-device", + Hostname: "hostname-" + deviceID, Addresses: []string{ "1.2.3.4", "::1", diff --git a/cmd/k8s-operator/tsrecorder.go b/cmd/k8s-operator/tsrecorder.go index dfbf96b0b..cfe38c50a 100644 --- a/cmd/k8s-operator/tsrecorder.go +++ b/cmd/k8s-operator/tsrecorder.go @@ -302,9 +302,7 @@ func (r *RecorderReconciler) validate(tsr *tsapi.Recorder) error { return nil } -// getNodeMetadata returns 'ok == true' iff the node ID is found. The dnsName -// is expected to always be non-empty if the node ID is, but not required. -func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) { +func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string) (*corev1.Secret, error) { secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Namespace: r.tsNamespace, @@ -313,12 +311,27 @@ func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string } if err := r.Get(ctx, client.ObjectKeyFromObject(secret), secret); err != nil { if apierrors.IsNotFound(err) { - return "", "", false, nil + return nil, nil } + return nil, fmt.Errorf("error getting state Secret: %w", err) + } + + return secret, nil +} + +func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) { + secret, err := r.getStateSecret(ctx, tsrName) + if err != nil || secret == nil { return "", "", false, err } + return getNodeMetadata(ctx, secret) +} + +// getNodeMetadata returns 'ok == true' iff the node ID is found. The dnsName +// is expected to always be non-empty if the node ID is, but not required. +func getNodeMetadata(ctx context.Context, secret *corev1.Secret) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) { // TODO(tomhjp): Should maybe use ipn to parse the following info instead. currentProfile, ok := secret.Data[currentProfileKey] if !ok { @@ -338,14 +351,23 @@ func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string } func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string) (d tsapi.RecorderTailnetDevice, ok bool, err error) { - nodeID, dnsName, ok, err := r.getNodeMetadata(ctx, tsrName) + secret, err := r.getStateSecret(ctx, tsrName) + if err != nil || secret == nil { + return tsapi.RecorderTailnetDevice{}, false, err + } + + return getDeviceInfo(ctx, r.tsClient, secret) +} + +func getDeviceInfo(ctx context.Context, tsClient tsClient, secret *corev1.Secret) (d tsapi.RecorderTailnetDevice, ok bool, err error) { + nodeID, dnsName, ok, err := getNodeMetadata(ctx, secret) if !ok || err != nil { return tsapi.RecorderTailnetDevice{}, false, err } // TODO(tomhjp): The profile info doesn't include addresses, which is why we // need the API. Should we instead update the profile to include addresses? - device, err := r.tsClient.Device(ctx, string(nodeID), nil) + device, err := tsClient.Device(ctx, string(nodeID), nil) if err != nil { return tsapi.RecorderTailnetDevice{}, false, fmt.Errorf("failed to get device info from API: %w", err) } @@ -370,6 +392,6 @@ type profile struct { } `json:"Config"` } -func markedForDeletion(tsr *tsapi.Recorder) bool { - return !tsr.DeletionTimestamp.IsZero() +func markedForDeletion(obj metav1.Object) bool { + return !obj.GetDeletionTimestamp().IsZero() } diff --git a/cmd/k8s-operator/tsrecorder_test.go b/cmd/k8s-operator/tsrecorder_test.go index a3500f191..bd73e8fb9 100644 --- a/cmd/k8s-operator/tsrecorder_test.go +++ b/cmd/k8s-operator/tsrecorder_test.go @@ -107,7 +107,7 @@ func TestRecorder(t *testing.T) { expectReconciled(t, reconciler, "", tsr.Name) tsr.Status.Devices = []tsapi.RecorderTailnetDevice{ { - Hostname: "test-device", + Hostname: "hostname-nodeid-123", TailnetIPs: []string{"1.2.3.4", "::1"}, URL: "https://test-0.example.ts.net", }, diff --git a/k8s-operator/api.md b/k8s-operator/api.md index d343e6395..82a3476ae 100644 --- a/k8s-operator/api.md +++ b/k8s-operator/api.md @@ -526,7 +526,7 @@ _Appears in:_ | `tags` _[Tags](#tags)_ | Tags that the Tailscale devices will be tagged with. Defaults to [tag:k8s].
If you specify custom tags here, make sure you also make the operator
an owner of these tags.
See https://tailscale.com/kb/1236/kubernetes-operator/#setting-up-the-kubernetes-operator.
Tags cannot be changed once a ProxyGroup device has been created.
Tag values must be in form ^tag:[a-zA-Z][a-zA-Z0-9-]*$. | | Pattern: `^tag:[a-zA-Z][a-zA-Z0-9-]*$`
Type: string
| | `replicas` _integer_ | Replicas specifies how many replicas to create the StatefulSet with.
Defaults to 2. | | | | `hostnamePrefix` _[HostnamePrefix](#hostnameprefix)_ | HostnamePrefix is the hostname prefix to use for tailnet devices created
by the ProxyGroup. Each device will have the integer number from its
StatefulSet pod appended to this prefix to form the full hostname.
HostnamePrefix can contain lower case letters, numbers and dashes, it
must not start with a dash and must be between 1 and 62 characters long. | | Pattern: `^[a-z0-9][a-z0-9-]{0,61}$`
Type: string
| -| `proxyClass` _string_ | ProxyClass is the name of the ProxyClass custom resource that contains
configuration options that should be applied to the resources created
for this ProxyGroup. If unset, and no default ProxyClass is set, the
operator will create resources with the default configuration. | | | +| `proxyClass` _string_ | ProxyClass is the name of the ProxyClass custom resource that contains
configuration options that should be applied to the resources created
for this ProxyGroup. If unset, the operator will create resources with
the default configuration. | | | #### ProxyGroupStatus diff --git a/k8s-operator/apis/v1alpha1/types_proxygroup.go b/k8s-operator/apis/v1alpha1/types_proxygroup.go index 92912a779..9b0e4215e 100644 --- a/k8s-operator/apis/v1alpha1/types_proxygroup.go +++ b/k8s-operator/apis/v1alpha1/types_proxygroup.go @@ -54,7 +54,7 @@ type ProxyGroupSpec struct { // Replicas specifies how many replicas to create the StatefulSet with. // Defaults to 2. // +optional - Replicas *int `json:"replicas,omitempty"` + Replicas *int32 `json:"replicas,omitempty"` // HostnamePrefix is the hostname prefix to use for tailnet devices created // by the ProxyGroup. Each device will have the integer number from its @@ -66,8 +66,8 @@ type ProxyGroupSpec struct { // ProxyClass is the name of the ProxyClass custom resource that contains // configuration options that should be applied to the resources created - // for this ProxyGroup. If unset, and no default ProxyClass is set, the - // operator will create resources with the default configuration. + // for this ProxyGroup. If unset, the operator will create resources with + // the default configuration. // +optional ProxyClass string `json:"proxyClass,omitempty"` } diff --git a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go index b6b94ce3f..ba4ff40e4 100644 --- a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go +++ b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go @@ -584,7 +584,7 @@ func (in *ProxyGroupSpec) DeepCopyInto(out *ProxyGroupSpec) { } if in.Replicas != nil { in, out := &in.Replicas, &out.Replicas - *out = new(int) + *out = new(int32) **out = **in } } diff --git a/k8s-operator/conditions.go b/k8s-operator/conditions.go index 2b4022c40..702ed2bd3 100644 --- a/k8s-operator/conditions.go +++ b/k8s-operator/conditions.go @@ -93,6 +93,14 @@ func SetRecorderCondition(tsr *tsapi.Recorder, conditionType tsapi.ConditionType tsr.Status.Conditions = conds } +// SetProxyGroupCondition ensures that ProxyGroup status has a condition with the +// given attributes. LastTransitionTime gets set every time condition's status +// changes. +func SetProxyGroupCondition(pg *tsapi.ProxyGroup, conditionType tsapi.ConditionType, status metav1.ConditionStatus, reason, message string, gen int64, clock tstime.Clock, logger *zap.SugaredLogger) { + conds := updateCondition(pg.Status.Conditions, conditionType, status, reason, message, gen, clock, logger) + pg.Status.Conditions = conds +} + func updateCondition(conds []metav1.Condition, conditionType tsapi.ConditionType, status metav1.ConditionStatus, reason, message string, gen int64, clock tstime.Clock, logger *zap.SugaredLogger) []metav1.Condition { newCondition := metav1.Condition{ Type: string(conditionType), diff --git a/k8s-operator/utils.go b/k8s-operator/utils.go index 497f31b60..a1f225fe6 100644 --- a/k8s-operator/utils.go +++ b/k8s-operator/utils.go @@ -29,9 +29,9 @@ type Records struct { IP4 map[string][]string `json:"ip4"` } -// TailscaledConfigFileNameForCap returns a tailscaled config file name in +// TailscaledConfigFileName returns a tailscaled config file name in // format expected by containerboot for the given CapVer. -func TailscaledConfigFileNameForCap(cap tailcfg.CapabilityVersion) string { +func TailscaledConfigFileName(cap tailcfg.CapabilityVersion) string { if cap < 95 { return "tailscaled" } diff --git a/kube/kubetypes/metrics.go b/kube/kubetypes/metrics.go index 021c1e26b..b183f1f6f 100644 --- a/kube/kubetypes/metrics.go +++ b/kube/kubetypes/metrics.go @@ -22,4 +22,5 @@ const ( MetricNameserverCount = "k8s_nameserver_resources" MetricRecorderCount = "k8s_recorder_resources" MetricEgressServiceCount = "k8s_egress_service_resources" + MetricProxyGroupCount = "k8s_proxygroup_resources" )