diff --git a/cmd/k8s-operator/deploy/crds/tailscale.com_recorders.yaml b/cmd/k8s-operator/deploy/crds/tailscale.com_recorders.yaml
index 0f3dcfcca..48db3ef4b 100644
--- a/cmd/k8s-operator/deploy/crds/tailscale.com_recorders.yaml
+++ b/cmd/k8s-operator/deploy/crds/tailscale.com_recorders.yaml
@@ -68,6 +68,11 @@ spec:
Corresponds to --ui tsrecorder flag https://tailscale.com/kb/1246/tailscale-ssh-session-recording#deploy-a-recorder-node.
Required if S3 storage is not set up, to ensure that recordings are accessible.
type: boolean
+ replicas:
+ description: Replicas specifies how many instances of tsrecorder to run. Defaults to 1.
+ type: integer
+ format: int32
+ minimum: 0
statefulSet:
description: |-
Configuration parameters for the Recorder's StatefulSet. The operator
@@ -1683,6 +1688,9 @@ spec:
items:
type: string
pattern: ^tag:[a-zA-Z][a-zA-Z0-9-]*$
+ x-kubernetes-validations:
+ - rule: '!(self.replicas > 1 && (!has(self.storage) || !has(self.storage.s3)))'
+ message: S3 storage must be used when deploying multiple Recorder replicas
status:
description: |-
RecorderStatus describes the status of the recorder. This is set
diff --git a/cmd/k8s-operator/deploy/manifests/operator.yaml b/cmd/k8s-operator/deploy/manifests/operator.yaml
index c5da367e0..2757f09e5 100644
--- a/cmd/k8s-operator/deploy/manifests/operator.yaml
+++ b/cmd/k8s-operator/deploy/manifests/operator.yaml
@@ -3348,6 +3348,11 @@ spec:
Corresponds to --ui tsrecorder flag https://tailscale.com/kb/1246/tailscale-ssh-session-recording#deploy-a-recorder-node.
Required if S3 storage is not set up, to ensure that recordings are accessible.
type: boolean
+ replicas:
+ description: Replicas specifies how many instances of tsrecorder to run. Defaults to 1.
+ format: int32
+ minimum: 0
+ type: integer
statefulSet:
description: |-
Configuration parameters for the Recorder's StatefulSet. The operator
@@ -4964,6 +4969,9 @@ spec:
type: string
type: array
type: object
+ x-kubernetes-validations:
+ - message: S3 storage must be used when deploying multiple Recorder replicas
+ rule: '!(self.replicas > 1 && (!has(self.storage) || !has(self.storage.s3)))'
status:
description: |-
RecorderStatus describes the status of the recorder. This is set
diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go
index 6b545a827..816fea566 100644
--- a/cmd/k8s-operator/operator.go
+++ b/cmd/k8s-operator/operator.go
@@ -44,10 +44,10 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
- "tailscale.com/envknob"
"tailscale.com/client/local"
"tailscale.com/client/tailscale"
+ "tailscale.com/envknob"
"tailscale.com/hostinfo"
"tailscale.com/ipn"
"tailscale.com/ipn/store/kubestore"
diff --git a/cmd/k8s-operator/tsrecorder.go b/cmd/k8s-operator/tsrecorder.go
index c922f78fe..bfb01fa86 100644
--- a/cmd/k8s-operator/tsrecorder.go
+++ b/cmd/k8s-operator/tsrecorder.go
@@ -12,6 +12,7 @@ import (
"fmt"
"net/http"
"slices"
+ "strconv"
"strings"
"sync"
@@ -29,6 +30,7 @@ import (
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
+
"tailscale.com/client/tailscale"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
@@ -69,13 +71,13 @@ func (r *RecorderReconciler) logger(name string) *zap.SugaredLogger {
return r.log.With("Recorder", name)
}
-func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
+func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
logger := r.logger(req.Name)
logger.Debugf("starting reconcile")
defer logger.Debugf("reconcile finished")
tsr := new(tsapi.Recorder)
- err = r.Get(ctx, req.NamespacedName, tsr)
+ err := r.Get(ctx, req.NamespacedName, tsr)
if apierrors.IsNotFound(err) {
logger.Debugf("Recorder not found, assuming it was deleted")
return reconcile.Result{}, nil
@@ -98,7 +100,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
}
tsr.Finalizers = slices.Delete(tsr.Finalizers, ix, ix+1)
- if err := r.Update(ctx, tsr); err != nil {
+ if err = r.Update(ctx, tsr); err != nil {
return reconcile.Result{}, err
}
return reconcile.Result{}, nil
@@ -110,10 +112,11 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
if !apiequality.Semantic.DeepEqual(oldTSRStatus, &tsr.Status) {
// An error encountered here should get returned by the Reconcile function.
if updateErr := r.Client.Status().Update(ctx, tsr); updateErr != nil {
- err = errors.Join(err, updateErr)
+ return reconcile.Result{}, errors.Join(err, updateErr)
}
}
- return reconcile.Result{}, err
+
+ return reconcile.Result{}, nil
}
if !slices.Contains(tsr.Finalizers, FinalizerName) {
@@ -123,12 +126,12 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
// operation is underway.
logger.Infof("ensuring Recorder is set up")
tsr.Finalizers = append(tsr.Finalizers, FinalizerName)
- if err := r.Update(ctx, tsr); err != nil {
+ if err = r.Update(ctx, tsr); err != nil {
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderCreationFailed, reasonRecorderCreationFailed)
}
}
- if err := r.validate(ctx, tsr); err != nil {
+ if err = r.validate(ctx, tsr); err != nil {
message := fmt.Sprintf("Recorder is invalid: %s", err)
r.recorder.Eventf(tsr, corev1.EventTypeWarning, reasonRecorderInvalid, message)
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderInvalid, message)
@@ -160,19 +163,29 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco
gaugeRecorderResources.Set(int64(r.recorders.Len()))
r.mu.Unlock()
- if err := r.ensureAuthSecretCreated(ctx, tsr); err != nil {
+ if err := r.ensureAuthSecretsCreated(ctx, tsr); err != nil {
return fmt.Errorf("error creating secrets: %w", err)
}
- // State Secret is precreated so we can use the Recorder CR as its owner ref.
- sec := tsrStateSecret(tsr, r.tsNamespace)
- if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sec, func(s *corev1.Secret) {
- s.ObjectMeta.Labels = sec.ObjectMeta.Labels
- s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations
- }); err != nil {
- return fmt.Errorf("error creating state Secret: %w", err)
+
+ // State Secrets are pre-created so we can use the Recorder CR as its owner ref.
+ var replicas int32 = 1
+ if tsr.Spec.Replicas != nil {
+ replicas = *tsr.Spec.Replicas
+ }
+
+ for replica := range replicas {
+ sec := tsrStateSecret(tsr, r.tsNamespace, replica)
+ _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sec, func(s *corev1.Secret) {
+ s.ObjectMeta.Labels = sec.ObjectMeta.Labels
+ s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations
+ })
+ if err != nil {
+ return fmt.Errorf("error creating state Secret %q: %w", sec.Name, err)
+ }
}
+
sa := tsrServiceAccount(tsr, r.tsNamespace)
- if _, err := createOrMaybeUpdate(ctx, r.Client, r.tsNamespace, sa, func(s *corev1.ServiceAccount) error {
+ _, err := createOrMaybeUpdate(ctx, r.Client, r.tsNamespace, sa, func(s *corev1.ServiceAccount) error {
// Perform this check within the update function to make sure we don't
// have a race condition between the previous check and the update.
if err := saOwnedByRecorder(s, tsr); err != nil {
@@ -183,54 +196,68 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco
s.ObjectMeta.Annotations = sa.ObjectMeta.Annotations
return nil
- }); err != nil {
+ })
+ if err != nil {
return fmt.Errorf("error creating ServiceAccount: %w", err)
}
+
role := tsrRole(tsr, r.tsNamespace)
- if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) {
+ _, err = createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) {
r.ObjectMeta.Labels = role.ObjectMeta.Labels
r.ObjectMeta.Annotations = role.ObjectMeta.Annotations
r.Rules = role.Rules
- }); err != nil {
+ })
+ if err != nil {
return fmt.Errorf("error creating Role: %w", err)
}
+
roleBinding := tsrRoleBinding(tsr, r.tsNamespace)
- if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) {
+ _, err = createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) {
r.ObjectMeta.Labels = roleBinding.ObjectMeta.Labels
r.ObjectMeta.Annotations = roleBinding.ObjectMeta.Annotations
r.RoleRef = roleBinding.RoleRef
r.Subjects = roleBinding.Subjects
- }); err != nil {
+ })
+ if err != nil {
return fmt.Errorf("error creating RoleBinding: %w", err)
}
+
ss := tsrStatefulSet(tsr, r.tsNamespace, r.loginServer)
- if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) {
+ _, err = createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) {
s.ObjectMeta.Labels = ss.ObjectMeta.Labels
s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations
s.Spec = ss.Spec
- }); err != nil {
+ })
+ if err != nil {
return fmt.Errorf("error creating StatefulSet: %w", err)
}
// ServiceAccount name may have changed, in which case we need to clean up
// the previous ServiceAccount. RoleBinding will already be updated to point
// to the new ServiceAccount.
- if err := r.maybeCleanupServiceAccounts(ctx, tsr, sa.Name); err != nil {
+ if err = r.maybeCleanupServiceAccounts(ctx, tsr, sa.Name); err != nil {
return fmt.Errorf("error cleaning up ServiceAccounts: %w", err)
}
+ // If we have scaled the recorder down, we will have dangling state secrets
+ // that we need to clean up.
+ if err = r.maybeCleanupSecrets(ctx, tsr); err != nil {
+ return fmt.Errorf("error cleaning up Secrets: %w", err)
+ }
+
var devices []tsapi.RecorderTailnetDevice
+ for replica := range replicas {
+ dev, ok, err := r.getDeviceInfo(ctx, tsr.Name, replica)
+ switch {
+ case err != nil:
+ return fmt.Errorf("failed to get device info: %w", err)
+ case !ok:
+ logger.Debugf("no Tailscale hostname known yet, waiting for Recorder pod to finish auth")
+ continue
+ }
- device, ok, err := r.getDeviceInfo(ctx, tsr.Name)
- if err != nil {
- return fmt.Errorf("failed to get device info: %w", err)
+ devices = append(devices, dev)
}
- if !ok {
- logger.Debugf("no Tailscale hostname known yet, waiting for Recorder pod to finish auth")
- return nil
- }
-
- devices = append(devices, device)
tsr.Status.Devices = devices
@@ -257,22 +284,89 @@ func saOwnedByRecorder(sa *corev1.ServiceAccount, tsr *tsapi.Recorder) error {
func (r *RecorderReconciler) maybeCleanupServiceAccounts(ctx context.Context, tsr *tsapi.Recorder, currentName string) error {
logger := r.logger(tsr.Name)
- // List all ServiceAccounts owned by this Recorder.
+ options := []client.ListOption{
+ client.InNamespace(r.tsNamespace),
+ client.MatchingLabels(tsrLabels("recorder", tsr.Name, nil)),
+ }
+
sas := &corev1.ServiceAccountList{}
- if err := r.List(ctx, sas, client.InNamespace(r.tsNamespace), client.MatchingLabels(labels("recorder", tsr.Name, nil))); err != nil {
+ if err := r.List(ctx, sas, options...); err != nil {
return fmt.Errorf("error listing ServiceAccounts for cleanup: %w", err)
}
- for _, sa := range sas.Items {
- if sa.Name == currentName {
+
+ for _, serviceAccount := range sas.Items {
+ if serviceAccount.Name == currentName {
+ continue
+ }
+
+ err := r.Delete(ctx, &serviceAccount)
+ switch {
+ case apierrors.IsNotFound(err):
+ logger.Debugf("ServiceAccount %s not found, likely already deleted", serviceAccount.Name)
+ continue
+ case err != nil:
+ return fmt.Errorf("error deleting ServiceAccount %s: %w", serviceAccount.Name, err)
+ }
+ }
+
+ return nil
+}
+
+func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsr *tsapi.Recorder) error {
+ options := []client.ListOption{
+ client.InNamespace(r.tsNamespace),
+ client.MatchingLabels(tsrLabels("recorder", tsr.Name, nil)),
+ }
+
+ secrets := &corev1.SecretList{}
+ if err := r.List(ctx, secrets, options...); err != nil {
+ return fmt.Errorf("error listing Secrets for cleanup: %w", err)
+ }
+
+ // Get the largest ordinal suffix that we expect. Then we'll go through the list of secrets owned by this
+ // recorder and remove them.
+ var replicas int32 = 1
+ if tsr.Spec.Replicas != nil {
+ replicas = *tsr.Spec.Replicas
+ }
+
+ for _, secret := range secrets.Items {
+ parts := strings.Split(secret.Name, "-")
+ if len(parts) == 0 {
+ continue
+ }
+
+ ordinal, err := strconv.ParseUint(parts[len(parts)-1], 10, 32)
+ if err != nil {
+ return fmt.Errorf("error parsing secret name %q: %w", secret.Name, err)
+ }
+
+ if int32(ordinal) < replicas {
continue
}
- if err := r.Delete(ctx, &sa); err != nil {
- if apierrors.IsNotFound(err) {
- logger.Debugf("ServiceAccount %s not found, likely already deleted", sa.Name)
- } else {
- return fmt.Errorf("error deleting ServiceAccount %s: %w", sa.Name, err)
+
+ devicePrefs, ok, err := getDevicePrefs(&secret)
+ if err != nil {
+ return err
+ }
+
+ if ok {
+ var errResp *tailscale.ErrResponse
+
+ r.log.Debugf("deleting device %s", devicePrefs.Config.NodeID)
+ err = r.tsClient.DeleteDevice(ctx, string(devicePrefs.Config.NodeID))
+ switch {
+ case errors.As(err, &errResp) && errResp.Status == http.StatusNotFound:
+ // This device has possibly already been deleted in the admin console. So we can ignore this
+ // and move on to removing the secret.
+ case err != nil:
+ return err
}
}
+
+ if err = r.Delete(ctx, &secret); err != nil {
+ return err
+ }
}
return nil
@@ -284,30 +378,38 @@ func (r *RecorderReconciler) maybeCleanupServiceAccounts(ctx context.Context, ts
func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder) (bool, error) {
logger := r.logger(tsr.Name)
- prefs, ok, err := r.getDevicePrefs(ctx, tsr.Name)
- if err != nil {
- return false, err
+ var replicas int32 = 1
+ if tsr.Spec.Replicas != nil {
+ replicas = *tsr.Spec.Replicas
}
- if !ok {
- logger.Debugf("state Secret %s-0 not found or does not contain node ID, continuing cleanup", tsr.Name)
- r.mu.Lock()
- r.recorders.Remove(tsr.UID)
- gaugeRecorderResources.Set(int64(r.recorders.Len()))
- r.mu.Unlock()
- return true, nil
- }
-
- id := string(prefs.Config.NodeID)
- logger.Debugf("deleting device %s from control", string(id))
- if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil {
- errResp := &tailscale.ErrResponse{}
- if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
- logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id))
- } else {
+
+ for replica := range replicas {
+ devicePrefs, ok, err := r.getDevicePrefs(ctx, tsr.Name, replica)
+ if err != nil {
+ return false, err
+ }
+ if !ok {
+ logger.Debugf("state Secret %s-%d not found or does not contain node ID, continuing cleanup", tsr.Name, replica)
+ r.mu.Lock()
+ r.recorders.Remove(tsr.UID)
+ gaugeRecorderResources.Set(int64(r.recorders.Len()))
+ r.mu.Unlock()
+ return true, nil
+ }
+
+ nodeID := string(devicePrefs.Config.NodeID)
+ logger.Debugf("deleting device %s from control", nodeID)
+ if err = r.tsClient.DeleteDevice(ctx, nodeID); err != nil {
+ errResp := &tailscale.ErrResponse{}
+ if errors.As(err, errResp) && errResp.Status == http.StatusNotFound {
+ logger.Debugf("device %s not found, likely because it has already been deleted from control", nodeID)
+ continue
+ }
+
return false, fmt.Errorf("error deleting device: %w", err)
}
- } else {
- logger.Debugf("device %s deleted from control", string(id))
+
+ logger.Debugf("device %s deleted from control", nodeID)
}
// Unlike most log entries in the reconcile loop, this will get printed
@@ -319,38 +421,46 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record
r.recorders.Remove(tsr.UID)
gaugeRecorderResources.Set(int64(r.recorders.Len()))
r.mu.Unlock()
+
return true, nil
}
-func (r *RecorderReconciler) ensureAuthSecretCreated(ctx context.Context, tsr *tsapi.Recorder) error {
- logger := r.logger(tsr.Name)
- key := types.NamespacedName{
- Namespace: r.tsNamespace,
- Name: tsr.Name,
- }
- if err := r.Get(ctx, key, &corev1.Secret{}); err == nil {
- // No updates, already created the auth key.
- logger.Debugf("auth Secret %s already exists", key.Name)
- return nil
- } else if !apierrors.IsNotFound(err) {
- return err
+func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tsr *tsapi.Recorder) error {
+ var replicas int32 = 1
+ if tsr.Spec.Replicas != nil {
+ replicas = *tsr.Spec.Replicas
}
- // Create the auth key Secret which is going to be used by the StatefulSet
- // to authenticate with Tailscale.
- logger.Debugf("creating authkey for new Recorder")
tags := tsr.Spec.Tags
if len(tags) == 0 {
tags = tsapi.Tags{"tag:k8s"}
}
- authKey, err := newAuthKey(ctx, r.tsClient, tags.Stringify())
- if err != nil {
- return err
- }
- logger.Debug("creating a new Secret for the Recorder")
- if err := r.Create(ctx, tsrAuthSecret(tsr, r.tsNamespace, authKey)); err != nil {
- return err
+ logger := r.logger(tsr.Name)
+
+ for replica := range replicas {
+ key := types.NamespacedName{
+ Namespace: r.tsNamespace,
+ Name: fmt.Sprintf("%s-auth-%d", tsr.Name, replica),
+ }
+
+ err := r.Get(ctx, key, &corev1.Secret{})
+ switch {
+ case err == nil:
+ logger.Debugf("auth Secret %q already exists", key.Name)
+ continue
+ case !apierrors.IsNotFound(err):
+ return fmt.Errorf("failed to get Secret %q: %w", key.Name, err)
+ }
+
+ authKey, err := newAuthKey(ctx, r.tsClient, tags.Stringify())
+ if err != nil {
+ return err
+ }
+
+ if err = r.Create(ctx, tsrAuthSecret(tsr, r.tsNamespace, authKey, replica)); err != nil {
+ return err
+ }
}
return nil
@@ -361,6 +471,10 @@ func (r *RecorderReconciler) validate(ctx context.Context, tsr *tsapi.Recorder)
return errors.New("must either enable UI or use S3 storage to ensure recordings are accessible")
}
+ if tsr.Spec.Replicas != nil && *tsr.Spec.Replicas > 1 && tsr.Spec.Storage.S3 == nil {
+ return errors.New("must use S3 storage when using multiple replicas to ensure recordings are accessible")
+ }
+
// Check any custom ServiceAccount config doesn't conflict with pre-existing
// ServiceAccounts. This check is performed once during validation to ensure
// errors are raised early, but also again during any Updates to prevent a race.
@@ -394,11 +508,11 @@ func (r *RecorderReconciler) validate(ctx context.Context, tsr *tsapi.Recorder)
return nil
}
-func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string) (*corev1.Secret, error) {
+func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string, replica int32) (*corev1.Secret, error) {
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: r.tsNamespace,
- Name: fmt.Sprintf("%s-0", tsrName),
+ Name: fmt.Sprintf("%s-%d", tsrName, replica),
},
}
if err := r.Get(ctx, client.ObjectKeyFromObject(secret), secret); err != nil {
@@ -412,8 +526,8 @@ func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string)
return secret, nil
}
-func (r *RecorderReconciler) getDevicePrefs(ctx context.Context, tsrName string) (prefs prefs, ok bool, err error) {
- secret, err := r.getStateSecret(ctx, tsrName)
+func (r *RecorderReconciler) getDevicePrefs(ctx context.Context, tsrName string, replica int32) (prefs prefs, ok bool, err error) {
+ secret, err := r.getStateSecret(ctx, tsrName, replica)
if err != nil || secret == nil {
return prefs, false, err
}
@@ -441,8 +555,8 @@ func getDevicePrefs(secret *corev1.Secret) (prefs prefs, ok bool, err error) {
return prefs, ok, nil
}
-func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
- secret, err := r.getStateSecret(ctx, tsrName)
+func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string, replica int32) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
+ secret, err := r.getStateSecret(ctx, tsrName, replica)
if err != nil || secret == nil {
return tsapi.RecorderTailnetDevice{}, false, err
}
diff --git a/cmd/k8s-operator/tsrecorder_specs.go b/cmd/k8s-operator/tsrecorder_specs.go
index 83d7439db..b4a10f296 100644
--- a/cmd/k8s-operator/tsrecorder_specs.go
+++ b/cmd/k8s-operator/tsrecorder_specs.go
@@ -12,30 +12,36 @@ import (
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/types/ptr"
"tailscale.com/version"
)
func tsrStatefulSet(tsr *tsapi.Recorder, namespace string, loginServer string) *appsv1.StatefulSet {
- return &appsv1.StatefulSet{
+ var replicas int32 = 1
+ if tsr.Spec.Replicas != nil {
+ replicas = *tsr.Spec.Replicas
+ }
+
+ ss := &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: tsr.Name,
Namespace: namespace,
- Labels: labels("recorder", tsr.Name, tsr.Spec.StatefulSet.Labels),
+ Labels: tsrLabels("recorder", tsr.Name, tsr.Spec.StatefulSet.Labels),
OwnerReferences: tsrOwnerReference(tsr),
Annotations: tsr.Spec.StatefulSet.Annotations,
},
Spec: appsv1.StatefulSetSpec{
- Replicas: ptr.To[int32](1),
+ Replicas: ptr.To(replicas),
Selector: &metav1.LabelSelector{
- MatchLabels: labels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels),
+ MatchLabels: tsrLabels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels),
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Name: tsr.Name,
Namespace: namespace,
- Labels: labels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels),
+ Labels: tsrLabels("recorder", tsr.Name, tsr.Spec.StatefulSet.Pod.Labels),
Annotations: tsr.Spec.StatefulSet.Pod.Annotations,
},
Spec: corev1.PodSpec{
@@ -59,7 +65,7 @@ func tsrStatefulSet(tsr *tsapi.Recorder, namespace string, loginServer string) *
ImagePullPolicy: tsr.Spec.StatefulSet.Pod.Container.ImagePullPolicy,
Resources: tsr.Spec.StatefulSet.Pod.Container.Resources,
SecurityContext: tsr.Spec.StatefulSet.Pod.Container.SecurityContext,
- Env: env(tsr, loginServer),
+ Env: tsrEnv(tsr, loginServer),
EnvFrom: func() []corev1.EnvFromSource {
if tsr.Spec.Storage.S3 == nil || tsr.Spec.Storage.S3.Credentials.Secret.Name == "" {
return nil
@@ -95,6 +101,28 @@ func tsrStatefulSet(tsr *tsapi.Recorder, namespace string, loginServer string) *
},
},
}
+
+ for replica := range replicas {
+ volumeName := fmt.Sprintf("authkey-%d", replica)
+
+ ss.Spec.Template.Spec.Containers[0].VolumeMounts = append(ss.Spec.Template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{
+ Name: volumeName,
+ ReadOnly: true,
+ MountPath: fmt.Sprintf("/etc/tailscaled/%s-%d", ss.Name, replica),
+ })
+
+ ss.Spec.Template.Spec.Volumes = append(ss.Spec.Template.Spec.Volumes, corev1.Volume{
+ Name: volumeName,
+ VolumeSource: corev1.VolumeSource{
+ Secret: &corev1.SecretVolumeSource{
+ SecretName: fmt.Sprintf("%s-auth-%d", tsr.Name, replica),
+ Items: []corev1.KeyToPath{{Key: "authkey", Path: "authkey"}},
+ },
+ },
+ })
+ }
+
+ return ss
}
func tsrServiceAccount(tsr *tsapi.Recorder, namespace string) *corev1.ServiceAccount {
@@ -102,7 +130,7 @@ func tsrServiceAccount(tsr *tsapi.Recorder, namespace string) *corev1.ServiceAcc
ObjectMeta: metav1.ObjectMeta{
Name: tsrServiceAccountName(tsr),
Namespace: namespace,
- Labels: labels("recorder", tsr.Name, nil),
+ Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
Annotations: tsr.Spec.StatefulSet.Pod.ServiceAccount.Annotations,
},
@@ -120,11 +148,24 @@ func tsrServiceAccountName(tsr *tsapi.Recorder) string {
}
func tsrRole(tsr *tsapi.Recorder, namespace string) *rbacv1.Role {
+ var replicas int32 = 1
+ if tsr.Spec.Replicas != nil {
+ replicas = *tsr.Spec.Replicas
+ }
+
+ resourceNames := make([]string, 0)
+ for replica := range replicas {
+ resourceNames = append(resourceNames,
+ fmt.Sprintf("%s-%d", tsr.Name, replica), // State secret.
+ fmt.Sprintf("%s-auth-%d", tsr.Name, replica), // Auth key secret.
+ )
+ }
+
return &rbacv1.Role{
ObjectMeta: metav1.ObjectMeta{
Name: tsr.Name,
Namespace: namespace,
- Labels: labels("recorder", tsr.Name, nil),
+ Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
},
Rules: []rbacv1.PolicyRule{
@@ -136,10 +177,7 @@ func tsrRole(tsr *tsapi.Recorder, namespace string) *rbacv1.Role {
"patch",
"update",
},
- ResourceNames: []string{
- tsr.Name, // Contains the auth key.
- fmt.Sprintf("%s-0", tsr.Name), // Contains the node state.
- },
+ ResourceNames: resourceNames,
},
{
APIGroups: []string{""},
@@ -159,7 +197,7 @@ func tsrRoleBinding(tsr *tsapi.Recorder, namespace string) *rbacv1.RoleBinding {
ObjectMeta: metav1.ObjectMeta{
Name: tsr.Name,
Namespace: namespace,
- Labels: labels("recorder", tsr.Name, nil),
+ Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
},
Subjects: []rbacv1.Subject{
@@ -176,12 +214,12 @@ func tsrRoleBinding(tsr *tsapi.Recorder, namespace string) *rbacv1.RoleBinding {
}
}
-func tsrAuthSecret(tsr *tsapi.Recorder, namespace string, authKey string) *corev1.Secret {
+func tsrAuthSecret(tsr *tsapi.Recorder, namespace string, authKey string, replica int32) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
- Name: tsr.Name,
- Labels: labels("recorder", tsr.Name, nil),
+ Name: fmt.Sprintf("%s-auth-%d", tsr.Name, replica),
+ Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
},
StringData: map[string]string{
@@ -190,30 +228,19 @@ func tsrAuthSecret(tsr *tsapi.Recorder, namespace string, authKey string) *corev
}
}
-func tsrStateSecret(tsr *tsapi.Recorder, namespace string) *corev1.Secret {
+func tsrStateSecret(tsr *tsapi.Recorder, namespace string, replica int32) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
- Name: fmt.Sprintf("%s-0", tsr.Name),
+ Name: fmt.Sprintf("%s-%d", tsr.Name, replica),
Namespace: namespace,
- Labels: labels("recorder", tsr.Name, nil),
+ Labels: tsrLabels("recorder", tsr.Name, nil),
OwnerReferences: tsrOwnerReference(tsr),
},
}
}
-func env(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar {
+func tsrEnv(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar {
envs := []corev1.EnvVar{
- {
- Name: "TS_AUTHKEY",
- ValueFrom: &corev1.EnvVarSource{
- SecretKeyRef: &corev1.SecretKeySelector{
- LocalObjectReference: corev1.LocalObjectReference{
- Name: tsr.Name,
- },
- Key: "authkey",
- },
- },
- },
{
Name: "POD_NAME",
ValueFrom: &corev1.EnvVarSource{
@@ -231,6 +258,10 @@ func env(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar {
},
},
},
+ {
+ Name: "TS_AUTHKEY_FILE",
+ Value: "/etc/tailscaled/$(POD_NAME)/authkey",
+ },
{
Name: "TS_STATE",
Value: "kube:$(POD_NAME)",
@@ -280,7 +311,7 @@ func env(tsr *tsapi.Recorder, loginServer string) []corev1.EnvVar {
return envs
}
-func labels(app, instance string, customLabels map[string]string) map[string]string {
+func tsrLabels(app, instance string, customLabels map[string]string) map[string]string {
labels := make(map[string]string, len(customLabels)+3)
for k, v := range customLabels {
labels[k] = v
diff --git a/cmd/k8s-operator/tsrecorder_specs_test.go b/cmd/k8s-operator/tsrecorder_specs_test.go
index 49332d09b..0d78129fc 100644
--- a/cmd/k8s-operator/tsrecorder_specs_test.go
+++ b/cmd/k8s-operator/tsrecorder_specs_test.go
@@ -12,6 +12,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/types/ptr"
)
@@ -23,6 +24,7 @@ func TestRecorderSpecs(t *testing.T) {
Name: "test",
},
Spec: tsapi.RecorderSpec{
+ Replicas: ptr.To[int32](3),
StatefulSet: tsapi.RecorderStatefulSet{
Labels: map[string]string{
"ss-label-key": "ss-label-value",
@@ -101,10 +103,10 @@ func TestRecorderSpecs(t *testing.T) {
}
// Pod-level.
- if diff := cmp.Diff(ss.Labels, labels("recorder", "test", tsr.Spec.StatefulSet.Labels)); diff != "" {
+ if diff := cmp.Diff(ss.Labels, tsrLabels("recorder", "test", tsr.Spec.StatefulSet.Labels)); diff != "" {
t.Errorf("(-got +want):\n%s", diff)
}
- if diff := cmp.Diff(ss.Spec.Template.Labels, labels("recorder", "test", tsr.Spec.StatefulSet.Pod.Labels)); diff != "" {
+ if diff := cmp.Diff(ss.Spec.Template.Labels, tsrLabels("recorder", "test", tsr.Spec.StatefulSet.Pod.Labels)); diff != "" {
t.Errorf("(-got +want):\n%s", diff)
}
if diff := cmp.Diff(ss.Spec.Template.Spec.Affinity, tsr.Spec.StatefulSet.Pod.Affinity); diff != "" {
@@ -124,7 +126,7 @@ func TestRecorderSpecs(t *testing.T) {
}
// Container-level.
- if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Env, env(tsr, tsLoginServer)); diff != "" {
+ if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Env, tsrEnv(tsr, tsLoginServer)); diff != "" {
t.Errorf("(-got +want):\n%s", diff)
}
if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Image, tsr.Spec.StatefulSet.Pod.Container.Image); diff != "" {
@@ -139,5 +141,17 @@ func TestRecorderSpecs(t *testing.T) {
if diff := cmp.Diff(ss.Spec.Template.Spec.Containers[0].Resources, tsr.Spec.StatefulSet.Pod.Container.Resources); diff != "" {
t.Errorf("(-got +want):\n%s", diff)
}
+
+ if *ss.Spec.Replicas != *tsr.Spec.Replicas {
+ t.Errorf("expected %d replicas, got %d", *tsr.Spec.Replicas, *ss.Spec.Replicas)
+ }
+
+ if len(ss.Spec.Template.Spec.Volumes) != int(*tsr.Spec.Replicas)+1 {
+ t.Errorf("expected %d volumes, got %d", *tsr.Spec.Replicas+1, len(ss.Spec.Template.Spec.Volumes))
+ }
+
+ if len(ss.Spec.Template.Spec.Containers[0].VolumeMounts) != int(*tsr.Spec.Replicas)+1 {
+ t.Errorf("expected %d volume mounts, got %d", *tsr.Spec.Replicas+1, len(ss.Spec.Template.Spec.Containers[0].VolumeMounts))
+ }
})
}
diff --git a/cmd/k8s-operator/tsrecorder_test.go b/cmd/k8s-operator/tsrecorder_test.go
index 184af2344..f7ff797b1 100644
--- a/cmd/k8s-operator/tsrecorder_test.go
+++ b/cmd/k8s-operator/tsrecorder_test.go
@@ -8,6 +8,7 @@ package main
import (
"context"
"encoding/json"
+ "fmt"
"strings"
"testing"
@@ -20,9 +21,11 @@ import (
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
+
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/tstest"
+ "tailscale.com/types/ptr"
)
const (
@@ -36,6 +39,9 @@ func TestRecorder(t *testing.T) {
Name: "test",
Finalizers: []string{"tailscale.com/finalizer"},
},
+ Spec: tsapi.RecorderSpec{
+ Replicas: ptr.To[int32](3),
+ },
}
fc := fake.NewClientBuilder().
@@ -80,6 +86,15 @@ func TestRecorder(t *testing.T) {
})
expectReconciled(t, reconciler, "", tsr.Name)
+ expectedEvent = "Warning RecorderInvalid Recorder is invalid: must use S3 storage when using multiple replicas to ensure recordings are accessible"
+ expectEvents(t, fr, []string{expectedEvent})
+
+ tsr.Spec.Storage.S3 = &tsapi.S3{}
+ mustUpdate(t, fc, "", "test", func(t *tsapi.Recorder) {
+ t.Spec = tsr.Spec
+ })
+ expectReconciled(t, reconciler, "", tsr.Name)
+
// Only check part of this error message, because it's defined in an
// external package and may change.
if err := fc.Get(context.Background(), client.ObjectKey{
@@ -180,33 +195,47 @@ func TestRecorder(t *testing.T) {
})
t.Run("populate_node_info_in_state_secret_and_see_it_appear_in_status", func(t *testing.T) {
- bytes, err := json.Marshal(map[string]any{
- "Config": map[string]any{
- "NodeID": "nodeid-123",
- "UserProfile": map[string]any{
- "LoginName": "test-0.example.ts.net",
- },
- },
- })
- if err != nil {
- t.Fatal(err)
- }
const key = "profile-abc"
- mustUpdate(t, fc, tsNamespace, "test-0", func(s *corev1.Secret) {
- s.Data = map[string][]byte{
- currentProfileKey: []byte(key),
- key: bytes,
+ for replica := range *tsr.Spec.Replicas {
+ bytes, err := json.Marshal(map[string]any{
+ "Config": map[string]any{
+ "NodeID": fmt.Sprintf("node-%d", replica),
+ "UserProfile": map[string]any{
+ "LoginName": fmt.Sprintf("test-%d.example.ts.net", replica),
+ },
+ },
+ })
+ if err != nil {
+ t.Fatal(err)
}
- })
+
+ name := fmt.Sprintf("%s-%d", "test", replica)
+ mustUpdate(t, fc, tsNamespace, name, func(s *corev1.Secret) {
+ s.Data = map[string][]byte{
+ currentProfileKey: []byte(key),
+ key: bytes,
+ }
+ })
+ }
expectReconciled(t, reconciler, "", tsr.Name)
tsr.Status.Devices = []tsapi.RecorderTailnetDevice{
{
- Hostname: "hostname-nodeid-123",
+ Hostname: "hostname-node-0",
TailnetIPs: []string{"1.2.3.4", "::1"},
URL: "https://test-0.example.ts.net",
},
+ {
+ Hostname: "hostname-node-1",
+ TailnetIPs: []string{"1.2.3.4", "::1"},
+ URL: "https://test-1.example.ts.net",
+ },
+ {
+ Hostname: "hostname-node-2",
+ TailnetIPs: []string{"1.2.3.4", "::1"},
+ URL: "https://test-2.example.ts.net",
+ },
}
expectEqual(t, fc, tsr)
})
@@ -222,7 +251,7 @@ func TestRecorder(t *testing.T) {
if expected := 0; reconciler.recorders.Len() != expected {
t.Fatalf("expected %d recorders, got %d", expected, reconciler.recorders.Len())
}
- if diff := cmp.Diff(tsClient.deleted, []string{"nodeid-123"}); diff != "" {
+ if diff := cmp.Diff(tsClient.deleted, []string{"node-0", "node-1", "node-2"}); diff != "" {
t.Fatalf("unexpected deleted devices (-got +want):\n%s", diff)
}
// The fake client does not clean up objects whose owner has been
@@ -233,26 +262,38 @@ func TestRecorder(t *testing.T) {
func expectRecorderResources(t *testing.T, fc client.WithWatch, tsr *tsapi.Recorder, shouldExist bool) {
t.Helper()
- auth := tsrAuthSecret(tsr, tsNamespace, "secret-authkey")
- state := tsrStateSecret(tsr, tsNamespace)
+ var replicas int32 = 1
+ if tsr.Spec.Replicas != nil {
+ replicas = *tsr.Spec.Replicas
+ }
+
role := tsrRole(tsr, tsNamespace)
roleBinding := tsrRoleBinding(tsr, tsNamespace)
serviceAccount := tsrServiceAccount(tsr, tsNamespace)
statefulSet := tsrStatefulSet(tsr, tsNamespace, tsLoginServer)
if shouldExist {
- expectEqual(t, fc, auth)
- expectEqual(t, fc, state)
expectEqual(t, fc, role)
expectEqual(t, fc, roleBinding)
expectEqual(t, fc, serviceAccount)
expectEqual(t, fc, statefulSet, removeResourceReqs)
} else {
- expectMissing[corev1.Secret](t, fc, auth.Namespace, auth.Name)
- expectMissing[corev1.Secret](t, fc, state.Namespace, state.Name)
expectMissing[rbacv1.Role](t, fc, role.Namespace, role.Name)
expectMissing[rbacv1.RoleBinding](t, fc, roleBinding.Namespace, roleBinding.Name)
expectMissing[corev1.ServiceAccount](t, fc, serviceAccount.Namespace, serviceAccount.Name)
expectMissing[appsv1.StatefulSet](t, fc, statefulSet.Namespace, statefulSet.Name)
}
+
+ for replica := range replicas {
+ auth := tsrAuthSecret(tsr, tsNamespace, "secret-authkey", replica)
+ state := tsrStateSecret(tsr, tsNamespace, replica)
+
+ if shouldExist {
+ expectEqual(t, fc, auth)
+ expectEqual(t, fc, state)
+ } else {
+ expectMissing[corev1.Secret](t, fc, auth.Namespace, auth.Name)
+ expectMissing[corev1.Secret](t, fc, state.Namespace, state.Name)
+ }
+ }
}
diff --git a/k8s-operator/api.md b/k8s-operator/api.md
index 979d199cb..3a4e692d9 100644
--- a/k8s-operator/api.md
+++ b/k8s-operator/api.md
@@ -887,7 +887,7 @@ _Appears in:_
-
+RecorderSpec describes a tsrecorder instance to be deployed in the cluster
@@ -900,6 +900,7 @@ _Appears in:_
| `tags` _[Tags](#tags)_ | Tags that the Tailscale device will be tagged with. Defaults to [tag:k8s].
If you specify custom tags here, make sure you also make the operator
an owner of these tags.
See https://tailscale.com/kb/1236/kubernetes-operator/#setting-up-the-kubernetes-operator.
Tags cannot be changed once a Recorder node has been created.
Tag values must be in form ^tag:[a-zA-Z][a-zA-Z0-9-]*$. | | Pattern: `^tag:[a-zA-Z][a-zA-Z0-9-]*$`
Type: string
|
| `enableUI` _boolean_ | Set to true to enable the Recorder UI. The UI lists and plays recorded sessions.
The UI will be served at :443. Defaults to false.
Corresponds to --ui tsrecorder flag https://tailscale.com/kb/1246/tailscale-ssh-session-recording#deploy-a-recorder-node.
Required if S3 storage is not set up, to ensure that recordings are accessible. | | |
| `storage` _[Storage](#storage)_ | Configure where to store session recordings. By default, recordings will
be stored in a local ephemeral volume, and will not be persisted past the
lifetime of a specific pod. | | |
+| `replicas` _integer_ | Replicas specifies how many instances of tsrecorder to run. Defaults to 1. | | Minimum: 0
|
#### RecorderStatefulSet
diff --git a/k8s-operator/apis/v1alpha1/types_recorder.go b/k8s-operator/apis/v1alpha1/types_recorder.go
index 16a610b26..67cffbf09 100644
--- a/k8s-operator/apis/v1alpha1/types_recorder.go
+++ b/k8s-operator/apis/v1alpha1/types_recorder.go
@@ -44,6 +44,8 @@ type RecorderList struct {
Items []Recorder `json:"items"`
}
+// RecorderSpec describes a tsrecorder instance to be deployed in the cluster
+// +kubebuilder:validation:XValidation:rule="!(self.replicas > 1 && (!has(self.storage) || !has(self.storage.s3)))",message="S3 storage must be used when deploying multiple Recorder replicas"
type RecorderSpec struct {
// Configuration parameters for the Recorder's StatefulSet. The operator
// deploys a StatefulSet for each Recorder resource.
@@ -74,6 +76,11 @@ type RecorderSpec struct {
// lifetime of a specific pod.
// +optional
Storage Storage `json:"storage,omitempty"`
+
+ // Replicas specifies how many instances of tsrecorder to run. Defaults to 1.
+ // +optional
+ // +kubebuilder:validation:Minimum=0
+ Replicas *int32 `json:"replicas,omitzero"`
}
type RecorderStatefulSet struct {
diff --git a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go
index 7492f1e54..ff0f3f6ac 100644
--- a/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go
+++ b/k8s-operator/apis/v1alpha1/zz_generated.deepcopy.go
@@ -1068,6 +1068,11 @@ func (in *RecorderSpec) DeepCopyInto(out *RecorderSpec) {
copy(*out, *in)
}
in.Storage.DeepCopyInto(&out.Storage)
+ if in.Replicas != nil {
+ in, out := &in.Replicas, &out.Replicas
+ *out = new(int32)
+ **out = **in
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RecorderSpec.