mirror of https://github.com/tailscale/tailscale/
cmd/{containerboot,k8s-operator},k8s-operator,kube: add ProxyGroup controller (#13684)
Implements the controller for the new ProxyGroup CRD, designed for running proxies in a high availability configuration. Each proxy gets its own config and state Secret, and its own tailscale node ID. We are currently mounting all of the config secrets into the container, but will stop mounting them and instead read them directly from the kube API once #13578 is implemented. Updates #13406 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>pull/13713/head
parent
1005cbc1e4
commit
e48cddfbb3
@ -0,0 +1,7 @@
|
||||
apiVersion: tailscale.com/v1alpha1
|
||||
kind: ProxyGroup
|
||||
metadata:
|
||||
name: egress-proxies
|
||||
spec:
|
||||
type: egress
|
||||
replicas: 3
|
@ -0,0 +1,507 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !plan9
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"go.uber.org/zap"
|
||||
xslices "golang.org/x/exp/slices"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
rbacv1 "k8s.io/api/rbac/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
||||
"tailscale.com/client/tailscale"
|
||||
"tailscale.com/ipn"
|
||||
tsoperator "tailscale.com/k8s-operator"
|
||||
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/tstime"
|
||||
"tailscale.com/types/ptr"
|
||||
"tailscale.com/util/clientmetric"
|
||||
"tailscale.com/util/mak"
|
||||
"tailscale.com/util/set"
|
||||
)
|
||||
|
||||
const (
|
||||
reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed"
|
||||
reasonProxyGroupReady = "ProxyGroupReady"
|
||||
reasonProxyGroupCreating = "ProxyGroupCreating"
|
||||
reasonProxyGroupInvalid = "ProxyGroupInvalid"
|
||||
)
|
||||
|
||||
var gaugeProxyGroupResources = clientmetric.NewGauge(kubetypes.MetricProxyGroupCount)
|
||||
|
||||
// ProxyGroupReconciler ensures cluster resources for a ProxyGroup definition.
|
||||
type ProxyGroupReconciler struct {
|
||||
client.Client
|
||||
l *zap.SugaredLogger
|
||||
recorder record.EventRecorder
|
||||
clock tstime.Clock
|
||||
tsClient tsClient
|
||||
|
||||
// User-specified defaults from the helm installation.
|
||||
tsNamespace string
|
||||
proxyImage string
|
||||
defaultTags []string
|
||||
tsFirewallMode string
|
||||
|
||||
mu sync.Mutex // protects following
|
||||
proxyGroups set.Slice[types.UID] // for proxygroups gauge
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) logger(name string) *zap.SugaredLogger {
|
||||
return r.l.With("ProxyGroup", name)
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
|
||||
logger := r.logger(req.Name)
|
||||
logger.Debugf("starting reconcile")
|
||||
defer logger.Debugf("reconcile finished")
|
||||
|
||||
pg := new(tsapi.ProxyGroup)
|
||||
err = r.Get(ctx, req.NamespacedName, pg)
|
||||
if apierrors.IsNotFound(err) {
|
||||
logger.Debugf("ProxyGroup not found, assuming it was deleted")
|
||||
return reconcile.Result{}, nil
|
||||
} else if err != nil {
|
||||
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com ProxyGroup: %w", err)
|
||||
}
|
||||
if markedForDeletion(pg) {
|
||||
logger.Debugf("ProxyGroup is being deleted, cleaning up resources")
|
||||
ix := xslices.Index(pg.Finalizers, FinalizerName)
|
||||
if ix < 0 {
|
||||
logger.Debugf("no finalizer, nothing to do")
|
||||
return reconcile.Result{}, nil
|
||||
}
|
||||
|
||||
if done, err := r.maybeCleanup(ctx, pg); err != nil {
|
||||
return reconcile.Result{}, err
|
||||
} else if !done {
|
||||
logger.Debugf("ProxyGroup resource cleanup not yet finished, will retry...")
|
||||
return reconcile.Result{RequeueAfter: shortRequeue}, nil
|
||||
}
|
||||
|
||||
pg.Finalizers = slices.Delete(pg.Finalizers, ix, ix+1)
|
||||
if err := r.Update(ctx, pg); err != nil {
|
||||
return reconcile.Result{}, err
|
||||
}
|
||||
return reconcile.Result{}, nil
|
||||
}
|
||||
|
||||
oldPGStatus := pg.Status.DeepCopy()
|
||||
setStatusReady := func(pg *tsapi.ProxyGroup, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) {
|
||||
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, status, reason, message, pg.Generation, r.clock, logger)
|
||||
if !apiequality.Semantic.DeepEqual(oldPGStatus, pg.Status) {
|
||||
// An error encountered here should get returned by the Reconcile function.
|
||||
if updateErr := r.Client.Status().Update(ctx, pg); updateErr != nil {
|
||||
err = errors.Wrap(err, updateErr.Error())
|
||||
}
|
||||
}
|
||||
return reconcile.Result{}, err
|
||||
}
|
||||
|
||||
if !slices.Contains(pg.Finalizers, FinalizerName) {
|
||||
// This log line is printed exactly once during initial provisioning,
|
||||
// because once the finalizer is in place this block gets skipped. So,
|
||||
// this is a nice place to log that the high level, multi-reconcile
|
||||
// operation is underway.
|
||||
logger.Infof("ensuring ProxyGroup is set up")
|
||||
pg.Finalizers = append(pg.Finalizers, FinalizerName)
|
||||
if err := r.Update(ctx, pg); err != nil {
|
||||
logger.Errorf("error adding finalizer: %w", err)
|
||||
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreationFailed, reasonProxyGroupCreationFailed)
|
||||
}
|
||||
}
|
||||
|
||||
if err := r.validate(pg); err != nil {
|
||||
logger.Errorf("error validating ProxyGroup spec: %w", err)
|
||||
message := fmt.Sprintf("ProxyGroup is invalid: %s", err)
|
||||
r.recorder.Eventf(pg, corev1.EventTypeWarning, reasonProxyGroupInvalid, message)
|
||||
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupInvalid, message)
|
||||
}
|
||||
|
||||
if err = r.maybeProvision(ctx, pg); err != nil {
|
||||
logger.Errorf("error provisioning ProxyGroup resources: %w", err)
|
||||
message := fmt.Sprintf("failed provisioning ProxyGroup: %s", err)
|
||||
r.recorder.Eventf(pg, corev1.EventTypeWarning, reasonProxyGroupCreationFailed, message)
|
||||
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreationFailed, message)
|
||||
}
|
||||
|
||||
desiredReplicas := int(pgReplicas(pg))
|
||||
if len(pg.Status.Devices) < desiredReplicas {
|
||||
message := fmt.Sprintf("%d/%d ProxyGroup pods running", len(pg.Status.Devices), desiredReplicas)
|
||||
logger.Debug(message)
|
||||
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message)
|
||||
}
|
||||
|
||||
if len(pg.Status.Devices) > desiredReplicas {
|
||||
message := fmt.Sprintf("waiting for %d ProxyGroup pods to shut down", len(pg.Status.Devices)-desiredReplicas)
|
||||
logger.Debug(message)
|
||||
return setStatusReady(pg, metav1.ConditionFalse, reasonProxyGroupCreating, message)
|
||||
}
|
||||
|
||||
logger.Info("ProxyGroup resources synced")
|
||||
return setStatusReady(pg, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady)
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.ProxyGroup) error {
|
||||
logger := r.logger(pg.Name)
|
||||
r.mu.Lock()
|
||||
r.proxyGroups.Add(pg.UID)
|
||||
gaugeProxyGroupResources.Set(int64(r.proxyGroups.Len()))
|
||||
r.mu.Unlock()
|
||||
|
||||
var proxyClass *tsapi.ProxyClass
|
||||
if pg.Spec.ProxyClass != "" {
|
||||
proxyClass = new(tsapi.ProxyClass)
|
||||
if err := r.Get(ctx, types.NamespacedName{Name: pg.Spec.ProxyClass}, proxyClass); err != nil {
|
||||
return fmt.Errorf("failed to get ProxyClass: %w", err)
|
||||
}
|
||||
if !tsoperator.ProxyClassIsReady(proxyClass) {
|
||||
logger.Infof("ProxyClass %s specified for the ProxyGroup, but it is not (yet) in a ready state, waiting...", pg.Spec.ProxyClass)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
cfgHash, err := r.ensureConfigSecretsCreated(ctx, pg, proxyClass)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error provisioning config Secrets: %w", err)
|
||||
}
|
||||
// State secrets are precreated so we can use the ProxyGroup CR as their owner ref.
|
||||
stateSecrets := pgStateSecrets(pg, r.tsNamespace)
|
||||
for _, sec := range stateSecrets {
|
||||
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sec, func(s *corev1.Secret) {
|
||||
s.ObjectMeta.Labels = sec.ObjectMeta.Labels
|
||||
s.ObjectMeta.Annotations = sec.ObjectMeta.Annotations
|
||||
s.ObjectMeta.OwnerReferences = sec.ObjectMeta.OwnerReferences
|
||||
}); err != nil {
|
||||
return fmt.Errorf("error provisioning state Secrets: %w", err)
|
||||
}
|
||||
}
|
||||
sa := pgServiceAccount(pg, r.tsNamespace)
|
||||
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, sa, func(s *corev1.ServiceAccount) {
|
||||
s.ObjectMeta.Labels = sa.ObjectMeta.Labels
|
||||
s.ObjectMeta.Annotations = sa.ObjectMeta.Annotations
|
||||
s.ObjectMeta.OwnerReferences = sa.ObjectMeta.OwnerReferences
|
||||
}); err != nil {
|
||||
return fmt.Errorf("error provisioning ServiceAccount: %w", err)
|
||||
}
|
||||
role := pgRole(pg, r.tsNamespace)
|
||||
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, role, func(r *rbacv1.Role) {
|
||||
r.ObjectMeta.Labels = role.ObjectMeta.Labels
|
||||
r.ObjectMeta.Annotations = role.ObjectMeta.Annotations
|
||||
r.ObjectMeta.OwnerReferences = role.ObjectMeta.OwnerReferences
|
||||
r.Rules = role.Rules
|
||||
}); err != nil {
|
||||
return fmt.Errorf("error provisioning Role: %w", err)
|
||||
}
|
||||
roleBinding := pgRoleBinding(pg, r.tsNamespace)
|
||||
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, roleBinding, func(r *rbacv1.RoleBinding) {
|
||||
r.ObjectMeta.Labels = roleBinding.ObjectMeta.Labels
|
||||
r.ObjectMeta.Annotations = roleBinding.ObjectMeta.Annotations
|
||||
r.ObjectMeta.OwnerReferences = roleBinding.ObjectMeta.OwnerReferences
|
||||
r.RoleRef = roleBinding.RoleRef
|
||||
r.Subjects = roleBinding.Subjects
|
||||
}); err != nil {
|
||||
return fmt.Errorf("error provisioning RoleBinding: %w", err)
|
||||
}
|
||||
ss := pgStatefulSet(pg, r.tsNamespace, r.proxyImage, r.tsFirewallMode, cfgHash)
|
||||
ss = applyProxyClassToStatefulSet(proxyClass, ss, nil, logger)
|
||||
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) {
|
||||
s.ObjectMeta.Labels = ss.ObjectMeta.Labels
|
||||
s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations
|
||||
s.ObjectMeta.OwnerReferences = ss.ObjectMeta.OwnerReferences
|
||||
s.Spec = ss.Spec
|
||||
}); err != nil {
|
||||
return fmt.Errorf("error provisioning StatefulSet: %w", err)
|
||||
}
|
||||
|
||||
if err := r.cleanupDanglingResources(ctx, pg); err != nil {
|
||||
return fmt.Errorf("error cleaning up dangling resources: %w", err)
|
||||
}
|
||||
|
||||
devices, err := r.getDeviceInfo(ctx, pg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get device info: %w", err)
|
||||
}
|
||||
|
||||
pg.Status.Devices = devices
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupDanglingResources ensures we don't leak config secrets, state secrets, and
|
||||
// tailnet devices when the number of replicas specified is reduced.
|
||||
func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg *tsapi.ProxyGroup) error {
|
||||
logger := r.logger(pg.Name)
|
||||
metadata, err := r.getNodeMetadata(ctx, pg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, m := range metadata {
|
||||
if m.ordinal+1 <= int(pgReplicas(pg)) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Dangling resource, delete the config + state Secrets, as well as
|
||||
// deleting the device from the tailnet.
|
||||
if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.Delete(ctx, m.stateSecret); err != nil {
|
||||
if !apierrors.IsNotFound(err) {
|
||||
return fmt.Errorf("error deleting state Secret %s: %w", m.stateSecret.Name, err)
|
||||
}
|
||||
}
|
||||
configSecret := m.stateSecret.DeepCopy()
|
||||
configSecret.Name += "-config"
|
||||
if err := r.Delete(ctx, configSecret); err != nil {
|
||||
if !apierrors.IsNotFound(err) {
|
||||
return fmt.Errorf("error deleting config Secret %s: %w", configSecret.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// maybeCleanup just deletes the device from the tailnet. All the kubernetes
|
||||
// resources linked to a ProxyGroup will get cleaned up via owner references
|
||||
// (which we can use because they are all in the same namespace).
|
||||
func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.ProxyGroup) (bool, error) {
|
||||
logger := r.logger(pg.Name)
|
||||
|
||||
metadata, err := r.getNodeMetadata(ctx, pg)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, m := range metadata {
|
||||
if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
|
||||
logger.Infof("cleaned up ProxyGroup resources")
|
||||
r.mu.Lock()
|
||||
r.proxyGroups.Remove(pg.UID)
|
||||
gaugeProxyGroupResources.Set(int64(r.proxyGroups.Len()))
|
||||
r.mu.Unlock()
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error {
|
||||
logger.Debugf("deleting device %s from control", string(id))
|
||||
if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil {
|
||||
errResp := &tailscale.ErrResponse{}
|
||||
if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
|
||||
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id))
|
||||
} else {
|
||||
return fmt.Errorf("error deleting device: %w", err)
|
||||
}
|
||||
} else {
|
||||
logger.Debugf("device %s deleted from control", string(id))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (hash string, err error) {
|
||||
logger := r.logger(pg.Name)
|
||||
var allConfigs []tailscaledConfigs
|
||||
for i := range pgReplicas(pg) {
|
||||
cfgSecret := &corev1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: fmt.Sprintf("%s-%d-config", pg.Name, i),
|
||||
Namespace: r.tsNamespace,
|
||||
Labels: pgSecretLabels(pg.Name, "config"),
|
||||
OwnerReferences: pgOwnerReference(pg),
|
||||
},
|
||||
}
|
||||
|
||||
var existingCfgSecret *corev1.Secret // unmodified copy of secret
|
||||
if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil {
|
||||
logger.Debugf("secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName())
|
||||
existingCfgSecret = cfgSecret.DeepCopy()
|
||||
} else if !apierrors.IsNotFound(err) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var authKey string
|
||||
if existingCfgSecret == nil {
|
||||
logger.Debugf("creating authkey for new ProxyGroup proxy")
|
||||
tags := pg.Spec.Tags.Stringify()
|
||||
if len(tags) == 0 {
|
||||
tags = r.defaultTags
|
||||
}
|
||||
authKey, err = newAuthKey(ctx, r.tsClient, tags)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
configs, err := pgTailscaledConfig(pg, proxyClass, i, authKey, existingCfgSecret)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error creating tailscaled config: %w", err)
|
||||
}
|
||||
allConfigs = append(allConfigs, configs)
|
||||
|
||||
for cap, cfg := range configs {
|
||||
cfgJSON, err := json.Marshal(cfg)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("error marshalling tailscaled config: %w", err)
|
||||
}
|
||||
mak.Set(&cfgSecret.StringData, tsoperator.TailscaledConfigFileName(cap), string(cfgJSON))
|
||||
}
|
||||
|
||||
if existingCfgSecret != nil {
|
||||
logger.Debugf("patching the existing ProxyGroup config Secret %s", cfgSecret.Name)
|
||||
if err := r.Patch(ctx, cfgSecret, client.MergeFrom(existingCfgSecret)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
} else {
|
||||
logger.Debugf("creating a new config Secret %s for the ProxyGroup", cfgSecret.Name)
|
||||
if err := r.Create(ctx, cfgSecret); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sum := sha256.New()
|
||||
b, err := json.Marshal(allConfigs)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if _, err := sum.Write(b); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%x", sum.Sum(nil)), nil
|
||||
}
|
||||
|
||||
func pgTailscaledConfig(pg *tsapi.ProxyGroup, class *tsapi.ProxyClass, idx int32, authKey string, oldSecret *corev1.Secret) (tailscaledConfigs, error) {
|
||||
conf := &ipn.ConfigVAlpha{
|
||||
Version: "alpha0",
|
||||
AcceptDNS: "false",
|
||||
AcceptRoutes: "false", // AcceptRoutes defaults to true
|
||||
Locked: "false",
|
||||
Hostname: ptr.To(fmt.Sprintf("%s-%d", pg.Name, idx)),
|
||||
}
|
||||
|
||||
if pg.Spec.HostnamePrefix != "" {
|
||||
conf.Hostname = ptr.To(fmt.Sprintf("%s%d", pg.Spec.HostnamePrefix, idx))
|
||||
}
|
||||
|
||||
if shouldAcceptRoutes(class) {
|
||||
conf.AcceptRoutes = "true"
|
||||
}
|
||||
|
||||
deviceAuthed := false
|
||||
for _, d := range pg.Status.Devices {
|
||||
if d.Hostname == *conf.Hostname {
|
||||
deviceAuthed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if authKey != "" {
|
||||
conf.AuthKey = &authKey
|
||||
} else if !deviceAuthed {
|
||||
key, err := authKeyFromSecret(oldSecret)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving auth key from Secret: %w", err)
|
||||
}
|
||||
conf.AuthKey = key
|
||||
}
|
||||
capVerConfigs := make(map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha)
|
||||
capVerConfigs[106] = *conf
|
||||
return capVerConfigs, nil
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) validate(_ *tsapi.ProxyGroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// getNodeMetadata gets metadata for all the pods owned by this ProxyGroup by
|
||||
// querying their state Secrets. It may not return the same number of items as
|
||||
// specified in the ProxyGroup spec if e.g. it is getting scaled up or down, or
|
||||
// some pods have failed to write state.
|
||||
func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.ProxyGroup) (metadata []nodeMetadata, _ error) {
|
||||
// List all state secrets owned by this ProxyGroup.
|
||||
secrets := &corev1.SecretList{}
|
||||
if err := r.List(ctx, secrets, client.InNamespace(r.tsNamespace), client.MatchingLabels(pgSecretLabels(pg.Name, "state"))); err != nil {
|
||||
return nil, fmt.Errorf("failed to list state Secrets: %w", err)
|
||||
}
|
||||
for _, secret := range secrets.Items {
|
||||
var ordinal int
|
||||
if _, err := fmt.Sscanf(secret.Name, pg.Name+"-%d", &ordinal); err != nil {
|
||||
return nil, fmt.Errorf("unexpected secret %s was labelled as owned by the ProxyGroup %s: %w", secret.Name, pg.Name, err)
|
||||
}
|
||||
|
||||
id, dnsName, ok, err := getNodeMetadata(ctx, &secret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
metadata = append(metadata, nodeMetadata{
|
||||
ordinal: ordinal,
|
||||
stateSecret: &secret,
|
||||
tsID: id,
|
||||
dnsName: dnsName,
|
||||
})
|
||||
}
|
||||
|
||||
return metadata, nil
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) getDeviceInfo(ctx context.Context, pg *tsapi.ProxyGroup) (devices []tsapi.TailnetDevice, _ error) {
|
||||
metadata, err := r.getNodeMetadata(ctx, pg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, m := range metadata {
|
||||
device, ok, err := getDeviceInfo(ctx, r.tsClient, m.stateSecret)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, tsapi.TailnetDevice{
|
||||
Hostname: device.Hostname,
|
||||
TailnetIPs: device.TailnetIPs,
|
||||
})
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
type nodeMetadata struct {
|
||||
ordinal int
|
||||
stateSecret *corev1.Secret
|
||||
tsID tailcfg.StableNodeID
|
||||
dnsName string
|
||||
}
|
@ -0,0 +1,262 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !plan9
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
rbacv1 "k8s.io/api/rbac/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
||||
"tailscale.com/types/ptr"
|
||||
)
|
||||
|
||||
// Returns the base StatefulSet definition for a ProxyGroup. A ProxyClass may be
|
||||
// applied over the top after.
|
||||
func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode, cfgHash string) *appsv1.StatefulSet {
|
||||
return &appsv1.StatefulSet{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pg.Name,
|
||||
Namespace: namespace,
|
||||
Labels: pgLabels(pg.Name, nil),
|
||||
OwnerReferences: pgOwnerReference(pg),
|
||||
},
|
||||
Spec: appsv1.StatefulSetSpec{
|
||||
Replicas: ptr.To(pgReplicas(pg)),
|
||||
Selector: &metav1.LabelSelector{
|
||||
MatchLabels: pgLabels(pg.Name, nil),
|
||||
},
|
||||
Template: corev1.PodTemplateSpec{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pg.Name,
|
||||
Namespace: namespace,
|
||||
Labels: pgLabels(pg.Name, nil),
|
||||
DeletionGracePeriodSeconds: ptr.To[int64](10),
|
||||
Annotations: map[string]string{
|
||||
podAnnotationLastSetConfigFileHash: cfgHash,
|
||||
},
|
||||
},
|
||||
Spec: corev1.PodSpec{
|
||||
ServiceAccountName: pg.Name,
|
||||
InitContainers: []corev1.Container{
|
||||
{
|
||||
Name: "sysctler",
|
||||
Image: image,
|
||||
SecurityContext: &corev1.SecurityContext{
|
||||
Privileged: ptr.To(true),
|
||||
},
|
||||
Command: []string{
|
||||
"/bin/sh",
|
||||
"-c",
|
||||
},
|
||||
Args: []string{
|
||||
"sysctl -w net.ipv4.ip_forward=1 && if sysctl net.ipv6.conf.all.forwarding; then sysctl -w net.ipv6.conf.all.forwarding=1; fi",
|
||||
},
|
||||
},
|
||||
},
|
||||
Containers: []corev1.Container{
|
||||
{
|
||||
Name: "tailscale",
|
||||
Image: image,
|
||||
SecurityContext: &corev1.SecurityContext{
|
||||
Capabilities: &corev1.Capabilities{
|
||||
Add: []corev1.Capability{
|
||||
"NET_ADMIN",
|
||||
},
|
||||
},
|
||||
},
|
||||
VolumeMounts: func() []corev1.VolumeMount {
|
||||
var mounts []corev1.VolumeMount
|
||||
for i := range pgReplicas(pg) {
|
||||
mounts = append(mounts, corev1.VolumeMount{
|
||||
Name: fmt.Sprintf("tailscaledconfig-%d", i),
|
||||
ReadOnly: true,
|
||||
MountPath: fmt.Sprintf("/etc/tsconfig/%s-%d", pg.Name, i),
|
||||
})
|
||||
}
|
||||
|
||||
return mounts
|
||||
}(),
|
||||
Env: func() []corev1.EnvVar {
|
||||
envs := []corev1.EnvVar{
|
||||
{
|
||||
Name: "POD_IP",
|
||||
ValueFrom: &corev1.EnvVarSource{
|
||||
FieldRef: &corev1.ObjectFieldSelector{
|
||||
FieldPath: "status.podIP",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "POD_NAME",
|
||||
ValueFrom: &corev1.EnvVarSource{
|
||||
FieldRef: &corev1.ObjectFieldSelector{
|
||||
// Secret is named after the pod.
|
||||
FieldPath: "metadata.name",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "TS_KUBE_SECRET",
|
||||
Value: "$(POD_NAME)",
|
||||
},
|
||||
{
|
||||
Name: "TS_STATE",
|
||||
Value: "kube:$(POD_NAME)",
|
||||
},
|
||||
{
|
||||
Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR",
|
||||
Value: "/etc/tsconfig/$(POD_NAME)",
|
||||
},
|
||||
{
|
||||
Name: "TS_USERSPACE",
|
||||
Value: "false",
|
||||
},
|
||||
}
|
||||
|
||||
if tsFirewallMode != "" {
|
||||
envs = append(envs, corev1.EnvVar{
|
||||
Name: "TS_DEBUG_FIREWALL_MODE",
|
||||
Value: tsFirewallMode,
|
||||
})
|
||||
}
|
||||
|
||||
return envs
|
||||
}(),
|
||||
},
|
||||
},
|
||||
Volumes: func() []corev1.Volume {
|
||||
var volumes []corev1.Volume
|
||||
for i := range pgReplicas(pg) {
|
||||
volumes = append(volumes, corev1.Volume{
|
||||
Name: fmt.Sprintf("tailscaledconfig-%d", i),
|
||||
VolumeSource: corev1.VolumeSource{
|
||||
Secret: &corev1.SecretVolumeSource{
|
||||
SecretName: fmt.Sprintf("%s-%d-config", pg.Name, i),
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return volumes
|
||||
}(),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func pgServiceAccount(pg *tsapi.ProxyGroup, namespace string) *corev1.ServiceAccount {
|
||||
return &corev1.ServiceAccount{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pg.Name,
|
||||
Namespace: namespace,
|
||||
Labels: pgLabels(pg.Name, nil),
|
||||
OwnerReferences: pgOwnerReference(pg),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func pgRole(pg *tsapi.ProxyGroup, namespace string) *rbacv1.Role {
|
||||
return &rbacv1.Role{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pg.Name,
|
||||
Namespace: namespace,
|
||||
Labels: pgLabels(pg.Name, nil),
|
||||
OwnerReferences: pgOwnerReference(pg),
|
||||
},
|
||||
Rules: []rbacv1.PolicyRule{
|
||||
{
|
||||
APIGroups: []string{""},
|
||||
Resources: []string{"secrets"},
|
||||
Verbs: []string{
|
||||
"get",
|
||||
"patch",
|
||||
"update",
|
||||
},
|
||||
ResourceNames: func() (secrets []string) {
|
||||
for i := range pgReplicas(pg) {
|
||||
secrets = append(secrets,
|
||||
fmt.Sprintf("%s-%d-config", pg.Name, i), // Config with auth key.
|
||||
fmt.Sprintf("%s-%d", pg.Name, i), // State.
|
||||
)
|
||||
}
|
||||
return secrets
|
||||
}(),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func pgRoleBinding(pg *tsapi.ProxyGroup, namespace string) *rbacv1.RoleBinding {
|
||||
return &rbacv1.RoleBinding{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pg.Name,
|
||||
Namespace: namespace,
|
||||
Labels: pgLabels(pg.Name, nil),
|
||||
OwnerReferences: pgOwnerReference(pg),
|
||||
},
|
||||
Subjects: []rbacv1.Subject{
|
||||
{
|
||||
Kind: "ServiceAccount",
|
||||
Name: pg.Name,
|
||||
Namespace: namespace,
|
||||
},
|
||||
},
|
||||
RoleRef: rbacv1.RoleRef{
|
||||
Kind: "Role",
|
||||
Name: pg.Name,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func pgStateSecrets(pg *tsapi.ProxyGroup, namespace string) (secrets []*corev1.Secret) {
|
||||
for i := range pgReplicas(pg) {
|
||||
secrets = append(secrets, &corev1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: fmt.Sprintf("%s-%d", pg.Name, i),
|
||||
Namespace: namespace,
|
||||
Labels: pgSecretLabels(pg.Name, "state"),
|
||||
OwnerReferences: pgOwnerReference(pg),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
return secrets
|
||||
}
|
||||
|
||||
func pgSecretLabels(pgName, typ string) map[string]string {
|
||||
return pgLabels(pgName, map[string]string{
|
||||
labelSecretType: typ, // "config" or "state".
|
||||
})
|
||||
}
|
||||
|
||||
func pgLabels(pgName string, customLabels map[string]string) map[string]string {
|
||||
l := make(map[string]string, len(customLabels)+3)
|
||||
for k, v := range customLabels {
|
||||
l[k] = v
|
||||
}
|
||||
|
||||
l[LabelManaged] = "true"
|
||||
l[LabelParentType] = "proxygroup"
|
||||
l[LabelParentName] = pgName
|
||||
|
||||
return l
|
||||
}
|
||||
|
||||
func pgOwnerReference(owner *tsapi.ProxyGroup) []metav1.OwnerReference {
|
||||
return []metav1.OwnerReference{*metav1.NewControllerRef(owner, tsapi.SchemeGroupVersion.WithKind("ProxyGroup"))}
|
||||
}
|
||||
|
||||
func pgReplicas(pg *tsapi.ProxyGroup) int32 {
|
||||
if pg.Spec.Replicas != nil {
|
||||
return *pg.Spec.Replicas
|
||||
}
|
||||
|
||||
return 2
|
||||
}
|
@ -0,0 +1,226 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !plan9
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"go.uber.org/zap"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
rbacv1 "k8s.io/api/rbac/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client/fake"
|
||||
"tailscale.com/client/tailscale"
|
||||
tsoperator "tailscale.com/k8s-operator"
|
||||
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
||||
"tailscale.com/tstest"
|
||||
"tailscale.com/types/ptr"
|
||||
)
|
||||
|
||||
const testProxyImage = "tailscale/tailscale:test"
|
||||
|
||||
func TestProxyGroup(t *testing.T) {
|
||||
pg := &tsapi.ProxyGroup{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "test",
|
||||
Finalizers: []string{"tailscale.com/finalizer"},
|
||||
},
|
||||
}
|
||||
|
||||
fc := fake.NewClientBuilder().
|
||||
WithScheme(tsapi.GlobalScheme).
|
||||
WithObjects(pg).
|
||||
WithStatusSubresource(pg).
|
||||
Build()
|
||||
tsClient := &fakeTSClient{}
|
||||
zl, _ := zap.NewDevelopment()
|
||||
fr := record.NewFakeRecorder(1)
|
||||
cl := tstest.NewClock(tstest.ClockOpts{})
|
||||
reconciler := &ProxyGroupReconciler{
|
||||
tsNamespace: tsNamespace,
|
||||
proxyImage: testProxyImage,
|
||||
defaultTags: []string{"tag:test-tag"},
|
||||
tsFirewallMode: "auto",
|
||||
Client: fc,
|
||||
tsClient: tsClient,
|
||||
recorder: fr,
|
||||
l: zl.Sugar(),
|
||||
clock: cl,
|
||||
}
|
||||
|
||||
t.Run("observe_ProxyGroupCreating_status_reason", func(t *testing.T) {
|
||||
expectReconciled(t, reconciler, "", pg.Name)
|
||||
|
||||
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "0/2 ProxyGroup pods running", 0, cl, zl.Sugar())
|
||||
expectEqual(t, fc, pg, nil)
|
||||
if expected := 1; reconciler.proxyGroups.Len() != expected {
|
||||
t.Fatalf("expected %d recorders, got %d", expected, reconciler.proxyGroups.Len())
|
||||
}
|
||||
expectProxyGroupResources(t, fc, pg, true)
|
||||
keyReq := tailscale.KeyCapabilities{
|
||||
Devices: tailscale.KeyDeviceCapabilities{
|
||||
Create: tailscale.KeyDeviceCreateCapabilities{
|
||||
Reusable: false,
|
||||
Ephemeral: false,
|
||||
Preauthorized: true,
|
||||
Tags: []string{"tag:test-tag"},
|
||||
},
|
||||
},
|
||||
}
|
||||
if diff := cmp.Diff(tsClient.KeyRequests(), []tailscale.KeyCapabilities{keyReq, keyReq}); diff != "" {
|
||||
t.Fatalf("unexpected secrets (-got +want):\n%s", diff)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("simulate_successful_device_auth", func(t *testing.T) {
|
||||
addNodeIDToStateSecrets(t, fc, pg)
|
||||
expectReconciled(t, reconciler, "", pg.Name)
|
||||
|
||||
pg.Status.Devices = []tsapi.TailnetDevice{
|
||||
{
|
||||
Hostname: "hostname-nodeid-0",
|
||||
TailnetIPs: []string{"1.2.3.4", "::1"},
|
||||
},
|
||||
{
|
||||
Hostname: "hostname-nodeid-1",
|
||||
TailnetIPs: []string{"1.2.3.4", "::1"},
|
||||
},
|
||||
}
|
||||
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady, 0, cl, zl.Sugar())
|
||||
expectEqual(t, fc, pg, nil)
|
||||
expectProxyGroupResources(t, fc, pg, true)
|
||||
})
|
||||
|
||||
t.Run("scale_up_to_3", func(t *testing.T) {
|
||||
pg.Spec.Replicas = ptr.To[int32](3)
|
||||
mustUpdate(t, fc, "", pg.Name, func(p *tsapi.ProxyGroup) {
|
||||
p.Spec = pg.Spec
|
||||
})
|
||||
expectReconciled(t, reconciler, "", pg.Name)
|
||||
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "2/3 ProxyGroup pods running", 0, cl, zl.Sugar())
|
||||
expectEqual(t, fc, pg, nil)
|
||||
|
||||
addNodeIDToStateSecrets(t, fc, pg)
|
||||
expectReconciled(t, reconciler, "", pg.Name)
|
||||
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, reasonProxyGroupReady, reasonProxyGroupReady, 0, cl, zl.Sugar())
|
||||
pg.Status.Devices = append(pg.Status.Devices, tsapi.TailnetDevice{
|
||||
Hostname: "hostname-nodeid-2",
|
||||
TailnetIPs: []string{"1.2.3.4", "::1"},
|
||||
})
|
||||
expectEqual(t, fc, pg, nil)
|
||||
expectProxyGroupResources(t, fc, pg, true)
|
||||
})
|
||||
|
||||
t.Run("scale_down_to_1", func(t *testing.T) {
|
||||
pg.Spec.Replicas = ptr.To[int32](1)
|
||||
mustUpdate(t, fc, "", pg.Name, func(p *tsapi.ProxyGroup) {
|
||||
p.Spec = pg.Spec
|
||||
})
|
||||
expectReconciled(t, reconciler, "", pg.Name)
|
||||
pg.Status.Devices = pg.Status.Devices[:1] // truncate to only the first device.
|
||||
expectEqual(t, fc, pg, nil)
|
||||
|
||||
expectProxyGroupResources(t, fc, pg, true)
|
||||
})
|
||||
|
||||
t.Run("delete_and_cleanup", func(t *testing.T) {
|
||||
if err := fc.Delete(context.Background(), pg); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectReconciled(t, reconciler, "", pg.Name)
|
||||
|
||||
expectMissing[tsapi.Recorder](t, fc, "", pg.Name)
|
||||
if expected := 0; reconciler.proxyGroups.Len() != expected {
|
||||
t.Fatalf("expected %d ProxyGroups, got %d", expected, reconciler.proxyGroups.Len())
|
||||
}
|
||||
// 2 nodes should get deleted as part of the scale down, and then finally
|
||||
// the first node gets deleted with the ProxyGroup cleanup.
|
||||
if diff := cmp.Diff(tsClient.deleted, []string{"nodeid-1", "nodeid-2", "nodeid-0"}); diff != "" {
|
||||
t.Fatalf("unexpected deleted devices (-got +want):\n%s", diff)
|
||||
}
|
||||
// The fake client does not clean up objects whose owner has been
|
||||
// deleted, so we can't test for the owned resources getting deleted.
|
||||
})
|
||||
}
|
||||
|
||||
func expectProxyGroupResources(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyGroup, shouldExist bool) {
|
||||
t.Helper()
|
||||
|
||||
role := pgRole(pg, tsNamespace)
|
||||
roleBinding := pgRoleBinding(pg, tsNamespace)
|
||||
serviceAccount := pgServiceAccount(pg, tsNamespace)
|
||||
statefulSet := pgStatefulSet(pg, tsNamespace, testProxyImage, "auto", "")
|
||||
|
||||
if shouldExist {
|
||||
expectEqual(t, fc, role, nil)
|
||||
expectEqual(t, fc, roleBinding, nil)
|
||||
expectEqual(t, fc, serviceAccount, nil)
|
||||
expectEqual(t, fc, statefulSet, func(ss *appsv1.StatefulSet) {
|
||||
ss.Spec.Template.Annotations[podAnnotationLastSetConfigFileHash] = ""
|
||||
})
|
||||
} else {
|
||||
expectMissing[rbacv1.Role](t, fc, role.Namespace, role.Name)
|
||||
expectMissing[rbacv1.RoleBinding](t, fc, roleBinding.Namespace, roleBinding.Name)
|
||||
expectMissing[corev1.ServiceAccount](t, fc, serviceAccount.Namespace, serviceAccount.Name)
|
||||
expectMissing[appsv1.StatefulSet](t, fc, statefulSet.Namespace, statefulSet.Name)
|
||||
}
|
||||
|
||||
var expectedSecrets []string
|
||||
for i := range pgReplicas(pg) {
|
||||
expectedSecrets = append(expectedSecrets,
|
||||
fmt.Sprintf("%s-%d", pg.Name, i),
|
||||
fmt.Sprintf("%s-%d-config", pg.Name, i),
|
||||
)
|
||||
}
|
||||
expectSecrets(t, fc, expectedSecrets)
|
||||
}
|
||||
|
||||
func expectSecrets(t *testing.T, fc client.WithWatch, expected []string) {
|
||||
t.Helper()
|
||||
|
||||
secrets := &corev1.SecretList{}
|
||||
if err := fc.List(context.Background(), secrets); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var actual []string
|
||||
for _, secret := range secrets.Items {
|
||||
actual = append(actual, secret.Name)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(actual, expected); diff != "" {
|
||||
t.Fatalf("unexpected secrets (-got +want):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func addNodeIDToStateSecrets(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyGroup) {
|
||||
const key = "profile-abc"
|
||||
for i := range pgReplicas(pg) {
|
||||
bytes, err := json.Marshal(map[string]any{
|
||||
"Config": map[string]any{
|
||||
"NodeID": fmt.Sprintf("nodeid-%d", i),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
mustUpdate(t, fc, tsNamespace, fmt.Sprintf("test-%d", i), func(s *corev1.Secret) {
|
||||
s.Data = map[string][]byte{
|
||||
currentProfileKey: []byte(key),
|
||||
key: bytes,
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue