mirror of https://github.com/tailscale/tailscale/
cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services for ProxyGroup (#13746)
cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services Set a readiness condition on ExternalName Services that define a tailnet target to route cluster traffic to via a ProxyGroup's proxies. The condition is set to true if at least one proxy is currently set up to route. Updates tailscale/tailscale#13406 Signed-off-by: Irbe Krumina <irbe@tailscale.com>pull/13755/head
parent
94c79659fa
commit
89ee6bbdae
@ -0,0 +1,179 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !plan9
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"go.uber.org/zap"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
discoveryv1 "k8s.io/api/discovery/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
||||
tsoperator "tailscale.com/k8s-operator"
|
||||
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
||||
"tailscale.com/tstime"
|
||||
)
|
||||
|
||||
const (
|
||||
reasonReadinessCheckFailed = "ReadinessCheckFailed"
|
||||
reasonClusterResourcesNotReady = "ClusterResourcesNotReady"
|
||||
reasonNoProxies = "NoProxiesConfigured"
|
||||
reasonNotReady = "NotReadyToRouteTraffic"
|
||||
reasonReady = "ReadyToRouteTraffic"
|
||||
reasonPartiallyReady = "PartiallyReadyToRouteTraffic"
|
||||
msgReadyToRouteTemplate = "%d out of %d replicas are ready to route traffic"
|
||||
)
|
||||
|
||||
type egressSvcsReadinessReconciler struct {
|
||||
client.Client
|
||||
logger *zap.SugaredLogger
|
||||
clock tstime.Clock
|
||||
tsNamespace string
|
||||
}
|
||||
|
||||
// Reconcile reconciles an ExternalName Service that defines a tailnet target to be exposed on a ProxyGroup and sets the
|
||||
// EgressSvcReady condition on it. The condition gets set to true if at least one of the proxies is currently ready to
|
||||
// route traffic to the target. It compares proxy Pod IPs with the endpoints set on the EndpointSlice for the egress
|
||||
// service to determine how many replicas are currently able to route traffic.
|
||||
func (esrr *egressSvcsReadinessReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
||||
l := esrr.logger.With("Service", req.NamespacedName)
|
||||
defer l.Info("reconcile finished")
|
||||
|
||||
svc := new(corev1.Service)
|
||||
if err = esrr.Get(ctx, req.NamespacedName, svc); apierrors.IsNotFound(err) {
|
||||
l.Info("Service not found")
|
||||
return res, nil
|
||||
} else if err != nil {
|
||||
return res, fmt.Errorf("failed to get Service: %w", err)
|
||||
}
|
||||
var (
|
||||
reason, msg string
|
||||
st metav1.ConditionStatus = metav1.ConditionUnknown
|
||||
)
|
||||
oldStatus := svc.Status.DeepCopy()
|
||||
defer func() {
|
||||
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, st, reason, msg, esrr.clock, l)
|
||||
if !apiequality.Semantic.DeepEqual(oldStatus, svc.Status) {
|
||||
err = errors.Join(err, esrr.Status().Update(ctx, svc))
|
||||
}
|
||||
}()
|
||||
|
||||
crl := egressSvcChildResourceLabels(svc)
|
||||
eps, err := getSingleObject[discoveryv1.EndpointSlice](ctx, esrr.Client, esrr.tsNamespace, crl)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error getting EndpointSlice: %w", err)
|
||||
reason = reasonReadinessCheckFailed
|
||||
msg = err.Error()
|
||||
return res, err
|
||||
}
|
||||
if eps == nil {
|
||||
l.Infof("EndpointSlice for Service does not yet exist, waiting...")
|
||||
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
pg := &tsapi.ProxyGroup{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: svc.Annotations[AnnotationProxyGroup],
|
||||
},
|
||||
}
|
||||
err = esrr.Get(ctx, client.ObjectKeyFromObject(pg), pg)
|
||||
if apierrors.IsNotFound(err) {
|
||||
l.Infof("ProxyGroup for Service does not exist, waiting...")
|
||||
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error retrieving ProxyGroup: %w", err)
|
||||
reason = reasonReadinessCheckFailed
|
||||
msg = err.Error()
|
||||
return res, err
|
||||
}
|
||||
if !tsoperator.ProxyGroupIsReady(pg) {
|
||||
l.Infof("ProxyGroup for Service is not ready, waiting...")
|
||||
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
|
||||
replicas := pgReplicas(pg)
|
||||
if replicas == 0 {
|
||||
l.Infof("ProxyGroup replicas set to 0")
|
||||
reason, msg = reasonNoProxies, reasonNoProxies
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
podLabels := pgLabels(pg.Name, nil)
|
||||
var readyReplicas int32
|
||||
for i := range replicas {
|
||||
podLabels[appsv1.PodIndexLabel] = fmt.Sprintf("%d", i)
|
||||
pod, err := getSingleObject[corev1.Pod](ctx, esrr.Client, esrr.tsNamespace, podLabels)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error retrieving ProxyGroup Pod: %w", err)
|
||||
reason = reasonReadinessCheckFailed
|
||||
msg = err.Error()
|
||||
return res, err
|
||||
}
|
||||
if pod == nil {
|
||||
l.Infof("[unexpected] ProxyGroup is ready, but replica %d was not found", i)
|
||||
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
|
||||
return res, nil
|
||||
}
|
||||
l.Infof("looking at Pod with IPs %v", pod.Status.PodIPs)
|
||||
ready := false
|
||||
for _, ep := range eps.Endpoints {
|
||||
l.Infof("looking at endpoint with addresses %v", ep.Addresses)
|
||||
if endpointReadyForPod(&ep, pod, l) {
|
||||
l.Infof("endpoint is ready for Pod")
|
||||
ready = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if ready {
|
||||
readyReplicas++
|
||||
}
|
||||
}
|
||||
msg = fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas)
|
||||
if readyReplicas == 0 {
|
||||
reason = reasonNotReady
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
st = metav1.ConditionTrue
|
||||
if readyReplicas < replicas {
|
||||
reason = reasonPartiallyReady
|
||||
} else {
|
||||
reason = reasonReady
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// endpointReadyForPod returns true if the endpoint is for the Pod's IPv4 address and is ready to serve traffic.
|
||||
// Endpoint must not be nil.
|
||||
func endpointReadyForPod(ep *discoveryv1.Endpoint, pod *corev1.Pod, l *zap.SugaredLogger) bool {
|
||||
podIP, err := podIPv4(pod)
|
||||
if err != nil {
|
||||
l.Infof("[unexpected] error retrieving Pod's IPv4 address: %v", err)
|
||||
return false
|
||||
}
|
||||
// Currently we only ever set a single address on and Endpoint and nothing else is meant to modify this.
|
||||
if len(ep.Addresses) != 1 {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(ep.Addresses[0], podIP) &&
|
||||
*ep.Conditions.Ready &&
|
||||
*ep.Conditions.Serving &&
|
||||
!*ep.Conditions.Terminating
|
||||
}
|
@ -0,0 +1,169 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !plan9
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/AlekSi/pointer"
|
||||
"go.uber.org/zap"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
discoveryv1 "k8s.io/api/discovery/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client/fake"
|
||||
tsoperator "tailscale.com/k8s-operator"
|
||||
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
||||
"tailscale.com/tstest"
|
||||
"tailscale.com/tstime"
|
||||
)
|
||||
|
||||
func TestEgressServiceReadiness(t *testing.T) {
|
||||
// We need to pass a ProxyGroup object to WithStatusSubresource because of some quirks in how the fake client
|
||||
// works. Without this code further down would not be able to update ProxyGroup status.
|
||||
fc := fake.NewClientBuilder().
|
||||
WithScheme(tsapi.GlobalScheme).
|
||||
WithStatusSubresource(&tsapi.ProxyGroup{}).
|
||||
Build()
|
||||
zl, _ := zap.NewDevelopment()
|
||||
cl := tstest.NewClock(tstest.ClockOpts{})
|
||||
rec := &egressSvcsReadinessReconciler{
|
||||
tsNamespace: "operator-ns",
|
||||
Client: fc,
|
||||
logger: zl.Sugar(),
|
||||
clock: cl,
|
||||
}
|
||||
tailnetFQDN := "my-app.tailnetxyz.ts.net"
|
||||
egressSvc := &corev1.Service{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "my-app",
|
||||
Namespace: "dev",
|
||||
Annotations: map[string]string{
|
||||
AnnotationProxyGroup: "dev",
|
||||
AnnotationTailnetTargetFQDN: tailnetFQDN,
|
||||
},
|
||||
},
|
||||
}
|
||||
fakeClusterIPSvc := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Name: "my-app", Namespace: "operator-ns"}}
|
||||
l := egressSvcEpsLabels(egressSvc, fakeClusterIPSvc)
|
||||
eps := &discoveryv1.EndpointSlice{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "my-app",
|
||||
Namespace: "operator-ns",
|
||||
Labels: l,
|
||||
},
|
||||
AddressType: discoveryv1.AddressTypeIPv4,
|
||||
}
|
||||
pg := &tsapi.ProxyGroup{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "dev",
|
||||
},
|
||||
}
|
||||
mustCreate(t, fc, egressSvc)
|
||||
setClusterNotReady(egressSvc, cl, zl.Sugar())
|
||||
t.Run("endpointslice_does_not_exist", func(t *testing.T) {
|
||||
expectReconciled(t, rec, "dev", "my-app")
|
||||
expectEqual(t, fc, egressSvc, nil) // not ready
|
||||
})
|
||||
t.Run("proxy_group_does_not_exist", func(t *testing.T) {
|
||||
mustCreate(t, fc, eps)
|
||||
expectReconciled(t, rec, "dev", "my-app")
|
||||
expectEqual(t, fc, egressSvc, nil) // still not ready
|
||||
})
|
||||
t.Run("proxy_group_not_ready", func(t *testing.T) {
|
||||
mustCreate(t, fc, pg)
|
||||
expectReconciled(t, rec, "dev", "my-app")
|
||||
expectEqual(t, fc, egressSvc, nil) // still not ready
|
||||
})
|
||||
t.Run("no_ready_replicas", func(t *testing.T) {
|
||||
setPGReady(pg, cl, zl.Sugar())
|
||||
mustUpdateStatus(t, fc, pg.Namespace, pg.Name, func(p *tsapi.ProxyGroup) {
|
||||
p.Status = pg.Status
|
||||
})
|
||||
expectEqual(t, fc, pg, nil)
|
||||
for i := range pgReplicas(pg) {
|
||||
p := pod(pg, i)
|
||||
mustCreate(t, fc, p)
|
||||
mustUpdateStatus(t, fc, p.Namespace, p.Name, func(existing *corev1.Pod) {
|
||||
existing.Status.PodIPs = p.Status.PodIPs
|
||||
})
|
||||
}
|
||||
expectReconciled(t, rec, "dev", "my-app")
|
||||
setNotReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg))
|
||||
expectEqual(t, fc, egressSvc, nil) // still not ready
|
||||
})
|
||||
t.Run("one_ready_replica", func(t *testing.T) {
|
||||
setEndpointForReplica(pg, 0, eps)
|
||||
mustUpdate(t, fc, eps.Namespace, eps.Name, func(e *discoveryv1.EndpointSlice) {
|
||||
e.Endpoints = eps.Endpoints
|
||||
})
|
||||
setReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg), 1)
|
||||
expectReconciled(t, rec, "dev", "my-app")
|
||||
expectEqual(t, fc, egressSvc, nil) // partially ready
|
||||
})
|
||||
t.Run("all_replicas_ready", func(t *testing.T) {
|
||||
for i := range pgReplicas(pg) {
|
||||
setEndpointForReplica(pg, i, eps)
|
||||
}
|
||||
mustUpdate(t, fc, eps.Namespace, eps.Name, func(e *discoveryv1.EndpointSlice) {
|
||||
e.Endpoints = eps.Endpoints
|
||||
})
|
||||
setReady(egressSvc, cl, zl.Sugar(), pgReplicas(pg), pgReplicas(pg))
|
||||
expectReconciled(t, rec, "dev", "my-app")
|
||||
expectEqual(t, fc, egressSvc, nil) // ready
|
||||
})
|
||||
}
|
||||
|
||||
func setClusterNotReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger) {
|
||||
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionFalse, reasonClusterResourcesNotReady, reasonClusterResourcesNotReady, cl, l)
|
||||
}
|
||||
|
||||
func setNotReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger, replicas int32) {
|
||||
msg := fmt.Sprintf(msgReadyToRouteTemplate, 0, replicas)
|
||||
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionFalse, reasonNotReady, msg, cl, l)
|
||||
}
|
||||
|
||||
func setReady(svc *corev1.Service, cl tstime.Clock, l *zap.SugaredLogger, replicas, readyReplicas int32) {
|
||||
reason := reasonPartiallyReady
|
||||
if readyReplicas == replicas {
|
||||
reason = reasonReady
|
||||
}
|
||||
msg := fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas)
|
||||
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, metav1.ConditionTrue, reason, msg, cl, l)
|
||||
}
|
||||
|
||||
func setPGReady(pg *tsapi.ProxyGroup, cl tstime.Clock, l *zap.SugaredLogger) {
|
||||
tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionTrue, "foo", "foo", pg.Generation, cl, l)
|
||||
}
|
||||
|
||||
func setEndpointForReplica(pg *tsapi.ProxyGroup, ordinal int32, eps *discoveryv1.EndpointSlice) {
|
||||
p := pod(pg, ordinal)
|
||||
eps.Endpoints = append(eps.Endpoints, discoveryv1.Endpoint{
|
||||
Addresses: []string{p.Status.PodIPs[0].IP},
|
||||
Conditions: discoveryv1.EndpointConditions{
|
||||
Ready: pointer.ToBool(true),
|
||||
Serving: pointer.ToBool(true),
|
||||
Terminating: pointer.ToBool(false),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func pod(pg *tsapi.ProxyGroup, ordinal int32) *corev1.Pod {
|
||||
l := pgLabels(pg.Name, nil)
|
||||
l[appsv1.PodIndexLabel] = fmt.Sprintf("%d", ordinal)
|
||||
ip := fmt.Sprintf("10.0.0.%d", ordinal)
|
||||
return &corev1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: fmt.Sprintf("%s-%d", pg.Name, ordinal),
|
||||
Namespace: "operator-ns",
|
||||
Labels: l,
|
||||
},
|
||||
Status: corev1.PodStatus{
|
||||
PodIPs: []corev1.PodIP{{IP: ip}},
|
||||
},
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue