@ -7,10 +7,8 @@ package main
import (
"context"
"crypto/tls"
_ "embed"
"fmt"
"net/http"
"os"
"slices"
"strings"
@ -26,7 +24,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/ transpor t"
"k8s.io/client-go/ res t"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
@ -42,6 +40,7 @@ import (
"tailscale.com/hostinfo"
"tailscale.com/ipn"
"tailscale.com/ipn/store/kubestore"
"tailscale.com/tailcfg"
"tailscale.com/tsnet"
"tailscale.com/types/logger"
"tailscale.com/types/opt"
@ -55,13 +54,8 @@ func main() {
tailscale . I_Acknowledge_This_API_Is_Unstable = true
var (
hostname = defaultEnv ( "OPERATOR_HOSTNAME" , "tailscale-operator" )
kubeSecret = defaultEnv ( "OPERATOR_SECRET" , "" )
operatorTags = defaultEnv ( "OPERATOR_INITIAL_TAGS" , "tag:k8s-operator" )
tsNamespace = defaultEnv ( "OPERATOR_NAMESPACE" , "" )
tslogging = defaultEnv ( "OPERATOR_LOGGING" , "info" )
clientIDPath = defaultEnv ( "CLIENT_ID_FILE" , "" )
clientSecretPath = defaultEnv ( "CLIENT_SECRET_FILE" , "" )
image = defaultEnv ( "PROXY_IMAGE" , "tailscale/tailscale:latest" )
priorityClassName = defaultEnv ( "PROXY_PRIORITY_CLASS_NAME" , "" )
tags = defaultEnv ( "PROXY_TAGS" , "tag:k8s" )
@ -79,8 +73,28 @@ func main() {
}
zlog := kzap . NewRaw ( opts ... ) . Sugar ( )
logf . SetLogger ( zapr . NewLogger ( zlog . Desugar ( ) ) )
startlog := zlog . Named ( "startup" )
s , tsClient := initTSNet ( zlog )
restConfig := config . GetConfigOrDie ( )
if shouldRunAuthProxy {
launchAuthProxy ( zlog , restConfig , s )
}
startReconcilers ( zlog , tsNamespace , restConfig , tsClient , image , priorityClassName , tags )
}
// initTSNet initializes the tsnet.Server and logs in to Tailscale. It uses the
// CLIENT_ID_FILE and CLIENT_SECRET_FILE environment variables to authenticate
// with Tailscale.
func initTSNet ( zlog * zap . SugaredLogger ) ( * tsnet . Server , * tailscale . Client ) {
hostinfo . SetApp ( "k8s-operator" )
var (
clientIDPath = defaultEnv ( "CLIENT_ID_FILE" , "" )
clientSecretPath = defaultEnv ( "CLIENT_SECRET_FILE" , "" )
hostname = defaultEnv ( "OPERATOR_HOSTNAME" , "tailscale-operator" )
kubeSecret = defaultEnv ( "OPERATOR_SECRET" , "" )
operatorTags = defaultEnv ( "OPERATOR_INITIAL_TAGS" , "tag:k8s-operator" )
)
startlog := zlog . Named ( "startup" )
if clientIDPath == "" || clientSecretPath == "" {
startlog . Fatalf ( "CLIENT_ID_FILE and CLIENT_SECRET_FILE must be set" )
}
@ -100,12 +114,6 @@ func main() {
tsClient := tailscale . NewClient ( "-" , nil )
tsClient . HTTPClient = credentials . Client ( context . Background ( ) )
if shouldRunAuthProxy {
hostinfo . SetApp ( "k8s-operator-proxy" )
} else {
hostinfo . SetApp ( "k8s-operator" )
}
s := & tsnet . Server {
Hostname : hostname ,
Logf : zlog . Named ( "tailscaled" ) . Debugf ,
@ -176,7 +184,13 @@ waitOnline:
}
time . Sleep ( time . Second )
}
return s , tsClient
}
// startReconcilers starts the controller-runtime manager and registers the
// ServiceReconciler.
func startReconcilers ( zlog * zap . SugaredLogger , tsNamespace string , restConfig * rest . Config , tsClient * tailscale . Client , image , priorityClassName , tags string ) {
startlog := zlog . Named ( "startReconcilers" )
// For secrets and statefulsets, we only get permission to touch the objects
// in the controller's own namespace. This cannot be expressed by
// .Watches(...) below, instead you have to add a per-type field selector to
@ -186,7 +200,6 @@ waitOnline:
nsFilter := cache . ByObject {
Field : client . InNamespace ( tsNamespace ) . AsSelector ( ) ,
}
restConfig := config . GetConfigOrDie ( )
mgr , err := manager . New ( restConfig , manager . Options {
Cache : cache . Options {
ByObject : map [ client . Object ] cache . ByObject {
@ -199,16 +212,6 @@ waitOnline:
startlog . Fatalf ( "could not create manager: %v" , err )
}
sr := & ServiceReconciler {
Client : mgr . GetClient ( ) ,
tsClient : tsClient ,
defaultTags : strings . Split ( tags , "," ) ,
operatorNamespace : tsNamespace ,
proxyImage : image ,
proxyPriorityClassName : priorityClassName ,
logger : zlog . Named ( "service-reconciler" ) ,
}
reconcileFilter := handler . EnqueueRequestsFromMapFunc ( func ( _ context . Context , o client . Object ) [ ] reconcile . Request {
ls := o . GetLabels ( )
if ls [ LabelManaged ] != "true" {
@ -231,33 +234,23 @@ waitOnline:
For ( & corev1 . Service { } ) .
Watches ( & appsv1 . StatefulSet { } , reconcileFilter ) .
Watches ( & corev1 . Secret { } , reconcileFilter ) .
Complete ( sr )
Complete ( & ServiceReconciler {
ssr : & tailscaleSTSReconciler {
Client : mgr . GetClient ( ) ,
tsClient : tsClient ,
defaultTags : strings . Split ( tags , "," ) ,
operatorNamespace : tsNamespace ,
proxyImage : image ,
proxyPriorityClassName : priorityClassName ,
} ,
Client : mgr . GetClient ( ) ,
logger : zlog . Named ( "service-reconciler" ) ,
} )
if err != nil {
startlog . Fatalf ( "could not create controller: %v" , err )
}
startlog . Infof ( "Startup complete, operator running, version: %s" , version . Long ( ) )
if shouldRunAuthProxy {
cfg , err := restConfig . TransportConfig ( )
if err != nil {
startlog . Fatalf ( "could not get rest.TransportConfig(): %v" , err )
}
// Kubernetes uses SPDY for exec and port-forward, however SPDY is
// incompatible with HTTP/2; so disable HTTP/2 in the proxy.
tr := http . DefaultTransport . ( * http . Transport ) . Clone ( )
tr . TLSClientConfig , err = transport . TLSConfigFor ( cfg )
if err != nil {
startlog . Fatalf ( "could not get transport.TLSConfigFor(): %v" , err )
}
tr . TLSNextProto = make ( map [ string ] func ( authority string , c * tls . Conn ) http . RoundTripper )
rt , err := transport . HTTPWrappersForConfig ( cfg , tr )
if err != nil {
startlog . Fatalf ( "could not get rest.TransportConfig(): %v" , err )
}
go runAuthProxy ( s , rt , zlog . Named ( "auth-proxy" ) . Infof )
}
if err := mgr . Start ( signals . SetupSignalHandler ( ) ) ; err != nil {
startlog . Fatalf ( "could not start manager: %v" , err )
}
@ -276,20 +269,15 @@ const (
AnnotationHostname = "tailscale.com/hostname"
)
// ServiceReconciler is a simple ControllerManagedBy example implementation.
type ServiceReconciler struct {
client . Client
tsClient tsClient
defaultTags [ ] string
operatorNamespace string
proxyImage string
proxyPriorityClassName string
ssr * tailscaleSTSReconciler
logger * zap . SugaredLogger
}
type tsClient interface {
CreateKey ( ctx context . Context , caps tailscale . KeyCapabilities ) ( string , * tailscale . Key , error )
DeleteDevice ( ctx context . Context , id string ) error
DeleteDevice ( ctx context . Context , nodeStableID string ) error
}
func childResourceLabels ( parent * corev1 . Service ) map [ string ] string {
@ -340,55 +328,13 @@ func (a *ServiceReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare
return nil
}
ml := childResourceLabels ( svc )
// Need to delete the StatefulSet first, and delete it with foreground
// cascading deletion. That way, the pod that's writing to the Secret will
// stop running before we start looking at the Secret's contents, and
// assuming k8s ordering semantics don't mess with us, that should avoid
// tailscale device deletion races where we fail to notice a device that
// should be removed.
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , ml )
if err != nil {
return fmt . Errorf ( "getting statefulset: %w" , err )
}
if sts != nil {
if ! sts . GetDeletionTimestamp ( ) . IsZero ( ) {
// Deletion in progress, check again later. We'll get another
// notification when the deletion is complete.
logger . Debugf ( "waiting for statefulset %s/%s deletion" , sts . GetNamespace ( ) , sts . GetName ( ) )
return nil
}
err := a . DeleteAllOf ( ctx , & appsv1 . StatefulSet { } , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( ml ) , client . PropagationPolicy ( metav1 . DeletePropagationForeground ) )
if err != nil {
return fmt . Errorf ( "deleting statefulset: %w" , err )
}
logger . Debugf ( "started deletion of statefulset %s/%s" , sts . GetNamespace ( ) , sts . GetName ( ) )
if done , err := a . ssr . Cleanup ( ctx , logger , childResourceLabels ( svc ) ) ; err != nil {
return fmt . Errorf ( "failed to cleanup: %w" , err )
} else if ! done {
logger . Debugf ( "cleanup not done yet, waiting for next reconcile" )
return nil
}
id , _ , err := a . getDeviceInfo ( ctx , svc )
if err != nil {
return fmt . Errorf ( "getting device info: %w" , err )
}
if id != "" {
// TODO: handle case where the device is already deleted, but the secret
// is still around.
if err := a . tsClient . DeleteDevice ( ctx , id ) ; err != nil {
return fmt . Errorf ( "deleting device: %w" , err )
}
}
types := [ ] client . Object {
& corev1 . Service { } ,
& corev1 . Secret { } ,
}
for _ , typ := range types {
if err := a . DeleteAllOf ( ctx , typ , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( ml ) ) ; err != nil {
return err
}
}
svc . Finalizers = append ( svc . Finalizers [ : ix ] , svc . Finalizers [ ix + 1 : ] ... )
if err := a . Update ( ctx , svc ) ; err != nil {
return fmt . Errorf ( "failed to remove finalizer: %w" , err )
@ -424,24 +370,23 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
return fmt . Errorf ( "failed to add finalizer: %w" , err )
}
}
// Do full reconcile.
hsvc , err := a . reconcileHeadlessService ( ctx , logger , svc )
if err != nil {
return fmt . Errorf ( "failed to reconcile headless service: %w" , err )
}
tags := a . defaultTags
crl := childResourceLabels ( svc )
var tags [ ] string
if tstr , ok := svc . Annotations [ AnnotationTags ] ; ok {
tags = strings . Split ( tstr , "," )
}
secretName , err := a . createOrGetSecret ( ctx , logger , svc , hsvc , tags )
if err != nil {
return fmt . Errorf ( "failed to create or get API key secret: %w" , err )
sts := & tailscaleSTSConfig {
ParentResourceName : svc . Name ,
ParentResourceUID : string ( svc . UID ) ,
TargetIP : svc . Spec . ClusterIP ,
Hostname : hostname ,
Tags : tags ,
ChildResourceLabels : crl ,
}
_ , err = a . reconcileSTS ( ctx , logger , svc , hsvc , secretName , hostname )
if err != nil {
return fmt . Errorf ( "failed to reconcile statefulset: %w" , err )
if err := a . ssr . Provision ( ctx , logger , sts ) ; err != nil {
return fmt . Errorf ( "failed to provision : %w", err )
}
if ! a . hasLoadBalancerClass ( svc ) {
@ -449,7 +394,7 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
return nil
}
_ , tsHost , err := a . get DeviceInfo( ctx , sv c)
_ , tsHost , err := a . ssr . DeviceInfo( ctx , crl )
if err != nil {
return fmt . Errorf ( "failed to get device ID: %w" , err )
}
@ -497,17 +442,111 @@ func (a *ServiceReconciler) hasAnnotation(svc *corev1.Service) bool {
svc . Annotations [ AnnotationExpose ] == "true"
}
func ( a * ServiceReconciler ) reconcileHeadlessService ( ctx context . Context , logger * zap . SugaredLogger , svc * corev1 . Service ) ( * corev1 . Service , error ) {
type tailscaleSTSConfig struct {
ParentResourceName string
ParentResourceUID string
ChildResourceLabels map [ string ] string
TargetIP string
Hostname string
Tags [ ] string // if empty, use defaultTags
}
type tailscaleSTSReconciler struct {
client . Client
tsClient tsClient
defaultTags [ ] string
operatorNamespace string
proxyImage string
proxyPriorityClassName string
}
// Provision ensures that the StatefulSet for the given service is running and
// up to date.
func ( a * tailscaleSTSReconciler ) Provision ( ctx context . Context , logger * zap . SugaredLogger , sts * tailscaleSTSConfig ) error {
// Do full reconcile.
hsvc , err := a . reconcileHeadlessService ( ctx , logger , sts )
if err != nil {
return fmt . Errorf ( "failed to reconcile headless service: %w" , err )
}
secretName , err := a . createOrGetSecret ( ctx , logger , sts , hsvc )
if err != nil {
return fmt . Errorf ( "failed to create or get API key secret: %w" , err )
}
_ , err = a . reconcileSTS ( ctx , logger , sts , hsvc , secretName )
if err != nil {
return fmt . Errorf ( "failed to reconcile statefulset: %w" , err )
}
return nil
}
// Cleanup removes all resources associated that were created by Provision with
// the given labels. It returns true when all resources have been removed,
// otherwise it returns false and the caller should retry later.
func ( a * tailscaleSTSReconciler ) Cleanup ( ctx context . Context , logger * zap . SugaredLogger , labels map [ string ] string ) ( done bool , _ error ) {
// Need to delete the StatefulSet first, and delete it with foreground
// cascading deletion. That way, the pod that's writing to the Secret will
// stop running before we start looking at the Secret's contents, and
// assuming k8s ordering semantics don't mess with us, that should avoid
// tailscale device deletion races where we fail to notice a device that
// should be removed.
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , labels )
if err != nil {
return false , fmt . Errorf ( "getting statefulset: %w" , err )
}
if sts != nil {
if ! sts . GetDeletionTimestamp ( ) . IsZero ( ) {
// Deletion in progress, check again later. We'll get another
// notification when the deletion is complete.
logger . Debugf ( "waiting for statefulset %s/%s deletion" , sts . GetNamespace ( ) , sts . GetName ( ) )
return false , nil
}
err := a . DeleteAllOf ( ctx , & appsv1 . StatefulSet { } , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( labels ) , client . PropagationPolicy ( metav1 . DeletePropagationForeground ) )
if err != nil {
return false , fmt . Errorf ( "deleting statefulset: %w" , err )
}
logger . Debugf ( "started deletion of statefulset %s/%s" , sts . GetNamespace ( ) , sts . GetName ( ) )
return false , nil
}
id , _ , err := a . DeviceInfo ( ctx , labels )
if err != nil {
return false , fmt . Errorf ( "getting device info: %w" , err )
}
if id != "" {
// TODO: handle case where the device is already deleted, but the secret
// is still around.
if err := a . tsClient . DeleteDevice ( ctx , string ( id ) ) ; err != nil {
return false , fmt . Errorf ( "deleting device: %w" , err )
}
}
types := [ ] client . Object {
& corev1 . Service { } ,
& corev1 . Secret { } ,
}
for _ , typ := range types {
if err := a . DeleteAllOf ( ctx , typ , client . InNamespace ( a . operatorNamespace ) , client . MatchingLabels ( labels ) ) ; err != nil {
return false , err
}
}
return true , nil
}
func ( a * tailscaleSTSReconciler ) reconcileHeadlessService ( ctx context . Context , logger * zap . SugaredLogger , sts * tailscaleSTSConfig ) ( * corev1 . Service , error ) {
hsvc := & corev1 . Service {
ObjectMeta : metav1 . ObjectMeta {
GenerateName : "ts-" + svc . Name + "-" ,
GenerateName : "ts-" + s ts. ParentResource Name + "-" ,
Namespace : a . operatorNamespace ,
Labels : childResourceLabels ( svc ) ,
Labels : sts . ChildResourceLabels ,
} ,
Spec : corev1 . ServiceSpec {
ClusterIP : "None" ,
Selector : map [ string ] string {
"app" : string ( svc . UID ) ,
"app" : sts . ParentResourceUID ,
} ,
} ,
}
@ -515,7 +554,7 @@ func (a *ServiceReconciler) reconcileHeadlessService(ctx context.Context, logger
return createOrUpdate ( ctx , a . Client , a . operatorNamespace , hsvc , func ( svc * corev1 . Service ) { svc . Spec = hsvc . Spec } )
}
func ( a * Service Reconciler) createOrGetSecret ( ctx context . Context , logger * zap . SugaredLogger , s vc , hsvc * corev1 . Service , tags [ ] string ) ( string , error ) {
func ( a * tailscaleSTS Reconciler) createOrGetSecret ( ctx context . Context , logger * zap . SugaredLogger , s tsC * tailscaleSTSConfig , hsvc * corev1 . Service ) ( string , error ) {
secret := & corev1 . Secret {
ObjectMeta : metav1 . ObjectMeta {
// Hardcode a -0 suffix so that in future, if we support
@ -523,7 +562,7 @@ func (a *ServiceReconciler) createOrGetSecret(ctx context.Context, logger *zap.S
// those.
Name : hsvc . Name + "-0" ,
Namespace : a . operatorNamespace ,
Labels : childResourceLabels ( svc ) ,
Labels : stsC . ChildResourceLabels ,
} ,
}
if err := a . Get ( ctx , client . ObjectKeyFromObject ( secret ) , secret ) ; err == nil {
@ -536,7 +575,7 @@ func (a *ServiceReconciler) createOrGetSecret(ctx context.Context, logger *zap.S
// Secret doesn't exist yet, create one. Initially it contains
// only the Tailscale authkey, but once Tailscale starts it'll
// also store the daemon state.
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , childResourceLabels ( svc ) )
sts , err := getSingleObject [ appsv1 . StatefulSet ] ( ctx , a . Client , a . operatorNamespace , stsC . ChildResourceLabels )
if err != nil {
return "" , err
}
@ -549,6 +588,10 @@ func (a *ServiceReconciler) createOrGetSecret(ctx context.Context, logger *zap.S
// Create API Key secret which is going to be used by the statefulset
// to authenticate with Tailscale.
logger . Debugf ( "creating authkey for new tailscale proxy" )
tags := stsC . Tags
if len ( tags ) == 0 {
tags = a . defaultTags
}
authKey , err := a . newAuthKey ( ctx , tags )
if err != nil {
return "" , err
@ -563,15 +606,17 @@ func (a *ServiceReconciler) createOrGetSecret(ctx context.Context, logger *zap.S
return secret . Name , nil
}
func ( a * ServiceReconciler ) getDeviceInfo ( ctx context . Context , svc * corev1 . Service ) ( id , hostname string , err error ) {
sec , err := getSingleObject [ corev1 . Secret ] ( ctx , a . Client , a . operatorNamespace , childResourceLabels ( svc ) )
// DeviceInfo returns the device ID and hostname for the Tailscale device
// associated with the given labels.
func ( a * tailscaleSTSReconciler ) DeviceInfo ( ctx context . Context , childLabels map [ string ] string ) ( id tailcfg . StableNodeID , hostname string , err error ) {
sec , err := getSingleObject [ corev1 . Secret ] ( ctx , a . Client , a . operatorNamespace , childLabels )
if err != nil {
return "" , "" , err
}
if sec == nil {
return "" , "" , nil
}
id = string ( sec . Data [ "device_id" ] )
id = tailcfg . StableNodeID ( sec . Data [ "device_id" ] )
if id == "" {
return "" , "" , nil
}
@ -584,7 +629,7 @@ func (a *ServiceReconciler) getDeviceInfo(ctx context.Context, svc *corev1.Servi
return id , hostname , nil
}
func ( a * Service Reconciler) newAuthKey ( ctx context . Context , tags [ ] string ) ( string , error ) {
func ( a * tailscaleSTS Reconciler) newAuthKey ( ctx context . Context , tags [ ] string ) ( string , error ) {
caps := tailscale . KeyCapabilities {
Devices : tailscale . KeyDeviceCapabilities {
Create : tailscale . KeyDeviceCreateCapabilities {
@ -605,7 +650,7 @@ func (a *ServiceReconciler) newAuthKey(ctx context.Context, tags []string) (stri
//go:embed manifests/proxy.yaml
var proxyYaml [ ] byte
func ( a * Service Reconciler) reconcileSTS ( ctx context . Context , logger * zap . SugaredLogger , parentSvc , headlessSvc * corev1 . Service , authKeySecret , hostname string ) ( * appsv1 . StatefulSet , error ) {
func ( a * tailscaleSTS Reconciler) reconcileSTS ( ctx context . Context , logger * zap . SugaredLogger , sts * tailscaleSTSConfig , headlessSvc * corev1 . Service , authKeySecret string ) ( * appsv1 . StatefulSet , error ) {
var ss appsv1 . StatefulSet
if err := yaml . Unmarshal ( proxyYaml , & ss ) ; err != nil {
return nil , fmt . Errorf ( "failed to unmarshal proxy spec: %w" , err )
@ -615,7 +660,7 @@ func (a *ServiceReconciler) reconcileSTS(ctx context.Context, logger *zap.Sugare
container . Env = append ( container . Env ,
corev1 . EnvVar {
Name : "TS_DEST_IP" ,
Value : parentSvc. Spec . Cluster IP,
Value : sts. Target IP,
} ,
corev1 . EnvVar {
Name : "TS_KUBE_SECRET" ,
@ -623,21 +668,21 @@ func (a *ServiceReconciler) reconcileSTS(ctx context.Context, logger *zap.Sugare
} ,
corev1 . EnvVar {
Name : "TS_HOSTNAME" ,
Value : h ostname,
Value : sts. H ostname,
} )
ss . ObjectMeta = metav1 . ObjectMeta {
Name : headlessSvc . Name ,
Namespace : a . operatorNamespace ,
Labels : childResourceLabels ( parentSvc ) ,
Labels : sts . ChildResourceLabels ,
}
ss . Spec . ServiceName = headlessSvc . Name
ss . Spec . Selector = & metav1 . LabelSelector {
MatchLabels : map [ string ] string {
"app" : string ( parentSvc . UID ) ,
"app" : sts . ParentResourceUID ,
} ,
}
ss . Spec . Template . ObjectMeta . Labels = map [ string ] string {
"app" : string ( parentSvc . UID ) ,
"app" : sts . ParentResourceUID ,
}
ss . Spec . Template . Spec . PriorityClassName = a . proxyPriorityClassName
logger . Debugf ( "reconciling statefulset %s/%s" , ss . GetNamespace ( ) , ss . GetName ( ) )