From 90b4358113d86b4fb06e89d4ae91ef8bcb6f6264 Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Fri, 19 Dec 2025 15:59:26 +0000 Subject: [PATCH] cmd/k8s-operator,ipn/ipnlocal: allow opting out of ACME order replace extension (#18252) In dynamically changing environments where ACME account keys and certs are stored separately, it can happen that the account key would get deleted (and recreated) between issuances. If that is the case, we currently fail renewals and the only way to recover is for users to delete certs. This adds a config knob to allow opting out of the replaces extension and utilizes it in the Kubernetes operator where there are known user workflows that could end up with this edge case. Updates #18251 Signed-off-by: Irbe Krumina --- cmd/k8s-operator/proxygroup_specs.go | 16 ++++++++++++++++ cmd/k8s-operator/sts.go | 8 ++++++++ cmd/k8s-operator/testutils_test.go | 2 ++ ipn/ipnlocal/cert.go | 5 ++++- 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/cmd/k8s-operator/proxygroup_specs.go b/cmd/k8s-operator/proxygroup_specs.go index 34db86db2..930b7049d 100644 --- a/cmd/k8s-operator/proxygroup_specs.go +++ b/cmd/k8s-operator/proxygroup_specs.go @@ -182,6 +182,14 @@ func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode string Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)", }, + { + // This ensures that cert renewals can succeed if ACME account + // keys have changed since issuance. We cannot guarantee or + // validate that the account key has not changed, see + // https://github.com/tailscale/tailscale/issues/18251 + Name: "TS_DEBUG_ACME_FORCE_RENEWAL", + Value: "true", + }, } if port != nil { @@ -347,6 +355,14 @@ func kubeAPIServerStatefulSet(pg *tsapi.ProxyGroup, namespace, image string, por Name: "$(POD_NAME)-config", }.String(), }, + { + // This ensures that cert renewals can succeed if ACME account + // keys have changed since issuance. We cannot guarantee or + // validate that the account key has not changed, see + // https://github.com/tailscale/tailscale/issues/18251 + Name: "TS_DEBUG_ACME_FORCE_RENEWAL", + Value: "true", + }, } if port != nil { diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index 62f91bf92..2b6d1290e 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -671,6 +671,14 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)", }, + corev1.EnvVar{ + // This ensures that cert renewals can succeed if ACME account + // keys have changed since issuance. We cannot guarantee or + // validate that the account key has not changed, see + // https://github.com/tailscale/tailscale/issues/18251 + Name: "TS_DEBUG_ACME_FORCE_RENEWAL", + Value: "true", + }, ) if sts.ForwardClusterTrafficViaL7IngressProxy { diff --git a/cmd/k8s-operator/testutils_test.go b/cmd/k8s-operator/testutils_test.go index 9eb06394c..b0e2cfd73 100644 --- a/cmd/k8s-operator/testutils_test.go +++ b/cmd/k8s-operator/testutils_test.go @@ -92,6 +92,7 @@ func expectedSTS(t *testing.T, cl client.Client, opts configOpts) *appsv1.Statef {Name: "POD_UID", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{APIVersion: "", FieldPath: "metadata.uid"}, ResourceFieldRef: nil, ConfigMapKeyRef: nil, SecretKeyRef: nil}}, {Name: "TS_KUBE_SECRET", Value: "$(POD_NAME)"}, {Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)"}, + {Name: "TS_DEBUG_ACME_FORCE_RENEWAL", Value: "true"}, }, SecurityContext: &corev1.SecurityContext{ Privileged: ptr.To(true), @@ -287,6 +288,7 @@ func expectedSTSUserspace(t *testing.T, cl client.Client, opts configOpts) *apps {Name: "POD_UID", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{APIVersion: "", FieldPath: "metadata.uid"}, ResourceFieldRef: nil, ConfigMapKeyRef: nil, SecretKeyRef: nil}}, {Name: "TS_KUBE_SECRET", Value: "$(POD_NAME)"}, {Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)"}, + {Name: "TS_DEBUG_ACME_FORCE_RENEWAL", Value: "true"}, {Name: "TS_SERVE_CONFIG", Value: "/etc/tailscaled/$(POD_NAME)/serve-config"}, {Name: "TS_INTERNAL_APP", Value: opts.app}, }, diff --git a/ipn/ipnlocal/cert.go b/ipn/ipnlocal/cert.go index a78fa5247..8804fcb5c 100644 --- a/ipn/ipnlocal/cert.go +++ b/ipn/ipnlocal/cert.go @@ -551,8 +551,11 @@ var getCertPEM = func(ctx context.Context, b *LocalBackend, cs certStore, logf l // If we have a previous cert, include it in the order. Assuming we're // within the ARI renewal window this should exclude us from LE rate // limits. + // Note that this order extension will fail renewals if the ACME account key has changed + // since the last issuance, see + // https://github.com/tailscale/tailscale/issues/18251 var opts []acme.OrderOption - if previous != nil { + if previous != nil && !envknob.Bool("TS_DEBUG_ACME_FORCE_RENEWAL") { prevCrt, err := previous.parseCertificate() if err == nil { opts = append(opts, acme.WithOrderReplacesCert(prevCrt))