From 4adbd14ab5cc7a7b347ebeaac64cb93895b70b76 Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Tue, 6 Jan 2026 16:43:48 +0100 Subject: [PATCH] cmd/k8s-operator,ipn/ipnlocal: allow opting out of ACME order replace extension (#18252) (#18343) In dynamically changing environments where ACME account keys and certs are stored separately, it can happen that the account key would get deleted (and recreated) between issuances. If that is the case, we currently fail renewals and the only way to recover is for users to delete certs. This adds a config knob to allow opting out of the replaces extension and utilizes it in the Kubernetes operator where there are known user workflows that could end up with this edge case. Updates #18251 (cherry picked from commit 90b4358113d86b4fb06e89d4ae91ef8bcb6f6264) Signed-off-by: Irbe Krumina --- cmd/k8s-operator/proxygroup_specs.go | 16 ++++++++++++++++ cmd/k8s-operator/sts.go | 8 ++++++++ cmd/k8s-operator/testutils_test.go | 2 ++ ipn/ipnlocal/cert.go | 5 ++++- 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/cmd/k8s-operator/proxygroup_specs.go b/cmd/k8s-operator/proxygroup_specs.go index 34db86db2..930b7049d 100644 --- a/cmd/k8s-operator/proxygroup_specs.go +++ b/cmd/k8s-operator/proxygroup_specs.go @@ -182,6 +182,14 @@ func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode string Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)", }, + { + // This ensures that cert renewals can succeed if ACME account + // keys have changed since issuance. We cannot guarantee or + // validate that the account key has not changed, see + // https://github.com/tailscale/tailscale/issues/18251 + Name: "TS_DEBUG_ACME_FORCE_RENEWAL", + Value: "true", + }, } if port != nil { @@ -347,6 +355,14 @@ func kubeAPIServerStatefulSet(pg *tsapi.ProxyGroup, namespace, image string, por Name: "$(POD_NAME)-config", }.String(), }, + { + // This ensures that cert renewals can succeed if ACME account + // keys have changed since issuance. We cannot guarantee or + // validate that the account key has not changed, see + // https://github.com/tailscale/tailscale/issues/18251 + Name: "TS_DEBUG_ACME_FORCE_RENEWAL", + Value: "true", + }, } if port != nil { diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index c52ffce85..d1cfda381 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -670,6 +670,14 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)", }, + corev1.EnvVar{ + // This ensures that cert renewals can succeed if ACME account + // keys have changed since issuance. We cannot guarantee or + // validate that the account key has not changed, see + // https://github.com/tailscale/tailscale/issues/18251 + Name: "TS_DEBUG_ACME_FORCE_RENEWAL", + Value: "true", + }, ) if sts.ForwardClusterTrafficViaL7IngressProxy { diff --git a/cmd/k8s-operator/testutils_test.go b/cmd/k8s-operator/testutils_test.go index b4c468c8e..a9e79cee5 100644 --- a/cmd/k8s-operator/testutils_test.go +++ b/cmd/k8s-operator/testutils_test.go @@ -91,6 +91,7 @@ func expectedSTS(t *testing.T, cl client.Client, opts configOpts) *appsv1.Statef {Name: "POD_UID", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{APIVersion: "", FieldPath: "metadata.uid"}, ResourceFieldRef: nil, ConfigMapKeyRef: nil, SecretKeyRef: nil}}, {Name: "TS_KUBE_SECRET", Value: "$(POD_NAME)"}, {Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)"}, + {Name: "TS_DEBUG_ACME_FORCE_RENEWAL", Value: "true"}, }, SecurityContext: &corev1.SecurityContext{ Privileged: ptr.To(true), @@ -280,6 +281,7 @@ func expectedSTSUserspace(t *testing.T, cl client.Client, opts configOpts) *apps {Name: "POD_UID", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{APIVersion: "", FieldPath: "metadata.uid"}, ResourceFieldRef: nil, ConfigMapKeyRef: nil, SecretKeyRef: nil}}, {Name: "TS_KUBE_SECRET", Value: "$(POD_NAME)"}, {Name: "TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR", Value: "/etc/tsconfig/$(POD_NAME)"}, + {Name: "TS_DEBUG_ACME_FORCE_RENEWAL", Value: "true"}, {Name: "TS_SERVE_CONFIG", Value: "/etc/tailscaled/$(POD_NAME)/serve-config"}, {Name: "TS_INTERNAL_APP", Value: opts.app}, }, diff --git a/ipn/ipnlocal/cert.go b/ipn/ipnlocal/cert.go index a78fa5247..8804fcb5c 100644 --- a/ipn/ipnlocal/cert.go +++ b/ipn/ipnlocal/cert.go @@ -551,8 +551,11 @@ var getCertPEM = func(ctx context.Context, b *LocalBackend, cs certStore, logf l // If we have a previous cert, include it in the order. Assuming we're // within the ARI renewal window this should exclude us from LE rate // limits. + // Note that this order extension will fail renewals if the ACME account key has changed + // since the last issuance, see + // https://github.com/tailscale/tailscale/issues/18251 var opts []acme.OrderOption - if previous != nil { + if previous != nil && !envknob.Bool("TS_DEBUG_ACME_FORCE_RENEWAL") { prevCrt, err := previous.parseCertificate() if err == nil { opts = append(opts, acme.WithOrderReplacesCert(prevCrt))