From bbe194c80d531c5f88010d4e31e17c060a8125e7 Mon Sep 17 00:00:00 2001 From: Irbe Krumina Date: Fri, 19 Apr 2024 16:49:46 +0100 Subject: [PATCH] cmd/k8s-operator: correctly determine cluster domain (#11512) Kubernetes cluster domain defaults to 'cluster.local', but can also be customized. We need to determine cluster domain to set up in-cluster forwarding to our egress proxies. This was previously hardcoded to 'cluster.local', so was the egress proxies were not usable in clusters with custom domains. This PR ensures that we attempt to determine the cluster domain by parsing /etc/resolv.conf. In case the cluster domain cannot be determined from /etc/resolv.conf, we fall back to 'cluster.local'. Updates tailscale/tailscale#10399,tailscale/tailscale#11445 Signed-off-by: Irbe Krumina --- cmd/k8s-operator/operator.go | 1 + cmd/k8s-operator/operator_test.go | 80 +++++++++++++++++++++++++++++++ cmd/k8s-operator/svc.go | 62 ++++++++++++++++++++++-- 3 files changed, 139 insertions(+), 4 deletions(-) diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index 6993b20fb..cf969372c 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -264,6 +264,7 @@ func runReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string logger: zlog.Named("service-reconciler"), isDefaultLoadBalancer: isDefaultLoadBalancer, recorder: eventRecorder, + tsNamespace: tsNamespace, }) if err != nil { startlog.Fatalf("could not create service reconciler: %v", err) diff --git a/cmd/k8s-operator/operator_test.go b/cmd/k8s-operator/operator_test.go index 7188ce65b..6dcb609d9 100644 --- a/cmd/k8s-operator/operator_test.go +++ b/cmd/k8s-operator/operator_test.go @@ -20,7 +20,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/reconcile" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/net/dns/resolvconffile" "tailscale.com/types/ptr" + "tailscale.com/util/dnsname" "tailscale.com/util/mak" ) @@ -1352,3 +1354,81 @@ func Test_serviceHandlerForIngress(t *testing.T) { t.Errorf("unexpected reconcile request for a Service that does not belong to any Ingress: %#+v\n", gotReqs) } } + +func Test_clusterDomainFromResolverConf(t *testing.T) { + zl, err := zap.NewDevelopment() + if err != nil { + t.Fatal(err) + } + tests := []struct { + name string + conf *resolvconffile.Config + namespace string + want string + }{ + { + name: "success- custom domain", + conf: &resolvconffile.Config{ + SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.svc.department.org.io"), toFQDN(t, "svc.department.org.io"), toFQDN(t, "department.org.io")}, + }, + namespace: "foo", + want: "department.org.io", + }, + { + name: "success- default domain", + conf: &resolvconffile.Config{ + SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.svc.cluster.local."), toFQDN(t, "svc.cluster.local."), toFQDN(t, "cluster.local.")}, + }, + namespace: "foo", + want: "cluster.local", + }, + { + name: "only two search domains found", + conf: &resolvconffile.Config{ + SearchDomains: []dnsname.FQDN{toFQDN(t, "svc.department.org.io"), toFQDN(t, "department.org.io")}, + }, + namespace: "foo", + want: "cluster.local", + }, + { + name: "first search domain does not match the expected structure", + conf: &resolvconffile.Config{ + SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.bar.department.org.io"), toFQDN(t, "svc.department.org.io"), toFQDN(t, "some.other.fqdn")}, + }, + namespace: "foo", + want: "cluster.local", + }, + { + name: "second search domain does not match the expected structure", + conf: &resolvconffile.Config{ + SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.svc.department.org.io"), toFQDN(t, "foo.department.org.io"), toFQDN(t, "some.other.fqdn")}, + }, + namespace: "foo", + want: "cluster.local", + }, + { + name: "third search domain does not match the expected structure", + conf: &resolvconffile.Config{ + SearchDomains: []dnsname.FQDN{toFQDN(t, "foo.svc.department.org.io"), toFQDN(t, "svc.department.org.io"), toFQDN(t, "some.other.fqdn")}, + }, + namespace: "foo", + want: "cluster.local", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := clusterDomainFromResolverConf(tt.conf, tt.namespace, zl.Sugar()); got != tt.want { + t.Errorf("clusterDomainFromResolverConf() = %v, want %v", got, tt.want) + } + }) + } +} + +func toFQDN(t *testing.T, s string) dnsname.FQDN { + t.Helper() + fqdn, err := dnsname.ToFQDN(s) + if err != nil { + t.Fatalf("error coverting %q to dnsname.FQDN: %v", s, err) + } + return fqdn +} diff --git a/cmd/k8s-operator/svc.go b/cmd/k8s-operator/svc.go index 8820a3554..ab09e5f0d 100644 --- a/cmd/k8s-operator/svc.go +++ b/cmd/k8s-operator/svc.go @@ -22,10 +22,16 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" + "tailscale.com/net/dns/resolvconffile" "tailscale.com/util/clientmetric" "tailscale.com/util/set" ) +const ( + resolvConfPath = "/etc/resolv.conf" + defaultClusterDomain = "cluster.local" +) + type ServiceReconciler struct { client.Client ssr *tailscaleSTSReconciler @@ -42,6 +48,8 @@ type ServiceReconciler struct { managedEgressProxies set.Slice[types.UID] recorder record.EventRecorder + + tsNamespace string } var ( @@ -225,10 +233,8 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga } if sts.TailnetTargetIP != "" || sts.TailnetTargetFQDN != "" { - // TODO (irbekrm): cluster.local is the default DNS name, but - // can be changed by users. Make this configurable or figure out - // how to discover the DNS name from within operator - headlessSvcName := hsvc.Name + "." + hsvc.Namespace + ".svc.cluster.local" + clusterDomain := retrieveClusterDomain(a.tsNamespace, logger) + headlessSvcName := hsvc.Name + "." + hsvc.Namespace + ".svc." + clusterDomain if svc.Spec.ExternalName != headlessSvcName || svc.Spec.Type != corev1.ServiceTypeExternalName { svc.Spec.ExternalName = headlessSvcName svc.Spec.Selector = nil @@ -344,3 +350,51 @@ func proxyClassIsReady(ctx context.Context, name string, cl client.Client) (bool } return tsoperator.ProxyClassIsReady(proxyClass), nil } + +// retrieveClusterDomain determines and retrieves cluster domain i.e +// (cluster.local) in which this Pod is running by parsing search domains in +// /etc/resolv.conf. If an error is encountered at any point during the process, +// defaults cluster domain to 'cluster.local'. +func retrieveClusterDomain(namespace string, logger *zap.SugaredLogger) string { + logger.Infof("attempting to retrieve cluster domain..") + conf, err := resolvconffile.ParseFile(resolvConfPath) + if err != nil { + // Vast majority of clusters use the cluster.local domain, so it + // is probably better to fall back to that than error out. + logger.Infof("[unexpected] error parsing /etc/resolv.conf to determine cluster domain, defaulting to 'cluster.local'.") + return defaultClusterDomain + } + return clusterDomainFromResolverConf(conf, namespace, logger) +} + +// clusterDomainFromResolverConf attempts to retrieve cluster domain from the provided resolver config. +// It expects the first three search domains in the resolver config to be be ['.svc., svc., , ...] +// If the first three domains match the expected structure, it returns the third. +// If the domains don't match the expected structure or an error is encountered, it defaults to 'cluster.local' domain. +func clusterDomainFromResolverConf(conf *resolvconffile.Config, namespace string, logger *zap.SugaredLogger) string { + if len(conf.SearchDomains) < 3 { + logger.Infof("[unexpected] resolver config contains only %d search domains, at least three expected.\nDefaulting cluster domain to 'cluster.local'.") + return defaultClusterDomain + } + first := conf.SearchDomains[0] + if !strings.HasPrefix(string(first), namespace+".svc") { + logger.Infof("[unexpected] first search domain in resolver config is %s; expected %s.\nDefaulting cluster domain to 'cluster.local'.", first, namespace+".svc.") + return defaultClusterDomain + } + second := conf.SearchDomains[1] + if !strings.HasPrefix(string(second), "svc") { + logger.Infof("[unexpected] second search domain in resolver config is %s; expected 'svc.'.\nDefaulting cluster domain to 'cluster.local'.", second) + return defaultClusterDomain + } + // Trim the trailing dot for backwards compatibility purposes as the + // cluster domain was previously hardcoded to 'cluster.local' without a + // trailing dot. + probablyClusterDomain := strings.TrimPrefix(second.WithoutTrailingDot(), "svc.") + third := conf.SearchDomains[2] + if !strings.EqualFold(third.WithoutTrailingDot(), probablyClusterDomain) { + logger.Infof("[unexpected] expected resolver config to contain serch domains .svc., svc., ; got %s %s %s\n. Defaulting cluster domain to 'cluster.local'.", first, second, third) + return defaultClusterDomain + } + logger.Infof("Cluster domain %q extracted from resolver config", probablyClusterDomain) + return probablyClusterDomain +}