diff --git a/cmd/k8s-operator/e2e/acl.hujson b/cmd/k8s-operator/e2e/acl.hujson new file mode 100644 index 000000000..1a7b61767 --- /dev/null +++ b/cmd/k8s-operator/e2e/acl.hujson @@ -0,0 +1,33 @@ +// To run the e2e tests against a tailnet, ensure its access controls are a +// superset of the following: +{ + "tagOwners": { + "tag:k8s-operator": [], + "tag:k8s": ["tag:k8s-operator"], + "tag:k8s-recorder": ["tag:k8s-operator"], + }, + "autoApprovers": { + // Could be relaxed if we coordinated with the cluster config, but this + // wide subnet maximises compatibility for most clusters. + "routes": { + "10.0.0.0/8": ["tag:k8s"], + }, + "services": { + "tag:k8s": ["tag:k8s"], + }, + }, + "grants": [ + { + "src": ["tag:k8s"], + "dst": ["tag:k8s", "tag:k8s-operator"], + "ip": ["tcp:80", "tcp:443"], + "app": { + "tailscale.com/cap/kubernetes": [{ + "impersonate": { + "groups": ["ts:e2e-test-proxy"], + }, + }], + }, + }, + ], +} \ No newline at end of file diff --git a/cmd/k8s-operator/e2e/ingress_test.go b/cmd/k8s-operator/e2e/ingress_test.go index 373dd2c7d..23f0711ec 100644 --- a/cmd/k8s-operator/e2e/ingress_test.go +++ b/cmd/k8s-operator/e2e/ingress_test.go @@ -10,6 +10,7 @@ import ( "testing" "time" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -17,45 +18,63 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/config" kube "tailscale.com/k8s-operator" "tailscale.com/tstest" + "tailscale.com/types/ptr" + "tailscale.com/util/httpm" ) // See [TestMain] for test requirements. func TestIngress(t *testing.T) { - if tsClient == nil { - t.Skip("TestIngress requires credentials for a tailscale client") + if apiClient == nil { + t.Skip("TestIngress requires TS_API_CLIENT_SECRET set") } - ctx := context.Background() cfg := config.GetConfigOrDie() cl, err := client.New(cfg, client.Options{}) if err != nil { t.Fatal(err) } // Apply nginx - createAndCleanup(t, ctx, cl, &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nginx", - Namespace: "default", - Labels: map[string]string{ - "app.kubernetes.io/name": "nginx", + createAndCleanup(t, cl, + &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nginx", + Namespace: "default", + Labels: map[string]string{ + "app.kubernetes.io/name": "nginx", + }, }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "nginx", - Image: "nginx", + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](1), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/name": "nginx", + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app.kubernetes.io/name": "nginx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "nginx", + Image: "nginx", + }, + }, + }, }, }, - }, - }) + }) // Apply service to expose it as ingress svc := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: "test-ingress", Namespace: "default", Annotations: map[string]string{ - "tailscale.com/expose": "true", + "tailscale.com/expose": "true", + "tailscale.com/proxy-class": "prod", }, }, Spec: corev1.ServiceSpec{ @@ -71,10 +90,10 @@ func TestIngress(t *testing.T) { }, }, } - createAndCleanup(t, ctx, cl, svc) + createAndCleanup(t, cl, svc) // TODO: instead of timing out only when test times out, cancel context after 60s or so. - if err := wait.PollUntilContextCancel(ctx, time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) { + if err := wait.PollUntilContextCancel(t.Context(), time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) { maybeReadySvc := &corev1.Service{ObjectMeta: objectMeta("default", "test-ingress")} if err := get(ctx, cl, maybeReadySvc); err != nil { return false, err @@ -89,17 +108,20 @@ func TestIngress(t *testing.T) { } var resp *http.Response - if err := tstest.WaitFor(time.Second*60, func() error { + if err := tstest.WaitFor(time.Minute, func() error { // TODO(tomhjp): Get the tailnet DNS name from the associated secret instead. // If we are not the first tailnet node with the requested name, we'll get // a -N suffix. - resp, err = tsClient.HTTPClient.Get(fmt.Sprintf("http://%s-%s:80", svc.Namespace, svc.Name)) + req, err := http.NewRequest(httpm.GET, fmt.Sprintf("http://%s-%s:80", svc.Namespace, svc.Name), nil) if err != nil { return err } - return nil + ctx, cancel := context.WithTimeout(t.Context(), time.Second) + defer cancel() + resp, err = tailnetClient.HTTPClient().Do(req.WithContext(ctx)) + return err }); err != nil { - t.Fatalf("error trying to reach service: %v", err) + t.Fatalf("error trying to reach Service: %v", err) } if resp.StatusCode != http.StatusOK { diff --git a/cmd/k8s-operator/e2e/main_test.go b/cmd/k8s-operator/e2e/main_test.go index 5a1364e09..fb5e5c859 100644 --- a/cmd/k8s-operator/e2e/main_test.go +++ b/cmd/k8s-operator/e2e/main_test.go @@ -6,167 +6,89 @@ package e2e import ( "context" "errors" - "fmt" "log" "os" - "slices" "strings" "testing" + "time" - "github.com/go-logr/zapr" - "github.com/tailscale/hujson" - "go.uber.org/zap/zapcore" "golang.org/x/oauth2/clientcredentials" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" - logf "sigs.k8s.io/controller-runtime/pkg/log" - kzap "sigs.k8s.io/controller-runtime/pkg/log/zap" "tailscale.com/internal/client/tailscale" + "tailscale.com/ipn/store/mem" + "tailscale.com/tsnet" ) -const ( - e2eManagedComment = "// This is managed by the k8s-operator e2e tests" -) - +// This test suite is currently not run in CI. +// It requires some setup not handled by this code: +// - Kubernetes cluster with local kubeconfig for it (direct connection, no API server proxy) +// - Tailscale operator installed with --set apiServerProxyConfig.mode="true" +// - ACLs from acl.hujson +// - OAuth client secret in TS_API_CLIENT_SECRET env, with at least auth_keys write scope and tag:k8s tag var ( - tsClient *tailscale.Client - testGrants = map[string]string{ - "test-proxy": `{ - "src": ["tag:e2e-test-proxy"], - "dst": ["tag:k8s-operator"], - "app": { - "tailscale.com/cap/kubernetes": [{ - "impersonate": { - "groups": ["ts:e2e-test-proxy"], - }, - }], - }, - }`, - } + apiClient *tailscale.Client // For API calls to control. + tailnetClient *tsnet.Server // For testing real tailnet traffic. ) -// This test suite is currently not run in CI. -// It requires some setup not handled by this code: -// - Kubernetes cluster with tailscale operator installed -// - Current kubeconfig context set to connect to that cluster (directly, no operator proxy) -// - Operator installed with --set apiServerProxyConfig.mode="true" -// - ACLs that define tag:e2e-test-proxy tag. TODO(tomhjp): Can maybe replace this prereq onwards with an API key -// - OAuth client ID and secret in TS_API_CLIENT_ID and TS_API_CLIENT_SECRET env -// - OAuth client must have auth_keys and policy_file write for tag:e2e-test-proxy tag func TestMain(m *testing.M) { code, err := runTests(m) if err != nil { - log.Fatal(err) + log.Printf("Error: %v", err) + os.Exit(1) } os.Exit(code) } func runTests(m *testing.M) (int, error) { - zlog := kzap.NewRaw([]kzap.Opts{kzap.UseDevMode(true), kzap.Level(zapcore.DebugLevel)}...).Sugar() - logf.SetLogger(zapr.NewLogger(zlog.Desugar())) + secret := os.Getenv("TS_API_CLIENT_SECRET") + if secret != "" { + secretParts := strings.Split(secret, "-") + if len(secretParts) != 4 { + return 0, errors.New("TS_API_CLIENT_SECRET is not valid") + } + ctx := context.Background() + credentials := clientcredentials.Config{ + ClientID: secretParts[2], + ClientSecret: secret, + TokenURL: "https://login.tailscale.com/api/v2/oauth/token", + Scopes: []string{"auth_keys"}, + } + apiClient = tailscale.NewClient("-", nil) + apiClient.HTTPClient = credentials.Client(ctx) + + caps := tailscale.KeyCapabilities{ + Devices: tailscale.KeyDeviceCapabilities{ + Create: tailscale.KeyDeviceCreateCapabilities{ + Reusable: false, + Preauthorized: true, + Ephemeral: true, + Tags: []string{"tag:k8s"}, + }, + }, + } - if clientID := os.Getenv("TS_API_CLIENT_ID"); clientID != "" { - cleanup, err := setupClientAndACLs() + authKey, authKeyMeta, err := apiClient.CreateKeyWithExpiry(ctx, caps, 10*time.Minute) if err != nil { return 0, err } - defer func() { - err = errors.Join(err, cleanup()) - }() - } - - return m.Run(), nil -} - -func setupClientAndACLs() (cleanup func() error, _ error) { - ctx := context.Background() - credentials := clientcredentials.Config{ - ClientID: os.Getenv("TS_API_CLIENT_ID"), - ClientSecret: os.Getenv("TS_API_CLIENT_SECRET"), - TokenURL: "https://login.tailscale.com/api/v2/oauth/token", - Scopes: []string{"auth_keys", "policy_file"}, - } - tsClient = tailscale.NewClient("-", nil) - tsClient.HTTPClient = credentials.Client(ctx) - - if err := patchACLs(ctx, tsClient, func(acls *hujson.Value) { - for test, grant := range testGrants { - deleteTestGrants(test, acls) - addTestGrant(test, grant, acls) - } - }); err != nil { - return nil, err - } - - return func() error { - return patchACLs(ctx, tsClient, func(acls *hujson.Value) { - for test := range testGrants { - deleteTestGrants(test, acls) - } - }) - }, nil -} - -func patchACLs(ctx context.Context, tsClient *tailscale.Client, patchFn func(*hujson.Value)) error { - acls, err := tsClient.ACLHuJSON(ctx) - if err != nil { - return err - } - hj, err := hujson.Parse([]byte(acls.ACL)) - if err != nil { - return err - } - - patchFn(&hj) - - hj.Format() - acls.ACL = hj.String() - if _, err := tsClient.SetACLHuJSON(ctx, *acls, true); err != nil { - return err - } - - return nil -} + defer apiClient.DeleteKey(context.Background(), authKeyMeta.ID) -func addTestGrant(test, grant string, acls *hujson.Value) error { - v, err := hujson.Parse([]byte(grant)) - if err != nil { - return err - } - - // Add the managed comment to the first line of the grant object contents. - v.Value.(*hujson.Object).Members[0].Name.BeforeExtra = hujson.Extra(fmt.Sprintf("%s: %s\n", e2eManagedComment, test)) - - if err := acls.Patch([]byte(fmt.Sprintf(`[{"op": "add", "path": "/grants/-", "value": %s}]`, v.String()))); err != nil { - return err - } - - return nil -} - -func deleteTestGrants(test string, acls *hujson.Value) error { - grants := acls.Find("/grants") - - var patches []string - for i, g := range grants.Value.(*hujson.Array).Elements { - members := g.Value.(*hujson.Object).Members - if len(members) == 0 { - continue + tailnetClient = &tsnet.Server{ + Hostname: "test-proxy", + Ephemeral: true, + Store: &mem.Store{}, + AuthKey: authKey, } - comment := strings.TrimSpace(string(members[0].Name.BeforeExtra)) - if name, found := strings.CutPrefix(comment, e2eManagedComment+": "); found && name == test { - patches = append(patches, fmt.Sprintf(`{"op": "remove", "path": "/grants/%d"}`, i)) + _, err = tailnetClient.Up(ctx) + if err != nil { + return 0, err } + defer tailnetClient.Close() } - // Remove in reverse order so we don't affect the found indices as we mutate. - slices.Reverse(patches) - - if err := acls.Patch([]byte(fmt.Sprintf("[%s]", strings.Join(patches, ",")))); err != nil { - return err - } - - return nil + return m.Run(), nil } func objectMeta(namespace, name string) metav1.ObjectMeta { @@ -176,13 +98,25 @@ func objectMeta(namespace, name string) metav1.ObjectMeta { } } -func createAndCleanup(t *testing.T, ctx context.Context, cl client.Client, obj client.Object) { +func createAndCleanup(t *testing.T, cl client.Client, obj client.Object) { t.Helper() - if err := cl.Create(ctx, obj); err != nil { - t.Fatal(err) + + // Try to create the object first + err := cl.Create(t.Context(), obj) + if err != nil { + if apierrors.IsAlreadyExists(err) { + if updateErr := cl.Update(t.Context(), obj); updateErr != nil { + t.Fatal(updateErr) + } + } else { + t.Fatal(err) + } } + t.Cleanup(func() { - if err := cl.Delete(ctx, obj); err != nil { + // Use context.Background() for cleanup, as t.Context() is cancelled + // just before cleanup functions are called. + if err := cl.Delete(context.Background(), obj); err != nil { t.Errorf("error cleaning up %s %s/%s: %s", obj.GetObjectKind().GroupVersionKind(), obj.GetNamespace(), obj.GetName(), err) } }) diff --git a/cmd/k8s-operator/e2e/proxy_test.go b/cmd/k8s-operator/e2e/proxy_test.go index eac983e88..b3010f97e 100644 --- a/cmd/k8s-operator/e2e/proxy_test.go +++ b/cmd/k8s-operator/e2e/proxy_test.go @@ -4,10 +4,8 @@ package e2e import ( - "context" "encoding/json" "fmt" - "strings" "testing" "time" @@ -17,18 +15,16 @@ import ( "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" - "tailscale.com/client/tailscale" - "tailscale.com/tsnet" + "tailscale.com/ipn" "tailscale.com/tstest" ) // See [TestMain] for test requirements. func TestProxy(t *testing.T) { - if tsClient == nil { - t.Skip("TestProxy requires credentials for a tailscale client") + if apiClient == nil { + t.Skip("TestIngress requires TS_API_CLIENT_SECRET set") } - ctx := context.Background() cfg := config.GetConfigOrDie() cl, err := client.New(cfg, client.Options{}) if err != nil { @@ -36,7 +32,7 @@ func TestProxy(t *testing.T) { } // Create role and role binding to allow a group we'll impersonate to do stuff. - createAndCleanup(t, ctx, cl, &rbacv1.Role{ + createAndCleanup(t, cl, &rbacv1.Role{ ObjectMeta: objectMeta("tailscale", "read-secrets"), Rules: []rbacv1.PolicyRule{{ APIGroups: []string{""}, @@ -44,7 +40,7 @@ func TestProxy(t *testing.T) { Resources: []string{"secrets"}, }}, }) - createAndCleanup(t, ctx, cl, &rbacv1.RoleBinding{ + createAndCleanup(t, cl, &rbacv1.RoleBinding{ ObjectMeta: objectMeta("tailscale", "read-secrets"), Subjects: []rbacv1.Subject{{ Kind: "Group", @@ -60,16 +56,14 @@ func TestProxy(t *testing.T) { operatorSecret := corev1.Secret{ ObjectMeta: objectMeta("tailscale", "operator"), } - if err := get(ctx, cl, &operatorSecret); err != nil { + if err := get(t.Context(), cl, &operatorSecret); err != nil { t.Fatal(err) } - // Connect to tailnet with test-specific tag so we can use the - // [testGrants] ACLs when connecting to the API server proxy - ts := tsnetServerWithTag(t, ctx, "tag:e2e-test-proxy") + // Join tailnet as a client of the API server proxy. proxyCfg := &rest.Config{ Host: fmt.Sprintf("https://%s:443", hostNameFromOperatorSecret(t, operatorSecret)), - Dial: ts.Dial, + Dial: tailnetClient.Dial, } proxyCl, err := client.New(proxyCfg, client.Options{}) if err != nil { @@ -82,8 +76,8 @@ func TestProxy(t *testing.T) { } // Wait for up to a minute the first time we use the proxy, to give it time // to provision the TLS certs. - if err := tstest.WaitFor(time.Second*60, func() error { - return get(ctx, proxyCl, &allowedSecret) + if err := tstest.WaitFor(time.Minute, func() error { + return get(t.Context(), proxyCl, &allowedSecret) }); err != nil { t.Fatal(err) } @@ -92,65 +86,25 @@ func TestProxy(t *testing.T) { forbiddenSecret := corev1.Secret{ ObjectMeta: objectMeta("default", "operator"), } - if err := get(ctx, proxyCl, &forbiddenSecret); err == nil || !apierrors.IsForbidden(err) { + if err := get(t.Context(), proxyCl, &forbiddenSecret); err == nil || !apierrors.IsForbidden(err) { t.Fatalf("expected forbidden error fetching secret from default namespace: %s", err) } } -func tsnetServerWithTag(t *testing.T, ctx context.Context, tag string) *tsnet.Server { - caps := tailscale.KeyCapabilities{ - Devices: tailscale.KeyDeviceCapabilities{ - Create: tailscale.KeyDeviceCreateCapabilities{ - Reusable: false, - Preauthorized: true, - Ephemeral: true, - Tags: []string{tag}, - }, - }, - } - - authKey, authKeyMeta, err := tsClient.CreateKey(ctx, caps) - if err != nil { - t.Fatal(err) +func hostNameFromOperatorSecret(t *testing.T, s corev1.Secret) string { + t.Helper() + prefsBytes, ok := s.Data[string(s.Data["_current-profile"])] + if !ok { + t.Fatalf("no state in operator Secret data: %#v", s.Data) } - t.Cleanup(func() { - if err := tsClient.DeleteKey(ctx, authKeyMeta.ID); err != nil { - t.Errorf("error deleting auth key: %s", err) - } - }) - ts := &tsnet.Server{ - Hostname: "test-proxy", - Ephemeral: true, - Dir: t.TempDir(), - AuthKey: authKey, - } - _, err = ts.Up(ctx) - if err != nil { + prefs := ipn.Prefs{} + if err := json.Unmarshal(prefsBytes, &prefs); err != nil { t.Fatal(err) } - t.Cleanup(func() { - if err := ts.Close(); err != nil { - t.Errorf("error shutting down tsnet.Server: %s", err) - } - }) - - return ts -} -func hostNameFromOperatorSecret(t *testing.T, s corev1.Secret) string { - profiles := map[string]any{} - if err := json.Unmarshal(s.Data["_profiles"], &profiles); err != nil { - t.Fatal(err) + if prefs.Persist == nil { + t.Fatalf("no hostname in operator Secret data: %#v", s.Data) } - key, ok := strings.CutPrefix(string(s.Data["_current-profile"]), "profile-") - if !ok { - t.Fatal(string(s.Data["_current-profile"])) - } - profile, ok := profiles[key] - if !ok { - t.Fatal(profiles) - } - - return ((profile.(map[string]any))["Name"]).(string) + return prefs.Persist.UserProfile.LoginName }