cmd/k8s-operator: simplify scope of e2e tests (#17076)

Removes ACL edits from e2e tests in favour of trying to simplify the
tests and separate the actual test logic from the environment setup
logic as much as possible. Also aims to fit in with the requirements
that will generally be filled anyway for most devs working on the
operator; in particular using tags that fit in with our documentation.

Updates tailscale/corp#32085

Change-Id: I7659246e39ec0b7bcc4ec0a00c6310f25fe6fac2

Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
tomhjp/poc-peer-relay-proxygroup
Tom Proctor 3 months ago committed by GitHub
parent 2d9d869d3d
commit 1ec3d20d10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,33 @@
// To run the e2e tests against a tailnet, ensure its access controls are a
// superset of the following:
{
"tagOwners": {
"tag:k8s-operator": [],
"tag:k8s": ["tag:k8s-operator"],
"tag:k8s-recorder": ["tag:k8s-operator"],
},
"autoApprovers": {
// Could be relaxed if we coordinated with the cluster config, but this
// wide subnet maximises compatibility for most clusters.
"routes": {
"10.0.0.0/8": ["tag:k8s"],
},
"services": {
"tag:k8s": ["tag:k8s"],
},
},
"grants": [
{
"src": ["tag:k8s"],
"dst": ["tag:k8s", "tag:k8s-operator"],
"ip": ["tcp:80", "tcp:443"],
"app": {
"tailscale.com/cap/kubernetes": [{
"impersonate": {
"groups": ["ts:e2e-test-proxy"],
},
}],
},
},
],
}

@ -10,6 +10,7 @@ import (
"testing" "testing"
"time" "time"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
@ -17,22 +18,24 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/config" "sigs.k8s.io/controller-runtime/pkg/client/config"
kube "tailscale.com/k8s-operator" kube "tailscale.com/k8s-operator"
"tailscale.com/tstest" "tailscale.com/tstest"
"tailscale.com/types/ptr"
"tailscale.com/util/httpm"
) )
// See [TestMain] for test requirements. // See [TestMain] for test requirements.
func TestIngress(t *testing.T) { func TestIngress(t *testing.T) {
if tsClient == nil { if apiClient == nil {
t.Skip("TestIngress requires credentials for a tailscale client") t.Skip("TestIngress requires TS_API_CLIENT_SECRET set")
} }
ctx := context.Background()
cfg := config.GetConfigOrDie() cfg := config.GetConfigOrDie()
cl, err := client.New(cfg, client.Options{}) cl, err := client.New(cfg, client.Options{})
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
// Apply nginx // Apply nginx
createAndCleanup(t, ctx, cl, &corev1.Pod{ createAndCleanup(t, cl,
&appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "nginx", Name: "nginx",
Namespace: "default", Namespace: "default",
@ -40,6 +43,19 @@ func TestIngress(t *testing.T) {
"app.kubernetes.io/name": "nginx", "app.kubernetes.io/name": "nginx",
}, },
}, },
Spec: appsv1.DeploymentSpec{
Replicas: ptr.To[int32](1),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/name": "nginx",
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"app.kubernetes.io/name": "nginx",
},
},
Spec: corev1.PodSpec{ Spec: corev1.PodSpec{
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
@ -48,6 +64,8 @@ func TestIngress(t *testing.T) {
}, },
}, },
}, },
},
},
}) })
// Apply service to expose it as ingress // Apply service to expose it as ingress
svc := &corev1.Service{ svc := &corev1.Service{
@ -56,6 +74,7 @@ func TestIngress(t *testing.T) {
Namespace: "default", Namespace: "default",
Annotations: map[string]string{ Annotations: map[string]string{
"tailscale.com/expose": "true", "tailscale.com/expose": "true",
"tailscale.com/proxy-class": "prod",
}, },
}, },
Spec: corev1.ServiceSpec{ Spec: corev1.ServiceSpec{
@ -71,10 +90,10 @@ func TestIngress(t *testing.T) {
}, },
}, },
} }
createAndCleanup(t, ctx, cl, svc) createAndCleanup(t, cl, svc)
// TODO: instead of timing out only when test times out, cancel context after 60s or so. // TODO: instead of timing out only when test times out, cancel context after 60s or so.
if err := wait.PollUntilContextCancel(ctx, time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) { if err := wait.PollUntilContextCancel(t.Context(), time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) {
maybeReadySvc := &corev1.Service{ObjectMeta: objectMeta("default", "test-ingress")} maybeReadySvc := &corev1.Service{ObjectMeta: objectMeta("default", "test-ingress")}
if err := get(ctx, cl, maybeReadySvc); err != nil { if err := get(ctx, cl, maybeReadySvc); err != nil {
return false, err return false, err
@ -89,17 +108,20 @@ func TestIngress(t *testing.T) {
} }
var resp *http.Response var resp *http.Response
if err := tstest.WaitFor(time.Second*60, func() error { if err := tstest.WaitFor(time.Minute, func() error {
// TODO(tomhjp): Get the tailnet DNS name from the associated secret instead. // TODO(tomhjp): Get the tailnet DNS name from the associated secret instead.
// If we are not the first tailnet node with the requested name, we'll get // If we are not the first tailnet node with the requested name, we'll get
// a -N suffix. // a -N suffix.
resp, err = tsClient.HTTPClient.Get(fmt.Sprintf("http://%s-%s:80", svc.Namespace, svc.Name)) req, err := http.NewRequest(httpm.GET, fmt.Sprintf("http://%s-%s:80", svc.Namespace, svc.Name), nil)
if err != nil { if err != nil {
return err return err
} }
return nil ctx, cancel := context.WithTimeout(t.Context(), time.Second)
defer cancel()
resp, err = tailnetClient.HTTPClient().Do(req.WithContext(ctx))
return err
}); err != nil { }); err != nil {
t.Fatalf("error trying to reach service: %v", err) t.Fatalf("error trying to reach Service: %v", err)
} }
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {

@ -6,167 +6,89 @@ package e2e
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"log" "log"
"os" "os"
"slices"
"strings" "strings"
"testing" "testing"
"time"
"github.com/go-logr/zapr"
"github.com/tailscale/hujson"
"go.uber.org/zap/zapcore"
"golang.org/x/oauth2/clientcredentials" "golang.org/x/oauth2/clientcredentials"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
kzap "sigs.k8s.io/controller-runtime/pkg/log/zap"
"tailscale.com/internal/client/tailscale" "tailscale.com/internal/client/tailscale"
"tailscale.com/ipn/store/mem"
"tailscale.com/tsnet"
) )
const ( // This test suite is currently not run in CI.
e2eManagedComment = "// This is managed by the k8s-operator e2e tests" // It requires some setup not handled by this code:
) // - Kubernetes cluster with local kubeconfig for it (direct connection, no API server proxy)
// - Tailscale operator installed with --set apiServerProxyConfig.mode="true"
// - ACLs from acl.hujson
// - OAuth client secret in TS_API_CLIENT_SECRET env, with at least auth_keys write scope and tag:k8s tag
var ( var (
tsClient *tailscale.Client apiClient *tailscale.Client // For API calls to control.
testGrants = map[string]string{ tailnetClient *tsnet.Server // For testing real tailnet traffic.
"test-proxy": `{
"src": ["tag:e2e-test-proxy"],
"dst": ["tag:k8s-operator"],
"app": {
"tailscale.com/cap/kubernetes": [{
"impersonate": {
"groups": ["ts:e2e-test-proxy"],
},
}],
},
}`,
}
) )
// This test suite is currently not run in CI.
// It requires some setup not handled by this code:
// - Kubernetes cluster with tailscale operator installed
// - Current kubeconfig context set to connect to that cluster (directly, no operator proxy)
// - Operator installed with --set apiServerProxyConfig.mode="true"
// - ACLs that define tag:e2e-test-proxy tag. TODO(tomhjp): Can maybe replace this prereq onwards with an API key
// - OAuth client ID and secret in TS_API_CLIENT_ID and TS_API_CLIENT_SECRET env
// - OAuth client must have auth_keys and policy_file write for tag:e2e-test-proxy tag
func TestMain(m *testing.M) { func TestMain(m *testing.M) {
code, err := runTests(m) code, err := runTests(m)
if err != nil { if err != nil {
log.Fatal(err) log.Printf("Error: %v", err)
os.Exit(1)
} }
os.Exit(code) os.Exit(code)
} }
func runTests(m *testing.M) (int, error) { func runTests(m *testing.M) (int, error) {
zlog := kzap.NewRaw([]kzap.Opts{kzap.UseDevMode(true), kzap.Level(zapcore.DebugLevel)}...).Sugar() secret := os.Getenv("TS_API_CLIENT_SECRET")
logf.SetLogger(zapr.NewLogger(zlog.Desugar())) if secret != "" {
secretParts := strings.Split(secret, "-")
if clientID := os.Getenv("TS_API_CLIENT_ID"); clientID != "" { if len(secretParts) != 4 {
cleanup, err := setupClientAndACLs() return 0, errors.New("TS_API_CLIENT_SECRET is not valid")
if err != nil {
return 0, err
}
defer func() {
err = errors.Join(err, cleanup())
}()
} }
return m.Run(), nil
}
func setupClientAndACLs() (cleanup func() error, _ error) {
ctx := context.Background() ctx := context.Background()
credentials := clientcredentials.Config{ credentials := clientcredentials.Config{
ClientID: os.Getenv("TS_API_CLIENT_ID"), ClientID: secretParts[2],
ClientSecret: os.Getenv("TS_API_CLIENT_SECRET"), ClientSecret: secret,
TokenURL: "https://login.tailscale.com/api/v2/oauth/token", TokenURL: "https://login.tailscale.com/api/v2/oauth/token",
Scopes: []string{"auth_keys", "policy_file"}, Scopes: []string{"auth_keys"},
} }
tsClient = tailscale.NewClient("-", nil) apiClient = tailscale.NewClient("-", nil)
tsClient.HTTPClient = credentials.Client(ctx) apiClient.HTTPClient = credentials.Client(ctx)
if err := patchACLs(ctx, tsClient, func(acls *hujson.Value) { caps := tailscale.KeyCapabilities{
for test, grant := range testGrants { Devices: tailscale.KeyDeviceCapabilities{
deleteTestGrants(test, acls) Create: tailscale.KeyDeviceCreateCapabilities{
addTestGrant(test, grant, acls) Reusable: false,
} Preauthorized: true,
}); err != nil { Ephemeral: true,
return nil, err Tags: []string{"tag:k8s"},
} },
},
return func() error {
return patchACLs(ctx, tsClient, func(acls *hujson.Value) {
for test := range testGrants {
deleteTestGrants(test, acls)
}
})
}, nil
} }
func patchACLs(ctx context.Context, tsClient *tailscale.Client, patchFn func(*hujson.Value)) error { authKey, authKeyMeta, err := apiClient.CreateKeyWithExpiry(ctx, caps, 10*time.Minute)
acls, err := tsClient.ACLHuJSON(ctx)
if err != nil { if err != nil {
return err return 0, err
}
hj, err := hujson.Parse([]byte(acls.ACL))
if err != nil {
return err
}
patchFn(&hj)
hj.Format()
acls.ACL = hj.String()
if _, err := tsClient.SetACLHuJSON(ctx, *acls, true); err != nil {
return err
} }
defer apiClient.DeleteKey(context.Background(), authKeyMeta.ID)
return nil tailnetClient = &tsnet.Server{
Hostname: "test-proxy",
Ephemeral: true,
Store: &mem.Store{},
AuthKey: authKey,
} }
_, err = tailnetClient.Up(ctx)
func addTestGrant(test, grant string, acls *hujson.Value) error {
v, err := hujson.Parse([]byte(grant))
if err != nil { if err != nil {
return err return 0, err
}
// Add the managed comment to the first line of the grant object contents.
v.Value.(*hujson.Object).Members[0].Name.BeforeExtra = hujson.Extra(fmt.Sprintf("%s: %s\n", e2eManagedComment, test))
if err := acls.Patch([]byte(fmt.Sprintf(`[{"op": "add", "path": "/grants/-", "value": %s}]`, v.String()))); err != nil {
return err
}
return nil
}
func deleteTestGrants(test string, acls *hujson.Value) error {
grants := acls.Find("/grants")
var patches []string
for i, g := range grants.Value.(*hujson.Array).Elements {
members := g.Value.(*hujson.Object).Members
if len(members) == 0 {
continue
}
comment := strings.TrimSpace(string(members[0].Name.BeforeExtra))
if name, found := strings.CutPrefix(comment, e2eManagedComment+": "); found && name == test {
patches = append(patches, fmt.Sprintf(`{"op": "remove", "path": "/grants/%d"}`, i))
}
} }
defer tailnetClient.Close()
// Remove in reverse order so we don't affect the found indices as we mutate.
slices.Reverse(patches)
if err := acls.Patch([]byte(fmt.Sprintf("[%s]", strings.Join(patches, ",")))); err != nil {
return err
} }
return nil return m.Run(), nil
} }
func objectMeta(namespace, name string) metav1.ObjectMeta { func objectMeta(namespace, name string) metav1.ObjectMeta {
@ -176,13 +98,25 @@ func objectMeta(namespace, name string) metav1.ObjectMeta {
} }
} }
func createAndCleanup(t *testing.T, ctx context.Context, cl client.Client, obj client.Object) { func createAndCleanup(t *testing.T, cl client.Client, obj client.Object) {
t.Helper() t.Helper()
if err := cl.Create(ctx, obj); err != nil {
// Try to create the object first
err := cl.Create(t.Context(), obj)
if err != nil {
if apierrors.IsAlreadyExists(err) {
if updateErr := cl.Update(t.Context(), obj); updateErr != nil {
t.Fatal(updateErr)
}
} else {
t.Fatal(err) t.Fatal(err)
} }
}
t.Cleanup(func() { t.Cleanup(func() {
if err := cl.Delete(ctx, obj); err != nil { // Use context.Background() for cleanup, as t.Context() is cancelled
// just before cleanup functions are called.
if err := cl.Delete(context.Background(), obj); err != nil {
t.Errorf("error cleaning up %s %s/%s: %s", obj.GetObjectKind().GroupVersionKind(), obj.GetNamespace(), obj.GetName(), err) t.Errorf("error cleaning up %s %s/%s: %s", obj.GetObjectKind().GroupVersionKind(), obj.GetNamespace(), obj.GetName(), err)
} }
}) })

@ -4,10 +4,8 @@
package e2e package e2e
import ( import (
"context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"strings"
"testing" "testing"
"time" "time"
@ -17,18 +15,16 @@ import (
"k8s.io/client-go/rest" "k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config" "sigs.k8s.io/controller-runtime/pkg/client/config"
"tailscale.com/client/tailscale" "tailscale.com/ipn"
"tailscale.com/tsnet"
"tailscale.com/tstest" "tailscale.com/tstest"
) )
// See [TestMain] for test requirements. // See [TestMain] for test requirements.
func TestProxy(t *testing.T) { func TestProxy(t *testing.T) {
if tsClient == nil { if apiClient == nil {
t.Skip("TestProxy requires credentials for a tailscale client") t.Skip("TestIngress requires TS_API_CLIENT_SECRET set")
} }
ctx := context.Background()
cfg := config.GetConfigOrDie() cfg := config.GetConfigOrDie()
cl, err := client.New(cfg, client.Options{}) cl, err := client.New(cfg, client.Options{})
if err != nil { if err != nil {
@ -36,7 +32,7 @@ func TestProxy(t *testing.T) {
} }
// Create role and role binding to allow a group we'll impersonate to do stuff. // Create role and role binding to allow a group we'll impersonate to do stuff.
createAndCleanup(t, ctx, cl, &rbacv1.Role{ createAndCleanup(t, cl, &rbacv1.Role{
ObjectMeta: objectMeta("tailscale", "read-secrets"), ObjectMeta: objectMeta("tailscale", "read-secrets"),
Rules: []rbacv1.PolicyRule{{ Rules: []rbacv1.PolicyRule{{
APIGroups: []string{""}, APIGroups: []string{""},
@ -44,7 +40,7 @@ func TestProxy(t *testing.T) {
Resources: []string{"secrets"}, Resources: []string{"secrets"},
}}, }},
}) })
createAndCleanup(t, ctx, cl, &rbacv1.RoleBinding{ createAndCleanup(t, cl, &rbacv1.RoleBinding{
ObjectMeta: objectMeta("tailscale", "read-secrets"), ObjectMeta: objectMeta("tailscale", "read-secrets"),
Subjects: []rbacv1.Subject{{ Subjects: []rbacv1.Subject{{
Kind: "Group", Kind: "Group",
@ -60,16 +56,14 @@ func TestProxy(t *testing.T) {
operatorSecret := corev1.Secret{ operatorSecret := corev1.Secret{
ObjectMeta: objectMeta("tailscale", "operator"), ObjectMeta: objectMeta("tailscale", "operator"),
} }
if err := get(ctx, cl, &operatorSecret); err != nil { if err := get(t.Context(), cl, &operatorSecret); err != nil {
t.Fatal(err) t.Fatal(err)
} }
// Connect to tailnet with test-specific tag so we can use the // Join tailnet as a client of the API server proxy.
// [testGrants] ACLs when connecting to the API server proxy
ts := tsnetServerWithTag(t, ctx, "tag:e2e-test-proxy")
proxyCfg := &rest.Config{ proxyCfg := &rest.Config{
Host: fmt.Sprintf("https://%s:443", hostNameFromOperatorSecret(t, operatorSecret)), Host: fmt.Sprintf("https://%s:443", hostNameFromOperatorSecret(t, operatorSecret)),
Dial: ts.Dial, Dial: tailnetClient.Dial,
} }
proxyCl, err := client.New(proxyCfg, client.Options{}) proxyCl, err := client.New(proxyCfg, client.Options{})
if err != nil { if err != nil {
@ -82,8 +76,8 @@ func TestProxy(t *testing.T) {
} }
// Wait for up to a minute the first time we use the proxy, to give it time // Wait for up to a minute the first time we use the proxy, to give it time
// to provision the TLS certs. // to provision the TLS certs.
if err := tstest.WaitFor(time.Second*60, func() error { if err := tstest.WaitFor(time.Minute, func() error {
return get(ctx, proxyCl, &allowedSecret) return get(t.Context(), proxyCl, &allowedSecret)
}); err != nil { }); err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -92,65 +86,25 @@ func TestProxy(t *testing.T) {
forbiddenSecret := corev1.Secret{ forbiddenSecret := corev1.Secret{
ObjectMeta: objectMeta("default", "operator"), ObjectMeta: objectMeta("default", "operator"),
} }
if err := get(ctx, proxyCl, &forbiddenSecret); err == nil || !apierrors.IsForbidden(err) { if err := get(t.Context(), proxyCl, &forbiddenSecret); err == nil || !apierrors.IsForbidden(err) {
t.Fatalf("expected forbidden error fetching secret from default namespace: %s", err) t.Fatalf("expected forbidden error fetching secret from default namespace: %s", err)
} }
} }
func tsnetServerWithTag(t *testing.T, ctx context.Context, tag string) *tsnet.Server { func hostNameFromOperatorSecret(t *testing.T, s corev1.Secret) string {
caps := tailscale.KeyCapabilities{ t.Helper()
Devices: tailscale.KeyDeviceCapabilities{ prefsBytes, ok := s.Data[string(s.Data["_current-profile"])]
Create: tailscale.KeyDeviceCreateCapabilities{ if !ok {
Reusable: false, t.Fatalf("no state in operator Secret data: %#v", s.Data)
Preauthorized: true,
Ephemeral: true,
Tags: []string{tag},
},
},
} }
authKey, authKeyMeta, err := tsClient.CreateKey(ctx, caps) prefs := ipn.Prefs{}
if err != nil { if err := json.Unmarshal(prefsBytes, &prefs); err != nil {
t.Fatal(err) t.Fatal(err)
} }
t.Cleanup(func() {
if err := tsClient.DeleteKey(ctx, authKeyMeta.ID); err != nil {
t.Errorf("error deleting auth key: %s", err)
}
})
ts := &tsnet.Server{ if prefs.Persist == nil {
Hostname: "test-proxy", t.Fatalf("no hostname in operator Secret data: %#v", s.Data)
Ephemeral: true,
Dir: t.TempDir(),
AuthKey: authKey,
}
_, err = ts.Up(ctx)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := ts.Close(); err != nil {
t.Errorf("error shutting down tsnet.Server: %s", err)
} }
}) return prefs.Persist.UserProfile.LoginName
return ts
}
func hostNameFromOperatorSecret(t *testing.T, s corev1.Secret) string {
profiles := map[string]any{}
if err := json.Unmarshal(s.Data["_profiles"], &profiles); err != nil {
t.Fatal(err)
}
key, ok := strings.CutPrefix(string(s.Data["_current-profile"]), "profile-")
if !ok {
t.Fatal(string(s.Data["_current-profile"]))
}
profile, ok := profiles[key]
if !ok {
t.Fatal(profiles)
}
return ((profile.(map[string]any))["Name"]).(string)
} }

Loading…
Cancel
Save