cmd/k8s-operator: simplify scope of e2e tests (#17076)

Removes ACL edits from e2e tests in favour of trying to simplify the
tests and separate the actual test logic from the environment setup
logic as much as possible. Also aims to fit in with the requirements
that will generally be filled anyway for most devs working on the
operator; in particular using tags that fit in with our documentation.

Updates tailscale/corp#32085

Change-Id: I7659246e39ec0b7bcc4ec0a00c6310f25fe6fac2

Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
tomhjp/poc-peer-relay-proxygroup
Tom Proctor 3 months ago committed by GitHub
parent 2d9d869d3d
commit 1ec3d20d10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,33 @@
// To run the e2e tests against a tailnet, ensure its access controls are a
// superset of the following:
{
"tagOwners": {
"tag:k8s-operator": [],
"tag:k8s": ["tag:k8s-operator"],
"tag:k8s-recorder": ["tag:k8s-operator"],
},
"autoApprovers": {
// Could be relaxed if we coordinated with the cluster config, but this
// wide subnet maximises compatibility for most clusters.
"routes": {
"10.0.0.0/8": ["tag:k8s"],
},
"services": {
"tag:k8s": ["tag:k8s"],
},
},
"grants": [
{
"src": ["tag:k8s"],
"dst": ["tag:k8s", "tag:k8s-operator"],
"ip": ["tcp:80", "tcp:443"],
"app": {
"tailscale.com/cap/kubernetes": [{
"impersonate": {
"groups": ["ts:e2e-test-proxy"],
},
}],
},
},
],
}

@ -10,6 +10,7 @@ import (
"testing"
"time"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
@ -17,22 +18,24 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/config"
kube "tailscale.com/k8s-operator"
"tailscale.com/tstest"
"tailscale.com/types/ptr"
"tailscale.com/util/httpm"
)
// See [TestMain] for test requirements.
func TestIngress(t *testing.T) {
if tsClient == nil {
t.Skip("TestIngress requires credentials for a tailscale client")
if apiClient == nil {
t.Skip("TestIngress requires TS_API_CLIENT_SECRET set")
}
ctx := context.Background()
cfg := config.GetConfigOrDie()
cl, err := client.New(cfg, client.Options{})
if err != nil {
t.Fatal(err)
}
// Apply nginx
createAndCleanup(t, ctx, cl, &corev1.Pod{
createAndCleanup(t, cl,
&appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "nginx",
Namespace: "default",
@ -40,6 +43,19 @@ func TestIngress(t *testing.T) {
"app.kubernetes.io/name": "nginx",
},
},
Spec: appsv1.DeploymentSpec{
Replicas: ptr.To[int32](1),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app.kubernetes.io/name": "nginx",
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"app.kubernetes.io/name": "nginx",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
@ -48,6 +64,8 @@ func TestIngress(t *testing.T) {
},
},
},
},
},
})
// Apply service to expose it as ingress
svc := &corev1.Service{
@ -56,6 +74,7 @@ func TestIngress(t *testing.T) {
Namespace: "default",
Annotations: map[string]string{
"tailscale.com/expose": "true",
"tailscale.com/proxy-class": "prod",
},
},
Spec: corev1.ServiceSpec{
@ -71,10 +90,10 @@ func TestIngress(t *testing.T) {
},
},
}
createAndCleanup(t, ctx, cl, svc)
createAndCleanup(t, cl, svc)
// TODO: instead of timing out only when test times out, cancel context after 60s or so.
if err := wait.PollUntilContextCancel(ctx, time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) {
if err := wait.PollUntilContextCancel(t.Context(), time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) {
maybeReadySvc := &corev1.Service{ObjectMeta: objectMeta("default", "test-ingress")}
if err := get(ctx, cl, maybeReadySvc); err != nil {
return false, err
@ -89,17 +108,20 @@ func TestIngress(t *testing.T) {
}
var resp *http.Response
if err := tstest.WaitFor(time.Second*60, func() error {
if err := tstest.WaitFor(time.Minute, func() error {
// TODO(tomhjp): Get the tailnet DNS name from the associated secret instead.
// If we are not the first tailnet node with the requested name, we'll get
// a -N suffix.
resp, err = tsClient.HTTPClient.Get(fmt.Sprintf("http://%s-%s:80", svc.Namespace, svc.Name))
req, err := http.NewRequest(httpm.GET, fmt.Sprintf("http://%s-%s:80", svc.Namespace, svc.Name), nil)
if err != nil {
return err
}
return nil
ctx, cancel := context.WithTimeout(t.Context(), time.Second)
defer cancel()
resp, err = tailnetClient.HTTPClient().Do(req.WithContext(ctx))
return err
}); err != nil {
t.Fatalf("error trying to reach service: %v", err)
t.Fatalf("error trying to reach Service: %v", err)
}
if resp.StatusCode != http.StatusOK {

@ -6,167 +6,89 @@ package e2e
import (
"context"
"errors"
"fmt"
"log"
"os"
"slices"
"strings"
"testing"
"time"
"github.com/go-logr/zapr"
"github.com/tailscale/hujson"
"go.uber.org/zap/zapcore"
"golang.org/x/oauth2/clientcredentials"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
kzap "sigs.k8s.io/controller-runtime/pkg/log/zap"
"tailscale.com/internal/client/tailscale"
"tailscale.com/ipn/store/mem"
"tailscale.com/tsnet"
)
const (
e2eManagedComment = "// This is managed by the k8s-operator e2e tests"
)
// This test suite is currently not run in CI.
// It requires some setup not handled by this code:
// - Kubernetes cluster with local kubeconfig for it (direct connection, no API server proxy)
// - Tailscale operator installed with --set apiServerProxyConfig.mode="true"
// - ACLs from acl.hujson
// - OAuth client secret in TS_API_CLIENT_SECRET env, with at least auth_keys write scope and tag:k8s tag
var (
tsClient *tailscale.Client
testGrants = map[string]string{
"test-proxy": `{
"src": ["tag:e2e-test-proxy"],
"dst": ["tag:k8s-operator"],
"app": {
"tailscale.com/cap/kubernetes": [{
"impersonate": {
"groups": ["ts:e2e-test-proxy"],
},
}],
},
}`,
}
apiClient *tailscale.Client // For API calls to control.
tailnetClient *tsnet.Server // For testing real tailnet traffic.
)
// This test suite is currently not run in CI.
// It requires some setup not handled by this code:
// - Kubernetes cluster with tailscale operator installed
// - Current kubeconfig context set to connect to that cluster (directly, no operator proxy)
// - Operator installed with --set apiServerProxyConfig.mode="true"
// - ACLs that define tag:e2e-test-proxy tag. TODO(tomhjp): Can maybe replace this prereq onwards with an API key
// - OAuth client ID and secret in TS_API_CLIENT_ID and TS_API_CLIENT_SECRET env
// - OAuth client must have auth_keys and policy_file write for tag:e2e-test-proxy tag
func TestMain(m *testing.M) {
code, err := runTests(m)
if err != nil {
log.Fatal(err)
log.Printf("Error: %v", err)
os.Exit(1)
}
os.Exit(code)
}
func runTests(m *testing.M) (int, error) {
zlog := kzap.NewRaw([]kzap.Opts{kzap.UseDevMode(true), kzap.Level(zapcore.DebugLevel)}...).Sugar()
logf.SetLogger(zapr.NewLogger(zlog.Desugar()))
if clientID := os.Getenv("TS_API_CLIENT_ID"); clientID != "" {
cleanup, err := setupClientAndACLs()
if err != nil {
return 0, err
}
defer func() {
err = errors.Join(err, cleanup())
}()
secret := os.Getenv("TS_API_CLIENT_SECRET")
if secret != "" {
secretParts := strings.Split(secret, "-")
if len(secretParts) != 4 {
return 0, errors.New("TS_API_CLIENT_SECRET is not valid")
}
return m.Run(), nil
}
func setupClientAndACLs() (cleanup func() error, _ error) {
ctx := context.Background()
credentials := clientcredentials.Config{
ClientID: os.Getenv("TS_API_CLIENT_ID"),
ClientSecret: os.Getenv("TS_API_CLIENT_SECRET"),
ClientID: secretParts[2],
ClientSecret: secret,
TokenURL: "https://login.tailscale.com/api/v2/oauth/token",
Scopes: []string{"auth_keys", "policy_file"},
}
tsClient = tailscale.NewClient("-", nil)
tsClient.HTTPClient = credentials.Client(ctx)
if err := patchACLs(ctx, tsClient, func(acls *hujson.Value) {
for test, grant := range testGrants {
deleteTestGrants(test, acls)
addTestGrant(test, grant, acls)
}
}); err != nil {
return nil, err
}
return func() error {
return patchACLs(ctx, tsClient, func(acls *hujson.Value) {
for test := range testGrants {
deleteTestGrants(test, acls)
}
})
}, nil
Scopes: []string{"auth_keys"},
}
apiClient = tailscale.NewClient("-", nil)
apiClient.HTTPClient = credentials.Client(ctx)
caps := tailscale.KeyCapabilities{
Devices: tailscale.KeyDeviceCapabilities{
Create: tailscale.KeyDeviceCreateCapabilities{
Reusable: false,
Preauthorized: true,
Ephemeral: true,
Tags: []string{"tag:k8s"},
},
},
}
func patchACLs(ctx context.Context, tsClient *tailscale.Client, patchFn func(*hujson.Value)) error {
acls, err := tsClient.ACLHuJSON(ctx)
authKey, authKeyMeta, err := apiClient.CreateKeyWithExpiry(ctx, caps, 10*time.Minute)
if err != nil {
return err
}
hj, err := hujson.Parse([]byte(acls.ACL))
if err != nil {
return err
}
patchFn(&hj)
hj.Format()
acls.ACL = hj.String()
if _, err := tsClient.SetACLHuJSON(ctx, *acls, true); err != nil {
return err
return 0, err
}
defer apiClient.DeleteKey(context.Background(), authKeyMeta.ID)
return nil
tailnetClient = &tsnet.Server{
Hostname: "test-proxy",
Ephemeral: true,
Store: &mem.Store{},
AuthKey: authKey,
}
func addTestGrant(test, grant string, acls *hujson.Value) error {
v, err := hujson.Parse([]byte(grant))
_, err = tailnetClient.Up(ctx)
if err != nil {
return err
}
// Add the managed comment to the first line of the grant object contents.
v.Value.(*hujson.Object).Members[0].Name.BeforeExtra = hujson.Extra(fmt.Sprintf("%s: %s\n", e2eManagedComment, test))
if err := acls.Patch([]byte(fmt.Sprintf(`[{"op": "add", "path": "/grants/-", "value": %s}]`, v.String()))); err != nil {
return err
}
return nil
}
func deleteTestGrants(test string, acls *hujson.Value) error {
grants := acls.Find("/grants")
var patches []string
for i, g := range grants.Value.(*hujson.Array).Elements {
members := g.Value.(*hujson.Object).Members
if len(members) == 0 {
continue
}
comment := strings.TrimSpace(string(members[0].Name.BeforeExtra))
if name, found := strings.CutPrefix(comment, e2eManagedComment+": "); found && name == test {
patches = append(patches, fmt.Sprintf(`{"op": "remove", "path": "/grants/%d"}`, i))
}
return 0, err
}
// Remove in reverse order so we don't affect the found indices as we mutate.
slices.Reverse(patches)
if err := acls.Patch([]byte(fmt.Sprintf("[%s]", strings.Join(patches, ",")))); err != nil {
return err
defer tailnetClient.Close()
}
return nil
return m.Run(), nil
}
func objectMeta(namespace, name string) metav1.ObjectMeta {
@ -176,13 +98,25 @@ func objectMeta(namespace, name string) metav1.ObjectMeta {
}
}
func createAndCleanup(t *testing.T, ctx context.Context, cl client.Client, obj client.Object) {
func createAndCleanup(t *testing.T, cl client.Client, obj client.Object) {
t.Helper()
if err := cl.Create(ctx, obj); err != nil {
// Try to create the object first
err := cl.Create(t.Context(), obj)
if err != nil {
if apierrors.IsAlreadyExists(err) {
if updateErr := cl.Update(t.Context(), obj); updateErr != nil {
t.Fatal(updateErr)
}
} else {
t.Fatal(err)
}
}
t.Cleanup(func() {
if err := cl.Delete(ctx, obj); err != nil {
// Use context.Background() for cleanup, as t.Context() is cancelled
// just before cleanup functions are called.
if err := cl.Delete(context.Background(), obj); err != nil {
t.Errorf("error cleaning up %s %s/%s: %s", obj.GetObjectKind().GroupVersionKind(), obj.GetNamespace(), obj.GetName(), err)
}
})

@ -4,10 +4,8 @@
package e2e
import (
"context"
"encoding/json"
"fmt"
"strings"
"testing"
"time"
@ -17,18 +15,16 @@ import (
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"tailscale.com/client/tailscale"
"tailscale.com/tsnet"
"tailscale.com/ipn"
"tailscale.com/tstest"
)
// See [TestMain] for test requirements.
func TestProxy(t *testing.T) {
if tsClient == nil {
t.Skip("TestProxy requires credentials for a tailscale client")
if apiClient == nil {
t.Skip("TestIngress requires TS_API_CLIENT_SECRET set")
}
ctx := context.Background()
cfg := config.GetConfigOrDie()
cl, err := client.New(cfg, client.Options{})
if err != nil {
@ -36,7 +32,7 @@ func TestProxy(t *testing.T) {
}
// Create role and role binding to allow a group we'll impersonate to do stuff.
createAndCleanup(t, ctx, cl, &rbacv1.Role{
createAndCleanup(t, cl, &rbacv1.Role{
ObjectMeta: objectMeta("tailscale", "read-secrets"),
Rules: []rbacv1.PolicyRule{{
APIGroups: []string{""},
@ -44,7 +40,7 @@ func TestProxy(t *testing.T) {
Resources: []string{"secrets"},
}},
})
createAndCleanup(t, ctx, cl, &rbacv1.RoleBinding{
createAndCleanup(t, cl, &rbacv1.RoleBinding{
ObjectMeta: objectMeta("tailscale", "read-secrets"),
Subjects: []rbacv1.Subject{{
Kind: "Group",
@ -60,16 +56,14 @@ func TestProxy(t *testing.T) {
operatorSecret := corev1.Secret{
ObjectMeta: objectMeta("tailscale", "operator"),
}
if err := get(ctx, cl, &operatorSecret); err != nil {
if err := get(t.Context(), cl, &operatorSecret); err != nil {
t.Fatal(err)
}
// Connect to tailnet with test-specific tag so we can use the
// [testGrants] ACLs when connecting to the API server proxy
ts := tsnetServerWithTag(t, ctx, "tag:e2e-test-proxy")
// Join tailnet as a client of the API server proxy.
proxyCfg := &rest.Config{
Host: fmt.Sprintf("https://%s:443", hostNameFromOperatorSecret(t, operatorSecret)),
Dial: ts.Dial,
Dial: tailnetClient.Dial,
}
proxyCl, err := client.New(proxyCfg, client.Options{})
if err != nil {
@ -82,8 +76,8 @@ func TestProxy(t *testing.T) {
}
// Wait for up to a minute the first time we use the proxy, to give it time
// to provision the TLS certs.
if err := tstest.WaitFor(time.Second*60, func() error {
return get(ctx, proxyCl, &allowedSecret)
if err := tstest.WaitFor(time.Minute, func() error {
return get(t.Context(), proxyCl, &allowedSecret)
}); err != nil {
t.Fatal(err)
}
@ -92,65 +86,25 @@ func TestProxy(t *testing.T) {
forbiddenSecret := corev1.Secret{
ObjectMeta: objectMeta("default", "operator"),
}
if err := get(ctx, proxyCl, &forbiddenSecret); err == nil || !apierrors.IsForbidden(err) {
if err := get(t.Context(), proxyCl, &forbiddenSecret); err == nil || !apierrors.IsForbidden(err) {
t.Fatalf("expected forbidden error fetching secret from default namespace: %s", err)
}
}
func tsnetServerWithTag(t *testing.T, ctx context.Context, tag string) *tsnet.Server {
caps := tailscale.KeyCapabilities{
Devices: tailscale.KeyDeviceCapabilities{
Create: tailscale.KeyDeviceCreateCapabilities{
Reusable: false,
Preauthorized: true,
Ephemeral: true,
Tags: []string{tag},
},
},
func hostNameFromOperatorSecret(t *testing.T, s corev1.Secret) string {
t.Helper()
prefsBytes, ok := s.Data[string(s.Data["_current-profile"])]
if !ok {
t.Fatalf("no state in operator Secret data: %#v", s.Data)
}
authKey, authKeyMeta, err := tsClient.CreateKey(ctx, caps)
if err != nil {
prefs := ipn.Prefs{}
if err := json.Unmarshal(prefsBytes, &prefs); err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := tsClient.DeleteKey(ctx, authKeyMeta.ID); err != nil {
t.Errorf("error deleting auth key: %s", err)
}
})
ts := &tsnet.Server{
Hostname: "test-proxy",
Ephemeral: true,
Dir: t.TempDir(),
AuthKey: authKey,
}
_, err = ts.Up(ctx)
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := ts.Close(); err != nil {
t.Errorf("error shutting down tsnet.Server: %s", err)
if prefs.Persist == nil {
t.Fatalf("no hostname in operator Secret data: %#v", s.Data)
}
})
return ts
}
func hostNameFromOperatorSecret(t *testing.T, s corev1.Secret) string {
profiles := map[string]any{}
if err := json.Unmarshal(s.Data["_profiles"], &profiles); err != nil {
t.Fatal(err)
}
key, ok := strings.CutPrefix(string(s.Data["_current-profile"]), "profile-")
if !ok {
t.Fatal(string(s.Data["_current-profile"]))
}
profile, ok := profiles[key]
if !ok {
t.Fatal(profiles)
}
return ((profile.(map[string]any))["Name"]).(string)
return prefs.Persist.UserProfile.LoginName
}

Loading…
Cancel
Save