health, net/tlsdial: add healthcheck for self-signed cert

When we make a connection to a server, we previously would verify with
the system roots, and then fall back to verifying with our baked-in
Let's Encrypt root if the system root cert verification failed.

We now explicitly check for, and log a health error on, self-signed
certificates. Additionally, we now always verify against our baked-in
Let's Encrypt root certificate and log an error if that isn't
successful. We don't consider this a health failure, since if we ever
change our server certificate issuer in the future older non-updated
versions of Tailscale will no longer be healthy despite being able to
connect.

Updates #3198

Change-Id: I00be5ceb8afee544ee795e3c7a2815476abc4abf
Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
pull/7158/head
Andrew Dunham 2 years ago
parent 7393ce5e4f
commit 2755f3843c

@ -34,6 +34,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
tailscale.com/derp/derphttp from tailscale.com/cmd/derper
tailscale.com/disco from tailscale.com/derp
tailscale.com/envknob from tailscale.com/derp+
tailscale.com/health from tailscale.com/net/tlsdial
tailscale.com/hostinfo from tailscale.com/net/interfaces+
tailscale.com/ipn from tailscale.com/client/tailscale
tailscale.com/ipn/ipnstate from tailscale.com/client/tailscale+
@ -81,6 +82,8 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
tailscale.com/util/httpm from tailscale.com/client/tailscale
tailscale.com/util/lineread from tailscale.com/hostinfo+
tailscale.com/util/mak from tailscale.com/syncs
tailscale.com/util/multierr from tailscale.com/health
tailscale.com/util/set from tailscale.com/health
tailscale.com/util/singleflight from tailscale.com/net/dnscache
tailscale.com/util/vizerror from tailscale.com/tsweb
W 💣 tailscale.com/util/winutil from tailscale.com/hostinfo+

@ -57,6 +57,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/derp/derphttp from tailscale.com/net/netcheck
tailscale.com/disco from tailscale.com/derp
tailscale.com/envknob from tailscale.com/cmd/tailscale/cli+
tailscale.com/health from tailscale.com/net/tlsdial
tailscale.com/health/healthmsg from tailscale.com/cmd/tailscale/cli
tailscale.com/hostinfo from tailscale.com/net/interfaces+
tailscale.com/ipn from tailscale.com/cmd/tailscale/cli+
@ -111,9 +112,10 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/util/httpm from tailscale.com/client/tailscale
tailscale.com/util/lineread from tailscale.com/net/interfaces+
tailscale.com/util/mak from tailscale.com/net/netcheck+
tailscale.com/util/multierr from tailscale.com/control/controlhttp
tailscale.com/util/multierr from tailscale.com/control/controlhttp+
tailscale.com/util/must from tailscale.com/cmd/tailscale/cli
tailscale.com/util/quarantine from tailscale.com/cmd/tailscale/cli
tailscale.com/util/set from tailscale.com/health
tailscale.com/util/singleflight from tailscale.com/net/dnscache
💣 tailscale.com/util/winutil from tailscale.com/hostinfo+
tailscale.com/version from tailscale.com/cmd/tailscale/cli+

@ -48,6 +48,7 @@ var (
controlHealth []string
lastLoginErr error
localLogConfigErr error
tlsConnectionErrors = map[string]error{} // map[ServerName]error
)
// Subsystem is the name of a subsystem whose health can be monitored.
@ -209,6 +210,18 @@ func SetLocalLogConfigHealth(err error) {
localLogConfigErr = err
}
// SetTLSConnectionError sets the error state for connections to a specific
// host. Setting the error to nil will clear any previously-set error.
func SetTLSConnectionError(host string, err error) {
mu.Lock()
defer mu.Unlock()
if err == nil {
delete(tlsConnectionErrors, host)
} else {
tlsConnectionErrors[host] = err
}
}
func RegisterDebugHandler(typ string, h http.Handler) {
mu.Lock()
defer mu.Unlock()
@ -476,6 +489,9 @@ func overallErrorLocked() error {
if err := envknob.ApplyDiskConfigError(); err != nil {
errs = append(errs, err)
}
for serverName, err := range tlsConnectionErrors {
errs = append(errs, fmt.Errorf("TLS connection error for %q: %w", serverName, err))
}
if e := fakeErrForTesting(); len(errs) == 0 && e != "" {
return errors.New(e)
}

@ -11,9 +11,11 @@
package tlsdial
import (
"bytes"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"log"
"os"
"sync"
@ -21,6 +23,7 @@ import (
"time"
"tailscale.com/envknob"
"tailscale.com/health"
)
var counterFallbackOK int32 // atomic
@ -33,6 +36,11 @@ var sslKeyLogFile = os.Getenv("SSLKEYLOGFILE")
var debug = envknob.RegisterBool("TS_DEBUG_TLS_DIAL")
// tlsdialWarningPrinted tracks whether we've printed a warning about a given
// hostname already, to avoid log spam for users with custom DERP servers,
// Headscale, etc.
var tlsdialWarningPrinted sync.Map // map[string]bool
// Config returns a tls.Config for connecting to a server.
// If base is non-nil, it's cloned as the base config before
// being configured and returned.
@ -66,6 +74,16 @@ func Config(host string, base *tls.Config) *tls.Config {
// (with the baked-in fallback root) in the VerifyConnection hook.
conf.InsecureSkipVerify = true
conf.VerifyConnection = func(cs tls.ConnectionState) error {
// Perform some health checks on this certificate before we do
// any verification.
if certIsSelfSigned(cs.PeerCertificates[0]) {
// Self-signed certs are never valid.
health.SetTLSConnectionError(cs.ServerName, fmt.Errorf("certificate is self-signed"))
} else {
// Ensure we clear any error state for this ServerName.
health.SetTLSConnectionError(cs.ServerName, nil)
}
// First try doing x509 verification with the system's
// root CA pool.
opts := x509.VerifyOptions{
@ -79,18 +97,27 @@ func Config(host string, base *tls.Config) *tls.Config {
if debug() {
log.Printf("tlsdial(sys %q): %v", host, errSys)
}
if errSys == nil {
return nil
}
// If that failed, because the system's CA roots are old
// or broken, fall back to trying LetsEncrypt at least.
// Always verify with our baked-in Let's Encrypt certificate,
// so we can log an informational message. This is useful for
// detecting SSL MiTM.
opts.Roots = bakedInRoots()
_, err := cs.PeerCertificates[0].Verify(opts)
_, bakedErr := cs.PeerCertificates[0].Verify(opts)
if debug() {
log.Printf("tlsdial(bake %q): %v", host, err)
log.Printf("tlsdial(bake %q): %v", host, bakedErr)
} else if bakedErr != nil {
if _, loaded := tlsdialWarningPrinted.LoadOrStore(host, true); !loaded {
if errSys == nil {
log.Printf("tlsdial: warning: server cert for %q is not a Let's Encrypt cert", host)
} else {
log.Printf("tlsdial: error: server cert for %q failed to verify and is not a Let's Encrypt cert", host)
}
}
}
if err == nil {
if errSys == nil {
return nil
} else if bakedErr == nil {
atomic.AddInt32(&counterFallbackOK, 1)
return nil
}
@ -99,6 +126,12 @@ func Config(host string, base *tls.Config) *tls.Config {
return conf
}
func certIsSelfSigned(cert *x509.Certificate) bool {
// A certificate is determined to be self-signed if the certificate's
// subject is the same as its issuer.
return bytes.Equal(cert.RawSubject, cert.RawIssuer)
}
// SetConfigExpectedCert modifies c to expect and verify that the server returns
// a certificate for the provided certDNSName.
//

Loading…
Cancel
Save