// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package logpolicy manages the creation or reuse of logtail loggers, // caching collection instance state on disk for use on future runs of // programs on the same machine. package logpolicy import ( "bufio" "bytes" "context" "crypto/tls" "encoding/json" "errors" "fmt" "io" "log" "net" "net/http" "net/url" "os" "os/exec" "path/filepath" "runtime" "strings" "sync" "time" "golang.org/x/term" "tailscale.com/atomicfile" "tailscale.com/envknob" "tailscale.com/log/filelogger" "tailscale.com/logtail" "tailscale.com/logtail/filch" "tailscale.com/net/dnscache" "tailscale.com/net/dnsfallback" "tailscale.com/net/netknob" "tailscale.com/net/netns" "tailscale.com/net/tlsdial" "tailscale.com/net/tshttpproxy" "tailscale.com/paths" "tailscale.com/safesocket" "tailscale.com/smallzstd" "tailscale.com/types/logger" "tailscale.com/util/clientmetric" "tailscale.com/util/racebuild" "tailscale.com/util/winutil" "tailscale.com/version" "tailscale.com/version/distro" ) var getLogTargetOnce struct { sync.Once v string // URL of logs server, or empty for default } func getLogTarget() string { getLogTargetOnce.Do(func() { if val, ok := os.LookupEnv("TS_LOG_TARGET"); ok { getLogTargetOnce.v = val } else { if runtime.GOOS == "windows" { getLogTargetOnce.v = winutil.GetRegString("LogTarget", "") } } }) return getLogTargetOnce.v } // LogHost returns the hostname only (without port) of the configured // logtail server, or the default. func LogHost() string { if v := getLogTarget(); v != "" { return v } return logtail.DefaultHost } // Config represents an instance of logs in a collection. type Config struct { Collection string PrivateID logtail.PrivateID PublicID logtail.PublicID } // Policy is a logger and its public ID. type Policy struct { // Logtail is the logger. Logtail *logtail.Logger // PublicID is the logger's instance identifier. PublicID logtail.PublicID } // NewConfig creates a Config with collection and a newly generated PrivateID. func NewConfig(collection string) *Config { id, err := logtail.NewPrivateID() if err != nil { panic("logtail.NewPrivateID should never fail") } return &Config{ Collection: collection, PrivateID: id, PublicID: id.Public(), } } // Validate verifies that the Config matches the collection, // and that the PrivateID and PublicID pair are sensible. func (c *Config) Validate(collection string) error { switch { case c == nil: return errors.New("config is nil") case c.Collection != collection: return fmt.Errorf("config collection %q does not match %q", c.Collection, collection) case c.PrivateID.IsZero(): return errors.New("config has zero PrivateID") case c.PrivateID.Public() != c.PublicID: return errors.New("config PrivateID does not match PublicID") } return nil } // ToBytes returns the JSON representation of c. func (c *Config) ToBytes() []byte { data, err := json.MarshalIndent(c, "", "\t") if err != nil { log.Fatalf("logpolicy.Config marshal: %v", err) } return data } // Save writes the JSON representation of c to stateFile. func (c *Config) Save(stateFile string) error { c.PublicID = c.PrivateID.Public() if err := os.MkdirAll(filepath.Dir(stateFile), 0750); err != nil { return err } data := c.ToBytes() if err := atomicfile.WriteFile(stateFile, data, 0600); err != nil { return err } return nil } // ConfigFromFile reads a Config from a JSON file. func ConfigFromFile(statefile string) (*Config, error) { b, err := os.ReadFile(statefile) if err != nil { return nil, err } return ConfigFromBytes(b) } // ConfigFromBytes parses a Config from its JSON encoding. func ConfigFromBytes(jsonEnc []byte) (*Config, error) { c := &Config{} if err := json.Unmarshal(jsonEnc, c); err != nil { return nil, err } return c, nil } // stderrWriter is an io.Writer that always writes to the latest // os.Stderr, even if os.Stderr changes during the lifetime of the // stderrWriter value. type stderrWriter struct{} func (stderrWriter) Write(buf []byte) (int, error) { return os.Stderr.Write(buf) } type logWriter struct { logger *log.Logger } func (l logWriter) Write(buf []byte) (int, error) { l.logger.Printf("%s", buf) return len(buf), nil } // logsDir returns the directory to use for log configuration and // buffer storage. func logsDir(logf logger.Logf) string { if d := os.Getenv("TS_LOGS_DIR"); d != "" { fi, err := os.Stat(d) if err == nil && fi.IsDir() { return d } } switch runtime.GOOS { case "windows": if version.CmdName() == "tailscaled" { // In the common case, when tailscaled is run as the Local System (as a service), // we want to use %ProgramData% (C:\ProgramData\Tailscale), aside the // system state config with the machine key, etc. But if that directory's // not accessible, then it's probably because the user is running tailscaled // as a regular user (perhaps in userspace-networking/SOCK5 mode) and we should // just use the %LocalAppData% instead. In a user context, %LocalAppData% isn't // subject to random deletions from Windows system updates. dir := filepath.Join(os.Getenv("ProgramData"), "Tailscale") if winProgramDataAccessible(dir) { logf("logpolicy: using dir %v", dir) return dir } } dir := filepath.Join(os.Getenv("LocalAppData"), "Tailscale") logf("logpolicy: using LocalAppData dir %v", dir) return dir case "linux": // STATE_DIRECTORY is set by systemd 240+ but we support older // systems-d. For example, Ubuntu 18.04 (Bionic Beaver) is 237. systemdStateDir := os.Getenv("STATE_DIRECTORY") if systemdStateDir != "" { logf("logpolicy: using $STATE_DIRECTORY, %q", systemdStateDir) return systemdStateDir } } // Default to e.g. /var/lib/tailscale or /var/db/tailscale on Unix. if d := paths.DefaultTailscaledStateFile(); d != "" { d = filepath.Dir(d) // directory of e.g. "/var/lib/tailscale/tailscaled.state" if err := os.MkdirAll(d, 0700); err == nil { logf("logpolicy: using system state directory %q", d) return d } } cacheDir, err := os.UserCacheDir() if err == nil { d := filepath.Join(cacheDir, "Tailscale") logf("logpolicy: using UserCacheDir, %q", d) return d } // Use the current working directory, unless we're being run by a // service manager that sets it to /. wd, err := os.Getwd() if err == nil && wd != "/" { logf("logpolicy: using current directory, %q", wd) return wd } // No idea where to put stuff. Try to create a temp dir. It'll // mean we might lose some logs and rotate through log IDs, but // it's something. tmp, err := os.MkdirTemp("", "tailscaled-log-*") if err != nil { panic("no safe place found to store log state") } logf("logpolicy: using temp directory, %q", tmp) return tmp } // runningUnderSystemd reports whether we're running under systemd. func runningUnderSystemd() bool { if runtime.GOOS == "linux" && os.Getppid() == 1 { slurp, _ := os.ReadFile("/proc/1/stat") return bytes.HasPrefix(slurp, []byte("1 (systemd) ")) } return false } func redirectStderrToLogPanics() bool { return runningUnderSystemd() || envknob.Bool("TS_PLEASE_PANIC") } // winProgramDataAccessible reports whether the directory (assumed to // be a Windows %ProgramData% directory) is accessible to the current // process. It's created if needed. func winProgramDataAccessible(dir string) bool { if err := os.MkdirAll(dir, 0700); err != nil { // TODO: windows ACLs return false } // The C:\ProgramData\Tailscale directory should be locked down // by with ACLs to only be readable by the local system so a // regular user shouldn't be able to do this operation: if _, err := os.ReadDir(dir); err != nil { return false } return true } // tryFixLogStateLocation is a temporary fixup for // https://github.com/tailscale/tailscale/issues/247 . We accidentally // wrote logging state files to /, and then later to $CACHE_DIRECTORY // (which is incorrect because the log ID is not reconstructible if // deleted - it's state, not cache data). // // If log state for cmdname exists in / or $CACHE_DIRECTORY, and no // log state for that command exists in dir, then the log state is // moved from wherever it does exist, into dir. Leftover logs state // in / and $CACHE_DIRECTORY is deleted. func tryFixLogStateLocation(dir, cmdname string) { switch runtime.GOOS { case "linux", "freebsd", "openbsd": // These are the OSes where we might have written stuff into // root. Others use different logic to find the logs storage // dir. default: return } if cmdname == "" { log.Printf("[unexpected] no cmdname given to tryFixLogStateLocation, please file a bug at https://github.com/tailscale/tailscale") return } if dir == "/" { // Trying to store things in / still. That's a bug, but don't // abort hard. log.Printf("[unexpected] storing logging config in /, please file a bug at https://github.com/tailscale/tailscale") return } if os.Getuid() != 0 { // Only root could have written log configs to weird places. return } // We stored logs in 2 incorrect places: either /, or CACHE_DIR // (aka /var/cache/tailscale). We want to move files into the // provided dir, preferring those in CACHE_DIR over those in / if // both exist. If files already exist in dir, don't // overwrite. Finally, once we've maybe moved files around, we // want to delete leftovers in / and CACHE_DIR, to clean up after // our past selves. files := []string{ fmt.Sprintf("%s.log.conf", cmdname), fmt.Sprintf("%s.log1.txt", cmdname), fmt.Sprintf("%s.log2.txt", cmdname), } // checks if any of the files above exist in d. checkExists := func(d string) (bool, error) { for _, file := range files { p := filepath.Join(d, file) _, err := os.Stat(p) if os.IsNotExist(err) { continue } else if err != nil { return false, fmt.Errorf("stat %q: %w", p, err) } return true, nil } return false, nil } // move files from d into dir, if they exist. moveFiles := func(d string) error { for _, file := range files { src := filepath.Join(d, file) _, err := os.Stat(src) if os.IsNotExist(err) { continue } else if err != nil { return fmt.Errorf("stat %q: %v", src, err) } dst := filepath.Join(dir, file) bs, err := exec.Command("mv", src, dst).CombinedOutput() if err != nil { return fmt.Errorf("mv %q %q: %v (%s)", src, dst, err, bs) } } return nil } existsInRoot, err := checkExists("/") if err != nil { log.Printf("checking for configs in /: %v", err) return } existsInCache := false cacheDir := os.Getenv("CACHE_DIRECTORY") if cacheDir != "" { existsInCache, err = checkExists("/var/cache/tailscale") if err != nil { log.Printf("checking for configs in %s: %v", cacheDir, err) } } existsInDest, err := checkExists(dir) if err != nil { log.Printf("checking for configs in %s: %v", dir, err) return } switch { case !existsInRoot && !existsInCache: // No leftover files, nothing to do. return case existsInDest: // Already have "canonical" configs, just delete any remnants // (below). case existsInCache: // CACHE_DIRECTORY takes precedence over /, move files from // there. if err := moveFiles(cacheDir); err != nil { log.Print(err) return } case existsInRoot: // Files from root is better than nothing. if err := moveFiles("/"); err != nil { log.Print(err) return } } // If moving succeeded, or we didn't need to move files, try to // delete any leftover files, but it's okay if we can't delete // them for some reason. dirs := []string{} if existsInCache { dirs = append(dirs, cacheDir) } if existsInRoot { dirs = append(dirs, "/") } for _, d := range dirs { for _, file := range files { p := filepath.Join(d, file) _, err := os.Stat(p) if os.IsNotExist(err) { continue } else if err != nil { log.Printf("stat %q: %v", p, err) return } if err := os.Remove(p); err != nil { log.Printf("rm %q: %v", p, err) } } } } // New returns a new log policy (a logger and its instance ID) for a // given collection name. func New(collection string) *Policy { return NewWithConfigPath(collection, "", "") } // NewWithConfigPath is identical to New, // but uses the specified directory and command name. // If either is empty, it derives them automatically. func NewWithConfigPath(collection, dir, cmdName string) *Policy { var lflags int if term.IsTerminal(2) || runtime.GOOS == "windows" { lflags = 0 } else { lflags = log.LstdFlags } if envknob.Bool("TS_DEBUG_LOG_TIME") { lflags = log.LstdFlags | log.Lmicroseconds } if runningUnderSystemd() { // If journalctl is going to prepend its own timestamp // anyway, no need to add one. lflags = 0 } console := log.New(stderrWriter{}, "", lflags) var earlyErrBuf bytes.Buffer earlyLogf := func(format string, a ...any) { fmt.Fprintf(&earlyErrBuf, format, a...) earlyErrBuf.WriteByte('\n') } if dir == "" { dir = logsDir(earlyLogf) } if cmdName == "" { cmdName = version.CmdName() } tryFixLogStateLocation(dir, cmdName) cfgPath := filepath.Join(dir, fmt.Sprintf("%s.log.conf", cmdName)) if runtime.GOOS == "windows" { switch cmdName { case "tailscaled": // Tailscale 1.14 and before stored state under %LocalAppData% // (usually "C:\WINDOWS\system32\config\systemprofile\AppData\Local" // when tailscaled.exe is running as a non-user system service). // However it is frequently cleared for almost any reason: Windows // updates, System Restore, even various System Cleaner utilities. // // The Windows service previously ran as tailscale-ipn.exe, so // machines which ran very old versions might still have their // log conf named %LocalAppData%\tailscale-ipn.log.conf // // Machines which started using Tailscale more recently will have // %LocalAppData%\tailscaled.log.conf // // Attempt to migrate the log conf to C:\ProgramData\Tailscale oldDir := filepath.Join(os.Getenv("LocalAppData"), "Tailscale") oldPath := filepath.Join(oldDir, "tailscaled.log.conf") if fi, err := os.Stat(oldPath); err != nil || !fi.Mode().IsRegular() { // *Only* if tailscaled.log.conf does not exist, // check for tailscale-ipn.log.conf oldPathOldCmd := filepath.Join(oldDir, "tailscale-ipn.log.conf") if fi, err := os.Stat(oldPathOldCmd); err == nil && fi.Mode().IsRegular() { oldPath = oldPathOldCmd } } cfgPath = paths.TryConfigFileMigration(earlyLogf, oldPath, cfgPath) case "tailscale-ipn": for _, oldBase := range []string{"wg64.log.conf", "wg32.log.conf"} { oldConf := filepath.Join(dir, oldBase) if fi, err := os.Stat(oldConf); err == nil && fi.Mode().IsRegular() { cfgPath = paths.TryConfigFileMigration(earlyLogf, oldConf, cfgPath) break } } } } newc, err := ConfigFromFile(cfgPath) if err != nil { earlyLogf("logpolicy.ConfigFromFile %v: %v", cfgPath, err) } if err := newc.Validate(collection); err != nil { earlyLogf("logpolicy.Config.Validate for %v: %v", cfgPath, err) newc = NewConfig(collection) if err := newc.Save(cfgPath); err != nil { earlyLogf("logpolicy.Config.Save for %v: %v", cfgPath, err) } } conf := logtail.Config{ Collection: newc.Collection, PrivateID: newc.PrivateID, Stderr: logWriter{console}, NewZstdEncoder: func() logtail.Encoder { w, err := smallzstd.NewEncoder(nil) if err != nil { panic(err) } return w }, HTTPC: &http.Client{Transport: NewLogtailTransport(logtail.DefaultHost)}, } if collection == logtail.CollectionNode { conf.MetricsDelta = clientmetric.EncodeLogTailMetricsDelta conf.IncludeProcID = true conf.IncludeProcSequence = true } if envknob.NoLogsNoSupport() { log.Println("You have disabled logging. Tailscale will not be able to provide support.") conf.HTTPC = &http.Client{Transport: noopPretendSuccessTransport{}} } else if val := getLogTarget(); val != "" { log.Println("You have enabled a non-default log target. Doing without being told to by Tailscale staff or your network administrator will make getting support difficult.") conf.BaseURL = val u, _ := url.Parse(val) conf.HTTPC = &http.Client{Transport: NewLogtailTransport(u.Host)} } filchOptions := filch.Options{ ReplaceStderr: redirectStderrToLogPanics(), } filchPrefix := filepath.Join(dir, cmdName) // NAS disks cannot hibernate if we're writing logs to them all the time. // https://github.com/tailscale/tailscale/issues/3551 if runtime.GOOS == "linux" && (distro.Get() == distro.Synology || distro.Get() == distro.QNAP) { tmpfsLogs := "/tmp/tailscale-logs" if err := os.MkdirAll(tmpfsLogs, 0755); err == nil { filchPrefix = filepath.Join(tmpfsLogs, cmdName) filchOptions.MaxFileSize = 1 << 20 } else { // not a fatal error, we can leave the log files on the spinning disk log.Printf("Unable to create /tmp directory for log storage: %v\n", err) } } filchBuf, filchErr := filch.New(filchPrefix, filchOptions) if filchBuf != nil { conf.Buffer = filchBuf if filchBuf.OrigStderr != nil { conf.Stderr = filchBuf.OrigStderr } } lw := logtail.NewLogger(conf, log.Printf) var logOutput io.Writer = lw if runtime.GOOS == "windows" && conf.Collection == logtail.CollectionNode { logID := newc.PublicID.String() exe, _ := os.Executable() if strings.EqualFold(filepath.Base(exe), "tailscaled.exe") { diskLogf := filelogger.New("tailscale-service", logID, lw.Logf) logOutput = logger.FuncWriter(diskLogf) } } log.SetFlags(0) // other logflags are set on console, not here log.SetOutput(logOutput) log.Printf("Program starting: v%v, Go %v: %#v", version.Long, goVersion(), os.Args) log.Printf("LogID: %v", newc.PublicID) if filchErr != nil { log.Printf("filch failed: %v", filchErr) } if earlyErrBuf.Len() != 0 { log.Printf("%s", earlyErrBuf.Bytes()) } return &Policy{ Logtail: lw, PublicID: newc.PublicID, } } // dialLog is used by NewLogtailTransport to log the happy path of its // own dialing. // // By default it goes nowhere and is only enabled when // tailscaled's in verbose mode. // // log.Printf isn't used so its own logs don't loop back into logtail // in the happy path, thus generating more logs. var dialLog = log.New(io.Discard, "logtail: ", log.LstdFlags|log.Lmsgprefix) // SetVerbosityLevel controls the verbosity level that should be // written to stderr. 0 is the default (not verbose). Levels 1 or higher // are increasingly verbose. // // It should not be changed concurrently with log writes. func (p *Policy) SetVerbosityLevel(level int) { p.Logtail.SetVerbosityLevel(level) if level > 0 { dialLog.SetOutput(os.Stderr) } } // Close immediately shuts down the logger. func (p *Policy) Close() { ctx, cancel := context.WithCancel(context.Background()) cancel() p.Shutdown(ctx) } // Shutdown gracefully shuts down the logger, finishing any current // log upload if it can be done before ctx is canceled. func (p *Policy) Shutdown(ctx context.Context) error { if p.Logtail != nil { log.Printf("flushing log.") return p.Logtail.Shutdown(ctx) } return nil } // NewLogtailTransport returns an HTTP Transport particularly suited to uploading // logs to the given host name. This includes: // - If DNS lookup fails, consult the bootstrap DNS list of Tailscale hostnames. // - If TLS connection fails, try again using LetsEncrypt's built-in root certificate, // for the benefit of older OS platforms which might not include it. func NewLogtailTransport(host string) *http.Transport { // Start with a copy of http.DefaultTransport and tweak it a bit. tr := http.DefaultTransport.(*http.Transport).Clone() tr.Proxy = tshttpproxy.ProxyFromEnvironment tshttpproxy.SetTransportGetProxyConnectHeader(tr) // We do our own zstd compression on uploads, and responses never contain any payload, // so don't send "Accept-Encoding: gzip" to save a few bytes on the wire, since there // will never be any body to decompress: tr.DisableCompression = true // Log whenever we dial: tr.DialContext = func(ctx context.Context, netw, addr string) (net.Conn, error) { nd := netns.FromDialer(log.Printf, &net.Dialer{ Timeout: 30 * time.Second, KeepAlive: netknob.PlatformTCPKeepAlive(), }) t0 := time.Now() c, err := nd.DialContext(ctx, netw, addr) d := time.Since(t0).Round(time.Millisecond) if err == nil { dialLog.Printf("dialed %q in %v", addr, d) return c, nil } if version.IsWindowsGUI() && strings.HasPrefix(netw, "tcp") { if c, err := safesocket.Connect(safesocket.DefaultConnectionStrategy("")); err == nil { fmt.Fprintf(c, "CONNECT %s HTTP/1.0\r\n\r\n", addr) br := bufio.NewReader(c) res, err := http.ReadResponse(br, nil) if err == nil && res.StatusCode != 200 { err = errors.New(res.Status) } if err != nil { log.Printf("logtail: CONNECT response error from tailscaled: %v", err) c.Close() } else { dialLog.Printf("connected via tailscaled") return c, nil } } } // If we failed to dial, try again with bootstrap DNS. log.Printf("logtail: dial %q failed: %v (in %v), trying bootstrap...", addr, err, d) dnsCache := &dnscache.Resolver{ Forward: dnscache.Get().Forward, // use default cache's forwarder UseLastGood: true, LookupIPFallback: dnsfallback.Lookup, } dialer := dnscache.Dialer(nd.DialContext, dnsCache) c, err = dialer(ctx, netw, addr) if err == nil { log.Printf("logtail: bootstrap dial succeeded") } return c, err } // We're contacting exactly 1 hostname, so the default's 100 // max idle conns is very high for our needs. Even 2 is // probably double what we need: tr.MaxIdleConns = 2 // Provide knob to force HTTP/1 for log uploads. // TODO(bradfitz): remove this debug knob once we've decided // to upload via HTTP/1 or HTTP/2 (probably HTTP/1). Or we might just enforce // it server-side. if envknob.Bool("TS_DEBUG_FORCE_H1_LOGS") { tr.TLSClientConfig = nil // DefaultTransport's was already initialized w/ h2 tr.ForceAttemptHTTP2 = false tr.TLSNextProto = map[string]func(authority string, c *tls.Conn) http.RoundTripper{} } tr.TLSClientConfig = tlsdial.Config(host, tr.TLSClientConfig) return tr } func goVersion() string { v := strings.TrimPrefix(runtime.Version(), "go") if racebuild.On { return v + "-race" } return v } type noopPretendSuccessTransport struct{} func (noopPretendSuccessTransport) RoundTrip(req *http.Request) (*http.Response, error) { io.ReadAll(req.Body) req.Body.Close() return &http.Response{ StatusCode: 200, Status: "200 OK", }, nil }