@ -17,7 +17,6 @@ import (
"sync"
"sync"
"time"
"time"
"golang.org/x/sync/errgroup"
"tailscale.com/logpolicy"
"tailscale.com/logpolicy"
"tailscale.com/logtail"
"tailscale.com/logtail"
"tailscale.com/net/connstats"
"tailscale.com/net/connstats"
@ -25,6 +24,7 @@ import (
"tailscale.com/smallzstd"
"tailscale.com/smallzstd"
"tailscale.com/tailcfg"
"tailscale.com/tailcfg"
"tailscale.com/types/netlogtype"
"tailscale.com/types/netlogtype"
"tailscale.com/util/multierr"
"tailscale.com/wgengine/router"
"tailscale.com/wgengine/router"
)
)
@ -32,8 +32,7 @@ import (
const pollPeriod = 5 * time . Second
const pollPeriod = 5 * time . Second
// Device is an abstraction over a tunnel device or a magic socket.
// Device is an abstraction over a tunnel device or a magic socket.
// *tstun.Wrapper implements this interface.
// Both *tstun.Wrapper and *magicsock.Conn implement this interface.
// *magicsock.Conn implements this interface.
type Device interface {
type Device interface {
SetStatistics ( * connstats . Statistics )
SetStatistics ( * connstats . Statistics )
}
}
@ -47,15 +46,15 @@ func (noopDevice) SetStatistics(*connstats.Statistics) {}
// Exit node traffic is not logged for privacy reasons.
// Exit node traffic is not logged for privacy reasons.
// The zero value is ready for use.
// The zero value is ready for use.
type Logger struct {
type Logger struct {
mu sync . Mutex
mu sync . Mutex // protects all fields below
logger * logtail . Logger
logger * logtail . Logger
stats * connstats . Statistics
tun Device
sock Device
addrs map [ netip . Addr ] bool
addrs map [ netip . Addr ] bool
prefixes map [ netip . Prefix ] bool
prefixes map [ netip . Prefix ] bool
group errgroup . Group
cancel context . CancelFunc
}
}
// Running reports whether the logger is running.
// Running reports whether the logger is running.
@ -97,18 +96,13 @@ func (nl *Logger) Startup(nodeID tailcfg.StableNodeID, nodeLogID, domainLogID lo
if nl . logger != nil {
if nl . logger != nil {
return fmt . Errorf ( "network logger already running for %v" , nl . logger . PrivateID ( ) . Public ( ) )
return fmt . Errorf ( "network logger already running for %v" , nl . logger . PrivateID ( ) . Public ( ) )
}
}
if tun == nil {
tun = noopDevice { }
}
if sock == nil {
sock = noopDevice { }
}
// Startup a log stream to Tailscale's logging service.
httpc := & http . Client { Transport : logpolicy . NewLogtailTransport ( logtail . DefaultHost ) }
httpc := & http . Client { Transport : logpolicy . NewLogtailTransport ( logtail . DefaultHost ) }
if testClient != nil {
if testClient != nil {
httpc = testClient
httpc = testClient
}
}
logger : = logtail . NewLogger ( logtail . Config {
nl. logger = logtail . NewLogger ( logtail . Config {
Collection : "tailtraffic.log.tailscale.io" ,
Collection : "tailtraffic.log.tailscale.io" ,
PrivateID : nodeLogID ,
PrivateID : nodeLogID ,
CopyPrivateID : domainLogID ,
CopyPrivateID : domainLogID ,
@ -127,47 +121,34 @@ func (nl *Logger) Startup(nodeID tailcfg.StableNodeID, nodeLogID, domainLogID lo
IncludeProcID : true ,
IncludeProcID : true ,
IncludeProcSequence : true ,
IncludeProcSequence : true ,
} , log . Printf )
} , log . Printf )
nl . logger = logger
stats := new ( connstats . Statistics )
ctx , cancel := context . WithCancel ( context . Background ( ) )
nl . cancel = cancel
nl . group . Go ( func ( ) error {
tun . SetStatistics ( stats )
defer tun . SetStatistics ( nil )
sock . SetStatistics ( stats )
// Startup a data structure to track per-connection statistics.
defer sock . SetStatistics ( nil )
// There is a maximum size for individual log messages that logtail
// can upload to the Tailscale log service, so stay below this limit.
const maxLogSize = 256 << 10
const maxConns = ( maxLogSize - netlogtype . MaxMessageJSONSize ) / netlogtype . MaxConnectionCountsJSONSize
nl . stats = connstats . NewStatistics ( pollPeriod , maxConns , func ( start , end time . Time , virtual , physical map [ netlogtype . Connection ] netlogtype . Counts ) {
nl . mu . Lock ( )
addrs := nl . addrs
prefixes := nl . prefixes
nl . mu . Unlock ( )
recordStatistics ( nl . logger , nodeID , start , end , virtual , physical , addrs , prefixes )
} )
start := time . Now ( )
// Register the connection tracker into the TUN device.
ticker := time . NewTicker ( pollPeriod )
if tun == nil {
for {
tun = noopDevice { }
var end time . Time
}
select {
nl . tun = tun
case <- ctx . Done ( ) :
nl . tun . SetStatistics ( nl . stats )
end = time . Now ( )
case end = <- ticker . C :
}
// NOTE: connstats and sockStats will always be slightly out-of-sync.
// Register the connection tracker into magicsock.
// It is impossible to have an atomic snapshot of statistics
if sock == nil {
// at both layers without a global mutex that spans all layers.
sock = noopDevice { }
connstats , sockStats := stats . Extract ( )
}
if len ( connstats ) + len ( sockStats ) > 0 {
nl . sock = sock
nl . mu . Lock ( )
nl . sock . SetStatistics ( nl . stats )
addrs := nl . addrs
prefixes := nl . prefixes
nl . mu . Unlock ( )
recordStatistics ( logger , nodeID , start , end , connstats , sockStats , addrs , prefixes )
}
if ctx . Err ( ) != nil {
break
}
start = end . Add ( time . Nanosecond )
}
return nil
} )
return nil
return nil
}
}
@ -222,21 +203,8 @@ func recordStatistics(logger *logtail.Logger, nodeID tailcfg.StableNodeID, start
}
}
if len ( m . VirtualTraffic ) + len ( m . SubnetTraffic ) + len ( m . ExitTraffic ) + len ( m . PhysicalTraffic ) > 0 {
if len ( m . VirtualTraffic ) + len ( m . SubnetTraffic ) + len ( m . ExitTraffic ) + len ( m . PhysicalTraffic ) > 0 {
// TODO(joetsai): Place a hard limit on the size of a network log message.
// The log server rejects any payloads above a certain size, so logging
// a message that large would cause logtail to be stuck forever trying
// and failing to upload the same excessively large payload.
//
// We should figure out the behavior for handling this. We could split
// the message apart so that there are multiple chunks with the same window,
// We could also consider reducing the granularity of the data
// by dropping port numbers.
const maxSize = 256 << 10
if b , err := json . Marshal ( m ) ; err != nil {
if b , err := json . Marshal ( m ) ; err != nil {
logger . Logf ( "json.Marshal error: %v" , err )
logger . Logf ( "json.Marshal error: %v" , err )
} else if len ( b ) > maxSize {
logger . Logf ( "JSON body too large: %dB (virtual:%d subnet:%d exit:%d physical:%d)" ,
len ( b ) , len ( m . VirtualTraffic ) , len ( m . SubnetTraffic ) , len ( m . ExitTraffic ) , len ( m . PhysicalTraffic ) )
} else {
} else {
logger . Logf ( "%s" , b )
logger . Logf ( "%s" , b )
}
}
@ -285,15 +253,23 @@ func (nl *Logger) Shutdown(ctx context.Context) error {
if nl . logger == nil {
if nl . logger == nil {
return nil
return nil
}
}
nl . cancel ( )
// Shutdown in reverse order of Startup.
// Do not hold lock while shutting down since this may flush one last time.
nl . mu . Unlock ( )
nl . mu . Unlock ( )
nl . group . Wait ( ) // do not hold lock while waiting
nl . sock . SetStatistics ( nil )
nl . tun . SetStatistics ( nil )
err1 := nl . stats . Shutdown ( ctx )
err2 := nl . logger . Shutdown ( ctx )
nl . mu . Lock ( )
nl . mu . Lock ( )
err := nl . logger . Shutdown ( ctx )
// Purge state.
nl . logger = nil
nl . logger = nil
nl . stats = nil
nl . tun = nil
nl . sock = nil
nl . addrs = nil
nl . addrs = nil
nl . prefixes = nil
nl . prefixes = nil
nl . cancel = nil
return err
return multi err. New ( err1 , err2 )
}
}