From 10d0ce8dde2df4a24b78adfe67916c9624f9be3c Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Mon, 12 Aug 2024 21:39:28 -0700 Subject: [PATCH] tstest/natlab: get tailscaled logs from gokrazy via syslog Using https://github.com/gokrazy/gokrazy/pull/275 This is much lower latency than logcatcher, which is higher latency and chunkier. And this is better than getting it via 'tailscale debug daemon-logs', which misses early interesting logs. Updates #13038 Change-Id: I499ec254c003a9494c0e9910f9c650c8ac44ef33 Signed-off-by: Brad Fitzpatrick --- gokrazy/go.mod | 2 +- .../github.com/gokrazy/gokrazy/go.mod | 2 +- .../github.com/gokrazy/gokrazy/go.sum | 2 + tstest/integration/nat/nat_test.go | 44 +++---------------- tstest/natlab/vnet/conf.go | 28 +++++++++--- tstest/natlab/vnet/vnet.go | 30 ++++++++++--- 6 files changed, 53 insertions(+), 55 deletions(-) diff --git a/gokrazy/go.mod b/gokrazy/go.mod index fbabc7895..8c898d3ca 100644 --- a/gokrazy/go.mod +++ b/gokrazy/go.mod @@ -18,6 +18,6 @@ require ( golang.org/x/sys v0.20.0 // indirect ) -replace github.com/gokrazy/gokrazy => github.com/tailscale/gokrazy v0.0.0-20240602215456-7b9b6bbf726a +replace github.com/gokrazy/gokrazy => github.com/tailscale/gokrazy v0.0.0-20240812224643-6b21ddf64678 replace github.com/gokrazy/tools => github.com/tailscale/gokrazy-tools v0.0.0-20240730192548-9f81add3a91e diff --git a/gokrazy/tsapp/builddir/github.com/gokrazy/gokrazy/go.mod b/gokrazy/tsapp/builddir/github.com/gokrazy/gokrazy/go.mod index 5430f30d1..33656efee 100644 --- a/gokrazy/tsapp/builddir/github.com/gokrazy/gokrazy/go.mod +++ b/gokrazy/tsapp/builddir/github.com/gokrazy/gokrazy/go.mod @@ -12,4 +12,4 @@ require ( golang.org/x/sys v0.20.0 // indirect ) -replace github.com/gokrazy/gokrazy => github.com/tailscale/gokrazy v0.0.0-20240802144848-676865a4e84f +replace github.com/gokrazy/gokrazy => github.com/tailscale/gokrazy v0.0.0-20240812224643-6b21ddf64678 diff --git a/gokrazy/tsapp/builddir/github.com/gokrazy/gokrazy/go.sum b/gokrazy/tsapp/builddir/github.com/gokrazy/gokrazy/go.sum index 995bdfb36..479eb1cef 100644 --- a/gokrazy/tsapp/builddir/github.com/gokrazy/gokrazy/go.sum +++ b/gokrazy/tsapp/builddir/github.com/gokrazy/gokrazy/go.sum @@ -16,6 +16,8 @@ github.com/tailscale/gokrazy v0.0.0-20240602215456-7b9b6bbf726a h1:7dnA8x14JihQm github.com/tailscale/gokrazy v0.0.0-20240602215456-7b9b6bbf726a/go.mod h1:NHROeDlzn0icUl3f+tEYvGGpcyBDMsr3AvKLHOWRe5M= github.com/tailscale/gokrazy v0.0.0-20240802144848-676865a4e84f h1:ZSAGWpgs+6dK2oIz5OR+HUul3oJbnhFn8YNgcZ3d9SQ= github.com/tailscale/gokrazy v0.0.0-20240802144848-676865a4e84f/go.mod h1:+/WWMckeuQt+DG6690A6H8IgC+HpBFq2fmwRKcSbxdk= +github.com/tailscale/gokrazy v0.0.0-20240812224643-6b21ddf64678 h1:2B8/FbIRqmVgRUulQ4iu1EojniufComYe5Yj4BtIn1c= +github.com/tailscale/gokrazy v0.0.0-20240812224643-6b21ddf64678/go.mod h1:+/WWMckeuQt+DG6690A6H8IgC+HpBFq2fmwRKcSbxdk= golang.org/x/sys v0.0.0-20201005065044-765f4ea38db3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/tstest/integration/nat/nat_test.go b/tstest/integration/nat/nat_test.go index 788a47615..4aa68d55b 100644 --- a/tstest/integration/nat/nat_test.go +++ b/tstest/integration/nat/nat_test.go @@ -7,12 +7,10 @@ import ( "bytes" "cmp" "context" - "encoding/json" "errors" "flag" "fmt" "io" - "log" "net" "net/http" "net/netip" @@ -193,6 +191,10 @@ func (nt *natTest) runTest(node1, node2 addNodeFunc) pingRoute { if nodes[0] == nil || nodes[1] == nil { t.Skip("skipping test; not applicable combination") } + if *logTailscaled { + nodes[0].SetVerboseSyslog(true) + nodes[1].SetVerboseSyslog(true) + } var err error nt.vnet, err = vnet.New(&c) @@ -247,7 +249,7 @@ func (nt *natTest) runTest(node1, node2 addNodeFunc) pingRoute { "-m", "384M", "-nodefaults", "-no-user-config", "-nographic", "-kernel", nt.kernel, - "-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-dd02023b0001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb tsc=unstable clocksource=hpet tailscale-tta=1"+envStr, + "-append", "console=hvc0 root=PARTUUID=60c24cc1-f3f9-427a-8199-dd02023b0001/PARTNROFF=1 ro init=/gokrazy/init panic=10 oops=panic pci=off nousb tsc=unstable clocksource=hpet gokrazy.remote_syslog.target=52.52.0.9:995 tailscale-tta=1"+envStr, "-drive", "id=blk0,file="+disk+",format=qcow2", "-device", "virtio-blk-device,drive=blk0", "-netdev", "stream,id=net0,addr.type=unix,addr.path="+sockAddr, @@ -281,13 +283,6 @@ func (nt *natTest) runTest(node1, node2 addNodeFunc) pingRoute { for i, c := range clients { i, c := i, c eg.Go(func() error { - if *logTailscaled { - wg.Add(1) - go func() { - defer wg.Done() - streamDaemonLogs(ctx, t, c, fmt.Sprintf("node%d:", i)) - }() - } st, err := c.Status(ctx) if err != nil { return fmt.Errorf("node%d status: %w", i, err) @@ -361,35 +356,6 @@ const ( routeNil pingRoute = "nil" // *ipnstate.PingResult is nil ) -func streamDaemonLogs(ctx context.Context, t testing.TB, c *vnet.NodeAgentClient, nodeID string) { - ctx, cancel := context.WithCancel(ctx) - defer cancel() - r, err := c.TailDaemonLogs(ctx) - if err != nil { - t.Errorf("tailDaemonLogs: %v", err) - return - } - logger := log.New(os.Stderr, nodeID+" ", log.Lmsgprefix) - dec := json.NewDecoder(r) - for { - // /{"logtail":{"client_time":"2024-08-08T17:42:31.95095956Z","proc_id":2024742977,"proc_seq":232},"text":"magicsock: derp-1 connected; connGen=1\n"} - var logEntry struct { - LogTail struct { - ClientTime time.Time `json:"client_time"` - } - Text string `json:"text"` - } - if err := dec.Decode(&logEntry); err != nil { - if err == io.EOF || errors.Is(err, context.Canceled) { - return - } - t.Errorf("log entry: %v", err) - return - } - logger.Printf("%s %s", logEntry.LogTail.ClientTime.Format("2006/01/02 15:04:05"), logEntry.Text) - } -} - func ping(ctx context.Context, c *vnet.NodeAgentClient, target netip.Addr) (*ipnstate.PingResult, error) { n := 0 var res *ipnstate.PingResult diff --git a/tstest/natlab/vnet/conf.go b/tstest/natlab/vnet/conf.go index c5859787e..d0d3fadca 100644 --- a/tstest/natlab/vnet/conf.go +++ b/tstest/natlab/vnet/conf.go @@ -73,9 +73,12 @@ func (c *Config) AddNode(opts ...any) *Node { case TailscaledEnv: n.env = append(n.env, o) case NodeOption: - if o == HostFirewall { + switch o { + case HostFirewall: n.hostFW = true - } else { + case VerboseSyslog: + n.verboseSyslog = true + default: if n.err == nil { n.err = fmt.Errorf("unknown NodeOption %q", o) } @@ -93,7 +96,8 @@ func (c *Config) AddNode(opts ...any) *Node { type NodeOption string const ( - HostFirewall NodeOption = "HostFirewall" + HostFirewall NodeOption = "HostFirewall" + VerboseSyslog NodeOption = "VerboseSyslog" ) // TailscaledEnv is а option that can be passed to Config.AddNode @@ -148,8 +152,9 @@ type Node struct { err error n *node // nil until NewServer called - env []TailscaledEnv - hostFW bool + env []TailscaledEnv + hostFW bool + verboseSyslog bool // TODO(bradfitz): this is halfway converted to supporting multiple NICs // but not done. We need a MAC-per-Network. @@ -171,6 +176,14 @@ func (n *Node) HostFirewall() bool { return n.hostFW } +func (n *Node) VerboseSyslog() bool { + return n.verboseSyslog +} + +func (n *Node) SetVerboseSyslog(v bool) { + n.verboseSyslog = v +} + // Network returns the first network this node is connected to, // or nil if none. func (n *Node) Network() *Network { @@ -277,8 +290,9 @@ func (s *Server) initFromConfig(c *Config) error { return conf.err } n := &node{ - mac: conf.mac, - net: netOfConf[conf.Network()], + mac: conf.mac, + net: netOfConf[conf.Network()], + verboseSyslog: conf.VerboseSyslog(), } n.interfaceID = must.Get(s.pcapWriter.AddInterface(pcapgo.NgInterface{ Name: fmt.Sprintf("node%d", i+1), diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go index 1f3da988d..38355e51f 100644 --- a/tstest/natlab/vnet/vnet.go +++ b/tstest/natlab/vnet/vnet.go @@ -397,10 +397,10 @@ func (n *network) serveLogCatcherConn(clientRemoteIP netip.Addr, c net.Conn) { if node != nil { node.logMu.Lock() defer node.logMu.Unlock() + node.logCatcherWrites++ for _, lg := range logs { tStr := lg.Logtail.Client_Time.Round(time.Millisecond).Format(time.RFC3339Nano) fmt.Fprintf(&node.logBuf, "[%v] %s\n", tStr, lg.Text) - log.Printf("LOG %v [%v] %s\n", clientRemoteIP, tStr, lg.Text) } } }) @@ -416,6 +416,7 @@ var ( fakeDERP1IP = netip.AddrFrom4([4]byte{33, 4, 0, 1}) // 3340=DERP; 1=derp 1 fakeDERP2IP = netip.AddrFrom4([4]byte{33, 4, 0, 2}) // 3340=DERP; 1=derp 1 fakeLogCatcherIP = netip.AddrFrom4([4]byte{52, 52, 0, 4}) + fakeSyslogIP = netip.AddrFrom4([4]byte{52, 52, 0, 9}) ) type EthernetPacket struct { @@ -501,16 +502,18 @@ func (n *network) MACOfIP(ip netip.Addr) (_ MAC, ok bool) { } type node struct { - mac MAC - interfaceID int - net *network - lanIP netip.Addr // must be in net.lanIP prefix + unique in net + mac MAC + interfaceID int + net *network + lanIP netip.Addr // must be in net.lanIP prefix + unique in net + verboseSyslog bool // logMu guards logBuf. // TODO(bradfitz): conditionally write these out to separate files at the end? // Currently they only hold logcatcher logs. - logMu sync.Mutex - logBuf bytes.Buffer + logMu sync.Mutex + logBuf bytes.Buffer + logCatcherWrites int } type derpServer struct { @@ -1007,6 +1010,19 @@ func (n *network) HandleEthernetIPv4PacketForRouter(ep EthernetPacket) { return } + if isUDP && dstIP == fakeSyslogIP { + node, ok := n.nodesByIP[srcIP] + if !ok { + return + } + if node.verboseSyslog { + // TODO(bradfitz): parse this and capture it, structured, into + // node's log buffer. + log.Printf("syslog from %v: %s", srcIP, udp.Payload) + } + return + } + if !toForward && isNATPMP(packet) { n.handleNATPMPRequest(UDPPacket{ Src: netip.AddrPortFrom(srcIP, uint16(udp.SrcPort)),