// Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause // Create two wgengine instances and pass data through them, measuring // throughput, latency, and packet loss. package main import ( "bufio" "io" "log" "net" "net/http" "net/http/pprof" "net/netip" "os" "strconv" "sync" "time" "tailscale.com/types/logger" ) const PayloadSize = 1000 const ICMPMinSize = 24 var Addr1 = netip.MustParsePrefix("100.64.1.1/32") var Addr2 = netip.MustParsePrefix("100.64.1.2/32") func main() { var logf logger.Logf = log.Printf log.SetFlags(0) debugMux := newDebugMux() go runDebugServer(debugMux, "0.0.0.0:8999") mode, err := strconv.Atoi(os.Args[1]) if err != nil { log.Fatalf("%q: %v", os.Args[1], err) } traf := NewTrafficGen(nil) // Sample test results below are using GOMAXPROCS=2 (for some // tests, including wireguard-go, higher GOMAXPROCS goes slower) // on apenwarr's old Linux box: // Intel(R) Core(TM) i7-4785T CPU @ 2.20GHz // My 2019 Mac Mini is about 20% faster on most tests. switch mode { // tx=8786325 rx=8786326 (0 = 0.00% loss) (70768.7 Mbits/sec) case 1: setupTrivialNoAllocTest(logf, traf) // tx=6476293 rx=6476293 (0 = 0.00% loss) (52249.7 Mbits/sec) case 2: setupTrivialTest(logf, traf) // tx=1957974 rx=1958379 (0 = 0.00% loss) (15939.8 Mbits/sec) case 11: setupBlockingChannelTest(logf, traf) // tx=728621 rx=701825 (26620 = 3.65% loss) (5525.2 Mbits/sec) // (much faster on macOS??) case 12: setupNonblockingChannelTest(logf, traf) // tx=1024260 rx=941098 (83334 = 8.14% loss) (7516.6 Mbits/sec) // (much faster on macOS??) case 13: setupDoubleChannelTest(logf, traf) // tx=265468 rx=263189 (2279 = 0.86% loss) (2162.0 Mbits/sec) case 21: setupUDPTest(logf, traf) // tx=1493580 rx=1493580 (0 = 0.00% loss) (12210.4 Mbits/sec) case 31: setupBatchTCPTest(logf, traf) // tx=134236 rx=133166 (1070 = 0.80% loss) (1088.9 Mbits/sec) case 101: setupWGTest(nil, logf, traf, Addr1, Addr2) default: log.Fatalf("provide a valid test number (0..n)") } logf("initialized ok.") traf.Start(Addr1.Addr(), Addr2.Addr(), PayloadSize+ICMPMinSize, 0) var cur, prev Snapshot var pps int64 i := 0 for { i += 1 time.Sleep(10 * time.Millisecond) if (i % 100) == 0 { prev = cur cur = traf.Snap() d := cur.Sub(prev) if prev.WhenNsec == 0 { logf("tx=%-6d rx=%-6d", d.TxPackets, d.RxPackets) } else { logf("%v @%7d pkt/s", d, pps) } } pps = traf.Adjust() } } func newDebugMux() *http.ServeMux { mux := http.NewServeMux() mux.HandleFunc("/debug/pprof/", pprof.Index) mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) mux.HandleFunc("/debug/pprof/profile", pprof.Profile) mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) mux.HandleFunc("/debug/pprof/trace", pprof.Trace) return mux } func runDebugServer(mux *http.ServeMux, addr string) { srv := &http.Server{ Addr: addr, Handler: mux, } if err := srv.ListenAndServe(); err != nil { log.Fatal(err) } } // The absolute minimal test of the traffic generator: have it fill // a packet buffer, then absorb it again. Zero packet loss. func setupTrivialNoAllocTest(logf logger.Logf, traf *TrafficGen) { go func() { b := make([]byte, 1600) for { n := traf.Generate(b, 16) if n == 0 { break } traf.GotPacket(b[0:n+16], 16) } }() } // Almost the same, but this time allocate a fresh buffer each time // through the loop. Still zero packet loss. Runs about 2/3 as fast for me. func setupTrivialTest(logf logger.Logf, traf *TrafficGen) { go func() { for { b := make([]byte, 1600) n := traf.Generate(b, 16) if n == 0 { break } traf.GotPacket(b[0:n+16], 16) } }() } // Pass packets through a blocking channel between sender and receiver. // Still zero packet loss since the sender stops when the channel is full. // Max speed depends on channel length (I'm not sure why). func setupBlockingChannelTest(logf logger.Logf, traf *TrafficGen) { ch := make(chan []byte, 1000) go func() { // transmitter for { b := make([]byte, 1600) n := traf.Generate(b, 16) if n == 0 { close(ch) break } ch <- b[0 : n+16] } }() go func() { // receiver for b := range ch { traf.GotPacket(b, 16) } }() } // Same as setupBlockingChannelTest, but now we drop packets whenever the // channel is full. Max speed is about the same as the above test, but // now with nonzero packet loss. func setupNonblockingChannelTest(logf logger.Logf, traf *TrafficGen) { ch := make(chan []byte, 1000) go func() { // transmitter for { b := make([]byte, 1600) n := traf.Generate(b, 16) if n == 0 { close(ch) break } select { case ch <- b[0 : n+16]: default: } } }() go func() { // receiver for b := range ch { traf.GotPacket(b, 16) } }() } // Same as above, but at an intermediate blocking channel and goroutine // to make things a little more like wireguard-go. Roughly 20% slower than // the single-channel version. func setupDoubleChannelTest(logf logger.Logf, traf *TrafficGen) { ch := make(chan []byte, 1000) ch2 := make(chan []byte, 1000) go func() { // transmitter for { b := make([]byte, 1600) n := traf.Generate(b, 16) if n == 0 { close(ch) break } select { case ch <- b[0 : n+16]: default: } } }() go func() { // intermediary for b := range ch { ch2 <- b } close(ch2) }() go func() { // receiver for b := range ch2 { traf.GotPacket(b, 16) } }() } // Instead of a channel, pass packets through a UDP socket. func setupUDPTest(logf logger.Logf, traf *TrafficGen) { la, err := net.ResolveUDPAddr("udp", ":0") if err != nil { log.Fatalf("resolve: %v", err) } s1, err := net.ListenUDP("udp", la) if err != nil { log.Fatalf("listen1: %v", err) } s2, err := net.ListenUDP("udp", la) if err != nil { log.Fatalf("listen2: %v", err) } a2 := s2.LocalAddr() // On macOS (but not Linux), you can't transmit to 0.0.0.0:port, // which is what returns from .LocalAddr() above. We have to // force it to localhost instead. a2.(*net.UDPAddr).IP = net.ParseIP("127.0.0.1") s1.SetWriteBuffer(1024 * 1024) s2.SetReadBuffer(1024 * 1024) go func() { // transmitter b := make([]byte, 1600) for { n := traf.Generate(b, 16) if n == 0 { break } s1.WriteTo(b[16:n+16], a2) } }() go func() { // receiver b := make([]byte, 1600) for traf.Running() { // Use ReadFrom instead of Read, to be more like // how wireguard-go does it, even though we're not // going to actually look at the address. n, _, err := s2.ReadFrom(b) if err != nil { log.Fatalf("s2.Read: %v", err) } traf.GotPacket(b[:n], 0) } }() } // Instead of a channel, pass packets through a TCP socket. // TCP is a single stream, so we can amortize one syscall across // multiple packets. 10x amortization seems to make it go ~10x faster, // as expected, getting us close to the speed of the channel tests above. // There's also zero packet loss. func setupBatchTCPTest(logf logger.Logf, traf *TrafficGen) { sl, err := net.Listen("tcp", ":0") if err != nil { log.Fatalf("listen: %v", err) } var slCloseOnce sync.Once slClose := func() { slCloseOnce.Do(func() { sl.Close() }) } s1, err := net.Dial("tcp", sl.Addr().String()) if err != nil { log.Fatalf("dial: %v", err) } s2, err := sl.Accept() if err != nil { log.Fatalf("accept: %v", err) } s1.(*net.TCPConn).SetWriteBuffer(1024 * 1024) s2.(*net.TCPConn).SetReadBuffer(1024 * 1024) ch := make(chan int) go func() { // transmitter defer slClose() defer s1.Close() bs1 := bufio.NewWriterSize(s1, 1024*1024) b := make([]byte, 1600) i := 0 for { i += 1 n := traf.Generate(b, 16) if n == 0 { break } if i == 1 { ch <- n } bs1.Write(b[16 : n+16]) // TODO: this is a pretty half-baked batching // function, which we'd never want to employ in // a real-life program. // // In real life, we'd probably want to flush // immediately when there are no more packets to // generate, and queue up only if we fall behind. // // In our case however, we just want to see the // technical benefits of batching 10 syscalls // into 1, so a fixed ratio makes more sense. if (i % 10) == 0 { bs1.Flush() } } }() go func() { // receiver defer slClose() defer s2.Close() bs2 := bufio.NewReaderSize(s2, 1024*1024) // Find out the packet size (we happen to know they're // all the same size) packetSize := <-ch b := make([]byte, packetSize) for traf.Running() { // TODO: can't use ReadFrom() here, which is // unfair compared to UDP. (ReadFrom for UDP // apparently allocates memory per packet, which // this test does not.) n, err := io.ReadFull(bs2, b) if err != nil { log.Fatalf("s2.Read: %v", err) } traf.GotPacket(b[:n], 0) } }() }