From 5336362e6408aae4acb1f7b44e8ae83193263e30 Mon Sep 17 00:00:00 2001 From: Anton Tolchanov Date: Wed, 27 Mar 2024 15:13:34 +0000 Subject: [PATCH] prober: export probe class and metrics from bandwidth prober - Wrap each prober function into a probe class that allows associating metric labels and custom metrics with a given probe; - Make sure all existing probe classes set a `class` metric label; - Move bandwidth probe size from being a metric label to a separate gauge metric; this will make it possible to use it to calculate average used bandwidth using a PromQL query; - Also export transfer time for the bandwidth prober (more accurate than the total probe time, since it excludes connection establishment time). Updates tailscale/corp#17912 Signed-off-by: Anton Tolchanov --- prober/derp.go | 106 ++++++++++++++++++++++++------------- prober/derp_test.go | 18 +++---- prober/dns.go | 8 ++- prober/dns_example_test.go | 14 ++--- prober/dns_test.go | 4 +- prober/http.go | 13 +++-- prober/prober.go | 87 +++++++++++++++++++++++------- prober/prober_test.go | 46 ++++++++-------- prober/tcp.go | 9 ++-- prober/tls.go | 26 +++++---- 10 files changed, 215 insertions(+), 116 deletions(-) diff --git a/prober/derp.go b/prober/derp.go index ad8cf7066..4108745ff 100644 --- a/prober/derp.go +++ b/prober/derp.go @@ -10,9 +10,9 @@ import ( crand "crypto/rand" "encoding/json" "errors" + "expvar" "fmt" "log" - "maps" "net" "net/http" "strconv" @@ -20,6 +20,7 @@ import ( "sync" "time" + "github.com/prometheus/client_golang/prometheus" "tailscale.com/derp" "tailscale.com/derp/derphttp" "tailscale.com/net/stun" @@ -42,11 +43,14 @@ type derpProber struct { bwInterval time.Duration bwProbeSize int64 - // Probe functions that can be overridden for testing. - tlsProbeFn func(string) ProbeFunc - udpProbeFn func(string, int) ProbeFunc - meshProbeFn func(string, string) ProbeFunc - bwProbeFn func(string, string, int64) ProbeFunc + // Probe class for fetching & updating the DERP map. + ProbeMap ProbeClass + + // Probe classes for probing individual derpers. + tlsProbeFn func(string) ProbeClass + udpProbeFn func(string, int) ProbeClass + meshProbeFn func(string, string) ProbeClass + bwProbeFn func(string, string, int64) ProbeClass sync.Mutex lastDERPMap *tailcfg.DERPMap @@ -100,6 +104,10 @@ func DERP(p *Prober, derpMapURL string, opts ...DERPOpt) (*derpProber, error) { nodes: make(map[string]*tailcfg.DERPNode), probes: make(map[string]*Probe), } + d.ProbeMap = ProbeClass{ + Probe: d.probeMapFn, + Class: "derp_map", + } for _, o := range opts { o(d) } @@ -109,10 +117,10 @@ func DERP(p *Prober, derpMapURL string, opts ...DERPOpt) (*derpProber, error) { return d, nil } -// ProbeMap fetches the DERPMap and creates/destroys probes for each +// probeMapFn fetches the DERPMap and creates/destroys probes for each // DERP server as necessary. It should get regularly executed as a // probe function itself. -func (d *derpProber) ProbeMap(ctx context.Context) error { +func (d *derpProber) probeMapFn(ctx context.Context) error { if err := d.updateMap(ctx); err != nil { return err } @@ -123,7 +131,7 @@ func (d *derpProber) ProbeMap(ctx context.Context) error { for _, region := range d.lastDERPMap.Regions { for _, server := range region.Nodes { - labels := map[string]string{ + labels := Labels{ "region": region.RegionCode, "region_id": strconv.Itoa(region.RegionID), "hostname": server.HostName, @@ -169,18 +177,11 @@ func (d *derpProber) ProbeMap(ctx context.Context) error { } if d.bwInterval > 0 && d.bwProbeSize > 0 { - bwLabels := maps.Clone(labels) - bwLabels["probe_size_bytes"] = fmt.Sprintf("%d", d.bwProbeSize) - if server.Name == to.Name { - bwLabels["derp_path"] = "single" - } else { - bwLabels["derp_path"] = "mesh" - } n := fmt.Sprintf("derp/%s/%s/%s/bw", region.RegionCode, server.Name, to.Name) wantProbes[n] = true if d.probes[n] == nil { log.Printf("adding DERP bandwidth probe for %s->%s (%s) %v bytes every %v", server.Name, to.Name, region.RegionName, d.bwProbeSize, d.bwInterval) - d.probes[n] = d.p.Run(n, d.bwInterval, bwLabels, d.bwProbeFn(server.Name, to.Name, d.bwProbeSize)) + d.probes[n] = d.p.Run(n, d.bwInterval, labels, d.bwProbeFn(server.Name, to.Name, d.bwProbeSize)) } } } @@ -198,32 +199,55 @@ func (d *derpProber) ProbeMap(ctx context.Context) error { return nil } -// probeMesh returs a probe func that sends a test packet through a pair of DERP +// probeMesh returs a probe class that sends a test packet through a pair of DERP // servers (or just one server, if 'from' and 'to' are the same). 'from' and 'to' // are expected to be names (DERPNode.Name) of two DERP servers in the same region. -func (d *derpProber) probeMesh(from, to string) ProbeFunc { - return func(ctx context.Context) error { - fromN, toN, err := d.getNodePair(from, to) - if err != nil { - return err - } +func (d *derpProber) probeMesh(from, to string) ProbeClass { + derpPath := "mesh" + if from == to { + derpPath = "single" + } + return ProbeClass{ + Probe: func(ctx context.Context) error { + fromN, toN, err := d.getNodePair(from, to) + if err != nil { + return err + } - dm := d.lastDERPMap - return derpProbeNodePair(ctx, dm, fromN, toN) + dm := d.lastDERPMap + return derpProbeNodePair(ctx, dm, fromN, toN) + }, + Class: "derp_mesh", + Labels: Labels{"derp_path": derpPath}, } } -// probeBandwidth returs a probe func that sends a payload of a given size +// probeBandwidth returs a probe class that sends a payload of a given size // through a pair of DERP servers (or just one server, if 'from' and 'to' are // the same). 'from' and 'to' are expected to be names (DERPNode.Name) of two // DERP servers in the same region. -func (d *derpProber) probeBandwidth(from, to string, size int64) ProbeFunc { - return func(ctx context.Context) error { - fromN, toN, err := d.getNodePair(from, to) - if err != nil { - return err - } - return derpProbeBandwidth(ctx, d.lastDERPMap, fromN, toN, size) +func (d *derpProber) probeBandwidth(from, to string, size int64) ProbeClass { + derpPath := "mesh" + if from == to { + derpPath = "single" + } + var transferTime expvar.Float + return ProbeClass{ + Probe: func(ctx context.Context) error { + fromN, toN, err := d.getNodePair(from, to) + if err != nil { + return err + } + return derpProbeBandwidth(ctx, d.lastDERPMap, fromN, toN, size, &transferTime) + }, + Class: "derp_bw", + Labels: Labels{"derp_path": derpPath}, + Metrics: func(l prometheus.Labels) []prometheus.Metric { + return []prometheus.Metric{ + prometheus.MustNewConstMetric(prometheus.NewDesc("derp_bw_probe_size_bytes", "Payload size of the bandwidth prober", nil, l), prometheus.GaugeValue, float64(size)), + prometheus.MustNewConstMetric(prometheus.NewDesc("derp_bw_transfer_time_seconds_total", "Time it took to transfer data", nil, l), prometheus.CounterValue, transferTime.Value()), + } + }, } } @@ -289,9 +313,12 @@ func (d *derpProber) updateMap(ctx context.Context) error { return nil } -func (d *derpProber) ProbeUDP(ipaddr string, port int) ProbeFunc { - return func(ctx context.Context) error { - return derpProbeUDP(ctx, ipaddr, port) +func (d *derpProber) ProbeUDP(ipaddr string, port int) ProbeClass { + return ProbeClass{ + Probe: func(ctx context.Context) error { + return derpProbeUDP(ctx, ipaddr, port) + }, + Class: "derp_udp", } } @@ -347,7 +374,7 @@ func derpProbeUDP(ctx context.Context, ipStr string, port int) error { // derpProbeBandwidth sends a payload of a given size between two local // DERP clients connected to two DERP servers. -func derpProbeBandwidth(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailcfg.DERPNode, size int64) (err error) { +func derpProbeBandwidth(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailcfg.DERPNode, size int64, transferTime *expvar.Float) (err error) { // This probe uses clients with isProber=false to avoid spamming the derper logs with every packet // sent by the bandwidth probe. fromc, err := newConn(ctx, dm, from, false) @@ -368,6 +395,9 @@ func derpProbeBandwidth(ctx context.Context, dm *tailcfg.DERPMap, from, to *tail time.Sleep(100 * time.Millisecond) // pretty arbitrary } + start := time.Now() + defer func() { transferTime.Add(time.Since(start).Seconds()) }() + if err := runDerpProbeNodePair(ctx, from, to, fromc, toc, size); err != nil { // Record pubkeys on failed probes to aid investigation. return fmt.Errorf("%s -> %s: %w", diff --git a/prober/derp_test.go b/prober/derp_test.go index 5674f9653..138852b91 100644 --- a/prober/derp_test.go +++ b/prober/derp_test.go @@ -60,16 +60,16 @@ func TestDerpProber(t *testing.T) { p: p, derpMapURL: srv.URL, tlsInterval: time.Second, - tlsProbeFn: func(_ string) ProbeFunc { return func(context.Context) error { return nil } }, + tlsProbeFn: func(_ string) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) }, udpInterval: time.Second, - udpProbeFn: func(_ string, _ int) ProbeFunc { return func(context.Context) error { return nil } }, + udpProbeFn: func(_ string, _ int) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) }, meshInterval: time.Second, - meshProbeFn: func(_, _ string) ProbeFunc { return func(context.Context) error { return nil } }, + meshProbeFn: func(_, _ string) ProbeClass { return FuncProbe(func(context.Context) error { return nil }) }, nodes: make(map[string]*tailcfg.DERPNode), probes: make(map[string]*Probe), } - if err := dp.ProbeMap(context.Background()); err != nil { - t.Errorf("unexpected ProbeMap() error: %s", err) + if err := dp.probeMapFn(context.Background()); err != nil { + t.Errorf("unexpected probeMapFn() error: %s", err) } if len(dp.nodes) != 2 || dp.nodes["n1"] == nil || dp.nodes["n2"] == nil { t.Errorf("unexpected nodes: %+v", dp.nodes) @@ -89,8 +89,8 @@ func TestDerpProber(t *testing.T) { IPv4: "1.1.1.1", IPv6: "::1", }) - if err := dp.ProbeMap(context.Background()); err != nil { - t.Errorf("unexpected ProbeMap() error: %s", err) + if err := dp.probeMapFn(context.Background()); err != nil { + t.Errorf("unexpected probeMapFn() error: %s", err) } if len(dp.nodes) != 3 { t.Errorf("unexpected nodes: %+v", dp.nodes) @@ -102,8 +102,8 @@ func TestDerpProber(t *testing.T) { // Remove 2 nodes and check that probes have been destroyed. dm.Regions[0].Nodes = dm.Regions[0].Nodes[:1] - if err := dp.ProbeMap(context.Background()); err != nil { - t.Errorf("unexpected ProbeMap() error: %s", err) + if err := dp.probeMapFn(context.Background()); err != nil { + t.Errorf("unexpected probeMapFn() error: %s", err) } if len(dp.nodes) != 1 { t.Errorf("unexpected nodes: %+v", dp.nodes) diff --git a/prober/dns.go b/prober/dns.go index 4302994fe..77e22ea3f 100644 --- a/prober/dns.go +++ b/prober/dns.go @@ -35,8 +35,12 @@ type ForEachAddrOpts struct { // every time a new IP is discovered. The Probes returned will be closed if an // IP address is no longer in the DNS record for the given hostname. This can // be used to healthcheck every IP address that a hostname resolves to. -func ForEachAddr(host string, makeProbes func(netip.Addr) []*Probe, opts ForEachAddrOpts) ProbeFunc { - return makeForEachAddr(host, makeProbes, opts).run +func ForEachAddr(host string, makeProbes func(netip.Addr) []*Probe, opts ForEachAddrOpts) ProbeClass { + feap := makeForEachAddr(host, makeProbes, opts) + return ProbeClass{ + Probe: feap.run, + Class: "dns_each_addr", + } } func makeForEachAddr(host string, makeProbes func(netip.Addr) []*Probe, opts ForEachAddrOpts) *forEachAddrProbe { diff --git a/prober/dns_example_test.go b/prober/dns_example_test.go index 4bb7471a2..a8326fd72 100644 --- a/prober/dns_example_test.go +++ b/prober/dns_example_test.go @@ -89,11 +89,13 @@ func ExampleForEachAddr() { <-sigCh } -func probeLogWrapper(logf logger.Logf, pf prober.ProbeFunc) prober.ProbeFunc { - return func(ctx context.Context) error { - logf("starting probe") - err := pf(ctx) - logf("probe finished with %v", err) - return err +func probeLogWrapper(logf logger.Logf, pc prober.ProbeClass) prober.ProbeClass { + return prober.ProbeClass{ + Probe: func(ctx context.Context) error { + logf("starting probe") + err := pc.Probe(ctx) + logf("probe finished with %v", err) + return err + }, } } diff --git a/prober/dns_test.go b/prober/dns_test.go index b7c432d11..1b6c31b55 100644 --- a/prober/dns_test.go +++ b/prober/dns_test.go @@ -57,9 +57,9 @@ func TestForEachAddr(t *testing.T) { registered = append(registered, addr) // Return a probe that does nothing; we don't care about what this does. - probe := p.Run(fmt.Sprintf("website/%s", addr), probeInterval, nil, func(_ context.Context) error { + probe := p.Run(fmt.Sprintf("website/%s", addr), probeInterval, nil, FuncProbe(func(_ context.Context) error { return nil - }) + })) return []*Probe{probe} } diff --git a/prober/http.go b/prober/http.go index 5c3355e46..e4b0b26fd 100644 --- a/prober/http.go +++ b/prober/http.go @@ -13,14 +13,17 @@ import ( const maxHTTPBody = 4 << 20 // MiB -// HTTP returns a Probe that healthchecks an HTTP URL. +// HTTP returns a ProbeClass that healthchecks an HTTP URL. // -// The ProbeFunc sends a GET request for url, expects an HTTP 200 +// The probe function sends a GET request for url, expects an HTTP 200 // response, and verifies that want is present in the response // body. -func HTTP(url, wantText string) ProbeFunc { - return func(ctx context.Context) error { - return probeHTTP(ctx, url, []byte(wantText)) +func HTTP(url, wantText string) ProbeClass { + return ProbeClass{ + Probe: func(ctx context.Context) error { + return probeHTTP(ctx, url, []byte(wantText)) + }, + Class: "http", } } diff --git a/prober/prober.go b/prober/prober.go index 35e8b12a3..36afb1a39 100644 --- a/prober/prober.go +++ b/prober/prober.go @@ -12,6 +12,7 @@ import ( "fmt" "hash/fnv" "log" + "maps" "math/rand" "sync" "time" @@ -19,10 +20,33 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -// ProbeFunc is a function that probes something and reports whether -// the probe succeeded. The provided context's deadline must be obeyed -// for correct probe scheduling. -type ProbeFunc func(context.Context) error +// ProbeClass defines a probe of a specific type: a probing function that will +// be regularly ran, and metric labels that will be added automatically to all +// probes using this class. +type ProbeClass struct { + // Probe is a function that probes something and reports whether the Probe + // succeeded. The provided context's deadline must be obeyed for correct + // Probe scheduling. + Probe func(context.Context) error + + // Class defines a user-facing name of the probe class that will be used + // in the `class` metric label. + Class string + + // Labels defines a set of metric labels that will be added to all metrics + // exposed by this probe class. + Labels Labels + + // Metrics allows a probe class to export custom Metrics. Can be nil. + Metrics func(prometheus.Labels) []prometheus.Metric +} + +// FuncProbe wraps a simple probe function in a ProbeClass. +func FuncProbe(fn func(context.Context) error) ProbeClass { + return ProbeClass{ + Probe: fn, + } +} // a Prober manages a set of probes and keeps track of their results. type Prober struct { @@ -61,17 +85,23 @@ func newForTest(now func() time.Time, newTicker func(time.Duration) ticker) *Pro return p } -// Run executes fun every interval, and exports probe results under probeName. +// Run executes probe class function every interval, and exports probe results under probeName. // // Registering a probe under an already-registered name panics. -func (p *Prober) Run(name string, interval time.Duration, labels map[string]string, fun ProbeFunc) *Probe { +func (p *Prober) Run(name string, interval time.Duration, labels Labels, pc ProbeClass) *Probe { p.mu.Lock() defer p.mu.Unlock() if _, ok := p.probes[name]; ok { panic(fmt.Sprintf("probe named %q already registered", name)) } - l := prometheus.Labels{"name": name} + l := prometheus.Labels{ + "name": name, + "class": pc.Class, + } + for k, v := range pc.Labels { + l[k] = v + } for k, v := range labels { l[k] = v } @@ -84,10 +114,11 @@ func (p *Prober) Run(name string, interval time.Duration, labels map[string]stri stopped: make(chan struct{}), name: name, - doProbe: fun, + probeClass: pc, interval: interval, initialDelay: initialDelay(name, interval), metrics: prometheus.NewRegistry(), + metricLabels: l, mInterval: prometheus.NewDesc("interval_secs", "Probe interval in seconds", nil, l), mStartTime: prometheus.NewDesc("start_secs", "Latest probe start time (seconds since epoch)", nil, l), mEndTime: prometheus.NewDesc("end_secs", "Latest probe end time (seconds since epoch)", nil, l), @@ -177,7 +208,7 @@ type Probe struct { stopped chan struct{} // closed when shutdown is complete name string - doProbe ProbeFunc + probeClass ProbeClass interval time.Duration initialDelay time.Duration tick ticker @@ -185,14 +216,15 @@ type Probe struct { // metrics is a Prometheus metrics registry for metrics exported by this probe. // Using a separate registry allows cleanly removing metrics exported by this // probe when it gets unregistered. - metrics *prometheus.Registry - mInterval *prometheus.Desc - mStartTime *prometheus.Desc - mEndTime *prometheus.Desc - mLatency *prometheus.Desc - mResult *prometheus.Desc - mAttempts *prometheus.CounterVec - mSeconds *prometheus.CounterVec + metrics *prometheus.Registry + metricLabels prometheus.Labels + mInterval *prometheus.Desc + mStartTime *prometheus.Desc + mEndTime *prometheus.Desc + mLatency *prometheus.Desc + mResult *prometheus.Desc + mAttempts *prometheus.CounterVec + mSeconds *prometheus.CounterVec mu sync.Mutex start time.Time // last time doProbe started @@ -268,7 +300,7 @@ func (p *Probe) run() { ctx, cancel := context.WithTimeout(p.ctx, timeout) defer cancel() - err := p.doProbe(ctx) + err := p.probeClass.Probe(ctx) p.recordEnd(start, err) if err != nil { log.Printf("probe %s: %v", p.name, err) @@ -349,6 +381,11 @@ func (p *Probe) Describe(ch chan<- *prometheus.Desc) { ch <- p.mLatency p.mAttempts.Describe(ch) p.mSeconds.Describe(ch) + if p.probeClass.Metrics != nil { + for _, m := range p.probeClass.Metrics(p.metricLabels) { + ch <- m.Desc() + } + } } // Collect implements prometheus.Collector. @@ -373,6 +410,11 @@ func (p *Probe) Collect(ch chan<- prometheus.Metric) { } p.mAttempts.Collect(ch) p.mSeconds.Collect(ch) + if p.probeClass.Metrics != nil { + for _, m := range p.probeClass.Metrics(p.metricLabels) { + ch <- m + } + } } // ticker wraps a time.Ticker in a way that can be faked for tests. @@ -401,3 +443,12 @@ func initialDelay(seed string, interval time.Duration) time.Duration { r := rand.New(rand.NewSource(int64(h.Sum64()))).Float64() return time.Duration(float64(interval) * r) } + +// Labels is a set of metric labels used by a prober. +type Labels map[string]string + +func (l Labels) With(k, v string) Labels { + new := maps.Clone(l) + new[k] = v + return new +} diff --git a/prober/prober_test.go b/prober/prober_test.go index 0eedf01cc..a97003a63 100644 --- a/prober/prober_test.go +++ b/prober/prober_test.go @@ -51,10 +51,10 @@ func TestProberTiming(t *testing.T) { } } - p.Run("test-probe", probeInterval, nil, func(context.Context) error { + p.Run("test-probe", probeInterval, nil, FuncProbe(func(context.Context) error { invoked <- struct{}{} return nil - }) + })) waitActiveProbes(t, p, clk, 1) @@ -93,10 +93,10 @@ func TestProberTimingSpread(t *testing.T) { } } - probe := p.Run("test-spread-probe", probeInterval, nil, func(context.Context) error { + probe := p.Run("test-spread-probe", probeInterval, nil, FuncProbe(func(context.Context) error { invoked <- struct{}{} return nil - }) + })) waitActiveProbes(t, p, clk, 1) @@ -156,12 +156,12 @@ func TestProberRun(t *testing.T) { var probes []*Probe for i := 0; i < startingProbes; i++ { - probes = append(probes, p.Run(fmt.Sprintf("probe%d", i), probeInterval, nil, func(context.Context) error { + probes = append(probes, p.Run(fmt.Sprintf("probe%d", i), probeInterval, nil, FuncProbe(func(context.Context) error { mu.Lock() defer mu.Unlock() cnt++ return nil - })) + }))) } checkCnt := func(want int) { @@ -207,13 +207,13 @@ func TestPrometheus(t *testing.T) { p := newForTest(clk.Now, clk.NewTicker).WithMetricNamespace("probe") var succeed atomic.Bool - p.Run("testprobe", probeInterval, map[string]string{"label": "value"}, func(context.Context) error { + p.Run("testprobe", probeInterval, map[string]string{"label": "value"}, FuncProbe(func(context.Context) error { clk.Advance(aFewMillis) if succeed.Load() { return nil } return errors.New("failing, as instructed by test") - }) + })) waitActiveProbes(t, p, clk, 1) @@ -221,16 +221,16 @@ func TestPrometheus(t *testing.T) { want := fmt.Sprintf(` # HELP probe_interval_secs Probe interval in seconds # TYPE probe_interval_secs gauge -probe_interval_secs{label="value",name="testprobe"} %f +probe_interval_secs{class="",label="value",name="testprobe"} %f # HELP probe_start_secs Latest probe start time (seconds since epoch) # TYPE probe_start_secs gauge -probe_start_secs{label="value",name="testprobe"} %d +probe_start_secs{class="",label="value",name="testprobe"} %d # HELP probe_end_secs Latest probe end time (seconds since epoch) # TYPE probe_end_secs gauge -probe_end_secs{label="value",name="testprobe"} %d +probe_end_secs{class="",label="value",name="testprobe"} %d # HELP probe_result Latest probe result (1 = success, 0 = failure) # TYPE probe_result gauge -probe_result{label="value",name="testprobe"} 0 +probe_result{class="",label="value",name="testprobe"} 0 `, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix()) return testutil.GatherAndCompare(p.metrics, strings.NewReader(want), "probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_result") @@ -248,19 +248,19 @@ probe_result{label="value",name="testprobe"} 0 want := fmt.Sprintf(` # HELP probe_interval_secs Probe interval in seconds # TYPE probe_interval_secs gauge -probe_interval_secs{label="value",name="testprobe"} %f +probe_interval_secs{class="",label="value",name="testprobe"} %f # HELP probe_start_secs Latest probe start time (seconds since epoch) # TYPE probe_start_secs gauge -probe_start_secs{label="value",name="testprobe"} %d +probe_start_secs{class="",label="value",name="testprobe"} %d # HELP probe_end_secs Latest probe end time (seconds since epoch) # TYPE probe_end_secs gauge -probe_end_secs{label="value",name="testprobe"} %d +probe_end_secs{class="",label="value",name="testprobe"} %d # HELP probe_latency_millis Latest probe latency (ms) # TYPE probe_latency_millis gauge -probe_latency_millis{label="value",name="testprobe"} %d +probe_latency_millis{class="",label="value",name="testprobe"} %d # HELP probe_result Latest probe result (1 = success, 0 = failure) # TYPE probe_result gauge -probe_result{label="value",name="testprobe"} 1 +probe_result{class="",label="value",name="testprobe"} 1 `, probeInterval.Seconds(), start.Unix(), end.Unix(), aFewMillis.Milliseconds()) return testutil.GatherAndCompare(p.metrics, strings.NewReader(want), "probe_interval_secs", "probe_start_secs", "probe_end_secs", "probe_latency_millis", "probe_result") @@ -274,14 +274,14 @@ func TestOnceMode(t *testing.T) { clk := newFakeTime() p := newForTest(clk.Now, clk.NewTicker).WithOnce(true) - p.Run("probe1", probeInterval, nil, func(context.Context) error { return nil }) - p.Run("probe2", probeInterval, nil, func(context.Context) error { return fmt.Errorf("error2") }) - p.Run("probe3", probeInterval, nil, func(context.Context) error { - p.Run("probe4", probeInterval, nil, func(context.Context) error { + p.Run("probe1", probeInterval, nil, FuncProbe(func(context.Context) error { return nil })) + p.Run("probe2", probeInterval, nil, FuncProbe(func(context.Context) error { return fmt.Errorf("error2") })) + p.Run("probe3", probeInterval, nil, FuncProbe(func(context.Context) error { + p.Run("probe4", probeInterval, nil, FuncProbe(func(context.Context) error { return fmt.Errorf("error4") - }) + })) return nil - }) + })) p.Wait() wantCount := 4 diff --git a/prober/tcp.go b/prober/tcp.go index 9f35a2697..22d054616 100644 --- a/prober/tcp.go +++ b/prober/tcp.go @@ -12,9 +12,12 @@ import ( // TCP returns a Probe that healthchecks a TCP endpoint. // // The ProbeFunc reports whether it can successfully connect to addr. -func TCP(addr string) ProbeFunc { - return func(ctx context.Context) error { - return probeTCP(ctx, addr) +func TCP(addr string) ProbeClass { + return ProbeClass{ + Probe: func(ctx context.Context) error { + return probeTCP(ctx, addr) + }, + Class: "tcp", } } diff --git a/prober/tls.go b/prober/tls.go index db25c9fd5..787df05c2 100644 --- a/prober/tls.go +++ b/prober/tls.go @@ -27,22 +27,28 @@ const expiresSoon = 7 * 24 * time.Hour // 7 days from now // The ProbeFunc connects to a hostPort (host:port string), does a TLS // handshake, verifies that the hostname matches the presented certificate, // checks certificate validity time and OCSP revocation status. -func TLS(hostPort string) ProbeFunc { - return func(ctx context.Context) error { - certDomain, _, err := net.SplitHostPort(hostPort) - if err != nil { - return err - } - return probeTLS(ctx, certDomain, hostPort) +func TLS(hostPort string) ProbeClass { + return ProbeClass{ + Probe: func(ctx context.Context) error { + certDomain, _, err := net.SplitHostPort(hostPort) + if err != nil { + return err + } + return probeTLS(ctx, certDomain, hostPort) + }, + Class: "tls", } } // TLSWithIP is like TLS, but dials the provided dialAddr instead // of using DNS resolution. The certDomain is the expected name in // the cert (and the SNI name to send). -func TLSWithIP(certDomain string, dialAddr netip.AddrPort) ProbeFunc { - return func(ctx context.Context) error { - return probeTLS(ctx, certDomain, dialAddr.String()) +func TLSWithIP(certDomain string, dialAddr netip.AddrPort) ProbeClass { + return ProbeClass{ + Probe: func(ctx context.Context) error { + return probeTLS(ctx, certDomain, dialAddr.String()) + }, + Class: "tls", } }