prober: only record latency for successful probes

This will make it easier to track probe latency on a dashboard.

Updates https://github.com/tailscale/corp/issues/9916

Signed-off-by: Anton Tolchanov <anton@tailscale.com>
pull/7654/head
Anton Tolchanov 1 year ago committed by Anton Tolchanov
parent d92047cc30
commit 7083246409

@ -161,11 +161,12 @@ type Probe struct {
tick ticker tick ticker
labels map[string]string labels map[string]string
mu sync.Mutex mu sync.Mutex
start time.Time // last time doProbe started start time.Time // last time doProbe started
end time.Time // last time doProbe returned end time.Time // last time doProbe returned
result bool // whether the last doProbe call succeeded latency time.Duration // last successful probe latency
lastErr error succeeded bool // whether the last doProbe call succeeded
lastErr error
} }
// Close shuts down the Probe and unregisters it from its Prober. // Close shuts down the Probe and unregisters it from its Prober.
@ -254,8 +255,13 @@ func (p *Probe) recordEnd(start time.Time, err error) {
p.mu.Lock() p.mu.Lock()
defer p.mu.Unlock() defer p.mu.Unlock()
p.end = end p.end = end
p.result = err == nil p.succeeded = err == nil
p.lastErr = err p.lastErr = err
if p.succeeded {
p.latency = end.Sub(p.start)
} else {
p.latency = 0
}
} }
type varExporter struct { type varExporter struct {
@ -289,13 +295,13 @@ func (v varExporter) probeInfo() map[string]ProbeInfo {
Labels: probe.labels, Labels: probe.labels,
Start: probe.start, Start: probe.start,
End: probe.end, End: probe.end,
Result: probe.result, Result: probe.succeeded,
} }
if probe.lastErr != nil { if probe.lastErr != nil {
inf.Error = probe.lastErr.Error() inf.Error = probe.lastErr.Error()
} }
if probe.end.After(probe.start) { if probe.latency > 0 {
inf.Latency = probe.end.Sub(probe.start).String() inf.Latency = probe.latency.String()
} }
out[probe.name] = inf out[probe.name] = inf
probe.mu.Unlock() probe.mu.Unlock()
@ -358,9 +364,10 @@ func (v varExporter) WritePrometheus(w io.Writer, prefix string) {
} }
if !probe.end.IsZero() { if !probe.end.IsZero() {
fmt.Fprintf(w, "%s_end_secs{%s} %d\n", prefix, labels, probe.end.Unix()) fmt.Fprintf(w, "%s_end_secs{%s} %d\n", prefix, labels, probe.end.Unix())
// Start is always present if end is. if probe.latency > 0 {
fmt.Fprintf(w, "%s_latency_millis{%s} %d\n", prefix, labels, probe.end.Sub(probe.start).Milliseconds()) fmt.Fprintf(w, "%s_latency_millis{%s} %d\n", prefix, labels, probe.latency.Milliseconds())
if probe.result { }
if probe.succeeded {
fmt.Fprintf(w, "%s_result{%s} 1\n", prefix, labels) fmt.Fprintf(w, "%s_result{%s} 1\n", prefix, labels)
} else { } else {
fmt.Fprintf(w, "%s_result{%s} 0\n", prefix, labels) fmt.Fprintf(w, "%s_result{%s} 0\n", prefix, labels)

@ -237,12 +237,11 @@ func TestExpvar(t *testing.T) {
} }
check("probe", ProbeInfo{ check("probe", ProbeInfo{
Labels: map[string]string{"label": "value"}, Labels: map[string]string{"label": "value"},
Start: epoch, Start: epoch,
End: epoch.Add(aFewMillis), End: epoch.Add(aFewMillis),
Latency: aFewMillis.String(), Result: false,
Result: false, Error: "failing, as instructed by test",
Error: "failing, as instructed by test",
}) })
succeed.Store(true) succeed.Store(true)
@ -280,9 +279,8 @@ func TestPrometheus(t *testing.T) {
probe_interval_secs{name="testprobe",label="value"} %f probe_interval_secs{name="testprobe",label="value"} %f
probe_start_secs{name="testprobe",label="value"} %d probe_start_secs{name="testprobe",label="value"} %d
probe_end_secs{name="testprobe",label="value"} %d probe_end_secs{name="testprobe",label="value"} %d
probe_latency_millis{name="testprobe",label="value"} %d
probe_result{name="testprobe",label="value"} 0 probe_result{name="testprobe",label="value"} 0
`, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix(), aFewMillis.Milliseconds())) `, probeInterval.Seconds(), epoch.Unix(), epoch.Add(aFewMillis).Unix()))
if diff := cmp.Diff(strings.TrimSpace(b.String()), want); diff != "" { if diff := cmp.Diff(strings.TrimSpace(b.String()), want); diff != "" {
return fmt.Errorf("wrong probe stats (-got+want):\n%s", diff) return fmt.Errorf("wrong probe stats (-got+want):\n%s", diff)
} }

Loading…
Cancel
Save