From cf1e6c6e5518a66b44eec66e4108a1bd16a5f6c4 Mon Sep 17 00:00:00 2001 From: Jordan Whited Date: Wed, 5 Jun 2024 06:36:48 -0700 Subject: [PATCH] cmd/stunstamp: fix remote write retry (#12348) Evaluation of remote write errors was using errors.Is() where it should have been using errors.As(). Updates tailscale/corp#20344 Signed-off-by: Jordan Whited --- cmd/stunstamp/stunstamp.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/cmd/stunstamp/stunstamp.go b/cmd/stunstamp/stunstamp.go index 5c25fc898..6a018585b 100644 --- a/cmd/stunstamp/stunstamp.go +++ b/cmd/stunstamp/stunstamp.go @@ -509,20 +509,24 @@ func (r *remoteWriteClient) write(ctx context.Context, ts []prompb.TimeSeries) e func remoteWriteTimeSeries(client *remoteWriteClient, tsCh chan []prompb.TimeSeries) { bo := backoff.NewBackoff("remote-write", log.Printf, time.Second*30) + // writeErr may contribute to bo's backoff schedule across tsCh read ops, + // i.e. if an unrecoverable error occurs for client.write(ctx, A), that + // should be accounted against bo prior to attempting to + // client.write(ctx, B). + var writeErr error for ts := range tsCh { for { + bo.BackOff(context.Background(), writeErr) reqCtx, cancel := context.WithTimeout(context.Background(), time.Second*30) - err := client.write(reqCtx, ts) + writeErr = client.write(reqCtx, ts) cancel() - // we could parse the Retry-After header, but use a simple exp - // backoff for now - bo.BackOff(context.Background(), err) - if err == nil { - break - } var re recoverableErr - if !errors.Is(err, &re) { - log.Printf("unrecoverable remote write error: %v", err) + recoverable := errors.As(writeErr, &re) + if writeErr != nil { + log.Printf("remote write error(recoverable=%v): %v", recoverable, writeErr) + } + if !recoverable { + // a nil err is not recoverable break } }