@ -13,19 +13,19 @@ import (
"fmt"
"fmt"
"io"
"io"
"log"
"log"
mrand "math/rand"
"net/http"
"net/http"
"os"
"os"
"strconv"
"strconv"
"strings"
"sync"
"sync"
"sync/atomic"
"sync/atomic"
"time"
"time"
"tailscale.com/envknob"
"tailscale.com/envknob"
"tailscale.com/logtail/backoff"
"tailscale.com/net/interfaces"
"tailscale.com/net/interfaces"
"tailscale.com/net/netmon"
"tailscale.com/net/netmon"
"tailscale.com/net/sockstats"
"tailscale.com/net/sockstats"
"tailscale.com/tstime"
tslogger "tailscale.com/types/logger"
tslogger "tailscale.com/types/logger"
"tailscale.com/types/logid"
"tailscale.com/types/logid"
"tailscale.com/util/set"
"tailscale.com/util/set"
@ -128,9 +128,6 @@ func NewLogger(cfg Config, logf tslogger.Logf) *Logger {
cfg . FlushDelayFn = func ( ) time . Duration { return 0 }
cfg . FlushDelayFn = func ( ) time . Duration { return 0 }
}
}
stdLogf := func ( f string , a ... any ) {
fmt . Fprintf ( cfg . Stderr , strings . TrimSuffix ( f , "\n" ) + "\n" , a ... )
}
var urlSuffix string
var urlSuffix string
if ! cfg . CopyPrivateID . IsZero ( ) {
if ! cfg . CopyPrivateID . IsZero ( ) {
urlSuffix = "?copyId=" + cfg . CopyPrivateID . String ( )
urlSuffix = "?copyId=" + cfg . CopyPrivateID . String ( )
@ -148,7 +145,6 @@ func NewLogger(cfg Config, logf tslogger.Logf) *Logger {
sentinel : make ( chan int32 , 16 ) ,
sentinel : make ( chan int32 , 16 ) ,
flushDelayFn : cfg . FlushDelayFn ,
flushDelayFn : cfg . FlushDelayFn ,
timeNow : cfg . TimeNow ,
timeNow : cfg . TimeNow ,
bo : backoff . NewBackoff ( "logtail" , stdLogf , 30 * time . Second ) ,
metricsDelta : cfg . MetricsDelta ,
metricsDelta : cfg . MetricsDelta ,
sockstatsLabel : sockstats . LabelLogtailLogger ,
sockstatsLabel : sockstats . LabelLogtailLogger ,
@ -186,7 +182,6 @@ type Logger struct {
flushPending atomic . Bool
flushPending atomic . Bool
sentinel chan int32
sentinel chan int32
timeNow func ( ) time . Time
timeNow func ( ) time . Time
bo * backoff . Backoff
zstdEncoder Encoder
zstdEncoder Encoder
uploadCancel func ( )
uploadCancel func ( )
explainedRaw bool
explainedRaw bool
@ -373,23 +368,38 @@ func (l *Logger) uploading(ctx context.Context) {
}
}
}
}
for len ( body ) > 0 {
var lastError string
select {
var numFailures int
case <- ctx . Done ( ) :
var firstFailure time . Time
return
for len ( body ) > 0 && ctx . Err ( ) == nil {
default :
retryAfter , err := l . upload ( ctx , body , origlen )
}
uploaded , err := l . upload ( ctx , body , origlen )
if err != nil {
if err != nil {
numFailures ++
firstFailure = time . Now ( )
if ! l . internetUp ( ) {
if ! l . internetUp ( ) {
fmt . Fprintf ( l . stderr , "logtail: internet down; waiting\n" )
fmt . Fprintf ( l . stderr , "logtail: internet down; waiting\n" )
l . awaitInternetUp ( ctx )
l . awaitInternetUp ( ctx )
continue
continue
}
}
fmt . Fprintf ( l . stderr , "logtail: upload: %v\n" , err )
}
// Only print the same message once.
l . bo . BackOff ( ctx , err )
if currError := err . Error ( ) ; lastError != currError {
if uploaded {
fmt . Fprintf ( l . stderr , "logtail: upload: %v\n" , err )
lastError = currError
}
// Sleep for the specified retryAfter period,
// otherwise default to some random value.
if retryAfter <= 0 {
retryAfter = time . Duration ( 30 + mrand . Intn ( 30 ) ) * time . Second
}
tstime . Sleep ( ctx , retryAfter )
} else {
// Only print a success message after recovery.
if numFailures > 0 {
fmt . Fprintf ( l . stderr , "logtail: upload succeeded after %d failures and %s\n" , numFailures , time . Since ( firstFailure ) . Round ( time . Second ) )
}
break
break
}
}
}
}
@ -433,7 +443,7 @@ func (l *Logger) awaitInternetUp(ctx context.Context) {
// upload uploads body to the log server.
// upload uploads body to the log server.
// origlen indicates the pre-compression body length.
// origlen indicates the pre-compression body length.
// origlen of -1 indicates that the body is not compressed.
// origlen of -1 indicates that the body is not compressed.
func ( l * Logger ) upload ( ctx context . Context , body [ ] byte , origlen int ) ( uploaded bool , err error ) {
func ( l * Logger ) upload ( ctx context . Context , body [ ] byte , origlen int ) ( retryAfter time . Duration , err error ) {
const maxUploadTime = 45 * time . Second
const maxUploadTime = 45 * time . Second
ctx = sockstats . WithSockStats ( ctx , l . sockstatsLabel , l . Logf )
ctx = sockstats . WithSockStats ( ctx , l . sockstatsLabel , l . Logf )
ctx , cancel := context . WithTimeout ( ctx , maxUploadTime )
ctx , cancel := context . WithTimeout ( ctx , maxUploadTime )
@ -460,17 +470,16 @@ func (l *Logger) upload(ctx context.Context, body []byte, origlen int) (uploaded
l . httpDoCalls . Add ( 1 )
l . httpDoCalls . Add ( 1 )
resp , err := l . httpc . Do ( req )
resp , err := l . httpc . Do ( req )
if err != nil {
if err != nil {
return false , fmt . Errorf ( "log upload of %d bytes %s failed: %v" , len ( body ) , compressedNote , err )
return 0 , fmt . Errorf ( "log upload of %d bytes %s failed: %v" , len ( body ) , compressedNote , err )
}
}
defer resp . Body . Close ( )
defer resp . Body . Close ( )
if resp . StatusCode != 200 {
if resp . StatusCode != http . StatusOK {
uploaded = resp . StatusCode == 400 // the server saved the logs anyway
n, _ := strconv . Atoi ( resp . Header . Get ( "Retry-After" ) )
b , _ := io . ReadAll ( io . LimitReader ( resp . Body , 1 << 2 0) )
b , _ := io . ReadAll ( io . LimitReader ( resp . Body , 1 << 1 0) )
return uploade d, fmt . Errorf ( "log upload of %d bytes %s failed %d: % q ", len ( body ) , compressedNote , resp . StatusCode , b )
return time. Duration ( n ) * time . Secon d, fmt . Errorf ( "log upload of %d bytes %s failed %d: % s ", len ( body ) , compressedNote , resp . StatusCode , b ytes. TrimSpace ( b ) )
}
}
return 0 , nil
return true , nil
}
}
// Flush uploads all logs to the server. It blocks until complete or there is an
// Flush uploads all logs to the server. It blocks until complete or there is an