control/controlclient: add Auto.updateRoutine

Instead of having updates replace the map polls, create
a third goroutine which is solely responsible for making
sure that control is aware of the latest client state.

This also makes it so that the streaming map polls are only
broken when there are auth changes, or the client is paused.

Updates tailscale/corp#5761

Signed-off-by: Maisem Ali <maisem@tailscale.com>
pull/8841/head
Maisem Ali 1 year ago committed by Maisem Ali
parent 7a5263e6d0
commit d16946854f

@ -9,6 +9,7 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"sync" "sync"
"sync/atomic"
"time" "time"
"tailscale.com/health" "tailscale.com/health"
@ -46,6 +47,91 @@ func (g *LoginGoal) sendLogoutError(err error) {
var _ Client = (*Auto)(nil) var _ Client = (*Auto)(nil)
// waitUnpause waits until the client is unpaused then returns. It only
// returns an error if the client is closed.
func (c *Auto) waitUnpause(routineLogName string) error {
c.mu.Lock()
if !c.paused {
c.mu.Unlock()
return nil
}
unpaused := c.unpausedChanLocked()
c.mu.Unlock()
c.logf("%s: awaiting unpause", routineLogName)
select {
case <-unpaused:
c.logf("%s: unpaused", routineLogName)
return nil
case <-c.quit:
return errors.New("quit")
}
}
// updateRoutine is responsible for informing the server of worthy changes to
// our local state. It runs in its own goroutine.
func (c *Auto) updateRoutine() {
defer close(c.updateDone)
bo := backoff.NewBackoff("updateRoutine", c.logf, 30*time.Second)
for {
if err := c.waitUnpause("updateRoutine"); err != nil {
c.logf("updateRoutine: exiting")
return
}
c.mu.Lock()
gen := c.lastUpdateGen
ctx := c.mapCtx
needUpdate := gen > 0 && gen != c.lastUpdateGenInformed && c.loggedIn
c.mu.Unlock()
if needUpdate {
select {
case <-c.quit:
c.logf("updateRoutine: exiting")
return
default:
}
} else {
// Nothing to do, wait for a signal.
select {
case <-c.quit:
c.logf("updateRoutine: exiting")
return
case <-c.updateCh:
continue
}
}
t0 := c.clock.Now()
err := c.direct.SendUpdate(ctx)
d := time.Since(t0).Round(time.Millisecond)
if err != nil {
if ctx.Err() == nil {
c.direct.logf("lite map update error after %v: %v", d, err)
}
bo.BackOff(ctx, err)
continue
}
bo.BackOff(ctx, nil)
c.direct.logf("[v1] successful lite map update in %v", d)
c.mu.Lock()
c.lastUpdateGenInformed = gen
c.mu.Unlock()
}
}
// atomicGen is an atomic int64 generator. It is used to generate monotonically
// increasing numbers for updateGen.
var atomicGen atomic.Int64
func nextUpdateGen() updateGen {
return updateGen(atomicGen.Add(1))
}
// updateGen is a monotonically increasing number that represents a particular
// update to the local state.
type updateGen int64
// Auto connects to a tailcontrol server for a node. // Auto connects to a tailcontrol server for a node.
// It's a concrete implementation of the Client interface. // It's a concrete implementation of the Client interface.
type Auto struct { type Auto struct {
@ -54,6 +140,7 @@ type Auto struct {
logf logger.Logf logf logger.Logf
expiry *time.Time expiry *time.Time
closed bool closed bool
updateCh chan struct{} // readable when we should inform the server of a change
newMapCh chan struct{} // readable when we must restart a map request newMapCh chan struct{} // readable when we must restart a map request
statusFunc func(Status) // called to update Client status; always non-nil statusFunc func(Status) // called to update Client status; always non-nil
@ -61,25 +148,29 @@ type Auto struct {
mu sync.Mutex // mutex guards the following fields mu sync.Mutex // mutex guards the following fields
// lastUpdateGen is the gen of last update we had an update worth sending to
// the server.
lastUpdateGen updateGen
// lastUpdateGenInformed is the value of lastUpdateAt that we've successfully
// informed the server of.
lastUpdateGenInformed updateGen
paused bool // whether we should stop making HTTP requests paused bool // whether we should stop making HTTP requests
unpauseWaiters []chan struct{} unpauseWaiters []chan struct{}
loggedIn bool // true if currently logged in loggedIn bool // true if currently logged in
loginGoal *LoginGoal // non-nil if some login activity is desired loginGoal *LoginGoal // non-nil if some login activity is desired
synced bool // true if our netmap is up-to-date synced bool // true if our netmap is up-to-date
inPollNetMap bool // true if currently running a PollNetMap
inLiteMapUpdate bool // true if a lite (non-streaming) map request is outstanding
liteMapUpdateCancel context.CancelFunc // cancels a lite map update, may be nil
liteMapUpdateCancels int // how many times we've canceled a lite map update
inSendStatus int // number of sendStatus calls currently in progress inSendStatus int // number of sendStatus calls currently in progress
state State state State
authCtx context.Context // context used for auth requests authCtx context.Context // context used for auth requests
mapCtx context.Context // context used for netmap requests mapCtx context.Context // context used for netmap and update requests
authCancel func() // cancel the auth context authCancel func() // cancel authCtx
mapCancel func() // cancel the netmap context mapCancel func() // cancel mapCtx
quit chan struct{} // when closed, goroutines should all exit quit chan struct{} // when closed, goroutines should all exit
authDone chan struct{} // when closed, auth goroutine is done authDone chan struct{} // when closed, authRoutine is done
mapDone chan struct{} // when closed, map goroutine is done mapDone chan struct{} // when closed, mapRoutine is done
updateDone chan struct{} // when closed, updateRoutine is done
} }
// New creates and starts a new Auto. // New creates and starts a new Auto.
@ -116,10 +207,12 @@ func NewNoStart(opts Options) (_ *Auto, err error) {
direct: direct, direct: direct,
clock: opts.Clock, clock: opts.Clock,
logf: opts.Logf, logf: opts.Logf,
updateCh: make(chan struct{}, 1),
newMapCh: make(chan struct{}, 1), newMapCh: make(chan struct{}, 1),
quit: make(chan struct{}), quit: make(chan struct{}),
authDone: make(chan struct{}), authDone: make(chan struct{}),
mapDone: make(chan struct{}), mapDone: make(chan struct{}),
updateDone: make(chan struct{}),
statusFunc: opts.Status, statusFunc: opts.Status,
} }
c.authCtx, c.authCancel = context.WithCancel(context.Background()) c.authCtx, c.authCancel = context.WithCancel(context.Background())
@ -162,85 +255,34 @@ func (c *Auto) SetPaused(paused bool) {
func (c *Auto) Start() { func (c *Auto) Start() {
go c.authRoutine() go c.authRoutine()
go c.mapRoutine() go c.mapRoutine()
go c.updateRoutine()
} }
// sendNewMapRequest either sends a new OmitPeers, non-streaming map request // updateControl sends a new OmitPeers, non-streaming map request (to just send
// (to just send Hostinfo/Netinfo/Endpoints info, while keeping an existing // Hostinfo/Netinfo/Endpoints info, while keeping an existing streaming response
// streaming response open), or start a new streaming one if necessary. // open).
// //
// It should be called whenever there's something new to tell the server. // It should be called whenever there's something new to tell the server.
func (c *Auto) sendNewMapRequest() { func (c *Auto) updateControl() {
gen := nextUpdateGen()
c.mu.Lock() c.mu.Lock()
if gen < c.lastUpdateGen {
// If we're not already streaming a netmap, then tear down everything // This update is out of date.
// and start a new stream (which starts by sending a new map request)
if !c.inPollNetMap || !c.loggedIn {
c.mu.Unlock() c.mu.Unlock()
c.cancelMapSafely()
return return
} }
c.lastUpdateGen = gen
// If we are already in process of doing a LiteMapUpdate, cancel it and
// try a new one. If this is the 10th time we have done this
// cancelation, tear down everything and start again.
const maxLiteMapUpdateAttempts = 10
if c.inLiteMapUpdate {
// Always cancel the in-flight lite map update, regardless of
// whether we cancel the streaming map request or not.
c.liteMapUpdateCancel()
c.inLiteMapUpdate = false
if c.liteMapUpdateCancels >= maxLiteMapUpdateAttempts {
// Not making progress
c.mu.Unlock() c.mu.Unlock()
c.cancelMapSafely()
return
}
// Increment our cancel counter and continue below to start a select {
// new lite update. case c.updateCh <- struct{}{}:
c.liteMapUpdateCancels++ default:
}
// Otherwise, send a lite update that doesn't keep a
// long-running stream response.
defer c.mu.Unlock()
c.inLiteMapUpdate = true
ctx, cancel := context.WithTimeout(c.mapCtx, 10*time.Second)
c.liteMapUpdateCancel = cancel
go func() {
defer cancel()
t0 := c.clock.Now()
err := c.direct.SendLiteMapUpdate(ctx)
d := time.Since(t0).Round(time.Millisecond)
c.mu.Lock()
c.inLiteMapUpdate = false
c.liteMapUpdateCancel = nil
if err == nil {
c.liteMapUpdateCancels = 0
}
c.mu.Unlock()
if err == nil {
c.logf("[v1] successful lite map update in %v", d)
return
}
if ctx.Err() == nil {
c.logf("lite map update after %v: %v", d, err)
}
if !errors.Is(ctx.Err(), context.Canceled) {
// Fall back to restarting the long-polling map
// request (the old heavy way) if the lite update
// failed for reasons other than the context being
// canceled.
c.cancelMapSafely()
} }
}()
} }
func (c *Auto) cancelAuth() { func (c *Auto) cancelAuth() {
c.mu.Lock() c.mu.Lock()
defer c.mu.Unlock()
if c.authCancel != nil { if c.authCancel != nil {
c.authCancel() c.authCancel()
} }
@ -248,9 +290,9 @@ func (c *Auto) cancelAuth() {
c.authCtx, c.authCancel = context.WithCancel(context.Background()) c.authCtx, c.authCancel = context.WithCancel(context.Background())
c.authCtx = sockstats.WithSockStats(c.authCtx, sockstats.LabelControlClientAuto, c.logf) c.authCtx = sockstats.WithSockStats(c.authCtx, sockstats.LabelControlClientAuto, c.logf)
} }
c.mu.Unlock()
} }
// cancelMapLocked is like cancelMap, but assumes the caller holds c.mu.
func (c *Auto) cancelMapLocked() { func (c *Auto) cancelMapLocked() {
if c.mapCancel != nil { if c.mapCancel != nil {
c.mapCancel() c.mapCancel()
@ -258,56 +300,33 @@ func (c *Auto) cancelMapLocked() {
if !c.closed { if !c.closed {
c.mapCtx, c.mapCancel = context.WithCancel(context.Background()) c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
c.mapCtx = sockstats.WithSockStats(c.mapCtx, sockstats.LabelControlClientAuto, c.logf) c.mapCtx = sockstats.WithSockStats(c.mapCtx, sockstats.LabelControlClientAuto, c.logf)
} }
} }
func (c *Auto) cancelMapUnsafely() { // cancelMap cancels the existing mapPoll and liteUpdates.
func (c *Auto) cancelMap() {
c.mu.Lock() c.mu.Lock()
defer c.mu.Unlock()
c.cancelMapLocked() c.cancelMapLocked()
c.mu.Unlock()
} }
func (c *Auto) cancelMapSafely() { // restartMap cancels the existing mapPoll and liteUpdates, and then starts a
c.mu.Lock() // new one.
defer c.mu.Unlock() func (c *Auto) restartMap() {
c.cancelMap()
// Always reset our lite map cancels counter if we're canceling c.logf("[v1] restartMap: synced=%v", c.synced)
// everything, since we're about to restart with a new map update; this
// allows future calls to sendNewMapRequest to retry sending lite
// updates.
c.liteMapUpdateCancels = 0
c.logf("[v1] cancelMapSafely: synced=%v", c.synced)
if c.inPollNetMap {
// received at least one netmap since the last
// interruption. That means the server has already
// fully processed our last request, which might
// include UpdateEndpoints(). Interrupt it and try
// again.
c.cancelMapLocked()
} else {
// !synced means we either haven't done a netmap
// request yet, or it hasn't answered yet. So the
// server is in an undefined state. If we send
// another netmap request too soon, it might race
// with the last one, and if we're very unlucky,
// the new request will be applied before the old one,
// and the wrong endpoints will get registered. We
// have to tell the client to abort politely, only
// after it receives a response to its existing netmap
// request.
select { select {
case c.newMapCh <- struct{}{}: case c.newMapCh <- struct{}{}:
c.logf("[v1] cancelMapSafely: wrote to channel") c.logf("[v1] restartMap: wrote to channel")
default: default:
// if channel write failed, then there was already // if channel write failed, then there was already
// an outstanding newMapCh request. One is enough, // an outstanding newMapCh request. One is enough,
// since it'll always use the latest endpoints. // since it'll always use the latest endpoints.
c.logf("[v1] cancelMapSafely: channel was full") c.logf("[v1] restartMap: channel was full")
}
} }
c.updateControl()
} }
func (c *Auto) authRoutine() { func (c *Auto) authRoutine() {
@ -428,7 +447,7 @@ func (c *Auto) authRoutine() {
c.mu.Unlock() c.mu.Unlock()
c.sendStatus("authRoutine-success", nil, "", nil) c.sendStatus("authRoutine-success", nil, "", nil)
c.cancelMapSafely() c.restartMap()
bo.BackOff(ctx, nil) bo.BackOff(ctx, nil)
} }
} }
@ -458,25 +477,19 @@ func (c *Auto) unpausedChanLocked() <-chan struct{} {
return unpaused return unpaused
} }
// mapRoutine is responsible for keeping a read-only streaming connection to the
// control server, and keeping the netmap up to date.
func (c *Auto) mapRoutine() { func (c *Auto) mapRoutine() {
defer close(c.mapDone) defer close(c.mapDone)
bo := backoff.NewBackoff("mapRoutine", c.logf, 30*time.Second) bo := backoff.NewBackoff("mapRoutine", c.logf, 30*time.Second)
for { for {
c.mu.Lock() if err := c.waitUnpause("mapRoutine"); err != nil {
if c.paused { c.logf("mapRoutine: exiting")
unpaused := c.unpausedChanLocked()
c.mu.Unlock()
c.logf("mapRoutine: awaiting unpause")
select {
case <-unpaused:
c.logf("mapRoutine: unpaused")
case <-c.quit:
c.logf("mapRoutine: quit")
return return
} }
continue
} c.mu.Lock()
c.logf("[v1] mapRoutine: %s", c.state) c.logf("[v1] mapRoutine: %s", c.state)
loggedIn := c.loggedIn loggedIn := c.loggedIn
ctx := c.mapCtx ctx := c.mapCtx
@ -513,43 +526,21 @@ func (c *Auto) mapRoutine() {
c.logf("[v1] mapRoutine: new map needed while idle.") c.logf("[v1] mapRoutine: new map needed while idle.")
} }
} else { } else {
// Be sure this is false when we're not inside
// PollNetMap, so that cancelMapSafely() can notify
// us correctly.
c.mu.Lock()
c.inPollNetMap = false
c.mu.Unlock()
health.SetInPollNetMap(false) health.SetInPollNetMap(false)
err := c.direct.PollNetMap(ctx, func(nm *netmap.NetworkMap) { err := c.direct.PollNetMap(ctx, func(nm *netmap.NetworkMap) {
health.SetInPollNetMap(true) health.SetInPollNetMap(true)
c.mu.Lock()
select {
case <-c.newMapCh:
c.logf("[v1] mapRoutine: new map request during PollNetMap. canceling.")
c.cancelMapLocked()
// Don't emit this netmap; we're
// about to request a fresh one.
c.mu.Unlock()
return
default:
}
c.mu.Lock()
c.synced = true c.synced = true
c.inPollNetMap = true
if c.loggedIn { if c.loggedIn {
c.state = StateSynchronized c.state = StateSynchronized
} }
exp := nm.Expiry c.expiry = ptr.To(nm.Expiry)
c.expiry = &exp
stillAuthed := c.loggedIn stillAuthed := c.loggedIn
state := c.state c.logf("[v1] mapRoutine: netmap received: %s", c.state)
c.mu.Unlock() c.mu.Unlock()
c.logf("[v1] mapRoutine: netmap received: %s", state)
if stillAuthed { if stillAuthed {
c.sendStatus("mapRoutine-got-netmap", nil, "", nm) c.sendStatus("mapRoutine-got-netmap", nil, "", nm)
} }
@ -560,7 +551,6 @@ func (c *Auto) mapRoutine() {
health.SetInPollNetMap(false) health.SetInPollNetMap(false)
c.mu.Lock() c.mu.Lock()
c.synced = false c.synced = false
c.inPollNetMap = false
if c.state == StateSynchronized { if c.state == StateSynchronized {
c.state = StateAuthenticated c.state = StateAuthenticated
} }
@ -602,7 +592,7 @@ func (c *Auto) SetHostinfo(hi *tailcfg.Hostinfo) {
} }
// Send new Hostinfo to server // Send new Hostinfo to server
c.sendNewMapRequest() c.updateControl()
} }
func (c *Auto) SetNetInfo(ni *tailcfg.NetInfo) { func (c *Auto) SetNetInfo(ni *tailcfg.NetInfo) {
@ -614,12 +604,17 @@ func (c *Auto) SetNetInfo(ni *tailcfg.NetInfo) {
} }
// Send new NetInfo to server // Send new NetInfo to server
c.sendNewMapRequest() c.updateControl()
} }
// SetTKAHead updates the TKA head hash that map-request infrastructure sends. // SetTKAHead updates the TKA head hash that map-request infrastructure sends.
func (c *Auto) SetTKAHead(headHash string) { func (c *Auto) SetTKAHead(headHash string) {
c.direct.SetTKAHead(headHash) if !c.direct.SetTKAHead(headHash) {
return
}
// Send new TKAHead to server
c.updateControl()
} }
func (c *Auto) sendStatus(who string, err error, url string, nm *netmap.NetworkMap) { func (c *Auto) sendStatus(who string, err error, url string, nm *netmap.NetworkMap) {
@ -728,7 +723,7 @@ func (c *Auto) SetExpirySooner(ctx context.Context, expiry time.Time) error {
func (c *Auto) UpdateEndpoints(endpoints []tailcfg.Endpoint) { func (c *Auto) UpdateEndpoints(endpoints []tailcfg.Endpoint) {
changed := c.direct.SetEndpoints(endpoints) changed := c.direct.SetEndpoints(endpoints)
if changed { if changed {
c.sendNewMapRequest() c.updateControl()
} }
} }
@ -750,8 +745,9 @@ func (c *Auto) Shutdown() {
close(c.quit) close(c.quit)
c.cancelAuth() c.cancelAuth()
<-c.authDone <-c.authDone
c.cancelMapUnsafely() c.cancelMap()
<-c.mapDone <-c.mapDone
<-c.updateDone
if direct != nil { if direct != nil {
direct.Close() direct.Close()
} }

@ -51,6 +51,7 @@ import (
"tailscale.com/types/netmap" "tailscale.com/types/netmap"
"tailscale.com/types/opt" "tailscale.com/types/opt"
"tailscale.com/types/persist" "tailscale.com/types/persist"
"tailscale.com/types/ptr"
"tailscale.com/types/tkatype" "tailscale.com/types/tkatype"
"tailscale.com/util/clientmetric" "tailscale.com/util/clientmetric"
"tailscale.com/util/multierr" "tailscale.com/util/multierr"
@ -259,10 +260,8 @@ func NewDirect(opts Options) (*Direct, error) {
if opts.Hostinfo == nil { if opts.Hostinfo == nil {
c.SetHostinfo(hostinfo.New()) c.SetHostinfo(hostinfo.New())
} else { } else {
ni := opts.Hostinfo.NetInfo
opts.Hostinfo.NetInfo = nil
c.SetHostinfo(opts.Hostinfo) c.SetHostinfo(opts.Hostinfo)
if ni != nil { if ni := opts.Hostinfo.NetInfo; ni != nil {
c.SetNetInfo(ni) c.SetNetInfo(ni)
} }
} }
@ -294,6 +293,8 @@ func (c *Direct) SetHostinfo(hi *tailcfg.Hostinfo) bool {
if hi == nil { if hi == nil {
panic("nil Hostinfo") panic("nil Hostinfo")
} }
hi = ptr.To(*hi)
hi.NetInfo = nil
c.mu.Lock() c.mu.Lock()
defer c.mu.Unlock() defer c.mu.Unlock()
@ -771,13 +772,13 @@ func (c *Direct) SetEndpoints(endpoints []tailcfg.Endpoint) (changed bool) {
// It always returns a non-nil error describing the reason for the failure // It always returns a non-nil error describing the reason for the failure
// or why the request ended. // or why the request ended.
func (c *Direct) PollNetMap(ctx context.Context, cb func(*netmap.NetworkMap)) error { func (c *Direct) PollNetMap(ctx context.Context, cb func(*netmap.NetworkMap)) error {
return c.sendMapRequest(ctx, -1, false, cb) return c.sendMapRequest(ctx, true, cb)
} }
// FetchNetMap fetches the netmap once. // FetchNetMapForTest fetches the netmap once.
func (c *Direct) FetchNetMap(ctx context.Context) (*netmap.NetworkMap, error) { func (c *Direct) FetchNetMapForTest(ctx context.Context) (*netmap.NetworkMap, error) {
var ret *netmap.NetworkMap var ret *netmap.NetworkMap
err := c.sendMapRequest(ctx, 1, false, func(nm *netmap.NetworkMap) { err := c.sendMapRequest(ctx, false, func(nm *netmap.NetworkMap) {
ret = nm ret = nm
}) })
if err == nil && ret == nil { if err == nil && ret == nil {
@ -786,11 +787,11 @@ func (c *Direct) FetchNetMap(ctx context.Context) (*netmap.NetworkMap, error) {
return ret, err return ret, err
} }
// SendLiteMapUpdate makes a /map request to update the server of our latest state, // SendUpdate makes a /map request to update the server of our latest state, but
// but does not fetch anything. It returns an error if the server did not return a // does not fetch anything. It returns an error if the server did not return a
// successful 200 OK response. // successful 200 OK response.
func (c *Direct) SendLiteMapUpdate(ctx context.Context) error { func (c *Direct) SendUpdate(ctx context.Context) error {
return c.sendMapRequest(ctx, 1, false, nil) return c.sendMapRequest(ctx, false, nil)
} }
// If we go more than pollTimeout without hearing from the server, // If we go more than pollTimeout without hearing from the server,
@ -798,17 +799,21 @@ func (c *Direct) SendLiteMapUpdate(ctx context.Context) error {
// every minute. // every minute.
const pollTimeout = 120 * time.Second const pollTimeout = 120 * time.Second
// sendMapRequest makes a /map request to download the network map, calling cb with // sendMapRequest makes a /map request to download the network map, calling cb
// each new netmap. If maxPolls is -1, it will poll forever and only returns if // with each new netmap. If isStreaming, it will poll forever and only returns
// the context expires or the server returns an error/closes the connection and as // if the context expires or the server returns an error/closes the connection
// such always returns a non-nil error. // and as such always returns a non-nil error.
// //
// If cb is nil, OmitPeers will be set to true. // If cb is nil, OmitPeers will be set to true.
func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool, cb func(*netmap.NetworkMap)) error { func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, cb func(*netmap.NetworkMap)) error {
if isStreaming && cb == nil {
panic("cb must be non-nil if isStreaming is true")
}
metricMapRequests.Add(1) metricMapRequests.Add(1)
metricMapRequestsActive.Add(1) metricMapRequestsActive.Add(1)
defer metricMapRequestsActive.Add(-1) defer metricMapRequestsActive.Add(-1)
if maxPolls == -1 { if isStreaming {
metricMapRequestsPoll.Add(1) metricMapRequestsPoll.Add(1)
} else { } else {
metricMapRequestsLite.Add(1) metricMapRequestsLite.Add(1)
@ -844,8 +849,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool
return errors.New("hostinfo: BackendLogID missing") return errors.New("hostinfo: BackendLogID missing")
} }
allowStream := maxPolls != 1 c.logf("[v1] PollNetMap: stream=%v ep=%v", isStreaming, epStrs)
c.logf("[v1] PollNetMap: stream=%v ep=%v", allowStream, epStrs)
vlogf := logger.Discard vlogf := logger.Discard
if DevKnob.DumpNetMaps() { if DevKnob.DumpNetMaps() {
@ -861,23 +865,11 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool
DiscoKey: c.discoPubKey, DiscoKey: c.discoPubKey,
Endpoints: epStrs, Endpoints: epStrs,
EndpointTypes: epTypes, EndpointTypes: epTypes,
Stream: allowStream, Stream: isStreaming,
Hostinfo: hi, Hostinfo: hi,
DebugFlags: c.debugFlags, DebugFlags: c.debugFlags,
OmitPeers: cb == nil, OmitPeers: cb == nil,
TKAHead: c.tkaHead, TKAHead: c.tkaHead,
// Previously we'd set ReadOnly to true if we didn't have any endpoints
// yet as we expected to learn them in a half second and restart the full
// streaming map poll, however as we are trying to reduce the number of
// times we restart the full streaming map poll we now just set ReadOnly
// false when we're doing a full streaming map poll.
//
// TODO(maisem/bradfitz): really ReadOnly should be set to true if for
// all streams and we should only do writes via lite map updates.
// However that requires an audit and a bunch of testing to make sure we
// don't break anything.
ReadOnly: readOnly && !allowStream,
} }
var extraDebugFlags []string var extraDebugFlags []string
if hi != nil && c.netMon != nil && !c.skipIPForwardingCheck && if hi != nil && c.netMon != nil && !c.skipIPForwardingCheck &&
@ -994,7 +986,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool
// the same format before just closing the connection. // the same format before just closing the connection.
// We can use this same read loop either way. // We can use this same read loop either way.
var msg []byte var msg []byte
for i := 0; i < maxPolls || maxPolls < 0; i++ { for i := 0; i == 0 || isStreaming; i++ {
vlogf("netmap: starting size read after %v (poll %v)", time.Since(t0).Round(time.Millisecond), i) vlogf("netmap: starting size read after %v (poll %v)", time.Since(t0).Round(time.Millisecond), i)
var siz [4]byte var siz [4]byte
if _, err := io.ReadFull(res.Body, siz[:]); err != nil { if _, err := io.ReadFull(res.Body, siz[:]); err != nil {
@ -1018,7 +1010,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool
metricMapResponseMessages.Add(1) metricMapResponseMessages.Add(1)
if allowStream { if isStreaming {
health.GotStreamedMapResponse() health.GotStreamedMapResponse()
} }

@ -42,7 +42,10 @@ func TestNewDirect(t *testing.T) {
t.Errorf("c.serverURL got %v want %v", c.serverURL, opts.ServerURL) t.Errorf("c.serverURL got %v want %v", c.serverURL, opts.ServerURL)
} }
if !hi.Equal(c.hostinfo) { // hi is stored without its NetInfo field.
hiWithoutNi := *hi
hiWithoutNi.NetInfo = nil
if !hiWithoutNi.Equal(c.hostinfo) {
t.Errorf("c.hostinfo got %v want %v", c.hostinfo, hi) t.Errorf("c.hostinfo got %v want %v", c.hostinfo, hi)
} }

@ -105,7 +105,8 @@ type CapabilityVersion int
// - 65: 2023-07-12: Client understands DERPMap.HomeParams + incremental DERPMap updates with params // - 65: 2023-07-12: Client understands DERPMap.HomeParams + incremental DERPMap updates with params
// - 66: 2023-07-23: UserProfile.Groups added (available via WhoIs) // - 66: 2023-07-23: UserProfile.Groups added (available via WhoIs)
// - 67: 2023-07-25: Client understands PeerCapMap // - 67: 2023-07-25: Client understands PeerCapMap
const CurrentCapabilityVersion CapabilityVersion = 67 // - 68: 2023-08-09: Client has dedicated updateRoutine; MapRequest.Stream true means ignore Hostinfo+Endpoints
const CurrentCapabilityVersion CapabilityVersion = 68
type StableID string type StableID string
@ -1082,7 +1083,20 @@ type MapRequest struct {
NodeKey key.NodePublic NodeKey key.NodePublic
DiscoKey key.DiscoPublic DiscoKey key.DiscoPublic
IncludeIPv6 bool `json:",omitempty"` // include IPv6 endpoints in returned Node Endpoints (for Version 4 clients) IncludeIPv6 bool `json:",omitempty"` // include IPv6 endpoints in returned Node Endpoints (for Version 4 clients)
Stream bool // if true, multiple MapResponse objects are returned
// Stream is whether the client wants to receive multiple MapResponses over
// the same HTTP connection.
//
// If false, the server will send a single MapResponse and then close the
// connection.
//
// If true and Version >= 68, the server should treat this as a read-only
// request and ignore any Hostinfo or other fields that might be set.
Stream bool
// Hostinfo is the client's current Hostinfo. Although it is always included
// in the request, the server may choose to ignore it when Stream is true
// and Version >= 68.
Hostinfo *Hostinfo Hostinfo *Hostinfo
// MapSessionHandle, if non-empty, is a request to reattach to a previous // MapSessionHandle, if non-empty, is a request to reattach to a previous
@ -1105,6 +1119,7 @@ type MapRequest struct {
MapSessionSeq int64 `json:",omitempty"` MapSessionSeq int64 `json:",omitempty"`
// Endpoints are the client's magicsock UDP ip:port endpoints (IPv4 or IPv6). // Endpoints are the client's magicsock UDP ip:port endpoints (IPv4 or IPv6).
// These can be ignored if Stream is true and Version >= 68.
Endpoints []string Endpoints []string
// EndpointTypes are the types of the corresponding endpoints in Endpoints. // EndpointTypes are the types of the corresponding endpoints in Endpoints.
EndpointTypes []EndpointType `json:",omitempty"` EndpointTypes []EndpointType `json:",omitempty"`
@ -1114,13 +1129,12 @@ type MapRequest struct {
// It is encoded as tka.AUMHash.MarshalText. // It is encoded as tka.AUMHash.MarshalText.
TKAHead string `json:",omitempty"` TKAHead string `json:",omitempty"`
// ReadOnly is whether the client just wants to fetch the // ReadOnly was set when client just wanted to fetch the MapResponse,
// MapResponse, without updating their Endpoints. The // without updating their Endpoints. The intended use was for clients to
// Endpoints field will be ignored and LastSeen will not be // discover the DERP map at start-up before their first real endpoint
// updated and peers will not be notified of changes. // update.
// //
// The intended use is for clients to discover the DERP map at // Deprecated: always false as of Version 68.
// start-up before their first real endpoint update.
ReadOnly bool `json:",omitempty"` ReadOnly bool `json:",omitempty"`
// OmitPeers is whether the client is okay with the Peers list being omitted // OmitPeers is whether the client is okay with the Peers list being omitted

Loading…
Cancel
Save