From eccc1677333ca1b3f7f0c29c7606c22710d5f34e Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 18 Nov 2020 12:32:38 -0800 Subject: [PATCH] wgengine/monitor: fix memory corruption in Windows implementation I used the Windows APIs wrong previously, but it had worked just enough. Updates #921 Signed-off-by: Brad Fitzpatrick --- wgengine/monitor/monitor_windows.go | 174 ++++++++++++++++++---------- 1 file changed, 116 insertions(+), 58 deletions(-) diff --git a/wgengine/monitor/monitor_windows.go b/wgengine/monitor/monitor_windows.go index 254577910..c1b361d89 100644 --- a/wgengine/monitor/monitor_windows.go +++ b/wgengine/monitor/monitor_windows.go @@ -7,6 +7,8 @@ package monitor import ( "context" "errors" + "fmt" + "runtime" "sync" "syscall" "time" @@ -24,9 +26,15 @@ const ( ) var ( - iphlpapi = syscall.NewLazyDLL("iphlpapi.dll") - notifyAddrChangeProc = iphlpapi.NewProc("NotifyAddrChange") - notifyRouteChangeProc = iphlpapi.NewProc("NotifyRouteChange") + iphlpapi = syscall.NewLazyDLL("iphlpapi.dll") + notifyAddrChangeProc = iphlpapi.NewProc("NotifyAddrChange") + notifyRouteChangeProc = iphlpapi.NewProc("NotifyRouteChange") + cancelIPChangeNotifyProc = iphlpapi.NewProc("CancelIPChangeNotify") +) + +const ( + _STATUS_PENDING = 0x00000103 // 259 + _STATUS_WAIT_0 = 0 ) type unspecifiedMessage struct{} @@ -43,27 +51,33 @@ type messageOrError struct { } type winMon struct { - ctx context.Context - cancel context.CancelFunc - messagec chan messageOrError - logf logger.Logf - pollTicker *time.Ticker - lastState *interfaces.State + ctx context.Context + cancel context.CancelFunc + messagec chan messageOrError + logf logger.Logf + pollTicker *time.Ticker + lastState *interfaces.State + closeHandle windows.Handle // signaled upon close mu sync.Mutex - event windows.Handle lastNetChange time.Time inFastPoll bool // recent net change event made us go into fast polling mode (to detect proxy changes) } func newOSMon(logf logger.Logf) (osMon, error) { + closeHandle, err := windows.CreateEvent(nil, 1 /* manual reset */, 0 /* unsignaled */, nil /* no name */) + if err != nil { + return nil, fmt.Errorf("CreateEvent: %w", err) + } + ctx, cancel := context.WithCancel(context.Background()) m := &winMon{ - logf: logf, - ctx: ctx, - cancel: cancel, - messagec: make(chan messageOrError, 1), - pollTicker: time.NewTicker(pollIntervalSlow), + logf: logf, + ctx: ctx, + cancel: cancel, + messagec: make(chan messageOrError, 1), + pollTicker: time.NewTicker(pollIntervalSlow), + closeHandle: closeHandle, } go m.awaitIPAndRouteChanges() return m, nil @@ -72,14 +86,7 @@ func newOSMon(logf logger.Logf) (osMon, error) { func (m *winMon) Close() error { m.cancel() m.pollTicker.Stop() - - m.mu.Lock() - defer m.mu.Unlock() - if h := m.event; h != 0 { - // Wake up any reader blocked in Receive. - windows.SetEvent(h) - } - + windows.SetEvent(m.closeHandle) // wakes up any reader blocked in Receive return nil } @@ -136,52 +143,80 @@ func (m *winMon) getIPOrRouteChangeMessage() (message, error) { return nil, errClosed } - var o windows.Overlapped - h, err := windows.CreateEvent(nil, 1 /* true*/, 0 /* unsignaled */, nil /* no name */) - if err != nil { - m.logf("CreateEvent: %v", err) - return nil, err - } - defer windows.CloseHandle(h) - - m.mu.Lock() - m.event = h - m.mu.Unlock() - - o.HEvent = h - - err = notifyAddrChange(&h, &o) + // TODO(bradfitz): locking ourselves to an OS thread here + // likely isn't necessary, but also can't really hurt. + // We'll be blocked in windows.WaitForMultipleObjects below + // anyway, so might as well stay on this thread during the + // notify calls and cancel funcs. + // Given the past memory corruption from misuse of these APIs, + // and my continued lack of understanding of Windows APIs, + // I'll be paranoid. But perhaps we can remove this once + // we understand more. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + addrHandle, oaddr, cancel, err := notifyAddrChange() if err != nil { m.logf("notifyAddrChange: %v", err) return nil, err } - err = notifyRouteChange(&h, &o) + defer cancel() + + routeHandle, oroute, cancel, err := notifyRouteChange() if err != nil { m.logf("notifyRouteChange: %v", err) return nil, err } + defer cancel() t0 := time.Now() - _, err = windows.WaitForSingleObject(o.HEvent, windows.INFINITE) - if m.ctx.Err() != nil { + eventNum, err := windows.WaitForMultipleObjects([]windows.Handle{ + m.closeHandle, // eventNum 0 + addrHandle, // eventNum 1 + routeHandle, // eventNum 2 + }, false, windows.INFINITE) + if m.ctx.Err() != nil || (err == nil && eventNum == 0) { return nil, errClosed } if err != nil { - m.logf("waitForSingleObject: %v", err) + m.logf("waitForMultipleObjects: %v", err) return nil, err } + d := time.Since(t0) - m.logf("got windows change event after %v", d) + var eventStr string + + // notifyAddrChange and notifyRouteChange both seem to return the same + // handle value. Determine which fired by looking at the "Internal" (sic) + // field of the Ovelapped instead. + // TODO(bradfitz): maybe clean this up; see TODO in callNotifyProc. + if (eventNum == 1 || eventNum == 2) && addrHandle == routeHandle { + if oaddr.Internal == _STATUS_WAIT_0 && oroute.Internal == _STATUS_PENDING { + eventStr = "addr-o" // "-o" overlapped suffix to distinguish from "addr" below + } else if oroute.Internal == _STATUS_WAIT_0 && oaddr.Internal == _STATUS_PENDING { + eventStr = "route-o" + } else { + eventStr = fmt.Sprintf("[unexpected] addr.internal=%d; route.internal=%d", oaddr.Internal, oroute.Internal) + } + } else { + switch eventNum { + case 1: + eventStr = "addr" + case 2: + eventStr = "route" + default: + eventStr = fmt.Sprintf("%d [unexpected]", eventNum) + } + } + m.logf("got windows change event after %v: evt=%s", d, eventStr) m.mu.Lock() { m.lastNetChange = time.Now() - m.event = 0 // Something changed, so assume Windows is about to // discover its new proxy settings from WPAD, which // seems to take a bit. Poll heavily for awhile. - m.logf("starting quick poll, waiting for WPAD change") m.inFastPoll = true m.pollTicker.Reset(pollIntervalFast) } @@ -190,23 +225,46 @@ func (m *winMon) getIPOrRouteChangeMessage() (message, error) { return unspecifiedMessage{}, nil } -func notifyAddrChange(h *windows.Handle, o *windows.Overlapped) error { - return callNotifyProc(notifyAddrChangeProc, h, o) +func notifyAddrChange() (h windows.Handle, o *windows.Overlapped, cancel func(), err error) { + return callNotifyProc(notifyAddrChangeProc) } -func notifyRouteChange(h *windows.Handle, o *windows.Overlapped) error { - return callNotifyProc(notifyRouteChangeProc, h, o) +func notifyRouteChange() (h windows.Handle, o *windows.Overlapped, cancel func(), err error) { + return callNotifyProc(notifyRouteChangeProc) } -func callNotifyProc(p *syscall.LazyProc, h *windows.Handle, o *windows.Overlapped) error { - r1, _, e1 := p.Call(uintptr(unsafe.Pointer(h)), uintptr(unsafe.Pointer(o))) - expect := uintptr(0) - if h != nil || o != nil { - const ERROR_IO_PENDING = 997 - expect = ERROR_IO_PENDING +func callNotifyProc(p *syscall.LazyProc) (h windows.Handle, o *windows.Overlapped, cancel func(), err error) { + o = new(windows.Overlapped) + + // TODO(bradfitz): understand why this if-false code doesn't + // work, even though the docs online suggest we should pass an + // event in the overlapped.Hevent field. + // The docs at + // https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-overlapped + // says that o.HEvent can be zero, though, which seems to work. + // Note that the returned windows.Handle returns the same value for both + // notifyAddrChange and notifyRouteChange, which is why our caller needs + // to look at the returned Overlapped's Internal field to see which case + // fired. That's also worth understanding more. + // See crawshaw's comment at https://github.com/tailscale/tailscale/pull/944#discussion_r526469186 + // too. + if false { + evt, err := windows.CreateEvent(nil, 0, 0, nil) + if err != nil { + return 0, nil, nil, err + } + o.HEvent = evt } - if r1 == expect { - return nil + + r1, _, e1 := syscall.Syscall(p.Addr(), 2, uintptr(unsafe.Pointer(&h)), uintptr(unsafe.Pointer(o)), 0) + + // We expect ERROR_IO_PENDING. + if syscall.Errno(r1) != windows.ERROR_IO_PENDING { + return 0, nil, nil, e1 + } + + cancel = func() { + cancelIPChangeNotifyProc.Call(uintptr(unsafe.Pointer(o))) } - return e1 + return h, o, cancel, nil }