From e21c21ec3b5eb1d411da540b25ea1102a0c0629a Mon Sep 17 00:00:00 2001 From: Brian DeHamer Date: Tue, 28 Jul 2015 19:29:20 +0000 Subject: [PATCH] Account for latency in container removal Under certain conditions when watchtower is monitoring a Docker Swarm cluster there would be cases where an updated container could not be started because the old hadn't yet been removed (name conflicts, mapped port conflicts, etc). We suspect that this has something to do with the async nature of swarm and even though we've asked the swarm master to remove a container it may not be completely removed from the associated node. The fix is to do some polling after the remove container call to ensure that the container is truly gone before proceeding. --- container/client.go | 12 +++++++++++- container/client_test.go | 6 ++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/container/client.go b/container/client.go index a9722e3..0c69316 100644 --- a/container/client.go +++ b/container/client.go @@ -2,6 +2,7 @@ package container import ( "crypto/tls" + "fmt" "time" log "github.com/Sirupsen/logrus" @@ -88,7 +89,16 @@ func (client DockerClient) StopContainer(c Container, timeout time.Duration) err log.Debugf("Removing container %s", c.ID()) - return client.api.RemoveContainer(c.ID(), true, false) + if err := client.api.RemoveContainer(c.ID(), true, false); err != nil { + return err + } + + // Wait for container to be removed. In this case an error is a good thing + if err := client.waitForStop(c, timeout); err == nil { + return fmt.Errorf("Container %s (%s) could not be removed", c.Name(), c.ID()) + } + + return nil } func (client DockerClient) StartContainer(c Container) error { diff --git a/container/client_test.go b/container/client_test.go index f2fdf0f..b8fc75a 100644 --- a/container/client_test.go +++ b/container/client_test.go @@ -106,8 +106,9 @@ func TestStopContainer_DefaultSuccess(t *testing.T) { api := mockclient.NewMockClient() api.On("KillContainer", "abc123", "SIGTERM").Return(nil) - api.On("InspectContainer", "abc123").Return(ci, nil) + api.On("InspectContainer", "abc123").Return(ci, nil).Once() api.On("RemoveContainer", "abc123", true, false).Return(nil) + api.On("InspectContainer", "abc123").Return(&dockerclient.ContainerInfo{}, errors.New("Not Found")) client := DockerClient{api: api} err := client.StopContainer(c, time.Second) @@ -134,8 +135,9 @@ func TestStopContainer_CustomSignalSuccess(t *testing.T) { api := mockclient.NewMockClient() api.On("KillContainer", "abc123", "SIGUSR1").Return(nil) - api.On("InspectContainer", "abc123").Return(ci, nil) + api.On("InspectContainer", "abc123").Return(ci, nil).Once() api.On("RemoveContainer", "abc123", true, false).Return(nil) + api.On("InspectContainer", "abc123").Return(&dockerclient.ContainerInfo{}, errors.New("Not Found")) client := DockerClient{api: api} err := client.StopContainer(c, time.Second)