From 1d1c630f7a79c59c0383a12649dc336389886d6e Mon Sep 17 00:00:00 2001 From: Simon Aronsson Date: Mon, 25 Nov 2019 21:11:12 +0100 Subject: [PATCH 1/5] feat: add timeout override for pre-update lifecycle hook --- docs/lifecycle-hooks.md | 13 ++++++++++ go.sum | 2 ++ internal/actions/update.go | 3 +-- internal/flags/flags.go | 6 +++-- pkg/container/client.go | 50 +++++++++++++++++++++++++++++--------- pkg/container/container.go | 22 ++++++++++++++++- pkg/container/metadata.go | 17 +++++++------ 7 files changed, 89 insertions(+), 24 deletions(-) diff --git a/docs/lifecycle-hooks.md b/docs/lifecycle-hooks.md index 071726c..f8bc640 100644 --- a/docs/lifecycle-hooks.md +++ b/docs/lifecycle-hooks.md @@ -46,6 +46,19 @@ docker run -d \ --label=com.centurylinklabs.watchtower.lifecycle.post-check="/send-heartbeat.sh" \ ``` +### Timeouts +The timeout for all lifecycle commands is 60 seconds. After that, a timeout will +occur, forcing Watchtower to continue the update loop. + +#### Pre-update timeouts + +For the `pre-update` lifecycle command, it is possible to override this timeout to +allow the script to finish before forcefully killing it. This is done by adding the +label `com.centurylinklabs.watchtower.lifecycle.pre-update-timeout` followed by +the timeout expressed in minutes. + +If the label value is explicitly set to `0`, the timeout will be disabled. + ### Execution failure The failure of a command to execute, identified by an exit code different than diff --git a/go.sum b/go.sum index 230df62..9531de6 100644 --- a/go.sum +++ b/go.sum @@ -61,6 +61,7 @@ github.com/docker/distribution v2.7.1+incompatible h1:a5mlkVzth6W5A4fOsS3D2EO5BU github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= github.com/docker/docker v0.0.0-20190404075923-dbe4a30928d4 h1:34LfsqlE2kEvmGP9qbRoPvOWkmluYGzmlvWVTzwvT0A= github.com/docker/docker v0.0.0-20190404075923-dbe4a30928d4/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v1.13.1 h1:IkZjBSIc8hBjLpqeAbeE5mca5mNgeatLHBy3GO78BWo= github.com/docker/docker-credential-helpers v0.6.1 h1:Dq4iIfcM7cNtddhLVWe9h4QDjsi4OER3Z8voPu/I52g= github.com/docker/docker-credential-helpers v0.6.1/go.mod h1:WRaJzqw3CTB9bk10avuGsjVBZsD05qeibJ1/TYlvc0Y= github.com/docker/go v1.5.1-1 h1:hr4w35acWBPhGBXlzPoHpmZ/ygPjnmFVxGxxGnMyP7k= @@ -284,6 +285,7 @@ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190522155817-f3200d17e092 h1:4QSRKanuywn15aTZvI/mIDEgPQpswuFndXpOj3rKEco= golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20191116160921-f9c825593386 h1:ktbWvQrW08Txdxno1PiDpSxPXG6ndGsfnJjRRtkM0LQ= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= diff --git a/internal/actions/update.go b/internal/actions/update.go index 874e705..31ceafe 100644 --- a/internal/actions/update.go +++ b/internal/actions/update.go @@ -75,7 +75,6 @@ func stopStaleContainer(container container.Container, client container.Client, } if params.LifecycleHooks { lifecycle.ExecutePreUpdateCommand(client, container) - } if err := client.StopContainer(container, params.Timeout); err != nil { @@ -140,4 +139,4 @@ func checkDependencies(containers []container.Container) { } } } -} +} \ No newline at end of file diff --git a/internal/flags/flags.go b/internal/flags/flags.go index a60d18f..039b574 100644 --- a/internal/flags/flags.go +++ b/internal/flags/flags.go @@ -30,12 +30,14 @@ func RegisterSystemFlags(rootCmd *cobra.Command) { viper.GetInt("WATCHTOWER_POLL_INTERVAL"), "poll interval (in seconds)") - flags.StringP("schedule", + flags.StringP( + "schedule", "s", viper.GetString("WATCHTOWER_SCHEDULE"), "the cron expression which defines when to update") - flags.DurationP("stop-timeout", + flags.DurationP( + "stop-timeout", "t", viper.GetDuration("WATCHTOWER_TIMEOUT"), "timeout before a container is forcefully stopped") diff --git a/pkg/container/client.go b/pkg/container/client.go index 607b84c..a74bfba 100644 --- a/pkg/container/client.go +++ b/pkg/container/client.go @@ -29,9 +29,8 @@ type Client interface { StartContainer(Container) (string, error) RenameContainer(Container, string) error IsContainerStale(Container) (bool, error) - ExecuteCommand(containerID string, command string) error + ExecuteCommand(containerID string, command string, timeout int) error RemoveImageByID(string) error - } // NewClient returns a new Client instance which can be used to interact with @@ -301,7 +300,7 @@ func (client dockerClient) RemoveImageByID(id string) error { return err } -func (client dockerClient) ExecuteCommand(containerID string, command string) error { +func (client dockerClient) ExecuteCommand(containerID string, command string, timeout int) error { bg := context.Background() // Create the exec @@ -331,7 +330,7 @@ func (client dockerClient) ExecuteCommand(containerID string, command string) er return err } - var execOutput string + var output string if attachErr == nil { defer response.Close() var writer bytes.Buffer @@ -339,26 +338,56 @@ func (client dockerClient) ExecuteCommand(containerID string, command string) er if err != nil { log.Error(err) } else if written > 0 { - execOutput = strings.TrimSpace(writer.String()) + output = strings.TrimSpace(writer.String()) } } // Inspect the exec to get the exit code and print a message if the // exit code is not success. - execInspect, err := client.api.ContainerExecInspect(bg, exec.ID) + err = client.waitForExecOrTimeout(bg, exec.ID, output, timeout) if err != nil { return err } - if execInspect.ExitCode > 0 { - log.Errorf("Command exited with code %v.", execInspect.ExitCode) - log.Error(execOutput) + return nil +} + +func (client dockerClient) waitForExecOrTimeout(bg context.Context, ID string, execOutput string, timeout int) error { + var ctx context.Context + var cancel context.CancelFunc + + if timeout > 0 { + ctx, cancel = context.WithTimeout(bg, time.Duration(timeout)*time.Minute) + defer cancel() } else { + ctx = bg + } + + for { + execInspect, err := client.api.ContainerExecInspect(ctx, ID) + + log.WithFields(log.Fields{ + "exit-code": execInspect.ExitCode, + "exec-id": execInspect.ExecID, + "running": execInspect.Running, + }).Debug("Awaiting timeout or completion") + + if err != nil { + return err + } + if execInspect.Running == true { + time.Sleep(1 * time.Second) + continue + } if len(execOutput) > 0 { log.Infof("Command output:\n%v", execOutput) } + if execInspect.ExitCode > 0 { + log.Errorf("Command exited with code %v.", execInspect.ExitCode) + log.Error(execOutput) + } + break } - return nil } @@ -377,7 +406,6 @@ func (client dockerClient) waitForStopOrTimeout(c Container, waitTime time.Durat return nil } } - time.Sleep(1 * time.Second) } } diff --git a/pkg/container/container.go b/pkg/container/container.go index f88ff91..32e5a31 100644 --- a/pkg/container/container.go +++ b/pkg/container/container.go @@ -2,10 +2,11 @@ package container import ( "fmt" - "github.com/containrrr/watchtower/internal/util" "strconv" "strings" + "github.com/containrrr/watchtower/internal/util" + "github.com/docker/docker/api/types" dockercontainer "github.com/docker/docker/api/types/container" ) @@ -118,6 +119,25 @@ func (c Container) IsWatchtower() bool { return ContainsWatchtowerLabel(c.containerInfo.Config.Labels) } +// PreUpdateTimeout checks whether a container has a specific timeout set +// for how long the pre-update command is allowed to run. This value is expressed +// either as an integer, in minutes, or as "off" which will allow the command/script +// to run indefinitely. Users should be cautious with the off option, as that +// could result in watchtower waiting forever. +func (c Container) PreUpdateTimeout() int { + var minutes int + var err error + + val := c.getLabelValueOrEmpty(preUpdateTimeoutLabel) + + minutes, err = strconv.Atoi(val) + if err != nil || val == "" { + return 1 + } + + return minutes +} + // StopSignal returns the custom stop signal (if any) that is encoded in the // container's metadata. If the container has not specified a custom stop // signal, the empty string "" is returned. diff --git a/pkg/container/metadata.go b/pkg/container/metadata.go index 0e04350..fe5a055 100644 --- a/pkg/container/metadata.go +++ b/pkg/container/metadata.go @@ -1,14 +1,15 @@ package container const ( - watchtowerLabel = "com.centurylinklabs.watchtower" - signalLabel = "com.centurylinklabs.watchtower.stop-signal" - enableLabel = "com.centurylinklabs.watchtower.enable" - zodiacLabel = "com.centurylinklabs.zodiac.original-image" - preCheckLabel = "com.centurylinklabs.watchtower.lifecycle.pre-check" - postCheckLabel = "com.centurylinklabs.watchtower.lifecycle.post-check" - preUpdateLabel = "com.centurylinklabs.watchtower.lifecycle.pre-update" - postUpdateLabel = "com.centurylinklabs.watchtower.lifecycle.post-update" + watchtowerLabel = "com.centurylinklabs.watchtower" + signalLabel = "com.centurylinklabs.watchtower.stop-signal" + enableLabel = "com.centurylinklabs.watchtower.enable" + zodiacLabel = "com.centurylinklabs.zodiac.original-image" + preCheckLabel = "com.centurylinklabs.watchtower.lifecycle.pre-check" + postCheckLabel = "com.centurylinklabs.watchtower.lifecycle.post-check" + preUpdateLabel = "com.centurylinklabs.watchtower.lifecycle.pre-update" + postUpdateLabel = "com.centurylinklabs.watchtower.lifecycle.post-update" + preUpdateTimeoutLabel = "com.centurylinklabs.watchtower.lifecycle.pre-update-timeout" ) // GetLifecyclePreCheckCommand returns the pre-check command set in the container metadata or an empty string From 1d3ffc728daa522d42cb09a0d9ce3ae3e4367a66 Mon Sep 17 00:00:00 2001 From: Simon Aronsson Date: Mon, 25 Nov 2019 21:19:38 +0100 Subject: [PATCH 2/5] fix: update mock client for tests --- internal/actions/actions_suite_test.go | 59 ++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/internal/actions/actions_suite_test.go b/internal/actions/actions_suite_test.go index 2c9b0c8..b633b6a 100644 --- a/internal/actions/actions_suite_test.go +++ b/internal/actions/actions_suite_test.go @@ -132,3 +132,62 @@ var _ = Describe("the actions package", func() { }) }) +func createMockContainer(id string, name string, image string, created time.Time) container.Container { + content := types.ContainerJSON{ + ContainerJSONBase: &types.ContainerJSONBase{ + ID: id, + Image: image, + Name: name, + Created: created.String(), + }, + } + return *container.NewContainer(&content, nil) +} + +type mockClient struct { + TestData *TestData + api cli.CommonAPIClient + pullImages bool + removeVolumes bool +} + +type TestData struct { + TriedToRemoveImage bool + NameOfContainerToKeep string + Containers []container.Container +} + +func (client mockClient) ListContainers(f t.Filter) ([]container.Container, error) { + return client.TestData.Containers, nil +} + +func (client mockClient) StopContainer(c container.Container, d time.Duration) error { + if c.Name() == client.TestData.NameOfContainerToKeep { + return errors.New("tried to stop the instance we want to keep") + } + return nil +} +func (client mockClient) StartContainer(c container.Container) (string, error) { + panic("Not implemented") +} + +func (client mockClient) RenameContainer(c container.Container, s string) error { + panic("Not implemented") +} + +func (client mockClient) RemoveImage(c container.Container) error { + client.TestData.TriedToRemoveImage = true + return nil +} + +func (client mockClient) GetContainer(containerID string) (container.Container, error) { + return container.Container{}, nil +} + +func (client mockClient) ExecuteCommand(containerID string, command string, timeout int) error { + return nil +} + +func (client mockClient) IsContainerStale(c container.Container) (bool, error) { + panic("Not implemented") +} From fac26dfc721e51cce7a49e6e32e3bd07ba91d07a Mon Sep 17 00:00:00 2001 From: Simon Aronsson Date: Fri, 27 Dec 2019 12:05:56 +0100 Subject: [PATCH 3/5] fix: improve logging --- pkg/notifications/notifier.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/notifications/notifier.go b/pkg/notifications/notifier.go index 2f25824..20e8337 100644 --- a/pkg/notifications/notifier.go +++ b/pkg/notifications/notifier.go @@ -27,8 +27,10 @@ func NewNotifier(c *cobra.Command) *Notifier { acceptedLogLevels := slackrus.LevelThreshold(logLevel) // Parse types and create notifiers. - types, _ := f.GetStringSlice("notifications") - + types, err := f.GetStringSlice("notifications") + if err != nil { + log.WithField("could not read notifications argument", log.Fields{ "Error": err }).Fatal() + } for _, t := range types { var tn ty.Notifier switch t { From c1a0da9a9d5eb6587c3a5b0020b6c7113e18fade Mon Sep 17 00:00:00 2001 From: Simon Aronsson Date: Fri, 3 Jan 2020 18:51:32 +0100 Subject: [PATCH 4/5] feature/367 fix: skip container if pre-update command fails --- internal/actions/update.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/internal/actions/update.go b/internal/actions/update.go index 31ceafe..1694c59 100644 --- a/internal/actions/update.go +++ b/internal/actions/update.go @@ -74,9 +74,13 @@ func stopStaleContainer(container container.Container, client container.Client, return } if params.LifecycleHooks { - lifecycle.ExecutePreUpdateCommand(client, container) + if err := lifecycle.ExecutePreUpdateCommand(client, container); err != nil { + log.Error(err) + log.Info("Skipping container as the pre-update command failed") + return + } } - + if err := client.StopContainer(container, params.Timeout); err != nil { log.Error(err) } @@ -139,4 +143,4 @@ func checkDependencies(containers []container.Container) { } } } -} \ No newline at end of file +} From 98c60d744143b9dc245ddfdc14270b22cb739b73 Mon Sep 17 00:00:00 2001 From: Simon Aronsson Date: Sat, 28 Mar 2020 19:48:04 +0100 Subject: [PATCH 5/5] fix some errors and clean up --- internal/actions/actions_suite_test.go | 51 +------------------------- internal/actions/mocks/client.go | 2 +- pkg/container/container.go | 4 +- pkg/lifecycle/lifecycle.go | 16 ++++---- 4 files changed, 12 insertions(+), 61 deletions(-) diff --git a/internal/actions/actions_suite_test.go b/internal/actions/actions_suite_test.go index b633b6a..5bfbcdd 100644 --- a/internal/actions/actions_suite_test.go +++ b/internal/actions/actions_suite_test.go @@ -9,6 +9,7 @@ import ( "github.com/containrrr/watchtower/pkg/container/mocks" cli "github.com/docker/docker/client" + "github.com/docker/docker/api/types" . "github.com/containrrr/watchtower/internal/actions/mocks" . "github.com/onsi/ginkgo" @@ -142,52 +143,4 @@ func createMockContainer(id string, name string, image string, created time.Time }, } return *container.NewContainer(&content, nil) -} - -type mockClient struct { - TestData *TestData - api cli.CommonAPIClient - pullImages bool - removeVolumes bool -} - -type TestData struct { - TriedToRemoveImage bool - NameOfContainerToKeep string - Containers []container.Container -} - -func (client mockClient) ListContainers(f t.Filter) ([]container.Container, error) { - return client.TestData.Containers, nil -} - -func (client mockClient) StopContainer(c container.Container, d time.Duration) error { - if c.Name() == client.TestData.NameOfContainerToKeep { - return errors.New("tried to stop the instance we want to keep") - } - return nil -} -func (client mockClient) StartContainer(c container.Container) (string, error) { - panic("Not implemented") -} - -func (client mockClient) RenameContainer(c container.Container, s string) error { - panic("Not implemented") -} - -func (client mockClient) RemoveImage(c container.Container) error { - client.TestData.TriedToRemoveImage = true - return nil -} - -func (client mockClient) GetContainer(containerID string) (container.Container, error) { - return container.Container{}, nil -} - -func (client mockClient) ExecuteCommand(containerID string, command string, timeout int) error { - return nil -} - -func (client mockClient) IsContainerStale(c container.Container) (bool, error) { - panic("Not implemented") -} +} \ No newline at end of file diff --git a/internal/actions/mocks/client.go b/internal/actions/mocks/client.go index dad2506..2145484 100644 --- a/internal/actions/mocks/client.go +++ b/internal/actions/mocks/client.go @@ -73,7 +73,7 @@ func (client MockClient) GetContainer(containerID string) (container.Container, } // ExecuteCommand is a mock method -func (client MockClient) ExecuteCommand(containerID string, command string) error { +func (client MockClient) ExecuteCommand(containerID string, command string, timeout int) error { return nil } diff --git a/pkg/container/container.go b/pkg/container/container.go index 32e5a31..fb495fe 100644 --- a/pkg/container/container.go +++ b/pkg/container/container.go @@ -121,8 +121,8 @@ func (c Container) IsWatchtower() bool { // PreUpdateTimeout checks whether a container has a specific timeout set // for how long the pre-update command is allowed to run. This value is expressed -// either as an integer, in minutes, or as "off" which will allow the command/script -// to run indefinitely. Users should be cautious with the off option, as that +// either as an integer, in minutes, or as 0 which will allow the command/script +// to run indefinitely. Users should be cautious with the 0 option, as that // could result in watchtower waiting forever. func (c Container) PreUpdateTimeout() int { var minutes int diff --git a/pkg/lifecycle/lifecycle.go b/pkg/lifecycle/lifecycle.go index 9823f9d..9311355 100644 --- a/pkg/lifecycle/lifecycle.go +++ b/pkg/lifecycle/lifecycle.go @@ -37,7 +37,7 @@ func ExecutePreCheckCommand(client container.Client, container container.Contain } log.Info("Executing pre-check command.") - if err := client.ExecuteCommand(container.ID(), command); err != nil { + if err := client.ExecuteCommand(container.ID(), command, 1); err != nil { log.Error(err) } } @@ -51,24 +51,22 @@ func ExecutePostCheckCommand(client container.Client, container container.Contai } log.Info("Executing post-check command.") - if err := client.ExecuteCommand(container.ID(), command); err != nil { + if err := client.ExecuteCommand(container.ID(), command, 1); err != nil { log.Error(err) } } // ExecutePreUpdateCommand tries to run the pre-update lifecycle hook for a single container. -func ExecutePreUpdateCommand(client container.Client, container container.Container) { - +func ExecutePreUpdateCommand(client container.Client, container container.Container) error { + timeout := container.PreUpdateTimeout() command := container.GetLifecyclePreUpdateCommand() if len(command) == 0 { log.Debug("No pre-update command supplied. Skipping") - return + return nil } log.Info("Executing pre-update command.") - if err := client.ExecuteCommand(container.ID(), command); err != nil { - log.Error(err) - } + return client.ExecuteCommand(container.ID(), command, timeout) } // ExecutePostUpdateCommand tries to run the post-update lifecycle hook for a single container. @@ -86,7 +84,7 @@ func ExecutePostUpdateCommand(client container.Client, newContainerID string) { } log.Info("Executing post-update command.") - if err := client.ExecuteCommand(newContainerID, command); err != nil { + if err := client.ExecuteCommand(newContainerID, command, 1); err != nil { log.Error(err) } }