Prometheus support (#450)

Co-authored-by: nils måsén <nils@piksel.se>
Co-authored-by: MihailITPlace <ya.halo-halo@yandex.ru>
Co-authored-by: Sebastiaan Tammer <sebastiaantammer@gmail.com>
pull/757/head
Simon Aronsson 4 years ago committed by GitHub
parent 35490c853d
commit d7d5b25882
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,12 +1,16 @@
package cmd package cmd
import ( import (
metrics2 "github.com/containrrr/watchtower/pkg/metrics"
"os" "os"
"os/signal" "os/signal"
"strconv" "strconv"
"syscall" "syscall"
"time" "time"
"github.com/containrrr/watchtower/pkg/api/metrics"
"github.com/containrrr/watchtower/pkg/api/update"
"github.com/containrrr/watchtower/internal/actions" "github.com/containrrr/watchtower/internal/actions"
"github.com/containrrr/watchtower/internal/flags" "github.com/containrrr/watchtower/internal/flags"
"github.com/containrrr/watchtower/pkg/api" "github.com/containrrr/watchtower/pkg/api"
@ -144,7 +148,10 @@ func PreRun(cmd *cobra.Command, args []string) {
func Run(c *cobra.Command, names []string) { func Run(c *cobra.Command, names []string) {
filter := filters.BuildFilter(names, enableLabel, scope) filter := filters.BuildFilter(names, enableLabel, scope)
runOnce, _ := c.PersistentFlags().GetBool("run-once") runOnce, _ := c.PersistentFlags().GetBool("run-once")
httpAPI, _ := c.PersistentFlags().GetBool("http-api") enableUpdateAPI, _ := c.PersistentFlags().GetBool("http-api-update")
enableMetricsAPI, _ := c.PersistentFlags().GetBool("http-api-metrics")
apiToken, _ := c.PersistentFlags().GetString("http-api-token")
if runOnce { if runOnce {
if noStartupMessage, _ := c.PersistentFlags().GetBool("no-startup-message"); !noStartupMessage { if noStartupMessage, _ := c.PersistentFlags().GetBool("no-startup-message"); !noStartupMessage {
@ -160,17 +167,20 @@ func Run(c *cobra.Command, names []string) {
log.Fatal(err) log.Fatal(err)
} }
if httpAPI { httpAPI := api.New(apiToken)
apiToken, _ := c.PersistentFlags().GetString("http-api-token")
if err := api.SetupHTTPUpdates(apiToken, func() { runUpdatesWithNotifications(filter) }); err != nil { if enableUpdateAPI {
log.Fatal(err) updateHandler := update.New(func() { runUpdatesWithNotifications(filter) })
os.Exit(1) httpAPI.RegisterFunc(updateHandler.Path, updateHandler.Handle)
} }
api.WaitForHTTPUpdates() if enableMetricsAPI {
metricsHandler := metrics.New()
httpAPI.RegisterHandler(metricsHandler.Path, metricsHandler.Handle)
} }
httpAPI.Start(enableUpdateAPI)
if err := runUpgradesOnSchedule(c, filter); err != nil { if err := runUpgradesOnSchedule(c, filter); err != nil {
log.Error(err) log.Error(err)
} }
@ -189,8 +199,11 @@ func runUpgradesOnSchedule(c *cobra.Command, filter t.Filter) error {
select { select {
case v := <-tryLockSem: case v := <-tryLockSem:
defer func() { tryLockSem <- v }() defer func() { tryLockSem <- v }()
runUpdatesWithNotifications(filter) metric := runUpdatesWithNotifications(filter)
metrics2.RegisterScan(metric)
default: default:
// Update was skipped
metrics2.RegisterScan(nil)
log.Debug("Skipped another update already running.") log.Debug("Skipped another update already running.")
} }
@ -222,7 +235,8 @@ func runUpgradesOnSchedule(c *cobra.Command, filter t.Filter) error {
return nil return nil
} }
func runUpdatesWithNotifications(filter t.Filter) { func runUpdatesWithNotifications(filter t.Filter) *metrics2.Metric {
notifier.StartNotification() notifier.StartNotification()
updateParams := t.UpdateParams{ updateParams := t.UpdateParams{
Filter: filter, Filter: filter,
@ -233,9 +247,10 @@ func runUpdatesWithNotifications(filter t.Filter) {
LifecycleHooks: lifecycleHooks, LifecycleHooks: lifecycleHooks,
RollingRestart: rollingRestart, RollingRestart: rollingRestart,
} }
err := actions.Update(client, updateParams) metrics, err := actions.Update(client, updateParams)
if err != nil { if err != nil {
log.Println(err) log.Println(err)
} }
notifier.SendNotification() notifier.SendNotification()
return metrics
} }

@ -0,0 +1,43 @@
version: '3.7'
services:
watchtower:
container_name: watchtower
build:
context: ./
dockerfile: dockerfiles/Dockerfile.dev-self-contained
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
ports:
- 8080:8080
command: --interval 10 --http-api-metrics --http-api-token demotoken --debug prometheus grafana parent child
prometheus:
container_name: prometheus
image: prom/prometheus
volumes:
- ./prometheus/:/etc/prometheus/
- prometheus:/prometheus/
ports:
- 9090:9090
grafana:
container_name: grafana
image: grafana/grafana
ports:
- 3000:3000
environment:
GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource
volumes:
- grafana:/var/lib/grafana
- ./grafana:/etc/grafana/provisioning
parent:
image: nginx
container_name: parent
child:
image: nginx:alpine
labels:
com.centurylinklabs.watchtower.depends-on: parent
container_name: child
volumes:
prometheus: {}
grafana: {}

@ -164,7 +164,7 @@ Environment Variable: WATCHTOWER_LABEL_ENABLE
## Without updating containers ## Without updating containers
Will only monitor for new images, send notifications and invoke the [pre-check/post-check hooks](https://containrrr.dev/watchtower/lifecycle-hooks/), but will **not** update the containers. Will only monitor for new images, send notifications and invoke the [pre-check/post-check hooks](https://containrrr.dev/watchtower/lifecycle-hooks/), but will **not** update the containers.
> ### ⚠️ Please note > **⚠️ Please note**
> >
> Due to Docker API limitations the latest image will still be pulled from the registry. > Due to Docker API limitations the latest image will still be pulled from the registry.
@ -238,9 +238,7 @@ Sets an authentication token to HTTP API requests.
Environment Variable: WATCHTOWER_HTTP_API_TOKEN Environment Variable: WATCHTOWER_HTTP_API_TOKEN
Type: String Type: String
Default: - Default: -
``` ```## Filter by scope
## Filter by scope
Update containers that have a `com.centurylinklabs.watchtower.scope` label set with the same value as the given argument. This enables [running multiple instances](https://containrrr.github.io/watchtower/running-multiple-instances). Update containers that have a `com.centurylinklabs.watchtower.scope` label set with the same value as the given argument. This enables [running multiple instances](https://containrrr.github.io/watchtower/running-multiple-instances).
``` ```
@ -250,6 +248,16 @@ Environment Variable: WATCHTOWER_SCOPE
Default: - Default: -
``` ```
## HTTP API Metrics
Enables a metrics endpoint, exposing prometheus metrics via HTTP. See [Metrics](metrics.md) for details.
```
Argument: --http-api-metrics
Environment Variable: WATCHTOWER_HTTP_API_METRICS
Type: Boolean
Default: false
```
## Scheduling ## Scheduling
[Cron expression](https://pkg.go.dev/github.com/robfig/cron@v1.2.0?tab=doc#hdr-CRON_Expression_Format) in 6 fields (rather than the traditional 5) which defines when and how often to check for new images. Either `--interval` or the schedule expression [Cron expression](https://pkg.go.dev/github.com/robfig/cron@v1.2.0?tab=doc#hdr-CRON_Expression_Format) in 6 fields (rather than the traditional 5) which defines when and how often to check for new images. Either `--interval` or the schedule expression
can be defined, but not both. An example: `--schedule "0 0 4 * * *"` can be defined, but not both. An example: `--schedule "0 0 4 * * *"`

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

@ -0,0 +1,26 @@
> **⚠️ Experimental feature**
>
> This feature was added in v1.0.4 and is still considered experimental.
> If you notice any strange behavior, please raise a ticket in the repository issues.
Metrics can be used to track how Watchtower behaves over time.
To use this feature, you have to set an [API token](arguments.md#http-api-token) and [enable the metrics API](arguments.md#http-api-metrics),
as well as creating a port mapping for your container for port `8080`.
## Available Metrics
| Name | Type | Description |
| ------------------------------- | ------- | --------------------------------------------------------------------------- |
| `watchtower_containers_scanned` | Gauge | Number of containers scanned for changes by watchtower during the last scan |
| `watchtower_containers_updated` | Gauge | Number of containers updated by watchtower during the last scan |
| `watchtower_containers_failed` | Gauge | Number of containers where update failed during the last scan |
| `watchtower_scans_total` | Counter | Number of scans since the watchtower started |
| `watchtower_scans_skipped` | Counter | Number of skipped scans since watchtower started |
## Demo
The repository contains a demo with prometheus and grafana, available through `docker-compose.yml`. This demo
is preconfigured with a dashboard, which will look something like this:
![grafana metrics](assets/grafana-dashboard.png)

@ -1,4 +1,3 @@
# Notifications # Notifications
Watchtower can send notifications when containers are updated. Notifications are sent via hooks in the logging system, [logrus](http://github.com/sirupsen/logrus). Watchtower can send notifications when containers are updated. Notifications are sent via hooks in the logging system, [logrus](http://github.com/sirupsen/logrus).
@ -56,19 +55,19 @@ docker run -d \
The previous example assumes, that you already have an SMTP server up and running you can connect to. If you don't or you want to bring up watchtower with your own simple SMTP relay the following `docker-compose.yml` might be a good start for you. The previous example assumes, that you already have an SMTP server up and running you can connect to. If you don't or you want to bring up watchtower with your own simple SMTP relay the following `docker-compose.yml` might be a good start for you.
The following example assumes, that your domain is called `your-domain.com` and that you are going to use a certificate valid for `smtp.your-domain.com`. This hostname has to be used as `WATCHTOWER_NOTIFICATION_EMAIL_SERVER` otherwise the TLS connection is going to fail with `Failed to send notification email` or `connect: connection refused`. We also have to add a network for this setup in order to add an alias to it. If you also want to enable DKIM or other features on the SMTP server, you will find more information at [freinet/postfix-relay](https://hub.docker.com/r/freinet/postfix-relay). The following example assumes, that your domain is called `your-domain.com` and that you are going to use a certificate valid for `smtp.your-domain.com`. This hostname has to be used as `WATCHTOWER_NOTIFICATION_EMAIL_SERVER` otherwise the TLS connection is going to fail with `Failed to send notification email` or `connect: connection refused`. We also have to add a network for this setup in order to add an alias to it. If you also want to enable DKIM or other features on the SMTP server, you will find more information at [freinet/postfix-relay](https://hub.docker.com/r/freinet/postfix-relay).
Example including an SMTP relay: Example including an SMTP relay:
```yaml ```yaml
--- ---
version: "3.8" version: '3.8'
services: services:
watchtower: watchtower:
image: containrrr/watchtower:latest image: containrrr/watchtower:latest
container_name: watchtower container_name: watchtower
environment: environment:
WATCHTOWER_MONITOR_ONLY: "true" WATCHTOWER_MONITOR_ONLY: 'true'
WATCHTOWER_NOTIFICATIONS: email WATCHTOWER_NOTIFICATIONS: email
WATCHTOWER_NOTIFICATION_EMAIL_FROM: from-address@your-domain.com WATCHTOWER_NOTIFICATION_EMAIL_FROM: from-address@your-domain.com
WATCHTOWER_NOTIFICATION_EMAIL_TO: to-address@your-domain.com WATCHTOWER_NOTIFICATION_EMAIL_TO: to-address@your-domain.com
@ -90,9 +89,9 @@ services:
- 25 - 25
environment: environment:
MAILNAME: somename.your-domain.com MAILNAME: somename.your-domain.com
TLS_KEY: "/etc/ssl/domains/your-domain.com/your-domain.com.key" TLS_KEY: '/etc/ssl/domains/your-domain.com/your-domain.com.key'
TLS_CRT: "/etc/ssl/domains/your-domain.com/your-domain.com.crt" TLS_CRT: '/etc/ssl/domains/your-domain.com/your-domain.com.crt'
TLS_CA: "/etc/ssl/domains/your-domain.com/intermediate.crt" TLS_CA: '/etc/ssl/domains/your-domain.com/intermediate.crt'
volumes: volumes:
- /etc/ssl/domains/your-domain.com/:/etc/ssl/domains/your-domain.com/:ro - /etc/ssl/domains/your-domain.com/:/etc/ssl/domains/your-domain.com/:ro
networks: networks:

@ -46,6 +46,7 @@ require (
github.com/opencontainers/image-spec v1.0.1 // indirect github.com/opencontainers/image-spec v1.0.1 // indirect
github.com/opencontainers/runc v0.1.1 // indirect github.com/opencontainers/runc v0.1.1 // indirect
github.com/pkg/errors v0.8.1 // indirect github.com/pkg/errors v0.8.1 // indirect
github.com/prometheus/client_golang v0.9.3
github.com/robfig/cron v0.0.0-20180505203441-b41be1df6967 github.com/robfig/cron v0.0.0-20180505203441-b41be1df6967
github.com/sirupsen/logrus v1.4.1 github.com/sirupsen/logrus v1.4.1
github.com/spf13/cobra v0.0.3 github.com/spf13/cobra v0.0.3

@ -0,0 +1,293 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 1,
"links": [],
"panels": [
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 1,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "7.3.6",
"targets": [
{
"expr": "watchtower_scans_total",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Total Scans",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "watchtower_containers_scanned{instance=\"watchtower:8080\", job=\"watchtower\"}"
},
"properties": [
{
"id": "displayName",
"value": "Scanned"
}
]
},
{
"matcher": {
"id": "byName",
"options": "watchtower_containers_failed{instance=\"watchtower:8080\", job=\"watchtower\"}"
},
"properties": [
{
"id": "displayName",
"value": "Faled"
}
]
},
{
"matcher": {
"id": "byName",
"options": "watchtower_containers_updated{instance=\"watchtower:8080\", job=\"watchtower\"}"
},
"properties": [
{
"id": "displayName",
"value": "Updated"
}
]
}
]
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 6,
"x": 1,
"y": 0
},
"hiddenSeries": false,
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.6",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "watchtower_containers_scanned",
"interval": "",
"legendFormat": "",
"refId": "A"
},
{
"expr": "watchtower_containers_failed",
"interval": "",
"legendFormat": "",
"refId": "B"
},
{
"expr": "watchtower_containers_updated",
"interval": "",
"legendFormat": "",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Container Updates",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 1,
"x": 0,
"y": 4
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "7.3.6",
"targets": [
{
"expr": "watchtower_scans_skipped",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Skipped Scans",
"type": "stat"
}
],
"refresh": false,
"schemaVersion": 26,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Watchtower",
"uid": "d7bdoT-Gz",
"version": 1
}

@ -0,0 +1,11 @@
apiVersion: 1
providers:
- name: 'Prometheus'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
options:
path: /etc/grafana/provisioning/dashboards

@ -0,0 +1,8 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true

@ -1,10 +1,11 @@
package actions_test package actions_test
import ( import (
"github.com/containrrr/watchtower/internal/actions"
"testing" "testing"
"time" "time"
"github.com/containrrr/watchtower/internal/actions"
"github.com/containrrr/watchtower/pkg/container" "github.com/containrrr/watchtower/pkg/container"
"github.com/containrrr/watchtower/pkg/container/mocks" "github.com/containrrr/watchtower/pkg/container/mocks"

@ -5,6 +5,7 @@ import (
"github.com/containrrr/watchtower/internal/util" "github.com/containrrr/watchtower/internal/util"
"github.com/containrrr/watchtower/pkg/container" "github.com/containrrr/watchtower/pkg/container"
"github.com/containrrr/watchtower/pkg/lifecycle" "github.com/containrrr/watchtower/pkg/lifecycle"
metrics2 "github.com/containrrr/watchtower/pkg/metrics"
"github.com/containrrr/watchtower/pkg/sorter" "github.com/containrrr/watchtower/pkg/sorter"
"github.com/containrrr/watchtower/pkg/types" "github.com/containrrr/watchtower/pkg/types"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
@ -14,8 +15,10 @@ import (
// used to start those containers have been updated. If a change is detected in // used to start those containers have been updated. If a change is detected in
// any of the images, the associated containers are stopped and restarted with // any of the images, the associated containers are stopped and restarted with
// the new image. // the new image.
func Update(client container.Client, params types.UpdateParams) error { func Update(client container.Client, params types.UpdateParams) (*metrics2.Metric, error) {
log.Debug("Checking containers for updated images") log.Debug("Checking containers for updated images")
metric := &metrics2.Metric{}
staleCount := 0
if params.LifecycleHooks { if params.LifecycleHooks {
lifecycle.ExecutePreChecks(client, params) lifecycle.ExecutePreChecks(client, params)
@ -23,9 +26,11 @@ func Update(client container.Client, params types.UpdateParams) error {
containers, err := client.ListContainers(params.Filter) containers, err := client.ListContainers(params.Filter)
if err != nil { if err != nil {
return err return nil, err
} }
staleCheckFailed := 0
for i, targetContainer := range containers { for i, targetContainer := range containers {
stale, err := client.IsContainerStale(targetContainer) stale, err := client.IsContainerStale(targetContainer)
if stale && !params.NoRestart && !params.MonitorOnly && !targetContainer.IsMonitorOnly() && !targetContainer.HasImageInfo() { if stale && !params.NoRestart && !params.MonitorOnly && !targetContainer.IsMonitorOnly() && !targetContainer.HasImageInfo() {
@ -34,13 +39,20 @@ func Update(client container.Client, params types.UpdateParams) error {
if err != nil { if err != nil {
log.Infof("Unable to update container %q: %v. Proceeding to next.", containers[i].Name(), err) log.Infof("Unable to update container %q: %v. Proceeding to next.", containers[i].Name(), err)
stale = false stale = false
staleCheckFailed++
metric.Failed++
} }
containers[i].Stale = stale containers[i].Stale = stale
if stale {
staleCount++
}
} }
containers, err = sorter.SortByDependencies(containers) containers, err = sorter.SortByDependencies(containers)
metric.Scanned = len(containers)
if err != nil { if err != nil {
return err return nil, err
} }
checkDependencies(containers) checkDependencies(containers)
@ -55,24 +67,32 @@ func Update(client container.Client, params types.UpdateParams) error {
} }
if params.RollingRestart { if params.RollingRestart {
performRollingRestart(containersToUpdate, client, params) metric.Failed += performRollingRestart(containersToUpdate, client, params)
} else { } else {
stopContainersInReversedOrder(containersToUpdate, client, params) metric.Failed += stopContainersInReversedOrder(containersToUpdate, client, params)
restartContainersInSortedOrder(containersToUpdate, client, params) metric.Failed += restartContainersInSortedOrder(containersToUpdate, client, params)
} }
metric.Updated = staleCount - (metric.Failed - staleCheckFailed)
if params.LifecycleHooks { if params.LifecycleHooks {
lifecycle.ExecutePostChecks(client, params) lifecycle.ExecutePostChecks(client, params)
} }
return nil return metric, nil
} }
func performRollingRestart(containers []container.Container, client container.Client, params types.UpdateParams) { func performRollingRestart(containers []container.Container, client container.Client, params types.UpdateParams) int {
cleanupImageIDs := make(map[string]bool) cleanupImageIDs := make(map[string]bool)
failed := 0
for i := len(containers) - 1; i >= 0; i-- { for i := len(containers) - 1; i >= 0; i-- {
if containers[i].Stale { if containers[i].Stale {
stopStaleContainer(containers[i], client, params) if err := stopStaleContainer(containers[i], client, params); err != nil {
restartStaleContainer(containers[i], client, params) failed++
}
if err := restartStaleContainer(containers[i], client, params); err != nil {
failed++
}
cleanupImageIDs[containers[i].ImageID()] = true cleanupImageIDs[containers[i].ImageID()] = true
} }
} }
@ -80,50 +100,63 @@ func performRollingRestart(containers []container.Container, client container.Cl
if params.Cleanup { if params.Cleanup {
cleanupImages(client, cleanupImageIDs) cleanupImages(client, cleanupImageIDs)
} }
return failed
} }
func stopContainersInReversedOrder(containers []container.Container, client container.Client, params types.UpdateParams) { func stopContainersInReversedOrder(containers []container.Container, client container.Client, params types.UpdateParams) int {
failed := 0
for i := len(containers) - 1; i >= 0; i-- { for i := len(containers) - 1; i >= 0; i-- {
stopStaleContainer(containers[i], client, params) if err := stopStaleContainer(containers[i], client, params); err != nil {
failed++
}
} }
return failed
} }
func stopStaleContainer(container container.Container, client container.Client, params types.UpdateParams) { func stopStaleContainer(container container.Container, client container.Client, params types.UpdateParams) error {
if container.IsWatchtower() { if container.IsWatchtower() {
log.Debugf("This is the watchtower container %s", container.Name()) log.Debugf("This is the watchtower container %s", container.Name())
return return nil
} }
if !container.Stale { if !container.Stale {
return return nil
} }
if params.LifecycleHooks { if params.LifecycleHooks {
if err := lifecycle.ExecutePreUpdateCommand(client, container); err != nil { if err := lifecycle.ExecutePreUpdateCommand(client, container); err != nil {
log.Error(err) log.Error(err)
log.Info("Skipping container as the pre-update command failed") log.Info("Skipping container as the pre-update command failed")
return return err
} }
} }
if err := client.StopContainer(container, params.Timeout); err != nil { if err := client.StopContainer(container, params.Timeout); err != nil {
log.Error(err) log.Error(err)
return err
} }
return nil
} }
func restartContainersInSortedOrder(containers []container.Container, client container.Client, params types.UpdateParams) { func restartContainersInSortedOrder(containers []container.Container, client container.Client, params types.UpdateParams) int {
imageIDs := make(map[string]bool) imageIDs := make(map[string]bool)
for _, staleContainer := range containers { failed := 0
if !staleContainer.Stale {
for _, c := range containers {
if !c.Stale {
continue continue
} }
restartStaleContainer(staleContainer, client, params) if err := restartStaleContainer(c, client, params); err != nil {
imageIDs[staleContainer.ImageID()] = true failed++
}
imageIDs[c.ImageID()] = true
} }
if params.Cleanup { if params.Cleanup {
cleanupImages(client, imageIDs) cleanupImages(client, imageIDs)
} }
return failed
} }
func cleanupImages(client container.Client, imageIDs map[string]bool) { func cleanupImages(client container.Client, imageIDs map[string]bool) {
@ -134,7 +167,7 @@ func cleanupImages(client container.Client, imageIDs map[string]bool) {
} }
} }
func restartStaleContainer(container container.Container, client container.Client, params types.UpdateParams) { func restartStaleContainer(container container.Container, client container.Client, params types.UpdateParams) error {
// Since we can't shutdown a watchtower container immediately, we need to // Since we can't shutdown a watchtower container immediately, we need to
// start the new one while the old one is still running. This prevents us // start the new one while the old one is still running. This prevents us
// from re-using the same container name so we first rename the current // from re-using the same container name so we first rename the current
@ -142,17 +175,19 @@ func restartStaleContainer(container container.Container, client container.Clien
if container.IsWatchtower() { if container.IsWatchtower() {
if err := client.RenameContainer(container, util.RandName()); err != nil { if err := client.RenameContainer(container, util.RandName()); err != nil {
log.Error(err) log.Error(err)
return return nil
} }
} }
if !params.NoRestart { if !params.NoRestart {
if newContainerID, err := client.StartContainer(container); err != nil { if newContainerID, err := client.StartContainer(container); err != nil {
log.Error(err) log.Error(err)
return err
} else if container.Stale && params.LifecycleHooks { } else if container.Stale && params.LifecycleHooks {
lifecycle.ExecutePostUpdateCommand(client, newContainerID) lifecycle.ExecutePostUpdateCommand(client, newContainerID)
} }
} }
return nil
} }
func checkDependencies(containers []container.Container) { func checkDependencies(containers []container.Container) {

@ -59,7 +59,7 @@ var _ = Describe("the update action", func() {
When("there are multiple containers using the same image", func() { When("there are multiple containers using the same image", func() {
It("should only try to remove the image once", func() { It("should only try to remove the image once", func() {
err := actions.Update(client, types.UpdateParams{Cleanup: true}) _, err := actions.Update(client, types.UpdateParams{Cleanup: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1))
}) })
@ -75,7 +75,7 @@ var _ = Describe("the update action", func() {
time.Now(), time.Now(),
), ),
) )
err := actions.Update(client, types.UpdateParams{Cleanup: true}) _, err := actions.Update(client, types.UpdateParams{Cleanup: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(2)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(2))
}) })
@ -83,7 +83,7 @@ var _ = Describe("the update action", func() {
When("performing a rolling restart update", func() { When("performing a rolling restart update", func() {
It("should try to remove the image once", func() { It("should try to remove the image once", func() {
err := actions.Update(client, types.UpdateParams{Cleanup: true, RollingRestart: true}) _, err := actions.Update(client, types.UpdateParams{Cleanup: true, RollingRestart: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1))
}) })
@ -121,7 +121,7 @@ var _ = Describe("the update action", func() {
}) })
It("should not update those containers", func() { It("should not update those containers", func() {
err := actions.Update(client, types.UpdateParams{Cleanup: true}) _, err := actions.Update(client, types.UpdateParams{Cleanup: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1))
}) })
@ -151,7 +151,7 @@ var _ = Describe("the update action", func() {
}) })
It("should not update any containers", func() { It("should not update any containers", func() {
err := actions.Update(client, types.UpdateParams{MonitorOnly: true}) _, err := actions.Update(client, types.UpdateParams{MonitorOnly: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(0)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(0))
}) })

@ -130,10 +130,15 @@ func RegisterSystemFlags(rootCmd *cobra.Command) {
"Restart containers one at a time") "Restart containers one at a time")
flags.BoolP( flags.BoolP(
"http-api", "http-api-update",
"", "",
viper.GetBool("WATCHTOWER_HTTP_API"), viper.GetBool("WATCHTOWER_HTTP_API_UPDATE"),
"Runs Watchtower in HTTP API mode, so that image updates must to be triggered by a request") "Runs Watchtower in HTTP API mode, so that image updates must to be triggered by a request")
flags.BoolP(
"http-api-metrics",
"",
viper.GetBool("WATCHTOWER_HTTP_API_METRICS"),
"Runs Watchtower with the Prometheus metrics API enabled")
flags.StringP( flags.StringP(
"http-api-token", "http-api-token",

@ -28,5 +28,6 @@ nav:
- 'Stop signals': 'stop-signals.md' - 'Stop signals': 'stop-signals.md'
- 'Lifecycle hooks': 'lifecycle-hooks.md' - 'Lifecycle hooks': 'lifecycle-hooks.md'
- 'Running multiple instances': 'running-multiple-instances.md' - 'Running multiple instances': 'running-multiple-instances.md'
- 'Metrics': 'metrics.md'
plugins: plugins:
- search - search

@ -1,63 +1,76 @@
package api package api
import ( import (
"errors" "fmt"
"io"
"net/http"
"os"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"net/http"
) )
var ( const tokenMissingMsg = "api token is empty or has not been set. exiting"
lock chan bool
)
func init() { // API is the http server responsible for serving the HTTP API endpoints
lock = make(chan bool, 1) type API struct {
lock <- true Token string
hasHandlers bool
} }
// SetupHTTPUpdates configures the endpoint needed for triggering updates via http // New is a factory function creating a new API instance
func SetupHTTPUpdates(apiToken string, updateFunction func()) error { func New(token string) *API {
if apiToken == "" { return &API{
return errors.New("api token is empty or has not been set. not starting api") Token: token,
hasHandlers: false,
} }
}
log.Println("Watchtower HTTP API started.") // RequireToken is wrapper around http.HandleFunc that checks token validity
func (api *API) RequireToken(fn http.HandlerFunc) http.HandlerFunc {
http.HandleFunc("/v1/update", func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
log.Info("Updates triggered by HTTP API request.") if r.Header.Get("Authorization") != fmt.Sprintf("Bearer %s", api.Token) {
log.Errorf("Invalid token \"%s\"", r.Header.Get("Authorization"))
_, err := io.Copy(os.Stdout, r.Body) log.Debugf("Expected token to be \"%s\"", api.Token)
if err != nil {
log.Println(err)
return return
} }
log.Println("Valid token found.")
fn(w, r)
}
}
if r.Header.Get("Token") != apiToken { // RegisterFunc is a wrapper around http.HandleFunc that also sets the flag used to determine whether to launch the API
log.Println("Invalid token. Not updating.") func (api *API) RegisterFunc(path string, fn http.HandlerFunc) {
return api.hasHandlers = true
} http.HandleFunc(path, api.RequireToken(fn))
}
log.Println("Valid token found. Attempting to update.") // RegisterHandler is a wrapper around http.Handler that also sets the flag used to determine whether to launch the API
func (api *API) RegisterHandler(path string, handler http.Handler) {
api.hasHandlers = true
http.Handle(path, api.RequireToken(handler.ServeHTTP))
}
select { // Start the API and serve over HTTP. Requires an API Token to be set.
case chanValue := <-lock: func (api *API) Start(block bool) error {
defer func() { lock <- chanValue }()
updateFunction()
default:
log.Debug("Skipped. Another update already running.")
}
}) if !api.hasHandlers {
log.Debug("Watchtower HTTP API skipped.")
return nil
}
if api.Token == "" {
log.Fatal(tokenMissingMsg)
}
log.Info("Watchtower HTTP API started.")
if block {
runHTTPServer()
} else {
go func() {
runHTTPServer()
}()
}
return nil return nil
} }
// WaitForHTTPUpdates starts the http server and listens for requests. func runHTTPServer() {
func WaitForHTTPUpdates() error { log.Info("Serving HTTP")
log.Fatal(http.ListenAndServe(":8080", nil)) log.Fatal(http.ListenAndServe(":8080", nil))
os.Exit(0)
return nil
} }

@ -0,0 +1,27 @@
package metrics
import (
"github.com/containrrr/watchtower/pkg/metrics"
"net/http"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// Handler is an HTTP handle for serving metric data
type Handler struct {
Path string
Handle http.HandlerFunc
Metrics *metrics.Metrics
}
// New is a factory function creating a new Metrics instance
func New() *Handler {
m := metrics.Default()
handler := promhttp.Handler()
return &Handler{
Path: "/v1/metrics",
Handle: handler.ServeHTTP,
Metrics: m,
}
}

@ -0,0 +1,77 @@
package metrics_test
import (
"fmt"
"github.com/containrrr/watchtower/pkg/metrics"
"io/ioutil"
"net/http"
"testing"
"github.com/containrrr/watchtower/pkg/api"
metricsAPI "github.com/containrrr/watchtower/pkg/api/metrics"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
const Token = "123123123"
func TestContainer(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Metrics Suite")
}
func runTestServer(m *metricsAPI.Handler) {
http.Handle(m.Path, m.Handle)
go func() {
http.ListenAndServe(":8080", nil)
}()
}
func getWithToken(c http.Client, url string) (*http.Response, error) {
req, _ := http.NewRequest("GET", url, nil)
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", Token))
return c.Do(req)
}
var _ = Describe("the metrics", func() {
httpAPI := api.New(Token)
m := metricsAPI.New()
httpAPI.RegisterHandler(m.Path, m.Handle)
httpAPI.Start(false)
// We should likely split this into multiple tests, but as prometheus requires a restart of the binary
// to reset the metrics and gauges, we'll just do it all at once.
It("should serve metrics", func() {
metric := &metrics.Metric{
Scanned: 4,
Updated: 3,
Failed: 1,
}
metrics.RegisterScan(metric)
c := http.Client{}
res, err := getWithToken(c, "http://localhost:8080/v1/metrics")
Expect(err).NotTo(HaveOccurred())
contents, err := ioutil.ReadAll(res.Body)
fmt.Printf("%s\n", string(contents))
Expect(string(contents)).To(ContainSubstring("watchtower_containers_updated 3"))
Expect(string(contents)).To(ContainSubstring("watchtower_containers_failed 1"))
Expect(string(contents)).To(ContainSubstring("watchtower_containers_scanned 4"))
Expect(string(contents)).To(ContainSubstring("watchtower_scans_total 1"))
Expect(string(contents)).To(ContainSubstring("watchtower_scans_skipped 0"))
for i := 0; i < 3; i++ {
metrics.RegisterScan(nil)
}
res, err = getWithToken(c, "http://localhost:8080/v1/metrics")
Expect(err).NotTo(HaveOccurred())
contents, err = ioutil.ReadAll(res.Body)
fmt.Printf("%s\n", string(contents))
Expect(string(contents)).To(ContainSubstring("watchtower_scans_total 4"))
Expect(string(contents)).To(ContainSubstring("watchtower_scans_skipped 3"))
})
})

@ -0,0 +1,50 @@
package update
import (
"io"
"net/http"
"os"
log "github.com/sirupsen/logrus"
)
var (
lock chan bool
)
// New is a factory function creating a new Handler instance
func New(updateFn func()) *Handler {
lock = make(chan bool, 1)
lock <- true
return &Handler{
fn: updateFn,
Path: "/v1/update",
}
}
// Handler is an API handler used for triggering container update scans
type Handler struct {
fn func()
Path string
}
// Handle is the actual http.Handle function doing all the heavy lifting
func (handle *Handler) Handle(w http.ResponseWriter, r *http.Request) {
log.Info("Updates triggered by HTTP API request.")
_, err := io.Copy(os.Stdout, r.Body)
if err != nil {
log.Println(err)
return
}
select {
case chanValue := <-lock:
defer func() { lock <- chanValue }()
handle.fn()
default:
log.Debug("Skipped. Another update already running.")
}
}

@ -0,0 +1,91 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var metrics *Metrics
// Metric is the data points of a single scan
type Metric struct {
Scanned int
Updated int
Failed int
}
// Metrics is the handler processing all individual scan metrics
type Metrics struct {
channel chan *Metric
scanned prometheus.Gauge
updated prometheus.Gauge
failed prometheus.Gauge
total prometheus.Counter
skipped prometheus.Counter
}
// Register registers metrics for an executed scan
func (metrics *Metrics) Register(metric *Metric) {
metrics.channel <- metric
}
// Default creates a new metrics handler if none exists, otherwise returns the existing one
func Default() *Metrics {
if metrics != nil {
return metrics
}
metrics = &Metrics{
scanned: promauto.NewGauge(prometheus.GaugeOpts{
Name: "watchtower_containers_scanned",
Help: "Number of containers scanned for changes by watchtower during the last scan",
}),
updated: promauto.NewGauge(prometheus.GaugeOpts{
Name: "watchtower_containers_updated",
Help: "Number of containers updated by watchtower during the last scan",
}),
failed: promauto.NewGauge(prometheus.GaugeOpts{
Name: "watchtower_containers_failed",
Help: "Number of containers where update failed during the last scan",
}),
total: promauto.NewCounter(prometheus.CounterOpts{
Name: "watchtower_scans_total",
Help: "Number of scans since the watchtower started",
}),
skipped: promauto.NewCounter(prometheus.CounterOpts{
Name: "watchtower_scans_skipped",
Help: "Number of skipped scans since watchtower started",
}),
channel: make(chan *Metric, 10),
}
go metrics.HandleUpdate(metrics.channel)
return metrics
}
// RegisterScan fetches a metric handler and enqueues a metric
func RegisterScan(metric *Metric) {
metrics := Default()
metrics.Register(metric)
}
// HandleUpdate dequeue the metric channel and processes it
func (metrics *Metrics) HandleUpdate(channel <-chan *Metric) {
for change := range channel {
if change == nil {
// Update was skipped and rescheduled
metrics.total.Inc()
metrics.skipped.Inc()
metrics.scanned.Set(0)
metrics.updated.Set(0)
metrics.failed.Set(0)
continue
}
// Update metrics with the new values
metrics.total.Inc()
metrics.scanned.Set(float64(change.Scanned))
metrics.updated.Set(float64(change.Updated))
metrics.failed.Set(float64(change.Failed))
}
}

@ -87,5 +87,5 @@ func (n *gotifyTypeNotifier) GetURL() string {
func (n *gotifyTypeNotifier) StartNotification() {} func (n *gotifyTypeNotifier) StartNotification() {}
func (n *gotifyTypeNotifier) SendNotification() {} func (n *gotifyTypeNotifier) SendNotification() {}
func (n *gotifyTypeNotifier) Close() {} func (n *gotifyTypeNotifier) Close() {}
func (n *gotifyTypeNotifier) Levels() []log.Level { return nil } func (n *gotifyTypeNotifier) Levels() []log.Level { return nil }

@ -63,6 +63,6 @@ func (n *msTeamsTypeNotifier) GetURL() string {
func (n *msTeamsTypeNotifier) StartNotification() {} func (n *msTeamsTypeNotifier) StartNotification() {}
func (n *msTeamsTypeNotifier) SendNotification() {} func (n *msTeamsTypeNotifier) SendNotification() {}
func (n *msTeamsTypeNotifier) Close() {} func (n *msTeamsTypeNotifier) Close() {}
func (n *msTeamsTypeNotifier) Levels() []log.Level { return nil } func (n *msTeamsTypeNotifier) Levels() []log.Level { return nil }
func (n *msTeamsTypeNotifier) Fire(entry *log.Entry) error { return nil } func (n *msTeamsTypeNotifier) Fire(entry *log.Entry) error { return nil }

@ -0,0 +1,9 @@
scrape_configs:
- job_name: watchtower
scrape_interval: 5s
metrics_path: /v1/metrics
bearer_token: demotoken
static_configs:
- targets:
- 'watchtower:8080'
Loading…
Cancel
Save