Prometheus support (#450)

Co-authored-by: nils måsén <nils@piksel.se>
Co-authored-by: MihailITPlace <ya.halo-halo@yandex.ru>
Co-authored-by: Sebastiaan Tammer <sebastiaantammer@gmail.com>
pull/757/head
Simon Aronsson 3 years ago committed by GitHub
parent 35490c853d
commit d7d5b25882
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,12 +1,16 @@
package cmd package cmd
import ( import (
metrics2 "github.com/containrrr/watchtower/pkg/metrics"
"os" "os"
"os/signal" "os/signal"
"strconv" "strconv"
"syscall" "syscall"
"time" "time"
"github.com/containrrr/watchtower/pkg/api/metrics"
"github.com/containrrr/watchtower/pkg/api/update"
"github.com/containrrr/watchtower/internal/actions" "github.com/containrrr/watchtower/internal/actions"
"github.com/containrrr/watchtower/internal/flags" "github.com/containrrr/watchtower/internal/flags"
"github.com/containrrr/watchtower/pkg/api" "github.com/containrrr/watchtower/pkg/api"
@ -144,7 +148,10 @@ func PreRun(cmd *cobra.Command, args []string) {
func Run(c *cobra.Command, names []string) { func Run(c *cobra.Command, names []string) {
filter := filters.BuildFilter(names, enableLabel, scope) filter := filters.BuildFilter(names, enableLabel, scope)
runOnce, _ := c.PersistentFlags().GetBool("run-once") runOnce, _ := c.PersistentFlags().GetBool("run-once")
httpAPI, _ := c.PersistentFlags().GetBool("http-api") enableUpdateAPI, _ := c.PersistentFlags().GetBool("http-api-update")
enableMetricsAPI, _ := c.PersistentFlags().GetBool("http-api-metrics")
apiToken, _ := c.PersistentFlags().GetString("http-api-token")
if runOnce { if runOnce {
if noStartupMessage, _ := c.PersistentFlags().GetBool("no-startup-message"); !noStartupMessage { if noStartupMessage, _ := c.PersistentFlags().GetBool("no-startup-message"); !noStartupMessage {
@ -160,17 +167,20 @@ func Run(c *cobra.Command, names []string) {
log.Fatal(err) log.Fatal(err)
} }
if httpAPI { httpAPI := api.New(apiToken)
apiToken, _ := c.PersistentFlags().GetString("http-api-token")
if err := api.SetupHTTPUpdates(apiToken, func() { runUpdatesWithNotifications(filter) }); err != nil { if enableUpdateAPI {
log.Fatal(err) updateHandler := update.New(func() { runUpdatesWithNotifications(filter) })
os.Exit(1) httpAPI.RegisterFunc(updateHandler.Path, updateHandler.Handle)
} }
api.WaitForHTTPUpdates() if enableMetricsAPI {
metricsHandler := metrics.New()
httpAPI.RegisterHandler(metricsHandler.Path, metricsHandler.Handle)
} }
httpAPI.Start(enableUpdateAPI)
if err := runUpgradesOnSchedule(c, filter); err != nil { if err := runUpgradesOnSchedule(c, filter); err != nil {
log.Error(err) log.Error(err)
} }
@ -189,8 +199,11 @@ func runUpgradesOnSchedule(c *cobra.Command, filter t.Filter) error {
select { select {
case v := <-tryLockSem: case v := <-tryLockSem:
defer func() { tryLockSem <- v }() defer func() { tryLockSem <- v }()
runUpdatesWithNotifications(filter) metric := runUpdatesWithNotifications(filter)
metrics2.RegisterScan(metric)
default: default:
// Update was skipped
metrics2.RegisterScan(nil)
log.Debug("Skipped another update already running.") log.Debug("Skipped another update already running.")
} }
@ -222,7 +235,8 @@ func runUpgradesOnSchedule(c *cobra.Command, filter t.Filter) error {
return nil return nil
} }
func runUpdatesWithNotifications(filter t.Filter) { func runUpdatesWithNotifications(filter t.Filter) *metrics2.Metric {
notifier.StartNotification() notifier.StartNotification()
updateParams := t.UpdateParams{ updateParams := t.UpdateParams{
Filter: filter, Filter: filter,
@ -233,9 +247,10 @@ func runUpdatesWithNotifications(filter t.Filter) {
LifecycleHooks: lifecycleHooks, LifecycleHooks: lifecycleHooks,
RollingRestart: rollingRestart, RollingRestart: rollingRestart,
} }
err := actions.Update(client, updateParams) metrics, err := actions.Update(client, updateParams)
if err != nil { if err != nil {
log.Println(err) log.Println(err)
} }
notifier.SendNotification() notifier.SendNotification()
return metrics
} }

@ -0,0 +1,43 @@
version: '3.7'
services:
watchtower:
container_name: watchtower
build:
context: ./
dockerfile: dockerfiles/Dockerfile.dev-self-contained
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
ports:
- 8080:8080
command: --interval 10 --http-api-metrics --http-api-token demotoken --debug prometheus grafana parent child
prometheus:
container_name: prometheus
image: prom/prometheus
volumes:
- ./prometheus/:/etc/prometheus/
- prometheus:/prometheus/
ports:
- 9090:9090
grafana:
container_name: grafana
image: grafana/grafana
ports:
- 3000:3000
environment:
GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource
volumes:
- grafana:/var/lib/grafana
- ./grafana:/etc/grafana/provisioning
parent:
image: nginx
container_name: parent
child:
image: nginx:alpine
labels:
com.centurylinklabs.watchtower.depends-on: parent
container_name: child
volumes:
prometheus: {}
grafana: {}

@ -164,7 +164,7 @@ Environment Variable: WATCHTOWER_LABEL_ENABLE
## Without updating containers ## Without updating containers
Will only monitor for new images, send notifications and invoke the [pre-check/post-check hooks](https://containrrr.dev/watchtower/lifecycle-hooks/), but will **not** update the containers. Will only monitor for new images, send notifications and invoke the [pre-check/post-check hooks](https://containrrr.dev/watchtower/lifecycle-hooks/), but will **not** update the containers.
> ### ⚠️ Please note > **⚠️ Please note**
> >
> Due to Docker API limitations the latest image will still be pulled from the registry. > Due to Docker API limitations the latest image will still be pulled from the registry.
@ -238,9 +238,7 @@ Sets an authentication token to HTTP API requests.
Environment Variable: WATCHTOWER_HTTP_API_TOKEN Environment Variable: WATCHTOWER_HTTP_API_TOKEN
Type: String Type: String
Default: - Default: -
``` ```## Filter by scope
## Filter by scope
Update containers that have a `com.centurylinklabs.watchtower.scope` label set with the same value as the given argument. This enables [running multiple instances](https://containrrr.github.io/watchtower/running-multiple-instances). Update containers that have a `com.centurylinklabs.watchtower.scope` label set with the same value as the given argument. This enables [running multiple instances](https://containrrr.github.io/watchtower/running-multiple-instances).
``` ```
@ -250,6 +248,16 @@ Environment Variable: WATCHTOWER_SCOPE
Default: - Default: -
``` ```
## HTTP API Metrics
Enables a metrics endpoint, exposing prometheus metrics via HTTP. See [Metrics](metrics.md) for details.
```
Argument: --http-api-metrics
Environment Variable: WATCHTOWER_HTTP_API_METRICS
Type: Boolean
Default: false
```
## Scheduling ## Scheduling
[Cron expression](https://pkg.go.dev/github.com/robfig/cron@v1.2.0?tab=doc#hdr-CRON_Expression_Format) in 6 fields (rather than the traditional 5) which defines when and how often to check for new images. Either `--interval` or the schedule expression [Cron expression](https://pkg.go.dev/github.com/robfig/cron@v1.2.0?tab=doc#hdr-CRON_Expression_Format) in 6 fields (rather than the traditional 5) which defines when and how often to check for new images. Either `--interval` or the schedule expression
can be defined, but not both. An example: `--schedule "0 0 4 * * *"` can be defined, but not both. An example: `--schedule "0 0 4 * * *"`

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

@ -0,0 +1,26 @@
> **⚠️ Experimental feature**
>
> This feature was added in v1.0.4 and is still considered experimental.
> If you notice any strange behavior, please raise a ticket in the repository issues.
Metrics can be used to track how Watchtower behaves over time.
To use this feature, you have to set an [API token](arguments.md#http-api-token) and [enable the metrics API](arguments.md#http-api-metrics),
as well as creating a port mapping for your container for port `8080`.
## Available Metrics
| Name | Type | Description |
| ------------------------------- | ------- | --------------------------------------------------------------------------- |
| `watchtower_containers_scanned` | Gauge | Number of containers scanned for changes by watchtower during the last scan |
| `watchtower_containers_updated` | Gauge | Number of containers updated by watchtower during the last scan |
| `watchtower_containers_failed` | Gauge | Number of containers where update failed during the last scan |
| `watchtower_scans_total` | Counter | Number of scans since the watchtower started |
| `watchtower_scans_skipped` | Counter | Number of skipped scans since watchtower started |
## Demo
The repository contains a demo with prometheus and grafana, available through `docker-compose.yml`. This demo
is preconfigured with a dashboard, which will look something like this:
![grafana metrics](assets/grafana-dashboard.png)

@ -1,4 +1,3 @@
# Notifications # Notifications
Watchtower can send notifications when containers are updated. Notifications are sent via hooks in the logging system, [logrus](http://github.com/sirupsen/logrus). Watchtower can send notifications when containers are updated. Notifications are sent via hooks in the logging system, [logrus](http://github.com/sirupsen/logrus).
@ -62,13 +61,13 @@ Example including an SMTP relay:
```yaml ```yaml
--- ---
version: "3.8" version: '3.8'
services: services:
watchtower: watchtower:
image: containrrr/watchtower:latest image: containrrr/watchtower:latest
container_name: watchtower container_name: watchtower
environment: environment:
WATCHTOWER_MONITOR_ONLY: "true" WATCHTOWER_MONITOR_ONLY: 'true'
WATCHTOWER_NOTIFICATIONS: email WATCHTOWER_NOTIFICATIONS: email
WATCHTOWER_NOTIFICATION_EMAIL_FROM: from-address@your-domain.com WATCHTOWER_NOTIFICATION_EMAIL_FROM: from-address@your-domain.com
WATCHTOWER_NOTIFICATION_EMAIL_TO: to-address@your-domain.com WATCHTOWER_NOTIFICATION_EMAIL_TO: to-address@your-domain.com
@ -90,9 +89,9 @@ services:
- 25 - 25
environment: environment:
MAILNAME: somename.your-domain.com MAILNAME: somename.your-domain.com
TLS_KEY: "/etc/ssl/domains/your-domain.com/your-domain.com.key" TLS_KEY: '/etc/ssl/domains/your-domain.com/your-domain.com.key'
TLS_CRT: "/etc/ssl/domains/your-domain.com/your-domain.com.crt" TLS_CRT: '/etc/ssl/domains/your-domain.com/your-domain.com.crt'
TLS_CA: "/etc/ssl/domains/your-domain.com/intermediate.crt" TLS_CA: '/etc/ssl/domains/your-domain.com/intermediate.crt'
volumes: volumes:
- /etc/ssl/domains/your-domain.com/:/etc/ssl/domains/your-domain.com/:ro - /etc/ssl/domains/your-domain.com/:/etc/ssl/domains/your-domain.com/:ro
networks: networks:

@ -46,6 +46,7 @@ require (
github.com/opencontainers/image-spec v1.0.1 // indirect github.com/opencontainers/image-spec v1.0.1 // indirect
github.com/opencontainers/runc v0.1.1 // indirect github.com/opencontainers/runc v0.1.1 // indirect
github.com/pkg/errors v0.8.1 // indirect github.com/pkg/errors v0.8.1 // indirect
github.com/prometheus/client_golang v0.9.3
github.com/robfig/cron v0.0.0-20180505203441-b41be1df6967 github.com/robfig/cron v0.0.0-20180505203441-b41be1df6967
github.com/sirupsen/logrus v1.4.1 github.com/sirupsen/logrus v1.4.1
github.com/spf13/cobra v0.0.3 github.com/spf13/cobra v0.0.3

@ -0,0 +1,293 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 1,
"links": [],
"panels": [
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 1,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "7.3.6",
"targets": [
{
"expr": "watchtower_scans_total",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Total Scans",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "watchtower_containers_scanned{instance=\"watchtower:8080\", job=\"watchtower\"}"
},
"properties": [
{
"id": "displayName",
"value": "Scanned"
}
]
},
{
"matcher": {
"id": "byName",
"options": "watchtower_containers_failed{instance=\"watchtower:8080\", job=\"watchtower\"}"
},
"properties": [
{
"id": "displayName",
"value": "Faled"
}
]
},
{
"matcher": {
"id": "byName",
"options": "watchtower_containers_updated{instance=\"watchtower:8080\", job=\"watchtower\"}"
},
"properties": [
{
"id": "displayName",
"value": "Updated"
}
]
}
]
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 6,
"x": 1,
"y": 0
},
"hiddenSeries": false,
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.6",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "watchtower_containers_scanned",
"interval": "",
"legendFormat": "",
"refId": "A"
},
{
"expr": "watchtower_containers_failed",
"interval": "",
"legendFormat": "",
"refId": "B"
},
{
"expr": "watchtower_containers_updated",
"interval": "",
"legendFormat": "",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Container Updates",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 1,
"x": 0,
"y": 4
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "7.3.6",
"targets": [
{
"expr": "watchtower_scans_skipped",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Skipped Scans",
"type": "stat"
}
],
"refresh": false,
"schemaVersion": 26,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Watchtower",
"uid": "d7bdoT-Gz",
"version": 1
}

@ -0,0 +1,11 @@
apiVersion: 1
providers:
- name: 'Prometheus'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
options:
path: /etc/grafana/provisioning/dashboards

@ -0,0 +1,8 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true

@ -1,10 +1,11 @@
package actions_test package actions_test
import ( import (
"github.com/containrrr/watchtower/internal/actions"
"testing" "testing"
"time" "time"
"github.com/containrrr/watchtower/internal/actions"
"github.com/containrrr/watchtower/pkg/container" "github.com/containrrr/watchtower/pkg/container"
"github.com/containrrr/watchtower/pkg/container/mocks" "github.com/containrrr/watchtower/pkg/container/mocks"

@ -5,6 +5,7 @@ import (
"github.com/containrrr/watchtower/internal/util" "github.com/containrrr/watchtower/internal/util"
"github.com/containrrr/watchtower/pkg/container" "github.com/containrrr/watchtower/pkg/container"
"github.com/containrrr/watchtower/pkg/lifecycle" "github.com/containrrr/watchtower/pkg/lifecycle"
metrics2 "github.com/containrrr/watchtower/pkg/metrics"
"github.com/containrrr/watchtower/pkg/sorter" "github.com/containrrr/watchtower/pkg/sorter"
"github.com/containrrr/watchtower/pkg/types" "github.com/containrrr/watchtower/pkg/types"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
@ -14,8 +15,10 @@ import (
// used to start those containers have been updated. If a change is detected in // used to start those containers have been updated. If a change is detected in
// any of the images, the associated containers are stopped and restarted with // any of the images, the associated containers are stopped and restarted with
// the new image. // the new image.
func Update(client container.Client, params types.UpdateParams) error { func Update(client container.Client, params types.UpdateParams) (*metrics2.Metric, error) {
log.Debug("Checking containers for updated images") log.Debug("Checking containers for updated images")
metric := &metrics2.Metric{}
staleCount := 0
if params.LifecycleHooks { if params.LifecycleHooks {
lifecycle.ExecutePreChecks(client, params) lifecycle.ExecutePreChecks(client, params)
@ -23,9 +26,11 @@ func Update(client container.Client, params types.UpdateParams) error {
containers, err := client.ListContainers(params.Filter) containers, err := client.ListContainers(params.Filter)
if err != nil { if err != nil {
return err return nil, err
} }
staleCheckFailed := 0
for i, targetContainer := range containers { for i, targetContainer := range containers {
stale, err := client.IsContainerStale(targetContainer) stale, err := client.IsContainerStale(targetContainer)
if stale && !params.NoRestart && !params.MonitorOnly && !targetContainer.IsMonitorOnly() && !targetContainer.HasImageInfo() { if stale && !params.NoRestart && !params.MonitorOnly && !targetContainer.IsMonitorOnly() && !targetContainer.HasImageInfo() {
@ -34,13 +39,20 @@ func Update(client container.Client, params types.UpdateParams) error {
if err != nil { if err != nil {
log.Infof("Unable to update container %q: %v. Proceeding to next.", containers[i].Name(), err) log.Infof("Unable to update container %q: %v. Proceeding to next.", containers[i].Name(), err)
stale = false stale = false
staleCheckFailed++
metric.Failed++
} }
containers[i].Stale = stale containers[i].Stale = stale
if stale {
staleCount++
}
} }
containers, err = sorter.SortByDependencies(containers) containers, err = sorter.SortByDependencies(containers)
metric.Scanned = len(containers)
if err != nil { if err != nil {
return err return nil, err
} }
checkDependencies(containers) checkDependencies(containers)
@ -55,24 +67,32 @@ func Update(client container.Client, params types.UpdateParams) error {
} }
if params.RollingRestart { if params.RollingRestart {
performRollingRestart(containersToUpdate, client, params) metric.Failed += performRollingRestart(containersToUpdate, client, params)
} else { } else {
stopContainersInReversedOrder(containersToUpdate, client, params) metric.Failed += stopContainersInReversedOrder(containersToUpdate, client, params)
restartContainersInSortedOrder(containersToUpdate, client, params) metric.Failed += restartContainersInSortedOrder(containersToUpdate, client, params)
} }
metric.Updated = staleCount - (metric.Failed - staleCheckFailed)
if params.LifecycleHooks { if params.LifecycleHooks {
lifecycle.ExecutePostChecks(client, params) lifecycle.ExecutePostChecks(client, params)
} }
return nil return metric, nil
} }
func performRollingRestart(containers []container.Container, client container.Client, params types.UpdateParams) { func performRollingRestart(containers []container.Container, client container.Client, params types.UpdateParams) int {
cleanupImageIDs := make(map[string]bool) cleanupImageIDs := make(map[string]bool)
failed := 0
for i := len(containers) - 1; i >= 0; i-- { for i := len(containers) - 1; i >= 0; i-- {
if containers[i].Stale { if containers[i].Stale {
stopStaleContainer(containers[i], client, params) if err := stopStaleContainer(containers[i], client, params); err != nil {
restartStaleContainer(containers[i], client, params) failed++
}
if err := restartStaleContainer(containers[i], client, params); err != nil {
failed++
}
cleanupImageIDs[containers[i].ImageID()] = true cleanupImageIDs[containers[i].ImageID()] = true
} }
} }
@ -80,50 +100,63 @@ func performRollingRestart(containers []container.Container, client container.Cl
if params.Cleanup { if params.Cleanup {
cleanupImages(client, cleanupImageIDs) cleanupImages(client, cleanupImageIDs)
} }
return failed
} }
func stopContainersInReversedOrder(containers []container.Container, client container.Client, params types.UpdateParams) { func stopContainersInReversedOrder(containers []container.Container, client container.Client, params types.UpdateParams) int {
failed := 0
for i := len(containers) - 1; i >= 0; i-- { for i := len(containers) - 1; i >= 0; i-- {
stopStaleContainer(containers[i], client, params) if err := stopStaleContainer(containers[i], client, params); err != nil {
failed++
}
} }
return failed
} }
func stopStaleContainer(container container.Container, client container.Client, params types.UpdateParams) { func stopStaleContainer(container container.Container, client container.Client, params types.UpdateParams) error {
if container.IsWatchtower() { if container.IsWatchtower() {
log.Debugf("This is the watchtower container %s", container.Name()) log.Debugf("This is the watchtower container %s", container.Name())
return return nil
} }
if !container.Stale { if !container.Stale {
return return nil
} }
if params.LifecycleHooks { if params.LifecycleHooks {
if err := lifecycle.ExecutePreUpdateCommand(client, container); err != nil { if err := lifecycle.ExecutePreUpdateCommand(client, container); err != nil {
log.Error(err) log.Error(err)
log.Info("Skipping container as the pre-update command failed") log.Info("Skipping container as the pre-update command failed")
return return err
} }
} }
if err := client.StopContainer(container, params.Timeout); err != nil { if err := client.StopContainer(container, params.Timeout); err != nil {
log.Error(err) log.Error(err)
return err
} }
return nil
} }
func restartContainersInSortedOrder(containers []container.Container, client container.Client, params types.UpdateParams) { func restartContainersInSortedOrder(containers []container.Container, client container.Client, params types.UpdateParams) int {
imageIDs := make(map[string]bool) imageIDs := make(map[string]bool)
for _, staleContainer := range containers { failed := 0
if !staleContainer.Stale {
for _, c := range containers {
if !c.Stale {
continue continue
} }
restartStaleContainer(staleContainer, client, params) if err := restartStaleContainer(c, client, params); err != nil {
imageIDs[staleContainer.ImageID()] = true failed++
}
imageIDs[c.ImageID()] = true
} }
if params.Cleanup { if params.Cleanup {
cleanupImages(client, imageIDs) cleanupImages(client, imageIDs)
} }
return failed
} }
func cleanupImages(client container.Client, imageIDs map[string]bool) { func cleanupImages(client container.Client, imageIDs map[string]bool) {
@ -134,7 +167,7 @@ func cleanupImages(client container.Client, imageIDs map[string]bool) {
} }
} }
func restartStaleContainer(container container.Container, client container.Client, params types.UpdateParams) { func restartStaleContainer(container container.Container, client container.Client, params types.UpdateParams) error {
// Since we can't shutdown a watchtower container immediately, we need to // Since we can't shutdown a watchtower container immediately, we need to
// start the new one while the old one is still running. This prevents us // start the new one while the old one is still running. This prevents us
// from re-using the same container name so we first rename the current // from re-using the same container name so we first rename the current
@ -142,17 +175,19 @@ func restartStaleContainer(container container.Container, client container.Clien
if container.IsWatchtower() { if container.IsWatchtower() {
if err := client.RenameContainer(container, util.RandName()); err != nil { if err := client.RenameContainer(container, util.RandName()); err != nil {
log.Error(err) log.Error(err)
return return nil
} }
} }
if !params.NoRestart { if !params.NoRestart {
if newContainerID, err := client.StartContainer(container); err != nil { if newContainerID, err := client.StartContainer(container); err != nil {
log.Error(err) log.Error(err)
return err
} else if container.Stale && params.LifecycleHooks { } else if container.Stale && params.LifecycleHooks {
lifecycle.ExecutePostUpdateCommand(client, newContainerID) lifecycle.ExecutePostUpdateCommand(client, newContainerID)
} }
} }
return nil
} }
func checkDependencies(containers []container.Container) { func checkDependencies(containers []container.Container) {

@ -59,7 +59,7 @@ var _ = Describe("the update action", func() {
When("there are multiple containers using the same image", func() { When("there are multiple containers using the same image", func() {
It("should only try to remove the image once", func() { It("should only try to remove the image once", func() {
err := actions.Update(client, types.UpdateParams{Cleanup: true}) _, err := actions.Update(client, types.UpdateParams{Cleanup: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1))
}) })
@ -75,7 +75,7 @@ var _ = Describe("the update action", func() {
time.Now(), time.Now(),
), ),
) )
err := actions.Update(client, types.UpdateParams{Cleanup: true}) _, err := actions.Update(client, types.UpdateParams{Cleanup: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(2)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(2))
}) })
@ -83,7 +83,7 @@ var _ = Describe("the update action", func() {
When("performing a rolling restart update", func() { When("performing a rolling restart update", func() {
It("should try to remove the image once", func() { It("should try to remove the image once", func() {
err := actions.Update(client, types.UpdateParams{Cleanup: true, RollingRestart: true}) _, err := actions.Update(client, types.UpdateParams{Cleanup: true, RollingRestart: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1))
}) })
@ -121,7 +121,7 @@ var _ = Describe("the update action", func() {
}) })
It("should not update those containers", func() { It("should not update those containers", func() {
err := actions.Update(client, types.UpdateParams{Cleanup: true}) _, err := actions.Update(client, types.UpdateParams{Cleanup: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(1))
}) })
@ -151,7 +151,7 @@ var _ = Describe("the update action", func() {
}) })
It("should not update any containers", func() { It("should not update any containers", func() {
err := actions.Update(client, types.UpdateParams{MonitorOnly: true}) _, err := actions.Update(client, types.UpdateParams{MonitorOnly: true})
Expect(err).NotTo(HaveOccurred()) Expect(err).NotTo(HaveOccurred())
Expect(client.TestData.TriedToRemoveImageCount).To(Equal(0)) Expect(client.TestData.TriedToRemoveImageCount).To(Equal(0))
}) })

@ -130,10 +130,15 @@ func RegisterSystemFlags(rootCmd *cobra.Command) {
"Restart containers one at a time") "Restart containers one at a time")
flags.BoolP( flags.BoolP(
"http-api", "http-api-update",
"", "",
viper.GetBool("WATCHTOWER_HTTP_API"), viper.GetBool("WATCHTOWER_HTTP_API_UPDATE"),
"Runs Watchtower in HTTP API mode, so that image updates must to be triggered by a request") "Runs Watchtower in HTTP API mode, so that image updates must to be triggered by a request")
flags.BoolP(
"http-api-metrics",
"",
viper.GetBool("WATCHTOWER_HTTP_API_METRICS"),
"Runs Watchtower with the Prometheus metrics API enabled")
flags.StringP( flags.StringP(
"http-api-token", "http-api-token",

@ -28,5 +28,6 @@ nav:
- 'Stop signals': 'stop-signals.md' - 'Stop signals': 'stop-signals.md'
- 'Lifecycle hooks': 'lifecycle-hooks.md' - 'Lifecycle hooks': 'lifecycle-hooks.md'
- 'Running multiple instances': 'running-multiple-instances.md' - 'Running multiple instances': 'running-multiple-instances.md'
- 'Metrics': 'metrics.md'
plugins: plugins:
- search - search

@ -1,63 +1,76 @@
package api package api
import ( import (
"errors" "fmt"
"io"
"net/http"
"os"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"net/http"
) )
var ( const tokenMissingMsg = "api token is empty or has not been set. exiting"
lock chan bool
)
func init() { // API is the http server responsible for serving the HTTP API endpoints
lock = make(chan bool, 1) type API struct {
lock <- true Token string
hasHandlers bool
} }
// SetupHTTPUpdates configures the endpoint needed for triggering updates via http // New is a factory function creating a new API instance
func SetupHTTPUpdates(apiToken string, updateFunction func()) error { func New(token string) *API {
if apiToken == "" { return &API{
return errors.New("api token is empty or has not been set. not starting api") Token: token,
hasHandlers: false,
}
} }
log.Println("Watchtower HTTP API started.") // RequireToken is wrapper around http.HandleFunc that checks token validity
func (api *API) RequireToken(fn http.HandlerFunc) http.HandlerFunc {
http.HandleFunc("/v1/update", func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
log.Info("Updates triggered by HTTP API request.") if r.Header.Get("Authorization") != fmt.Sprintf("Bearer %s", api.Token) {
log.Errorf("Invalid token \"%s\"", r.Header.Get("Authorization"))
_, err := io.Copy(os.Stdout, r.Body) log.Debugf("Expected token to be \"%s\"", api.Token)
if err != nil {
log.Println(err)
return return
} }
log.Println("Valid token found.")
fn(w, r)
}
}
if r.Header.Get("Token") != apiToken { // RegisterFunc is a wrapper around http.HandleFunc that also sets the flag used to determine whether to launch the API
log.Println("Invalid token. Not updating.") func (api *API) RegisterFunc(path string, fn http.HandlerFunc) {
return api.hasHandlers = true
http.HandleFunc(path, api.RequireToken(fn))
}
// RegisterHandler is a wrapper around http.Handler that also sets the flag used to determine whether to launch the API
func (api *API) RegisterHandler(path string, handler http.Handler) {
api.hasHandlers = true
http.Handle(path, api.RequireToken(handler.ServeHTTP))
} }
log.Println("Valid token found. Attempting to update.") // Start the API and serve over HTTP. Requires an API Token to be set.
func (api *API) Start(block bool) error {
select { if !api.hasHandlers {
case chanValue := <-lock: log.Debug("Watchtower HTTP API skipped.")
defer func() { lock <- chanValue }() return nil
updateFunction()
default:
log.Debug("Skipped. Another update already running.")
} }
}) if api.Token == "" {
log.Fatal(tokenMissingMsg)
}
log.Info("Watchtower HTTP API started.")
if block {
runHTTPServer()
} else {
go func() {
runHTTPServer()
}()
}
return nil return nil
} }
// WaitForHTTPUpdates starts the http server and listens for requests. func runHTTPServer() {
func WaitForHTTPUpdates() error { log.Info("Serving HTTP")
log.Fatal(http.ListenAndServe(":8080", nil)) log.Fatal(http.ListenAndServe(":8080", nil))
os.Exit(0)
return nil
} }

@ -0,0 +1,27 @@
package metrics
import (
"github.com/containrrr/watchtower/pkg/metrics"
"net/http"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// Handler is an HTTP handle for serving metric data
type Handler struct {
Path string
Handle http.HandlerFunc
Metrics *metrics.Metrics
}
// New is a factory function creating a new Metrics instance
func New() *Handler {
m := metrics.Default()
handler := promhttp.Handler()
return &Handler{
Path: "/v1/metrics",
Handle: handler.ServeHTTP,
Metrics: m,
}
}

@ -0,0 +1,77 @@
package metrics_test
import (
"fmt"
"github.com/containrrr/watchtower/pkg/metrics"
"io/ioutil"
"net/http"
"testing"
"github.com/containrrr/watchtower/pkg/api"
metricsAPI "github.com/containrrr/watchtower/pkg/api/metrics"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
const Token = "123123123"
func TestContainer(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Metrics Suite")
}
func runTestServer(m *metricsAPI.Handler) {
http.Handle(m.Path, m.Handle)
go func() {
http.ListenAndServe(":8080", nil)
}()
}
func getWithToken(c http.Client, url string) (*http.Response, error) {
req, _ := http.NewRequest("GET", url, nil)
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", Token))
return c.Do(req)
}
var _ = Describe("the metrics", func() {
httpAPI := api.New(Token)
m := metricsAPI.New()
httpAPI.RegisterHandler(m.Path, m.Handle)
httpAPI.Start(false)
// We should likely split this into multiple tests, but as prometheus requires a restart of the binary
// to reset the metrics and gauges, we'll just do it all at once.
It("should serve metrics", func() {
metric := &metrics.Metric{
Scanned: 4,
Updated: 3,
Failed: 1,
}
metrics.RegisterScan(metric)
c := http.Client{}
res, err := getWithToken(c, "http://localhost:8080/v1/metrics")
Expect(err).NotTo(HaveOccurred())
contents, err := ioutil.ReadAll(res.Body)
fmt.Printf("%s\n", string(contents))
Expect(string(contents)).To(ContainSubstring("watchtower_containers_updated 3"))
Expect(string(contents)).To(ContainSubstring("watchtower_containers_failed 1"))
Expect(string(contents)).To(ContainSubstring("watchtower_containers_scanned 4"))
Expect(string(contents)).To(ContainSubstring("watchtower_scans_total 1"))
Expect(string(contents)).To(ContainSubstring("watchtower_scans_skipped 0"))
for i := 0; i < 3; i++ {
metrics.RegisterScan(nil)
}
res, err = getWithToken(c, "http://localhost:8080/v1/metrics")
Expect(err).NotTo(HaveOccurred())
contents, err = ioutil.ReadAll(res.Body)
fmt.Printf("%s\n", string(contents))
Expect(string(contents)).To(ContainSubstring("watchtower_scans_total 4"))
Expect(string(contents)).To(ContainSubstring("watchtower_scans_skipped 3"))
})
})

@ -0,0 +1,50 @@
package update
import (
"io"
"net/http"
"os"
log "github.com/sirupsen/logrus"
)
var (
lock chan bool
)
// New is a factory function creating a new Handler instance
func New(updateFn func()) *Handler {
lock = make(chan bool, 1)
lock <- true
return &Handler{
fn: updateFn,
Path: "/v1/update",
}
}
// Handler is an API handler used for triggering container update scans
type Handler struct {
fn func()
Path string
}
// Handle is the actual http.Handle function doing all the heavy lifting
func (handle *Handler) Handle(w http.ResponseWriter, r *http.Request) {
log.Info("Updates triggered by HTTP API request.")
_, err := io.Copy(os.Stdout, r.Body)
if err != nil {
log.Println(err)
return
}
select {
case chanValue := <-lock:
defer func() { lock <- chanValue }()
handle.fn()
default:
log.Debug("Skipped. Another update already running.")
}
}

@ -0,0 +1,91 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var metrics *Metrics
// Metric is the data points of a single scan
type Metric struct {
Scanned int
Updated int
Failed int
}
// Metrics is the handler processing all individual scan metrics
type Metrics struct {
channel chan *Metric
scanned prometheus.Gauge
updated prometheus.Gauge
failed prometheus.Gauge
total prometheus.Counter
skipped prometheus.Counter
}
// Register registers metrics for an executed scan
func (metrics *Metrics) Register(metric *Metric) {
metrics.channel <- metric
}
// Default creates a new metrics handler if none exists, otherwise returns the existing one
func Default() *Metrics {
if metrics != nil {
return metrics
}
metrics = &Metrics{
scanned: promauto.NewGauge(prometheus.GaugeOpts{
Name: "watchtower_containers_scanned",
Help: "Number of containers scanned for changes by watchtower during the last scan",
}),
updated: promauto.NewGauge(prometheus.GaugeOpts{
Name: "watchtower_containers_updated",
Help: "Number of containers updated by watchtower during the last scan",
}),
failed: promauto.NewGauge(prometheus.GaugeOpts{
Name: "watchtower_containers_failed",
Help: "Number of containers where update failed during the last scan",
}),
total: promauto.NewCounter(prometheus.CounterOpts{
Name: "watchtower_scans_total",
Help: "Number of scans since the watchtower started",
}),
skipped: promauto.NewCounter(prometheus.CounterOpts{
Name: "watchtower_scans_skipped",
Help: "Number of skipped scans since watchtower started",
}),
channel: make(chan *Metric, 10),
}
go metrics.HandleUpdate(metrics.channel)
return metrics
}
// RegisterScan fetches a metric handler and enqueues a metric
func RegisterScan(metric *Metric) {
metrics := Default()
metrics.Register(metric)
}
// HandleUpdate dequeue the metric channel and processes it
func (metrics *Metrics) HandleUpdate(channel <-chan *Metric) {
for change := range channel {
if change == nil {
// Update was skipped and rescheduled
metrics.total.Inc()
metrics.skipped.Inc()
metrics.scanned.Set(0)
metrics.updated.Set(0)
metrics.failed.Set(0)
continue
}
// Update metrics with the new values
metrics.total.Inc()
metrics.scanned.Set(float64(change.Scanned))
metrics.updated.Set(float64(change.Updated))
metrics.failed.Set(float64(change.Failed))
}
}

@ -0,0 +1,9 @@
scrape_configs:
- job_name: watchtower
scrape_interval: 5s
metrics_path: /v1/metrics
bearer_token: demotoken
static_configs:
- targets:
- 'watchtower:8080'
Loading…
Cancel
Save