Monitor your cronjobs activity. Use the exported prometheus metrics to get alerts when your cronjob fails, takes longer than expected or does not start when it should.
helm repo add sk8l https://sk8l.io/charts
helm repo update
helm search repo sk8l
helm upgrade --install [RELEASE_NAME] sk8l/sk8l \
--set namespace=[NAMESPACE] \
--set serviceAccount.metadata.namespace=[NAMESPACE]
sk8l exposes different Prometheus metrics that are listed below.
Name | Description |
---|---|
sk8l_[NAMESPACE]_registered_cronjobs_total | Total registered cronjobs |
sk8l_[NAMESPACE]_completed_cronjobs_total | Total completed cronjobs |
sk8l_[NAMESPACE]_failing_cronjobs_total | Total cronjobs failures |
sk8l_[NAMESPACE]_running_cronjobs_total | Amount of current running cronjobs |
sk8l_[NAMESPACE]_[CRONJOB_NAME]_completion_total | Total completions of a cronjobs |
sk8l_[NAMESPACE]_[CRONJOB_NAME]_duration_seconds | Current duration of a running cronjob |
sk8l_[NAMESPACE]_[CRONJOB_NAME]_failure_total | Total failures of a cronjob |
global:
scrape_interval: 10s
scrape_configs:
- job_name: sk8l-api-pod
scheme: https
tls_config:
ca_file: "/etc/sk8l-certs/ca-cert.pem"
cert_file: "/etc/sk8l-certs/server-cert.pem"
# key: "/etc/sk8l-certs/server-key.pem"
key_file: "/etc/sk8l-certs/server-key.pem"
min_version: "TLS12"
insecure_skip_verify: false
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_sk8l_io_api_scrape_port]
action: keep
regex: (\d{4})
- source_labels: [__address__, __meta_kubernetes_pod_annotation_sk8l_io_api_scrape_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__