summaryrefslogtreecommitdiffstats
path: root/roles/os_zabbix
diff options
context:
space:
mode:
authorJoel Diaz <jdiaz@redhat.com>2015-12-16 15:59:26 -0500
committerJoel Diaz <jdiaz@redhat.com>2015-12-16 15:59:26 -0500
commitc607f1ba93be5e9f16723074ff97ffd27b025f8c (patch)
tree57d78b69cd526a57ad0258ba468868561626f384 /roles/os_zabbix
parent4dfe16e0e567a633cedd8ee56ffaed5110ca1629 (diff)
parentf826925c8217d5c9f150ef03ca8deb718c37c157 (diff)
downloadopenshift-c607f1ba93be5e9f16723074ff97ffd27b025f8c.tar.gz
openshift-c607f1ba93be5e9f16723074ff97ffd27b025f8c.tar.bz2
openshift-c607f1ba93be5e9f16723074ff97ffd27b025f8c.tar.xz
openshift-c607f1ba93be5e9f16723074ff97ffd27b025f8c.zip
Merge pull request #1078 from joelddiaz/master
sync master -> prod
Diffstat (limited to 'roles/os_zabbix')
-rw-r--r--roles/os_zabbix/vars/template_docker.yml10
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml8
-rw-r--r--roles/os_zabbix/vars/template_zagg_server.yml16
3 files changed, 31 insertions, 3 deletions
diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml
index bfabf50c5..91a2c400e 100644
--- a/roles/os_zabbix/vars/template_docker.yml
+++ b/roles/os_zabbix/vars/template_docker.yml
@@ -12,6 +12,11 @@ g_template_docker:
- Docker Daemon
value_type: int
+ - key: docker.container.dns.resolution
+ applications:
+ - Docker Daemon
+ value_type: int
+
- key: docker.storage.is_loopback
applications:
- Docker Storage
@@ -62,6 +67,11 @@ g_template_docker:
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc'
priority: high
+ - name: 'docker.container.dns.resolution failed on {HOST.NAME}'
+ expression: '{Template Docker:docker.container.dns.resolution.max(#3)}>0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc'
+ priority: high
+
- name: 'Docker storage is using LOOPBACK on {HOST.NAME}'
expression: '{Template Docker:docker.storage.is_loopback.last()}<>0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc'
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 514d6fd24..a0ba8d104 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -269,6 +269,14 @@ g_template_openshift_master:
- 'Openshift Master process not running on {HOST.NAME}'
priority: avg
+ - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}'
+ expression: '{Template Openshift Master:create_app.sum(1h)}>3'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ description: The application create loop has failed 4 or more times in the last hour
+ priority: avg
+
- name: 'Openshift Master API health check is failing on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml
index 0e8e53bb7..db5665993 100644
--- a/roles/os_zabbix/vars/template_zagg_server.yml
+++ b/roles/os_zabbix/vars/template_zagg_server.yml
@@ -7,7 +7,12 @@ g_template_zagg_server:
- Zagg Server
value_type: int
- - key: zagg.server.processor.errors
+ - key: zagg.server.metrics.errors
+ applications:
+ - Zagg Server
+ value_type: int
+
+ - key: zagg.server.heartbeat.errors
applications:
- Zagg Server
value_type: int
@@ -18,8 +23,13 @@ g_template_zagg_server:
value_type: int
ztriggers:
- - name: 'Error sending metrics on {HOST.NAME}'
- expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0'
+ - name: 'Error processing metrics on {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.metrics.errors.min(#3)}>0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ priority: average
+
+ - name: 'Error processing heartbeats on {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.heartbeat.errors.min(#3)}>0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
priority: average