summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/os_zabbix/tasks/main.yml13
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml8
-rw-r--r--roles/os_zabbix/vars/template_zagg_server.yml36
3 files changed, 57 insertions, 0 deletions
diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml
index d0b307a3d..7552086d4 100644
--- a/roles/os_zabbix/tasks/main.yml
+++ b/roles/os_zabbix/tasks/main.yml
@@ -37,6 +37,9 @@
- include_vars: template_aws.yml
tags:
- aws
+- include_vars: template_zagg_server.yml
+ tags:
+ - zagg_server
- name: Include Template Heartbeat
include: ../../lib_zabbix/tasks/create_template.yml
@@ -137,3 +140,13 @@
password: "{{ ozb_password }}"
tags:
- aws
+
+- name: Include Template Zagg Server
+ include: ../../lib_zabbix/tasks/create_template.yml
+ vars:
+ template: "{{ g_template_zagg_server }}"
+ server: "{{ ozb_server }}"
+ user: "{{ ozb_user }}"
+ password: "{{ ozb_password }}"
+ tags:
+ - zagg_server
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 514d6fd24..a0ba8d104 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -269,6 +269,14 @@ g_template_openshift_master:
- 'Openshift Master process not running on {HOST.NAME}'
priority: avg
+ - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}'
+ expression: '{Template Openshift Master:create_app.sum(1h)}>3'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ description: The application create loop has failed 4 or more times in the last hour
+ priority: avg
+
- name: 'Openshift Master API health check is failing on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml
new file mode 100644
index 000000000..0e8e53bb7
--- /dev/null
+++ b/roles/os_zabbix/vars/template_zagg_server.yml
@@ -0,0 +1,36 @@
+---
+g_template_zagg_server:
+ name: Template Zagg Server
+ zitems:
+ - key: zagg.server.metrics.count
+ applications:
+ - Zagg Server
+ value_type: int
+
+ - key: zagg.server.processor.errors
+ applications:
+ - Zagg Server
+ value_type: int
+
+ - key: zagg.server.heartbeat.count
+ applications:
+ - Zagg Server
+ value_type: int
+
+ ztriggers:
+ - name: 'Error sending metrics on {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ priority: average
+
+ - name: 'Critically High number of metrics in Zagg queue {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>10000'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ priority: high
+
+ - name: 'High number of metrics in Zagg queue {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>5000'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ dependencies:
+ - 'Critically High number of metrics in Zagg queue {HOST.NAME}'
+ priority: average