diff options
Diffstat (limited to 'roles/os_zabbix')
-rw-r--r-- | roles/os_zabbix/tasks/main.yml | 9 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 82 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_os_linux.yml | 21 | ||||
-rw-r--r-- | roles/os_zabbix/vars/template_performance_copilot.yml | 14 |
4 files changed, 126 insertions, 0 deletions
diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index a503b24d7..82bf78b57 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -15,6 +15,7 @@ - include_vars: template_ops_tools.yml - include_vars: template_app_zabbix_server.yml - include_vars: template_app_zabbix_agent.yml +- include_vars: template_performance_copilot.yml - name: Include Template Heartbeat include: ../../lib_zabbix/tasks/create_template.yml @@ -79,3 +80,11 @@ server: "{{ ozb_server }}" user: "{{ ozb_user }}" password: "{{ ozb_password }}" + +- name: Include Template Performance Copilot + include: ../../lib_zabbix/tasks/create_template.yml + vars: + template: "{{ g_template_performance_copilot }}" + server: "{{ ozb_server }}" + user: "{{ ozb_user }}" + password: "{{ ozb_password }}" diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 1de4fefbb..6defc4989 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -31,6 +31,78 @@ g_template_openshift_master: applications: - Openshift Master + - key: openshift.master.etcd.create.success + description: Show number of successful create actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.create.fail + description: Show number of failed create actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.delete.success + description: Show number of successful delete actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.delete.fail + description: Show number of failed delete actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.get.success + description: Show number of successful get actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.get.fail + description: Show number of failed get actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.set.success + description: Show number of successful set actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.set.fail + description: Show number of failed set actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.update.success + description: Show number of successful update actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.update.fail + description: Show number of failed update actions + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.watchers + description: Show number of etcd watchers + type: int + applications: + - Openshift Etcd + + - key: openshift.master.etcd.ping + description: etcd ping + type: int + applications: + - Openshift Etcd + ztriggers: - name: 'Application creation has failed on {HOST.NAME}' expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -56,3 +128,13 @@ g_template_openshift_master: expression: '{Template Openshift Master:openshift.project.counter.last()}=0' url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' priority: info + + - name: 'Low number of etcd watchers on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: avg + + - name: 'Etcd ping failed on {HOST.NAME}' + expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' + priority: high diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index aeeec4b8d..fbc20cd63 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -194,6 +194,11 @@ g_template_os_linux: lifetime: 1 description: "Dynamically register the filesystems" + - name: disc.disk + key: disc.disk + lifetime: 1 + description: "Dynamically register disks on a node" + zitemprototypes: - discoveryrule_key: disc.filesys name: "disc.filesys.full.{#OSO_FILESYS}" @@ -211,6 +216,22 @@ g_template_os_linux: applications: - Disk + - discoveryrule_key: disc.disk + name: "TPS (IOPS) for disk {#OSO_DISK}" + key: "disc.disk.tps[{#OSO_DISK}]" + value_type: int + description: "PCP disk.dev.totals metric measured over a period of time. This shows how many disk transactions per second the disk is using" + applications: + - Disk + + - discoveryrule_key: disc.disk + name: "Percent Utilized for disk {#OSO_DISK}" + key: "disc.disk.putil[{#OSO_DISK}]" + value_type: float + description: "PCP disk.dev.avactive metric measured over a period of time. This is the '%util' in the iostat command" + applications: + - Disk + ztriggerprototypes: - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85' diff --git a/roles/os_zabbix/vars/template_performance_copilot.yml b/roles/os_zabbix/vars/template_performance_copilot.yml new file mode 100644 index 000000000..b62fa0228 --- /dev/null +++ b/roles/os_zabbix/vars/template_performance_copilot.yml @@ -0,0 +1,14 @@ +--- +g_template_performance_copilot: + name: Template Performance Copilot + zitems: + - key: pcp.ping + applications: + - Performance Copilot + value_type: int + + ztriggers: + - name: 'pcp.ping failed on {HOST.NAME}' + expression: '{Template Performance Copilot:pcp.ping.max(#3)}<1' + url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_pcp_ping.asciidoc' + priority: average |