summaryrefslogtreecommitdiffstats
path: root/roles
diff options
context:
space:
mode:
Diffstat (limited to 'roles')
-rw-r--r--roles/lib_zabbix/library/zbx_httptest.py282
-rw-r--r--roles/lib_zabbix/library/zbx_usergroup.py64
-rw-r--r--roles/openshift_common/tasks/main.yml3
-rwxr-xr-xroles/openshift_facts/library/openshift_facts.py46
-rw-r--r--roles/openshift_master/templates/master.yaml.v1.j27
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml82
-rw-r--r--roles/oso_host_monitoring/README.md50
-rw-r--r--roles/oso_host_monitoring/defaults/main.yml1
-rw-r--r--roles/oso_host_monitoring/handlers/main.yml12
-rw-r--r--roles/oso_host_monitoring/meta/main.yml8
-rw-r--r--roles/oso_host_monitoring/tasks/main.yml65
-rw-r--r--roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j21
-rw-r--r--roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j243
-rw-r--r--roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j262
-rw-r--r--roles/oso_host_monitoring/vars/main.yml1
15 files changed, 687 insertions, 40 deletions
diff --git a/roles/lib_zabbix/library/zbx_httptest.py b/roles/lib_zabbix/library/zbx_httptest.py
new file mode 100644
index 000000000..96733b3d1
--- /dev/null
+++ b/roles/lib_zabbix/library/zbx_httptest.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python
+'''
+ Ansible module for zabbix httpservice
+'''
+# vim: expandtab:tabstop=4:shiftwidth=4
+#
+# Zabbix item ansible module
+#
+#
+# Copyright 2015 Red Hat Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This is in place because each module looks similar to each other.
+# These need duplicate code as their behavior is very similar
+# but different for each zabbix class.
+# pylint: disable=duplicate-code
+
+# pylint: disable=import-error
+from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection
+
+def exists(content, key='result'):
+ ''' Check if key exists in content or the size of content[key] > 0
+ '''
+ if not content.has_key(key):
+ return False
+
+ if not content[key]:
+ return False
+
+ return True
+
+def get_authentication_method(auth):
+ ''' determine authentication type'''
+ rval = 0
+ if 'basic' in auth:
+ rval = 1
+ elif 'ntlm' in auth:
+ rval = 2
+
+ return rval
+
+def get_verify_host(verify):
+ '''
+ get the values for verify_host
+ '''
+ if verify:
+ return 1
+
+ return 0
+
+def get_app_id(zapi, application):
+ '''
+ get related templates
+ '''
+ # Fetch templates by name
+ content = zapi.get_content('application',
+ 'get',
+ {'search': {'name': application},
+ 'selectApplications': ['applicationid', 'name']})
+ if content.has_key('result'):
+ return content['result'][0]['applicationid']
+
+ return None
+
+def get_template_id(zapi, template_name):
+ '''
+ get related templates
+ '''
+ # Fetch templates by name
+ content = zapi.get_content('template',
+ 'get',
+ {'search': {'host': template_name},
+ 'selectApplications': ['applicationid', 'name']})
+ if content.has_key('result'):
+ return content['result'][0]['templateid']
+
+ return None
+
+def get_host_id_by_name(zapi, host_name):
+ '''Get host id by name'''
+ content = zapi.get_content('host',
+ 'get',
+ {'filter': {'name': host_name}})
+
+ return content['result'][0]['hostid']
+
+def get_status(status):
+ ''' Determine the status of the web scenario '''
+ rval = 0
+ if 'disabled' in status:
+ return 1
+
+ return rval
+
+def find_step(idx, step_list):
+ ''' find step by index '''
+ for step in step_list:
+ if str(step['no']) == str(idx):
+ return step
+
+ return None
+
+def steps_equal(zab_steps, user_steps):
+ '''compare steps returned from zabbix
+ and steps passed from user
+ '''
+
+ if len(user_steps) != len(zab_steps):
+ return False
+
+ for idx in range(1, len(user_steps)+1):
+
+ user = find_step(idx, user_steps)
+ zab = find_step(idx, zab_steps)
+
+ for key, value in user.items():
+ if str(value) != str(zab[key]):
+ return False
+
+ return True
+
+# The branches are needed for CRUD and error handling
+# pylint: disable=too-many-branches
+def main():
+ '''
+ ansible zabbix module for zbx_item
+ '''
+
+ module = AnsibleModule(
+ argument_spec=dict(
+ zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'),
+ zbx_user=dict(default=os.environ.get('ZABBIX_USER', None), type='str'),
+ zbx_password=dict(default=os.environ.get('ZABBIX_PASSWORD', None), type='str'),
+ zbx_debug=dict(default=False, type='bool'),
+ name=dict(default=None, require=True, type='str'),
+ agent=dict(default=None, type='str'),
+ template_name=dict(default=None, type='str'),
+ host_name=dict(default=None, type='str'),
+ interval=dict(default=60, type='int'),
+ application=dict(default=None, type='str'),
+ authentication=dict(default=None, type='str'),
+ http_user=dict(default=None, type='str'),
+ http_password=dict(default=None, type='str'),
+ state=dict(default='present', type='str'),
+ status=dict(default='enabled', type='str'),
+ steps=dict(default='present', type='list'),
+ verify_host=dict(default=False, type='bool'),
+ retries=dict(default=1, type='int'),
+ headers=dict(default=None, type='dict'),
+ query_type=dict(default='filter', choices=['filter', 'search'], type='str'),
+ ),
+ #supports_check_mode=True
+ mutually_exclusive=[['template_name', 'host_name']],
+ )
+
+ zapi = ZabbixAPI(ZabbixConnection(module.params['zbx_server'],
+ module.params['zbx_user'],
+ module.params['zbx_password'],
+ module.params['zbx_debug']))
+
+ #Set the instance and the template for the rest of the calls
+ zbx_class_name = 'httptest'
+ state = module.params['state']
+ hostid = None
+
+ # If a template name was passed then accept the template
+ if module.params['template_name']:
+ hostid = get_template_id(zapi, module.params['template_name'])
+ else:
+ hostid = get_host_id_by_name(zapi, module.params['host_name'])
+
+ # Fail if a template was not found matching the name
+ if not hostid:
+ module.exit_json(failed=True,
+ changed=False,
+ results='Error: Could find template or host with name [%s].' %
+ (module.params.get('template_name', module.params['host_name'])),
+ state="Unkown")
+
+ content = zapi.get_content(zbx_class_name,
+ 'get',
+ {module.params['query_type']: {'name': module.params['name']},
+ 'selectSteps': 'extend',
+ })
+
+ #******#
+ # GET
+ #******#
+ if state == 'list':
+ module.exit_json(changed=False, results=content['result'], state="list")
+
+ #******#
+ # DELETE
+ #******#
+ if state == 'absent':
+ if not exists(content):
+ module.exit_json(changed=False, state="absent")
+
+ content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0]['httptestid']])
+ module.exit_json(changed=True, results=content['result'], state="absent")
+
+ # Create and Update
+ if state == 'present':
+
+ params = {'name': module.params['name'],
+ 'hostid': hostid,
+ 'agent': module.params['agent'],
+ 'retries': module.params['retries'],
+ 'steps': module.params['steps'],
+ 'applicationid': get_app_id(zapi, module.params['application']),
+ 'delay': module.params['interval'],
+ 'verify_host': get_verify_host(module.params['verify_host']),
+ 'status': get_status(module.params['status']),
+ 'headers': module.params['headers'],
+ 'http_user': module.params['http_user'],
+ 'http_password': module.params['http_password'],
+ }
+
+
+ # Remove any None valued params
+ _ = [params.pop(key, None) for key in params.keys() if params[key] is None]
+
+ #******#
+ # CREATE
+ #******#
+ if not exists(content):
+ content = zapi.get_content(zbx_class_name, 'create', params)
+
+ if content.has_key('error'):
+ module.exit_json(failed=True, changed=True, results=content['error'], state="present")
+
+ module.exit_json(changed=True, results=content['result'], state='present')
+
+
+ ########
+ # UPDATE
+ ########
+ differences = {}
+ zab_results = content['result'][0]
+ for key, value in params.items():
+
+ if key == 'steps':
+ if not steps_equal(zab_results[key], value):
+ differences[key] = value
+
+ elif zab_results[key] != value and zab_results[key] != str(value):
+ differences[key] = value
+
+ # We have differences and need to update
+ if not differences:
+ module.exit_json(changed=False, results=zab_results, state="present")
+
+ differences['httptestid'] = zab_results['httptestid']
+ content = zapi.get_content(zbx_class_name, 'update', differences)
+
+ if content.has_key('error'):
+ module.exit_json(failed=True, changed=False, results=content['error'], state="present")
+
+ module.exit_json(changed=True, results=content['result'], state="present")
+
+ module.exit_json(failed=True,
+ changed=False,
+ results='Unknown state passed. %s' % state,
+ state="unknown")
+
+# pylint: disable=redefined-builtin, unused-wildcard-import, wildcard-import, locally-disabled
+# import module snippets. This are required
+from ansible.module_utils.basic import *
+
+main()
diff --git a/roles/lib_zabbix/library/zbx_usergroup.py b/roles/lib_zabbix/library/zbx_usergroup.py
index 297d8ef91..3fd44d80c 100644
--- a/roles/lib_zabbix/library/zbx_usergroup.py
+++ b/roles/lib_zabbix/library/zbx_usergroup.py
@@ -27,6 +27,10 @@ zabbix ansible module for usergroups
# but different for each zabbix class.
# pylint: disable=duplicate-code
+# Disabling too-many-branches as we need the error checking and the if-statements
+# to determine the proper state
+# pylint: disable=too-many-branches
+
# pylint: disable=import-error
from openshift_tools.monitoring.zbxapi import ZabbixAPI, ZabbixConnection
@@ -92,26 +96,24 @@ def get_user_status(status):
return 1
-#def get_userids(zapi, users):
-# ''' Get userids from user aliases
-# '''
-# if not users:
-# return None
-#
-# userids = []
-# for alias in users:
-# content = zapi.get_content('user', 'get', {'search': {'alias': alias}})
-# if content['result']:
-# userids.append(content['result'][0]['userid'])
-#
-# return userids
+def get_userids(zapi, users):
+ ''' Get userids from user aliases
+ '''
+ if not users:
+ return None
+
+ userids = []
+ for alias in users:
+ content = zapi.get_content('user', 'get', {'search': {'alias': alias}})
+ if content['result']:
+ userids.append(content['result'][0]['userid'])
+
+ return userids
def main():
''' Ansible module for usergroup
'''
- ##def usergroup(self, name, rights=None, users=None, state='present', params=None):
-
module = AnsibleModule(
argument_spec=dict(
zbx_server=dict(default='https://localhost/zabbix/api_jsonrpc.php', type='str'),
@@ -123,7 +125,7 @@ def main():
status=dict(default='enabled', type='str'),
name=dict(default=None, type='str', required=True),
rights=dict(default=None, type='list'),
- #users=dict(default=None, type='list'),
+ users=dict(default=None, type='list'),
state=dict(default='present', type='str'),
),
#supports_check_mode=True
@@ -144,9 +146,15 @@ def main():
{'search': {'name': uname},
'selectUsers': 'userid',
})
+ #******#
+ # GET
+ #******#
if state == 'list':
module.exit_json(changed=False, results=content['result'], state="list")
+ #******#
+ # DELETE
+ #******#
if state == 'absent':
if not exists(content):
module.exit_json(changed=False, state="absent")
@@ -157,6 +165,7 @@ def main():
content = zapi.get_content(zbx_class_name, 'delete', [content['result'][0][idname]])
module.exit_json(changed=True, results=content['result'], state="absent")
+ # Create and Update
if state == 'present':
params = {'name': uname,
@@ -164,26 +173,37 @@ def main():
'users_status': get_user_status(module.params['status']),
'gui_access': get_gui_access(module.params['gui_access']),
'debug_mode': get_debug_mode(module.params['debug_mode']),
- #'userids': get_userids(zapi, module.params['users']),
+ 'userids': get_userids(zapi, module.params['users']),
}
+ # Remove any None valued params
_ = [params.pop(key, None) for key in params.keys() if params[key] == None]
+ #******#
+ # CREATE
+ #******#
if not exists(content):
# if we didn't find it, create it
content = zapi.get_content(zbx_class_name, 'create', params)
+
+ if content.has_key('error'):
+ module.exit_json(failed=True, changed=True, results=content['error'], state="present")
+
module.exit_json(changed=True, results=content['result'], state='present')
- # already exists, we need to update it
- # let's compare properties
+
+
+ ########
+ # UPDATE
+ ########
differences = {}
zab_results = content['result'][0]
for key, value in params.items():
if key == 'rights':
differences['rights'] = value
- #elif key == 'userids' and zab_results.has_key('users'):
- #if zab_results['users'] != value:
- #differences['userids'] = value
+ elif key == 'userids' and zab_results.has_key('users'):
+ if zab_results['users'] != value:
+ differences['userids'] = value
elif zab_results[key] != value and zab_results[key] != str(value):
differences[key] = value
diff --git a/roles/openshift_common/tasks/main.yml b/roles/openshift_common/tasks/main.yml
index 38d5a08e4..e9df4e364 100644
--- a/roles/openshift_common/tasks/main.yml
+++ b/roles/openshift_common/tasks/main.yml
@@ -18,6 +18,3 @@
deployment_type: "{{ openshift_deployment_type }}"
use_fluentd: "{{ openshift_use_fluentd | default(None) }}"
use_flannel: "{{ openshift_use_flannel | default(None) }}"
-
-- name: Set hostname
- hostname: name={{ openshift.common.hostname }}
diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py
index 51e3ef1c0..6006bfa9d 100755
--- a/roles/openshift_facts/library/openshift_facts.py
+++ b/roles/openshift_facts/library/openshift_facts.py
@@ -864,20 +864,38 @@ def apply_provider_facts(facts, provider_facts):
return facts
-def merge_facts(orig, new):
+def merge_facts(orig, new, additive_facts_to_overwrite):
""" Recursively merge facts dicts
Args:
orig (dict): existing facts
new (dict): facts to update
+
+ additive_facts_to_overwrite (list): additive facts to overwrite in jinja
+ '.' notation ex: ['master.named_certificates']
+
Returns:
dict: the merged facts
"""
+ additive_facts = ['named_certificates']
facts = dict()
for key, value in orig.iteritems():
if key in new:
if isinstance(value, dict) and isinstance(new[key], dict):
- facts[key] = merge_facts(value, new[key])
+ relevant_additive_facts = []
+ # Keep additive_facts_to_overwrite if key matches
+ for item in additive_facts_to_overwrite:
+ if '.' in item and item.startswith(key + '.'):
+ relevant_additive_facts.append(item)
+ facts[key] = merge_facts(value, new[key], relevant_additive_facts)
+ elif key in additive_facts and key not in [x.split('.')[-1] for x in additive_facts_to_overwrite]:
+ # Fact is additive so we'll combine orig and new.
+ if isinstance(value, list) and isinstance(new[key], list):
+ new_fact = []
+ for item in copy.deepcopy(value) + copy.copy(new[key]):
+ if item not in new_fact:
+ new_fact.append(item)
+ facts[key] = new_fact
else:
facts[key] = copy.copy(new[key])
else:
@@ -961,13 +979,15 @@ class OpenShiftFacts(object):
role (str): role for setting local facts
filename (str): local facts file to use
local_facts (dict): local facts to set
+ additive_facts_to_overwrite (list): additive facts to overwrite in jinja
+ '.' notation ex: ['master.named_certificates']
Raises:
OpenShiftFactsUnsupportedRoleError:
"""
known_roles = ['common', 'master', 'node', 'master_sdn', 'node_sdn', 'dns', 'etcd']
- def __init__(self, role, filename, local_facts):
+ def __init__(self, role, filename, local_facts, additive_facts_to_overwrite=False):
self.changed = False
self.filename = filename
if role not in self.known_roles:
@@ -976,25 +996,27 @@ class OpenShiftFacts(object):
)
self.role = role
self.system_facts = ansible_facts(module)
- self.facts = self.generate_facts(local_facts)
+ self.facts = self.generate_facts(local_facts, additive_facts_to_overwrite)
- def generate_facts(self, local_facts):
+ def generate_facts(self, local_facts, additive_facts_to_overwrite):
""" Generate facts
Args:
local_facts (dict): local_facts for overriding generated
defaults
+ additive_facts_to_overwrite (list): additive facts to overwrite in jinja
+ '.' notation ex: ['master.named_certificates']
Returns:
dict: The generated facts
"""
- local_facts = self.init_local_facts(local_facts)
+ local_facts = self.init_local_facts(local_facts, additive_facts_to_overwrite)
roles = local_facts.keys()
defaults = self.get_defaults(roles)
provider_facts = self.init_provider_facts()
facts = apply_provider_facts(defaults, provider_facts)
- facts = merge_facts(facts, local_facts)
+ facts = merge_facts(facts, local_facts, additive_facts_to_overwrite)
facts['current_config'] = get_current_config(facts)
facts = set_url_facts_if_unset(facts)
facts = set_project_cfg_facts_if_unset(facts)
@@ -1132,11 +1154,13 @@ class OpenShiftFacts(object):
)
return provider_facts
- def init_local_facts(self, facts=None):
+ def init_local_facts(self, facts=None, additive_facts_to_overwrite=False):
""" Initialize the provider facts
Args:
facts (dict): local facts to set
+ additive_facts_to_overwrite (list): additive facts to overwrite in jinja
+ '.' notation ex: ['master.named_certificates']
Returns:
dict: The result of merging the provided facts with existing
@@ -1154,7 +1178,7 @@ class OpenShiftFacts(object):
basestring):
facts_to_set[arg] = module.from_json(facts_to_set[arg])
- new_local_facts = merge_facts(local_facts, facts_to_set)
+ new_local_facts = merge_facts(local_facts, facts_to_set, additive_facts_to_overwrite)
for facts in new_local_facts.values():
keys_to_delete = []
for fact, value in facts.iteritems():
@@ -1184,6 +1208,7 @@ def main():
role=dict(default='common', required=False,
choices=OpenShiftFacts.known_roles),
local_facts=dict(default=None, type='dict', required=False),
+ additive_facts_to_overwrite=dict(default=[], type='list', required=False),
),
supports_check_mode=True,
add_file_common_args=True,
@@ -1191,9 +1216,10 @@ def main():
role = module.params['role']
local_facts = module.params['local_facts']
+ additive_facts_to_overwrite = module.params['additive_facts_to_overwrite']
fact_file = '/etc/ansible/facts.d/openshift.fact'
- openshift_facts = OpenShiftFacts(role, fact_file, local_facts)
+ openshift_facts = OpenShiftFacts(role, fact_file, local_facts, additive_facts_to_overwrite)
file_params = module.params.copy()
file_params['path'] = fact_file
diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2
index bb12a0a0f..2a37c06d9 100644
--- a/roles/openshift_master/templates/master.yaml.v1.j2
+++ b/roles/openshift_master/templates/master.yaml.v1.j2
@@ -27,9 +27,6 @@ corsAllowedOrigins:
{% for custom_origin in openshift.master.custom_cors_origins | default("") %}
- {{ custom_origin }}
{% endfor %}
-{% for name in (named_certificates | map(attribute='names')) | list | oo_flatten %}
- - {{ name }}
-{% endfor %}
{% if 'disabled_features' in openshift.master %}
disabledFeatures: {{ openshift.master.disabled_features | to_json }}
{% endif %}
@@ -144,9 +141,9 @@ servingInfo:
keyFile: master.server.key
maxRequestsInFlight: 500
requestTimeoutSeconds: 3600
-{% if named_certificates %}
+{% if openshift.master.named_certificates %}
namedCertificates:
-{% for named_certificate in named_certificates %}
+{% for named_certificate in openshift.master.named_certificates %}
- certFile: {{ named_certificate['certfile'] }}
keyFile: {{ named_certificate['keyfile'] }}
names:
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 174486e15..512adad4c 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -13,6 +13,12 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.api.ping
+ description: "Verify that the Openshift API is up"
+ type: int
+ applications:
+ - Openshift Master
+
- key: openshift.master.api.healthz
description: "Checks the healthz check of the master's api: https://master_host/healthz"
type: int
@@ -44,6 +50,12 @@ g_template_openshift_master:
applications:
- Openshift Master
+ - key: openshift.master.node.count
+ description: Shows the total number of nodes found in the Openshift Cluster
+ type: int
+ applications:
+ - Openshift Master
+
- key: openshift.project.count
description: Shows number of projects on a cluster
type: int
@@ -122,6 +134,66 @@ g_template_openshift_master:
applications:
- Openshift Etcd
+ - key: openshift.master.metric.ping
+ description: "This check verifies that the https://master/metrics check is alive and communicating properly."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.list.5
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.list.9
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.list.99
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the pod operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.5
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 50% of the end to end scheduling operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.9
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 90% of the end to end scheduling operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
+ - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.99
+ description: "Value from https://master/metrics. This is the time, in miliseconds, that 99% of the end to end scheduling operations have taken to completed."
+ type: int
+ applications:
+ - Openshift Master Metrics
+
ztriggers:
- name: 'Application creation has failed on {HOST.NAME}'
expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
@@ -133,6 +205,16 @@ g_template_openshift_master:
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
priority: high
+ - name: 'Openshift Master API PING check is failing on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ priority: high
+
+ - name: 'Openshift Master metric PING check is failing on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ priority: avg
+
- name: 'Openshift Master process not running on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
diff --git a/roles/oso_host_monitoring/README.md b/roles/oso_host_monitoring/README.md
new file mode 100644
index 000000000..f1fa05adb
--- /dev/null
+++ b/roles/oso_host_monitoring/README.md
@@ -0,0 +1,50 @@
+Role Name
+=========
+
+Applies local host monitoring container(s).
+
+Requirements
+------------
+
+None.
+
+Role Variables
+--------------
+
+osohm_zagg_web_url: where to contact monitoring service
+osohm_host_monitoring: name of host monitoring container
+osohm_zagg_client: name of container with zabbix client
+osohm_docker_registry_url: docker repository containing above containers
+osohm_default_zagg_server_user: login info to zabbix server
+osohm_default_zagg_password: password to zabbix server
+
+Dependencies
+------------
+
+None.
+
+Example Playbook
+----------------
+
+Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too:
+
+ - hosts: servers
+ roles:
+ - oso_host_monitoring
+ vars:
+ osohm_zagg_web_url: "https://..."
+ osohm_host_monitoring: "oso-rhel7-host-monitoring"
+ osohm_zagg_client: "oso-rhel7-zagg-client"
+ osohm_docker_registry_url: "docker-registry.example.com/mon/"
+ osohm_default_zagg_server_user: "zagg-client"
+ osohm_default_zagg_password: "secret"
+
+License
+-------
+
+ASL 2.0
+
+Author Information
+------------------
+
+OpenShift operations, Red Hat, Inc
diff --git a/roles/oso_host_monitoring/defaults/main.yml b/roles/oso_host_monitoring/defaults/main.yml
new file mode 100644
index 000000000..ed97d539c
--- /dev/null
+++ b/roles/oso_host_monitoring/defaults/main.yml
@@ -0,0 +1 @@
+---
diff --git a/roles/oso_host_monitoring/handlers/main.yml b/roles/oso_host_monitoring/handlers/main.yml
new file mode 100644
index 000000000..7863ad15b
--- /dev/null
+++ b/roles/oso_host_monitoring/handlers/main.yml
@@ -0,0 +1,12 @@
+---
+- name: "Restart the {{ osohm_host_monitoring }} service"
+ service:
+ name: "{{ osohm_host_monitoring }}"
+ state: restarted
+ enabled: yes
+
+- name: "Restart the {{ osohm_zagg_client }} service"
+ service:
+ name: "{{ osohm_zagg_client }}"
+ state: restarted
+ enabled: yes
diff --git a/roles/oso_host_monitoring/meta/main.yml b/roles/oso_host_monitoring/meta/main.yml
new file mode 100644
index 000000000..cce30c2db
--- /dev/null
+++ b/roles/oso_host_monitoring/meta/main.yml
@@ -0,0 +1,8 @@
+---
+galaxy_info:
+ author: OpenShift
+ description: apply monitoring container(s).
+ company: Red Hat, Inc
+ license: ASL 2.0
+ min_ansible_version: 1.2
+dependencies: []
diff --git a/roles/oso_host_monitoring/tasks/main.yml b/roles/oso_host_monitoring/tasks/main.yml
new file mode 100644
index 000000000..6ddfa3dcb
--- /dev/null
+++ b/roles/oso_host_monitoring/tasks/main.yml
@@ -0,0 +1,65 @@
+---
+- fail:
+ msg: "This playbook requires {{item}} to be set."
+ when: "{{ item }} is not defined or {{ item }} == ''"
+ with_items:
+ - osohm_zagg_web_url
+ - osohm_host_monitoring
+ - osohm_zagg_client
+ - osohm_docker_registry_url
+ - osohm_default_zagg_server_user
+ - osohm_default_zagg_server_password
+
+- name: create /etc/docker/ops
+ file:
+ path: /etc/docker/ops
+ state: directory
+ mode: 0770
+ group: root
+ owner: root
+
+- name: Copy dockercfg to /etc/docker/ops
+ template:
+ src: docker-registry.ops.cfg.j2
+ dest: /etc/docker/ops/.dockercfg
+ owner: root
+ group: root
+ mode: 0600
+
+- name: "Copy {{ osohm_host_monitoring }} systemd file"
+ template:
+ src: "{{ osohm_host_monitoring }}.service.j2"
+ dest: "/etc/systemd/system/{{ osohm_host_monitoring }}.service"
+ owner: root
+ group: root
+ mode: 0644
+ notify:
+ - "Restart the {{ osohm_host_monitoring }} service"
+ register: systemd_host_monitoring
+
+- name: "Copy {{ osohm_zagg_client }} systemd file"
+ template:
+ src: "{{ osohm_zagg_client }}.service.j2"
+ dest: "/etc/systemd/system/{{ osohm_zagg_client }}.service"
+ owner: root
+ group: root
+ mode: 0644
+ notify:
+ - "Restart the {{ osohm_zagg_client }} service"
+ register: zagg_systemd
+
+- name: reload systemd
+ command: /usr/bin/systemctl --system daemon-reload
+ when: systemd_host_monitoring | changed or zagg_systemd | changed
+
+- name: "Start the {{ osohm_host_monitoring }} service"
+ service:
+ name: "{{ osohm_host_monitoring }}"
+ state: started
+ enabled: yes
+
+- name: "Start the {{ osohm_zagg_client }} service"
+ service:
+ name: "{{ osohm_zagg_client }}"
+ state: started
+ enabled: yes
diff --git a/roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j2 b/roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j2
new file mode 100644
index 000000000..9e49da469
--- /dev/null
+++ b/roles/oso_host_monitoring/templates/docker-registry.ops.cfg.j2
@@ -0,0 +1 @@
+{"{{ osohm_docker_registry_ops_url }}":{"auth":"{{ osohm_docker_registry_ops_key }}","email":"{{ osohm_docker_registry_ops_email }}"}}
diff --git a/roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j2 b/roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j2
new file mode 100644
index 000000000..d18ad90fe
--- /dev/null
+++ b/roles/oso_host_monitoring/templates/oso-f22-host-monitoring.service.j2
@@ -0,0 +1,43 @@
+# This is a systemd file to run this docker container under systemd.
+# To make this work:
+# * pull the image (probably from ops docker registry)
+# * place this file in /etc/systemd/system without the .systemd extension
+# * run the commands:
+# systemctl daemon-reload
+# systemctl enable pcp-docker
+# systemctl start pcp-docker
+#
+#
+[Unit]
+Description=PCP Collector Contatainer
+Requires=docker.service
+After=docker.service
+
+
+[Service]
+Type=simple
+TimeoutStartSec=5m
+Environment=HOME=/etc/docker/ops
+#Slice=container-small.slice
+
+# systemd syntax '=-' ignore errors from return codes.
+ExecStartPre=-/usr/bin/docker kill "{{ osohm_host_monitoring }}"
+ExecStartPre=-/usr/bin/docker rm "{{ osohm_host_monitoring }}"
+ExecStartPre=-/usr/bin/docker pull "{{ osohm_docker_registry_url }}{{ osohm_host_monitoring }}"
+
+
+ExecStart=/usr/bin/docker run --rm --name="{{ osohm_host_monitoring }}" \
+ --privileged --net=host --pid=host --ipc=host \
+ -v /sys:/sys:ro -v /etc/localtime:/etc/localtime:ro \
+ -v /var/lib/docker:/var/lib/docker:ro -v /run:/run \
+ -v /var/log:/var/log \
+ {{ osohm_docker_registry_url }}{{ osohm_host_monitoring }}
+
+ExecReload=-/usr/bin/docker stop "{{ osohm_host_monitoring }}"
+ExecReload=-/usr/bin/docker rm "{{ osohm_host_monitoring }}"
+ExecStop=-/usr/bin/docker stop "{{ osohm_host_monitoring }}"
+Restart=always
+RestartSec=30
+
+[Install]
+WantedBy=default.target
diff --git a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2
new file mode 100644
index 000000000..978e40b88
--- /dev/null
+++ b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2
@@ -0,0 +1,62 @@
+# This is a systemd file to run this docker container under systemd.
+# To make this work:
+# * pull the image (probably from ops docker registry)
+# * place this file in /etc/systemd/system without the .systemd extension
+# * run the commands:
+# systemctl daemon-reload
+# systemctl enable zagg-client-docker
+# systemctl start zagg-client-docker
+#
+#
+[Unit]
+Description=Zagg Client Contatainer
+Requires=docker.service
+After=docker.service
+
+
+[Service]
+Type=simple
+TimeoutStartSec=5m
+Environment=HOME=/etc/docker/ops
+#Slice=container-small.slice
+
+# systemd syntax '=-' ignore errors from return codes.
+ExecStartPre=-/usr/bin/docker kill "{{ osohm_zagg_client }}"
+ExecStartPre=-/usr/bin/docker rm "{{ osohm_zagg_client }}"
+ExecStartPre=-/usr/bin/docker pull "{{ osohm_docker_registry_url }}{{ osohm_zagg_client }}"
+
+
+ExecStart=/usr/bin/docker run --name {{ osohm_zagg_client }} \
+ --privileged \
+ --pid=host \
+ --net=host \
+ -e ZAGG_URL={{ osohm_zagg_web_url }} \
+ -e ZAGG_USER={{ osohm_default_zagg_server_user }} \
+ -e ZAGG_PASSWORD={{ osohm_default_zagg_server_password }} \
+ -e ZAGG_CLIENT_HOSTNAME={{ ec2_tag_Name }} \
+ -e ZAGG_SSL_VERIFY={{ osohm_zagg_verify_ssl }} \
+ -e OSO_CLUSTER_GROUP={{ cluster_group }} \
+ -e OSO_CLUSTER_ID={{ oo_clusterid }} \
+ -e OSO_HOST_TYPE={{ hostvars[inventory_hostname]['ec2_tag_host-type'] }} \
+ -e OSO_SUB_HOST_TYPE={{ hostvars[inventory_hostname]['ec2_tag_sub-host-type'] }} \
+ -v /etc/localtime:/etc/localtime \
+ -v /run/pcp:/run/pcp \
+ -v /var/run/docker.sock:/var/run/docker.sock \
+ -v /var/run/openvswitch/db.sock:/var/run/openvswitch/db.sock \
+{% if hostvars[inventory_hostname]['ec2_tag_host-type'] == 'master' %}
+ -v /etc/openshift/master/admin.kubeconfig:/etc/openshift/master/admin.kubeconfig \
+ -v /etc/openshift/master/master.etcd-client.crt:/etc/openshift/master/master.etcd-client.crt \
+ -v /etc/openshift/master/master.etcd-client.key:/etc/openshift/master/master.etcd-client.key \
+ -v /etc/openshift/master/master-config.yaml:/etc/openshift/master/master-config.yaml \
+{% endif %}
+ {{ osohm_docker_registry_url }}{{ osohm_zagg_client }}
+
+
+ExecReload=-/usr/bin/docker stop "{{ osohm_zagg_client }}"
+ExecReload=-/usr/bin/docker rm "{{ osohm_zagg_client }}"
+ExecStop=-/usr/bin/docker stop "{{ osohm_zagg_client }}"
+Restart=always
+RestartSec=30
+
+[Install]
+WantedBy=default.target
diff --git a/roles/oso_host_monitoring/vars/main.yml b/roles/oso_host_monitoring/vars/main.yml
new file mode 100644
index 000000000..ed97d539c
--- /dev/null
+++ b/roles/oso_host_monitoring/vars/main.yml
@@ -0,0 +1 @@
+---