summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoel Diaz <jdiaz@redhat.com>2015-12-16 15:59:26 -0500
committerJoel Diaz <jdiaz@redhat.com>2015-12-16 15:59:26 -0500
commitc607f1ba93be5e9f16723074ff97ffd27b025f8c (patch)
tree57d78b69cd526a57ad0258ba468868561626f384
parent4dfe16e0e567a633cedd8ee56ffaed5110ca1629 (diff)
parentf826925c8217d5c9f150ef03ca8deb718c37c157 (diff)
downloadopenshift-c607f1ba93be5e9f16723074ff97ffd27b025f8c.tar.gz
openshift-c607f1ba93be5e9f16723074ff97ffd27b025f8c.tar.bz2
openshift-c607f1ba93be5e9f16723074ff97ffd27b025f8c.tar.xz
openshift-c607f1ba93be5e9f16723074ff97ffd27b025f8c.zip
Merge pull request #1078 from joelddiaz/master
sync master -> prod
-rwxr-xr-xbin/ohi68
-rw-r--r--bin/openshift_ansible/awsutil.py38
-rwxr-xr-xinventory/multi_inventory.py18
-rw-r--r--roles/openshift_master/tasks/main.yml6
-rw-r--r--roles/openshift_node/tasks/main.yml6
-rw-r--r--roles/os_zabbix/vars/template_docker.yml10
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml8
-rw-r--r--roles/os_zabbix/vars/template_zagg_server.yml16
-rw-r--r--roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j22
9 files changed, 113 insertions, 59 deletions
diff --git a/bin/ohi b/bin/ohi
index d679edcfb..be9c53ec0 100755
--- a/bin/ohi
+++ b/bin/ohi
@@ -1,14 +1,16 @@
#!/usr/bin/env python
+'''
+Ohi = Openshift Host Inventory
+
+This script provides an easy way to look at your host inventory.
+
+This depends on multi_inventory being setup correctly.
+'''
# vim: expandtab:tabstop=4:shiftwidth=4
import argparse
-import traceback
import sys
import os
-import re
-import tempfile
-import time
-import subprocess
import ConfigParser
from openshift_ansible import awsutil
@@ -20,6 +22,9 @@ CONFIG_HOST_TYPE_ALIAS_SECTION = 'host_type_aliases'
class Ohi(object):
+ '''
+ Class for managing openshift host inventory
+ '''
def __init__(self):
self.host_type_aliases = {}
self.file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)))
@@ -35,6 +40,10 @@ class Ohi(object):
self.aws = awsutil.AwsUtil(self.host_type_aliases)
def run(self):
+ '''
+ Call into awsutil and retrieve the desired hosts and environments
+ '''
+
if self.args.list_host_types:
self.aws.print_host_types()
return 0
@@ -43,18 +52,24 @@ class Ohi(object):
if self.args.host_type is not None and \
self.args.env is not None:
# Both env and host-type specified
- hosts = self.aws.get_host_list(host_type=self.args.host_type, \
- envs=self.args.env)
+ hosts = self.aws.get_host_list(host_type=self.args.host_type,
+ envs=self.args.env,
+ version=self.args.openshift_version,
+ cached=self.args.cache_only)
if self.args.host_type is None and \
self.args.env is not None:
# Only env specified
- hosts = self.aws.get_host_list(envs=self.args.env)
+ hosts = self.aws.get_host_list(envs=self.args.env,
+ version=self.args.openshift_version,
+ cached=self.args.cache_only)
if self.args.host_type is not None and \
self.args.env is None:
# Only host-type specified
- hosts = self.aws.get_host_list(host_type=self.args.host_type)
+ hosts = self.aws.get_host_list(host_type=self.args.host_type,
+ version=self.args.openshift_version,
+ cached=self.args.cache_only)
if hosts is None:
# We weren't able to determine what they wanted to do
@@ -69,6 +84,9 @@ class Ohi(object):
return 0
def parse_config_file(self):
+ '''
+ Parse the config file for ohi
+ '''
if os.path.isfile(self.config_path):
config = ConfigParser.ConfigParser()
config.read(self.config_path)
@@ -85,23 +103,27 @@ class Ohi(object):
parser = argparse.ArgumentParser(description='OpenShift Host Inventory')
- parser.add_argument('--list-host-types', default=False, action='store_true',
- help='List all of the host types')
+ parser.add_argument('--list-host-types', default=False, action='store_true', help='List all of the host types')
- parser.add_argument('-e', '--env', action="store",
- help="Which environment to use")
+ parser.add_argument('-e', '--env', action="store", help="Which environment to use")
- parser.add_argument('-t', '--host-type', action="store",
- help="Which host type to use")
+ parser.add_argument('-t', '--host-type', action="store", help="Which host type to use")
- parser.add_argument('-l', '--user', action='store', default=None,
- help='username')
+ parser.add_argument('-l', '--user', action='store', default=None, help='username')
+ parser.add_argument('-c', '--cache-only', action='store_true', default=False,
+ help='Retrieve the host inventory by cache only. Default is false.')
- self.args = parser.parse_args()
+ parser.add_argument('-o', '--openshift-version', action='store', default='2',
+ help='Specify the openshift version. Default is 2')
-if __name__ == '__main__':
+ self.args = parser.parse_args()
+
+def main():
+ '''
+ Ohi will do its work here
+ '''
if len(sys.argv) == 1:
print "\nError: No options given. Use --help to see the available options\n"
sys.exit(0)
@@ -110,5 +132,9 @@ if __name__ == '__main__':
ohi = Ohi()
exitcode = ohi.run()
sys.exit(exitcode)
- except ArgumentError as e:
- print "\nError: %s\n" % e.message
+ except ArgumentError as err:
+ print "\nError: %s\n" % err.message
+
+if __name__ == '__main__':
+ main()
+
diff --git a/bin/openshift_ansible/awsutil.py b/bin/openshift_ansible/awsutil.py
index ba32b4dbd..1ea2f914c 100644
--- a/bin/openshift_ansible/awsutil.py
+++ b/bin/openshift_ansible/awsutil.py
@@ -46,14 +46,17 @@ class AwsUtil(object):
self.alias_lookup[value] = key
@staticmethod
- def get_inventory(args=None):
+ def get_inventory(args=None, cached=False):
"""Calls the inventory script and returns a dictionary containing the inventory."
Keyword arguments:
args -- optional arguments to pass to the inventory script
"""
minv = multi_inventory.MultiInventory(args)
- minv.run()
+ if cached:
+ minv.get_inventory_from_cache()
+ else:
+ minv.run()
return minv.result
def get_environments(self):
@@ -168,11 +171,12 @@ class AwsUtil(object):
host_type = self.resolve_host_type(host_type)
return "tag_env-host-type_%s-%s" % (env, host_type)
- def get_host_list(self, host_type=None, envs=None):
+ def get_host_list(self, host_type=None, envs=None, version=None, cached=False):
"""Get the list of hosts from the inventory using host-type and environment
"""
+ retval = set([])
envs = envs or []
- inv = self.get_inventory()
+ inv = self.get_inventory(cached=cached)
# We prefer to deal with a list of environments
if issubclass(type(envs), basestring):
@@ -183,29 +187,25 @@ class AwsUtil(object):
if host_type and envs:
# Both host type and environment were specified
- retval = []
for env in envs:
- env_host_type_tag = self.gen_env_host_type_tag(host_type, env)
- if env_host_type_tag in inv.keys():
- retval += inv[env_host_type_tag]
- return set(retval)
+ retval.update(inv.get('tag_environment_%s' % env, []))
+ retval.intersection_update(inv.get(self.gen_host_type_tag(host_type), []))
- if envs and not host_type:
+ elif envs and not host_type:
# Just environment was specified
- retval = []
for env in envs:
env_tag = AwsUtil.gen_env_tag(env)
if env_tag in inv.keys():
- retval += inv[env_tag]
- return set(retval)
+ retval.update(inv.get(env_tag, []))
- if host_type and not envs:
+ elif host_type and not envs:
# Just host-type was specified
- retval = []
host_type_tag = self.gen_host_type_tag(host_type)
if host_type_tag in inv.keys():
- retval = inv[host_type_tag]
- return set(retval)
+ retval.update(inv.get(host_type_tag, []))
+
+ # If version is specified then return only hosts in that version
+ if version:
+ retval.intersection_update(inv.get('oo_version_%s' % version, []))
- # We should never reach here!
- raise ArgumentError("Invalid combination of parameters")
+ return retval
diff --git a/inventory/multi_inventory.py b/inventory/multi_inventory.py
index 232f2402d..20fc48aa9 100755
--- a/inventory/multi_inventory.py
+++ b/inventory/multi_inventory.py
@@ -56,15 +56,6 @@ class MultiInventory(object):
else:
self.config_file = None # expect env vars
-
- def run(self):
- '''This method checks to see if the local
- cache is valid for the inventory.
-
- if the cache is valid; return cache
- else the credentials are loaded from multi_inventory.yaml or from the env
- and we attempt to get the inventory from the provider specified.
- '''
# load yaml
if self.config_file and os.path.isfile(self.config_file):
self.config = self.load_yaml_config()
@@ -91,6 +82,15 @@ class MultiInventory(object):
if self.config.has_key('cache_location'):
self.cache_path = self.config['cache_location']
+ def run(self):
+ '''This method checks to see if the local
+ cache is valid for the inventory.
+
+ if the cache is valid; return cache
+ else the credentials are loaded from multi_inventory.yaml or from the env
+ and we attempt to get the inventory from the provider specified.
+ '''
+
if self.args.get('refresh_cache', None):
self.get_inventory()
self.write_to_cache()
diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml
index 8995863ec..43647cc49 100644
--- a/roles/openshift_master/tasks/main.yml
+++ b/roles/openshift_master/tasks/main.yml
@@ -228,7 +228,7 @@
register: start_result
- set_fact:
- master_service_status_changed = start_result | changed
+ master_service_status_changed: start_result | changed
when: not openshift_master_ha | bool
- name: Start and enable master api
@@ -237,7 +237,7 @@
register: start_result
- set_fact:
- master_api_service_status_changed = start_result | changed
+ master_api_service_status_changed: start_result | changed
when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
- name: Start and enable master controller
@@ -246,7 +246,7 @@
register: start_result
- set_fact:
- master_controllers_service_status_changed = start_result | changed
+ master_controllers_service_status_changed: start_result | changed
when: openshift_master_ha | bool and openshift.master.cluster_method == 'native'
- name: Install cluster packages
diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml
index eef7bec9a..38bffc2e5 100644
--- a/roles/openshift_node/tasks/main.yml
+++ b/roles/openshift_node/tasks/main.yml
@@ -85,11 +85,11 @@
docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries')
| oo_split() | union(['registry.access.redhat.com'])
| difference(['']) }}"
- when: openshift.common.deployment_type == 'enterprise'
+ when: openshift.common.deployment_type in ['enterprise', 'openshift-enterprise', 'atomic-enterprise']
- set_fact:
docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries')
| oo_split() | difference(['']) }}"
- when: openshift.common.deployment_type != 'enterprise'
+ when: openshift.common.deployment_type not in ['enterprise', 'openshift-enterprise', 'atomic-enterprise']
- name: Add personal registries
lineinfile:
@@ -131,4 +131,4 @@
register: start_result
- set_fact:
- node_service_status_changed = start_result | changed
+ node_service_status_changed: start_result | changed
diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml
index bfabf50c5..91a2c400e 100644
--- a/roles/os_zabbix/vars/template_docker.yml
+++ b/roles/os_zabbix/vars/template_docker.yml
@@ -12,6 +12,11 @@ g_template_docker:
- Docker Daemon
value_type: int
+ - key: docker.container.dns.resolution
+ applications:
+ - Docker Daemon
+ value_type: int
+
- key: docker.storage.is_loopback
applications:
- Docker Storage
@@ -62,6 +67,11 @@ g_template_docker:
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc'
priority: high
+ - name: 'docker.container.dns.resolution failed on {HOST.NAME}'
+ expression: '{Template Docker:docker.container.dns.resolution.max(#3)}>0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc'
+ priority: high
+
- name: 'Docker storage is using LOOPBACK on {HOST.NAME}'
expression: '{Template Docker:docker.storage.is_loopback.last()}<>0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc'
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 514d6fd24..a0ba8d104 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -269,6 +269,14 @@ g_template_openshift_master:
- 'Openshift Master process not running on {HOST.NAME}'
priority: avg
+ - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}'
+ expression: '{Template Openshift Master:create_app.sum(1h)}>3'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
+ dependencies:
+ - 'Openshift Master process not running on {HOST.NAME}'
+ description: The application create loop has failed 4 or more times in the last hour
+ priority: avg
+
- name: 'Openshift Master API health check is failing on {HOST.NAME}'
expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml
index 0e8e53bb7..db5665993 100644
--- a/roles/os_zabbix/vars/template_zagg_server.yml
+++ b/roles/os_zabbix/vars/template_zagg_server.yml
@@ -7,7 +7,12 @@ g_template_zagg_server:
- Zagg Server
value_type: int
- - key: zagg.server.processor.errors
+ - key: zagg.server.metrics.errors
+ applications:
+ - Zagg Server
+ value_type: int
+
+ - key: zagg.server.heartbeat.errors
applications:
- Zagg Server
value_type: int
@@ -18,8 +23,13 @@ g_template_zagg_server:
value_type: int
ztriggers:
- - name: 'Error sending metrics on {HOST.NAME}'
- expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0'
+ - name: 'Error processing metrics on {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.metrics.errors.min(#3)}>0'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
+ priority: average
+
+ - name: 'Error processing heartbeats on {HOST.NAME}'
+ expression: '{Template Zagg Server:zagg.server.heartbeat.errors.min(#3)}>0'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'
priority: average
diff --git a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2 b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2
index 978e40b88..bcc8a5e03 100644
--- a/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2
+++ b/roles/oso_host_monitoring/templates/oso-rhel7-zagg-client.service.j2
@@ -42,7 +42,7 @@ ExecStart=/usr/bin/docker run --name {{ osohm_zagg_client }}
-v /etc/localtime:/etc/localtime \
-v /run/pcp:/run/pcp \
-v /var/run/docker.sock:/var/run/docker.sock \
- -v /var/run/openvswitch/db.sock:/var/run/openvswitch/db.sock \
+ -v /var/run/openvswitch:/var/run/openvswitch \
{% if hostvars[inventory_hostname]['ec2_tag_host-type'] == 'master' %}
-v /etc/openshift/master/admin.kubeconfig:/etc/openshift/master/admin.kubeconfig \
-v /etc/openshift/master/master.etcd-client.crt:/etc/openshift/master/master.etcd-client.crt \