From b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26 Mon Sep 17 00:00:00 2001
From: "Suren A. Chilingaryan" <csa@suren.me>
Date: Sun, 6 Oct 2019 04:37:01 +0200
Subject: Integration of CentOS8 and ipecompute nodes

---
 docker.yml                            |  6 ++++++
 install.yml                           | 20 ++++++++++++++++++
 inventories/ipe.erb                   | 17 ++++++++++++++++
 rocm.yml                              |  7 +++++++
 roles/common/tasks/main.yml           | 13 +++++-------
 roles/common/tasks/main_dnf.yml       | 15 ++++++++++++++
 roles/common/tasks/main_yum.yml       | 24 ++++++++++++++++++++++
 roles/common/tasks/software.yml       | 17 ----------------
 roles/cuda/vars/centos-8.yml          |  4 ++++
 roles/cuda/vars/redhat-8.yml          |  4 ++++
 roles/docker/defaults/main.yml        |  8 ++++++++
 roles/docker/handlers/main.yml        |  4 ++++
 roles/docker/tasks/configure_apt.yml  | 15 ++++++++++++++
 roles/docker/tasks/configure_dnf.yml  | 18 +++++++++++++++++
 roles/docker/tasks/configure_yum.yml  | 38 +++++++++++++++++++++++++++++++++++
 roles/docker/tasks/install_docker.yml | 14 +++++++++++++
 roles/docker/tasks/install_podman.yml | 12 +++++++++++
 roles/docker/tasks/main.yml           | 24 ++++++++++++++++++++++
 roles/docker/vars/centos-7.yml        |  3 +++
 roles/docker/vars/centos-8.yml        | 11 ++++++++++
 roles/rocm/tasks/main.yml             | 20 ++++++++++++++++++
 roles/storage/defaults/main.yml       |  1 +
 roles/storage/tasks/ipecompute2.yml   | 17 ++++++++++++++++
 roles/storage/tasks/ipecompute4.yml   | 35 ++++++++++++++++++++++++++++++++
 roles/storage/tasks/main.yml          | 20 ++++++++++++------
 roles/storage/tasks/nfs.yml           | 12 +++++++++++
 26 files changed, 348 insertions(+), 31 deletions(-)
 create mode 100644 docker.yml
 create mode 100644 rocm.yml
 create mode 100644 roles/common/tasks/main_dnf.yml
 create mode 100644 roles/common/tasks/main_yum.yml
 create mode 100644 roles/cuda/vars/centos-8.yml
 create mode 100644 roles/cuda/vars/redhat-8.yml
 create mode 100644 roles/docker/defaults/main.yml
 create mode 100644 roles/docker/handlers/main.yml
 create mode 100644 roles/docker/tasks/configure_apt.yml
 create mode 100644 roles/docker/tasks/configure_dnf.yml
 create mode 100644 roles/docker/tasks/configure_yum.yml
 create mode 100644 roles/docker/tasks/install_docker.yml
 create mode 100644 roles/docker/tasks/install_podman.yml
 create mode 100644 roles/docker/tasks/main.yml
 create mode 100644 roles/docker/vars/centos-7.yml
 create mode 100644 roles/docker/vars/centos-8.yml
 create mode 100644 roles/rocm/tasks/main.yml
 create mode 100644 roles/storage/defaults/main.yml
 create mode 100644 roles/storage/tasks/ipecompute2.yml
 create mode 100644 roles/storage/tasks/ipecompute4.yml
 create mode 100644 roles/storage/tasks/nfs.yml

diff --git a/docker.yml b/docker.yml
new file mode 100644
index 0000000..ea91aed
--- /dev/null
+++ b/docker.yml
@@ -0,0 +1,6 @@
+- name: Docker
+  hosts: all
+  remote_user: root
+  roles:
+    - role: docker
+
diff --git a/install.yml b/install.yml
index 278dac9..f1acdd8 100644
--- a/install.yml
+++ b/install.yml
@@ -15,9 +15,29 @@
   remote_user: root
   roles:
     - role: cuda
+
+# The AMD driver is "surprisingly" crashing
+#- name: ROCM
+#  hosts: rcom
+#  remote_user: root
+#  roles:
+#    - role: rocm
+
+
+- name: Docker
+  hosts: docker
+  remote_user: root
+  roles:
+    - role: docker
     
 - name: Desktop
   hosts: desktop
   remote_user: root
   roles:
     - role: desktop
+
+- name: Additional Local and Network Storage
+  hosts: infra
+  remote_user: root
+  roles:
+    - role: storage
diff --git a/inventories/ipe.erb b/inventories/ipe.erb
index df62890..20edf72 100644
--- a/inventories/ipe.erb
+++ b/inventories/ipe.erb
@@ -1,6 +1,9 @@
 [ands]
 192.168.26.[140:149]
 
+[compute]
+192.168.26.[130:139]
+
 [camera]
 192.168.26.[80:89]
 
@@ -13,7 +16,21 @@ student
 [cuda]
 192.168.26.[80:84]
 192.168.26.[86:89]
+192.168.26.[131:133]
+
+[rocm]
+192.168.26.134
+
+[docker]
+192.168.26.[131:139]
 
 [ib]
 192.168.26.[60:69]
 192.168.26.[80:89]
+192.168.26.[130:139]
+192.168.26.[140:149]
+
+[infra]
+192.168.26.[80:89]
+192.168.26.[130:139]
+192.168.26.[140:149]
diff --git a/rocm.yml b/rocm.yml
new file mode 100644
index 0000000..c76c068
--- /dev/null
+++ b/rocm.yml
@@ -0,0 +1,7 @@
+- name: Common Software
+  hosts: all
+  remote_user: root
+  roles:
+    - role: rocm
+
+
diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml
index 286a027..9f3cf79 100644
--- a/roles/common/tasks/main.yml
+++ b/roles/common/tasks/main.yml
@@ -5,14 +5,11 @@
     - epel-release
   when: ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux'
 
-- name: Add our repository with updates and overrides
-  yum_repository: name="{{ item.name }}" description= "{{ item.description | default('Ands repository') }}" baseurl="{{ item.url }}" enabled="yes" gpgcheck="no" cost="{{ item.cost | default(1) }}"
-  with_items: "{{ ands_repositories | default([]) }}"
-
-# We always update on first install and if requested
-- name: Update CentOS
-  yum: name=* state=latest update_cache=yes
-  when: (result | changed) or (os_update | default(false))
+- include_tasks: main_yum.yml
+  when: ansible_pkg_mgr == 'yum' 
+  
+- include_tasks: main_dnf.yml
+  when: ansible_pkg_mgr == 'dnf'
 
 - name: Install additional software
   include_tasks: software.yml
diff --git a/roles/common/tasks/main_dnf.yml b/roles/common/tasks/main_dnf.yml
new file mode 100644
index 0000000..0572132
--- /dev/null
+++ b/roles/common/tasks/main_dnf.yml
@@ -0,0 +1,15 @@
+- name: Add our repository with updates and overrides
+  yum_repository: name="{{ item.name }}" description= "{{ item.description | default('Ands repository') }}" baseurl="{{ item.url }}" enabled="yes" gpgcheck="no" cost="{{ item.cost | default(1) }}"
+  with_items: "{{ ands_repositories | default([]) }}"
+
+# We always update on first install and if requested
+- name: Update CentOS
+  dnf: name=* state=latest
+  when: (result | changed) or (os_update | default(false))
+
+- name: Install various ansible requirements
+  package: name={{item}} state=present
+  with_items: 
+    - yum-plugin-versionlock
+    - python-rhsm-certificates
+#    - iptables-services
diff --git a/roles/common/tasks/main_yum.yml b/roles/common/tasks/main_yum.yml
new file mode 100644
index 0000000..2b320d5
--- /dev/null
+++ b/roles/common/tasks/main_yum.yml
@@ -0,0 +1,24 @@
+- name: Add our repository with updates and overrides
+  yum_repository: name="{{ item.name }}" description= "{{ item.description | default('Ands repository') }}" baseurl="{{ item.url }}" enabled="yes" gpgcheck="no" cost="{{ item.cost | default(1) }}"
+  with_items: "{{ ands_repositories | default([]) }}"
+
+# We always update on first install and if requested
+- name: Update CentOS
+  yum: name=* state=latest update_cache=yes
+  when: (result | changed) or (os_update | default(false))
+
+- name: Install various ansible requirements
+  package: name={{item}} state=present
+  with_items: 
+    - yum-plugin-versionlock
+    - libselinux-python
+    - libsemanage-python
+    - yamllint
+    - pyOpenSSL
+    - python-passlib
+    - python2-ruamel-yaml
+    - python2-jmespath
+    - python-ipaddress
+    - iptables-services
+    - PyYAML
+    - python-rhsm-certificates
diff --git a/roles/common/tasks/software.yml b/roles/common/tasks/software.yml
index c621ef3..3a1a5c1 100644
--- a/roles/common/tasks/software.yml
+++ b/roles/common/tasks/software.yml
@@ -1,19 +1,3 @@
-- name: Install various ansible requirements
-  package: name={{item}} state=present
-  with_items: 
-    - yum-plugin-versionlock
-    - libselinux-python
-    - libsemanage-python
-    - yamllint
-    - pyOpenSSL
-    - python-passlib
-    - python2-ruamel-yaml
-    - python2-jmespath
-    - python-ipaddress
-    - iptables-services
-    - PyYAML
-    - python-rhsm-certificates
-
 - name: Install various administrative tools
   package: name={{item}} state=present
   with_items: 
@@ -21,7 +5,6 @@
     - telnet
     - lsof
     - strace
-    - bzr
     - git
     - pciutils
 
diff --git a/roles/cuda/vars/centos-8.yml b/roles/cuda/vars/centos-8.yml
new file mode 100644
index 0000000..935e84d
--- /dev/null
+++ b/roles/cuda/vars/centos-8.yml
@@ -0,0 +1,4 @@
+---
+cuda_repo_subfolder: rhel8
+
+# vim:ft=ansible:
\ No newline at end of file
diff --git a/roles/cuda/vars/redhat-8.yml b/roles/cuda/vars/redhat-8.yml
new file mode 100644
index 0000000..935e84d
--- /dev/null
+++ b/roles/cuda/vars/redhat-8.yml
@@ -0,0 +1,4 @@
+---
+cuda_repo_subfolder: rhel8
+
+# vim:ft=ansible:
\ No newline at end of file
diff --git a/roles/docker/defaults/main.yml b/roles/docker/defaults/main.yml
new file mode 100644
index 0000000..a5bcb04
--- /dev/null
+++ b/roles/docker/defaults/main.yml
@@ -0,0 +1,8 @@
+---
+docker_repo_url: "https://download.docker.com/linux"
+nvidia_docker_repo_url: "https://nvidia.github.io"
+
+nvidia_repos:
+  - libnvidia-container
+  - nvidia-container-runtime
+  - nvidia-docker
diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml
new file mode 100644
index 0000000..3eb0349
--- /dev/null
+++ b/roles/docker/handlers/main.yml
@@ -0,0 +1,4 @@
+---
+- name:  restart docker
+  systemd: name="docker" daemon_reload="yes" state="restarted"
+  become: yes
diff --git a/roles/docker/tasks/configure_apt.yml b/roles/docker/tasks/configure_apt.yml
new file mode 100644
index 0000000..3fd961a
--- /dev/null
+++ b/roles/docker/tasks/configure_apt.yml
@@ -0,0 +1,15 @@
+---
+# tasks file for ansible-role-cuda
+- name: Trust packaging key for Nvidia repositories (apt)
+  apt_key:
+    data: "{{ lookup('file', 'files/nvidia_docker_packaging_key.asc') }}"
+    id: "{{ nvidia_docker_packaging_key_id }}"
+    state: present
+
+- name: Configure Nvidia repository (apt)
+  apt_repository:
+    repo: "deb {{ nvidia_docker_repo_url }}/{{ nvidia_docker_repo_subfolder }} /"
+    filename: nvidia_docker
+    state: present
+
+# vim:ft=ansible:
diff --git a/roles/docker/tasks/configure_dnf.yml b/roles/docker/tasks/configure_dnf.yml
new file mode 100644
index 0000000..73ecb30
--- /dev/null
+++ b/roles/docker/tasks/configure_dnf.yml
@@ -0,0 +1,18 @@
+---
+- name: Import NVIDIA Docker repository gpg keys
+  rpm_key:
+    key: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey"
+    state: present
+  with_items: "{{ nvidia_repos }}"
+  when: "'cuda' in group_names"
+
+- name: Configure Nvidia repositories (yum)
+  yum_repository:
+    name: "{{ item }}"
+    description: Official {{ item }} repository
+    baseurl: "{{ nvidia_docker_repo_url }}/{{ item }}/{{ nvidia_docker_repo_subfolder }}/x86_64/"
+    gpgkey: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey"
+    gpgcheck: no
+    enabled: yes
+  with_items: "{{ nvidia_repos }}"
+  when: "'cuda' in group_names"
diff --git a/roles/docker/tasks/configure_yum.yml b/roles/docker/tasks/configure_yum.yml
new file mode 100644
index 0000000..99a2743
--- /dev/null
+++ b/roles/docker/tasks/configure_yum.yml
@@ -0,0 +1,38 @@
+---
+- name: Upload packaging key for docker repositories
+  copy:
+    src: docker_packaging_key.asc
+    dest: "{{ docker_rpm_key_path }}"
+    mode: 0644
+
+- name: Import Docker CE repository gpg key
+  rpm_key:
+    key: https://download.docker.com/linux/centos/gpg
+    state: present
+
+- name: Import NVIDIA Docker repository gpg keys
+  rpm_key:
+    key: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey"
+    state: present
+  with_items: "{{ nvidia_repos }}"
+
+- name: Configure docker repositories (yum)
+  yum_repository:
+    name: "docker-ce-{{ item }}"
+    description: Official docker-ce repository
+    baseurl: "{{ docker_repo_url }}/{{ docker_repo_subfolder }}/x86_64/{{ item }}"
+    gpgkey: https://download.docker.com/linux/centos/gpg
+    gpgcheck: yes
+    enabled: yes
+  with_items:
+    - stable
+
+- name: Configure Nvidia repositories (yum)
+  yum_repository:
+    name: "{{ item }}"
+    description: Official {{ item }} repository
+    baseurl: "{{ nvidia_docker_repo_url }}/{{ item }}/{{ nvidia_docker_repo_subfolder }}/x86_64/"
+    gpgkey: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey"
+    gpgcheck: no
+    enabled: yes
+  with_items: "{{ nvidia_repos }}"
diff --git a/roles/docker/tasks/install_docker.yml b/roles/docker/tasks/install_docker.yml
new file mode 100644
index 0000000..9ae0cb9
--- /dev/null
+++ b/roles/docker/tasks/install_docker.yml
@@ -0,0 +1,14 @@
+- name: Install requirements
+  package: name="{{ item }}" state=present
+  with_items:
+    - lvm2
+
+- name: Install nvidia docker
+  package: name="nvidia-docker2" state=present
+  notify:
+    - restart docker
+
+- name: Install additional packages
+  package: name="{{ item }}" state=present
+  with_items:
+    - docker-compose
diff --git a/roles/docker/tasks/install_podman.yml b/roles/docker/tasks/install_podman.yml
new file mode 100644
index 0000000..3498aa7
--- /dev/null
+++ b/roles/docker/tasks/install_podman.yml
@@ -0,0 +1,12 @@
+- name: Install docker packages
+  package: name="{{ item }}" state=present
+  with_items:
+    - podman
+    - buildah
+    - skopeo
+
+- name: Install NVIDIA packages
+  package: name="{{ item }}" state=present
+  with_items:
+    - nvidia-container-runtime
+  when: "'cuda' in group_names"
diff --git a/roles/docker/tasks/main.yml b/roles/docker/tasks/main.yml
new file mode 100644
index 0000000..f13f99f
--- /dev/null
+++ b/roles/docker/tasks/main.yml
@@ -0,0 +1,24 @@
+---
+- name: "Gather OS specific variables"
+  include_vars: "{{ item }}"
+  with_first_found:
+    - "{{ ansible_distribution|lower }}-{{ ansible_distribution_version }}.yml"
+    - "{{ ansible_distribution|lower }}-{{ ansible_distribution_major_version }}.yml"
+    - "{{ ansible_distribution|lower }}.yml"
+    - "{{ ansible_os_family|lower }}.yml"
+
+- include_tasks: configure_yum.yml
+  when: ansible_pkg_mgr == 'yum'
+
+- include_tasks: configure_dnf.yml
+  when: ansible_pkg_mgr == 'dnf'
+
+- include_tasks: configure_apt.yml
+  when: ansible_pkg_mgr == 'apt'
+
+- include_tasks: install_docker.yml
+  when: ansible_pkg_mgr == 'yum' or  ansible_pkg_mgr == 'apt'
+
+- include_tasks: install_podman.yml
+  when: ansible_pkg_mgr == 'dnf'
+
diff --git a/roles/docker/vars/centos-7.yml b/roles/docker/vars/centos-7.yml
new file mode 100644
index 0000000..e681468
--- /dev/null
+++ b/roles/docker/vars/centos-7.yml
@@ -0,0 +1,3 @@
+---
+nvidia_docker_repo_subfolder: centos7
+docker_repo_subfolder: centos/7
diff --git a/roles/docker/vars/centos-8.yml b/roles/docker/vars/centos-8.yml
new file mode 100644
index 0000000..d4d24fe
--- /dev/null
+++ b/roles/docker/vars/centos-8.yml
@@ -0,0 +1,11 @@
+---
+# While we have RHEL8 repo, in fact it references centos7 packages
+# https://nvidia.github.io/nvidia-docker/rhel8.0/nvidia-docker.repo
+# nvidia_docker_repo_subfolder: rhel8.0
+nvidia_docker_repo_subfolder: centos7
+#docker_repo_subfolder: centos/8
+
+
+nvidia_repos:
+  - libnvidia-container
+  - nvidia-container-runtime
diff --git a/roles/rocm/tasks/main.yml b/roles/rocm/tasks/main.yml
new file mode 100644
index 0000000..4ae1a87
--- /dev/null
+++ b/roles/rocm/tasks/main.yml
@@ -0,0 +1,20 @@
+- name: Configure DarkSoft repositories (for packages mangling provides/requires to suit ROCm)
+  yum_repository:
+    name: "ands_centos8"
+    description: Various packages for CentOS8
+    baseurl: "http://ufo.kit.edu/ands/repos/centos8/centos8/"
+    gpgcheck: no
+    enabled: yes
+
+- name: Configure ROCm repositories (yum)
+  yum_repository:
+    name: "rocm"
+    description: AMD ROCm Drivers and Infrastructure
+    baseurl: "http://repo.radeon.com/rocm/yum/rpm/"
+    gpgcheck: no
+    enabled: yes
+
+- name: Install ROCm drivers and packages
+  package: name="{{ item }}" state=present
+  with_items:
+    - rocm-dkms
diff --git a/roles/storage/defaults/main.yml b/roles/storage/defaults/main.yml
new file mode 100644
index 0000000..ca36e70
--- /dev/null
+++ b/roles/storage/defaults/main.yml
@@ -0,0 +1 @@
+compute4_ssds: ['sda','sdb','sdc','sdd','sde','sdf','sdg','sdh']
diff --git a/roles/storage/tasks/ipecompute2.yml b/roles/storage/tasks/ipecompute2.yml
new file mode 100644
index 0000000..9b2cef8
--- /dev/null
+++ b/roles/storage/tasks/ipecompute2.yml
@@ -0,0 +1,17 @@
+- name: Delete partitions
+  parted: device="/dev/sda" label="gpt" number="{{ item }}" state="absent"
+  with_items: [ 2, 3, 4 ]
+
+- name: Create partition
+  parted: 
+    device: "/dev/sda" 
+    label: "gpt" 
+    number: 1 
+    name: "fast" 
+    state: "present" 
+
+- name: arrays | Creating Array(s) Filesystem
+  filesystem: dev="/dev/sda1" fstype="xfs"
+
+- name: arrays | Mounting Array(s)
+  mount: name="/mnt/fast" src="/dev/sda1" fstype="xfs" state="mounted"
diff --git a/roles/storage/tasks/ipecompute4.yml b/roles/storage/tasks/ipecompute4.yml
new file mode 100644
index 0000000..5b3a88f
--- /dev/null
+++ b/roles/storage/tasks/ipecompute4.yml
@@ -0,0 +1,35 @@
+---
+#- name: Delete partitions
+#  parted: device="/dev/{{ item[0] }}" label="gpt" number="{{ item[1] }}" state="absent"
+#  with_nested:
+#    - "{{ compute4_ssds }}"
+#    - [ 2, 3, 4 ]
+
+- name: Create partition
+  parted: 
+    device: "/dev/{{ item }}" 
+    label: "gpt" 
+    number: 1 
+    name: "softraid" 
+    flags: [raid]
+    state: "present" 
+  failed_when: false
+  with_items: "{{ compute4_ssds }}"
+
+- name: arrays | Checking Status Of Array(s)
+  shell: "cat /proc/mdstat | grep md10"
+  register: "array_check"
+  changed_when: false
+  failed_when: false
+  check_mode: no
+
+- name: arrays | Creating Array(s)
+  shell: "yes | mdadm --create /dev/md10 --level=0 --raid-devices={{ compute4_ssds | count }} {{ compute4_ssds | map('regex_replace', '(.*)', '/dev/\\1') | join ('1 ') }}1"
+  register: "array_created"
+  when: array_check.rc != 0
+
+- name: arrays | Creating Array(s) Filesystem
+  filesystem: dev="/dev/md10" fstype="xfs"
+
+- name: arrays | Mounting Array(s)
+  mount: name="/mnt/fast" src="/dev/md10" fstype="xfs" state="mounted"
diff --git a/roles/storage/tasks/main.yml b/roles/storage/tasks/main.yml
index 871e785..014e396 100644
--- a/roles/storage/tasks/main.yml
+++ b/roles/storage/tasks/main.yml
@@ -1,9 +1,17 @@
 ---
-- name: Ensure NFS common is installed.
-  package: name=nfs-utils state=present
+- name: Ensure required software is installed.
+  package: name="{{ item }}" state=present
+  with_items: [ 'parted', 'mdadm', 'nfs-utils' ]
 
-- name: Create mountable dir
-  file: path=/mnt/ands state=directory mode=755 owner=root group=root
+- debug: msg="{{ inventory_hostname }}"
 
-- name: set mountpoints
-  mount: name=/mnt/ands src=192.168.26.140:/mnt/ands fstype=nfs4 opts=defaults,minorversion=1,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=mounted
+- name: configure network fs
+  include_tasks: nfs.yml
+
+- name: configure ipepdvcompute2
+  include_tasks: ipecompute2.yml
+  when: inventory_hostname == '192.168.26.132'
+
+- name: configure ipepdvcompute4
+  include_tasks: ipecompute4.yml
+  when: inventory_hostname == '192.168.26.134'
diff --git a/roles/storage/tasks/nfs.yml b/roles/storage/tasks/nfs.yml
new file mode 100644
index 0000000..9dbd467
--- /dev/null
+++ b/roles/storage/tasks/nfs.yml
@@ -0,0 +1,12 @@
+---
+- name: Create mountable dir
+  file: path=/mnt/ands state=directory mode=755 owner=root group=root
+
+- name: Create mountable dir
+  file: path=/mnt/pdv state=directory mode=755 owner=root group=root
+
+- name: set mountpoints
+  mount: name=/mnt/ands src=192.168.26.140:/mnt/ands fstype=nfs4 opts=defaults,minorversion=1,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=absent
+
+- name: set mountpoints
+  mount: name=/mnt/pdv src=192.168.26.170:/pdv fstype=nfs opts=defaults,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=mounted
-- 
cgit v1.2.3