diff options
72 files changed, 1430 insertions, 714 deletions
diff --git a/anslib/openshift-ansible b/anslib/openshift-ansible -Subproject 22d3a96deaf74b7aa9aa021a73ef39e2b4da337 +Subproject 974da273128f43a564967716a7386b59f883254 diff --git a/anslib/patches/openshift/networkmanager-ds-use-openshift-interface.patch b/anslib/patches/openshift/networkmanager-ds-use-openshift-interface.patch index 687be8a..0ffdc5e 100644 --- a/anslib/patches/openshift/networkmanager-ds-use-openshift-interface.patch +++ b/anslib/patches/openshift/networkmanager-ds-use-openshift-interface.patch @@ -12,10 +12,10 @@ index ea28064..df95ba3 100644 file: /etc/dnsmasq.d/node-dnsmasq.conf lines: diff --git a/roles/openshift_node/files/networkmanager/99-origin-dns.sh b/roles/openshift_node/files/networkmanager/99-origin-dns.sh -index acf3e2f..16129a2 100755 +index acf3e2f..8c597a6 100755 --- a/roles/openshift_node/files/networkmanager/99-origin-dns.sh +++ b/roles/openshift_node/files/networkmanager/99-origin-dns.sh -@@ -43,10 +43,25 @@ if [[ $2 =~ ^(up|dhcp4-change|dhcp6-change)$ ]]; then +@@ -43,10 +43,26 @@ if [[ $2 =~ ^(up|dhcp4-change|dhcp6-change)$ ]]; then ###################################################################### # couldn't find an existing method to determine if the interface owns the # default route @@ -40,7 +40,8 @@ index acf3e2f..16129a2 100755 + def_routes_int=$(for r in ${def_routes}; do /sbin/ip route get to ${r} | awk '{print $3}'; done) + interfaces="${def_route_int} ${def_routes_int}" + -+ if [[ "${interfaces}" =~ (^|[[:space:]])${DEVICE_IFACE}($|[[:space:]]) ]]; then ++ nsconf=$(grep -P ^nameserver /etc/resolv.conf | wc -l) ++ if [[ $nsconf -gt 0 && "${interfaces}" =~ (^|[[:space:]])${DEVICE_IFACE}($|[[:space:]]) ]]; then +# if [[ ${DEVICE_IFACE} == ${def_route_int} ]]; then if [ ! -f /etc/dnsmasq.d/origin-dns.conf ]; then cat << EOF > /etc/dnsmasq.d/origin-dns.conf diff --git a/docs/benchmarks/netpipe-hostnet-clusterip.txt b/docs/benchmarks/netpipe-hostnet-clusterip.txt new file mode 100644 index 0000000..452a59b --- /dev/null +++ b/docs/benchmarks/netpipe-hostnet-clusterip.txt @@ -0,0 +1,124 @@ + 1 0.326958 0.00002333 + 2 0.618103 0.00002469 + 3 0.938810 0.00002438 + 4 1.140540 0.00002676 + 6 1.810863 0.00002528 + 8 2.440223 0.00002501 + 12 4.014228 0.00002281 + 13 4.244945 0.00002336 + 16 5.265391 0.00002318 + 19 6.288176 0.00002305 + 21 6.873981 0.00002331 + 24 7.626945 0.00002401 + 27 8.464006 0.00002434 + 29 9.132165 0.00002423 + 32 10.071211 0.00002424 + 35 11.350914 0.00002352 + 45 14.462787 0.00002374 + 48 15.551991 0.00002355 + 51 16.617742 0.00002341 + 61 17.937454 0.00002595 + 64 19.940809 0.00002449 + 67 19.670239 0.00002599 + 93 27.923937 0.00002541 + 96 30.696302 0.00002386 + 99 31.378657 0.00002407 + 125 39.836959 0.00002394 + 128 39.851527 0.00002451 + 131 41.123237 0.00002430 + 189 59.994843 0.00002403 + 192 52.568072 0.00002787 + 195 60.825254 0.00002446 + 253 80.036908 0.00002412 + 256 78.572397 0.00002486 + 259 82.453495 0.00002397 + 381 121.586010 0.00002391 + 384 122.643994 0.00002389 + 387 119.587204 0.00002469 + 509 152.953007 0.00002539 + 512 156.751101 0.00002492 + 515 147.461225 0.00002665 + 765 223.078148 0.00002616 + 768 245.267636 0.00002389 + 771 252.201504 0.00002332 + 1021 291.274243 0.00002674 + 1024 288.122902 0.00002712 + 1027 314.918782 0.00002488 + 1533 455.244190 0.00002569 + 1536 440.315120 0.00002661 + 1539 444.559116 0.00002641 + 2045 408.429719 0.00003820 + 2048 408.361919 0.00003826 + 2051 403.367349 0.00003879 + 3069 598.249055 0.00003914 + 3072 608.830745 0.00003850 + 3075 605.542765 0.00003874 + 4093 691.850246 0.00004514 + 4096 712.818517 0.00004384 + 4099 698.066606 0.00004480 + 6141 907.399011 0.00005163 + 6144 909.222865 0.00005156 + 6147 920.949895 0.00005092 + 8189 1106.942777 0.00005644 + 8192 1118.442648 0.00005588 + 8195 1138.471355 0.00005492 + 12285 1456.435686 0.00006435 + 12288 1473.988562 0.00006360 + 12291 1432.599036 0.00006546 + 16381 1672.451589 0.00007473 + 16384 1687.110914 0.00007409 + 16387 1594.364859 0.00007842 + 24573 1820.900468 0.00010296 + 24576 1927.109643 0.00009730 + 24579 1925.807752 0.00009737 + 32765 2039.948799 0.00012254 + 32768 2264.455285 0.00011040 + 32771 2053.911942 0.00012173 + 49149 2329.879339 0.00016094 + 49152 2251.567470 0.00016655 + 49155 2376.570618 0.00015780 + 65533 2087.316837 0.00023953 + 65536 2090.007791 0.00023923 + 65539 2240.546493 0.00022317 + 98301 2261.214485 0.00033167 + 98304 2236.528922 0.00033534 + 98307 2267.504025 0.00033077 + 131069 2506.301596 0.00039899 + 131072 2574.001159 0.00038850 + 131075 2499.398059 0.00040011 + 196605 2679.266208 0.00055985 + 196608 2577.617790 0.00058193 + 196611 2655.729790 0.00056483 + 262141 2866.098615 0.00069780 + 262144 2952.146697 0.00067747 + 262147 2921.582565 0.00068457 + 393213 3280.847971 0.00091439 + 393216 3145.640621 0.00095370 + 393219 3190.458883 0.00094031 + 524285 3293.829390 0.00121439 + 524288 3395.057727 0.00117818 + 524291 3213.113808 0.00124491 + 786429 3433.707485 0.00174738 + 786432 3564.531089 0.00168325 + 786435 3343.824065 0.00179436 + 1048573 3461.698116 0.00231100 + 1048576 3411.340450 0.00234512 + 1048579 3603.069459 0.00222034 + 1572861 3253.106873 0.00368877 + 1572864 3502.997228 0.00342564 + 1572867 3457.981793 0.00347024 + 2097149 3331.709227 0.00480233 + 2097152 3296.412690 0.00485376 + 2097155 3635.046705 0.00440160 + 3145725 3713.207547 0.00646341 + 3145728 3398.330126 0.00706229 + 3145731 3455.172928 0.00694611 + 4194301 3667.624405 0.00872499 + 4194304 3244.050612 0.00986421 + 4194307 3076.097496 0.01040280 + 6291453 3498.607536 0.01371974 + 6291456 3473.348924 0.01381952 + 6291459 3392.586555 0.01414850 + 8388605 3522.519503 0.01816881 + 8388608 3694.116745 0.01732484 + 8388611 3279.530110 0.01951500 diff --git a/docs/benchmarks/netpipe-hostnet-hostip.txt b/docs/benchmarks/netpipe-hostnet-hostip.txt new file mode 100644 index 0000000..494289d --- /dev/null +++ b/docs/benchmarks/netpipe-hostnet-hostip.txt @@ -0,0 +1,124 @@ + 1 0.518634 0.00001471 + 2 1.076815 0.00001417 + 3 1.516004 0.00001510 + 4 2.144302 0.00001423 + 6 3.214913 0.00001424 + 8 4.166316 0.00001465 + 12 6.289028 0.00001456 + 13 6.744819 0.00001470 + 16 8.533129 0.00001431 + 19 10.729789 0.00001351 + 21 11.221507 0.00001428 + 24 13.006231 0.00001408 + 27 14.385727 0.00001432 + 29 15.045281 0.00001471 + 32 15.624612 0.00001563 + 35 17.163339 0.00001556 + 45 23.883358 0.00001437 + 48 25.888550 0.00001415 + 51 26.756400 0.00001454 + 61 33.091611 0.00001406 + 64 34.624658 0.00001410 + 67 35.725842 0.00001431 + 93 49.945160 0.00001421 + 96 50.993414 0.00001436 + 99 52.297713 0.00001444 + 125 66.283502 0.00001439 + 128 68.269142 0.00001430 + 131 70.562884 0.00001416 + 189 97.002458 0.00001487 + 192 101.594727 0.00001442 + 195 102.730659 0.00001448 + 253 136.727875 0.00001412 + 256 126.018401 0.00001550 + 259 123.015865 0.00001606 + 381 187.900047 0.00001547 + 384 182.281966 0.00001607 + 387 187.505026 0.00001575 + 509 261.417114 0.00001486 + 512 243.113544 0.00001607 + 515 248.983291 0.00001578 + 765 411.716257 0.00001418 + 768 404.020586 0.00001450 + 771 393.655066 0.00001494 + 1021 523.139717 0.00001489 + 1024 528.470385 0.00001478 + 1027 500.212191 0.00001566 + 1533 709.342507 0.00001649 + 1536 745.302649 0.00001572 + 1539 748.867382 0.00001568 + 2045 701.949001 0.00002223 + 2048 641.125753 0.00002437 + 2051 725.006704 0.00002158 + 3069 1149.080142 0.00002038 + 3072 1117.869559 0.00002097 + 3075 1066.876626 0.00002199 + 4093 1234.821755 0.00002529 + 4096 1392.067164 0.00002245 + 4099 1364.273095 0.00002292 + 6141 1978.643297 0.00002368 + 6144 2001.046782 0.00002343 + 6147 1981.921823 0.00002366 + 8189 2528.235274 0.00002471 + 8192 2421.728225 0.00002581 + 8195 2545.005298 0.00002457 + 12285 3266.040928 0.00002870 + 12288 3574.432125 0.00002623 + 12291 3525.409573 0.00002660 + 16381 4179.351534 0.00002990 + 16384 4412.705178 0.00002833 + 16387 4302.765466 0.00002906 + 24573 5694.202878 0.00003292 + 24576 5592.149917 0.00003353 + 24579 5454.255077 0.00003438 + 32765 5895.412790 0.00004240 + 32768 5999.816085 0.00004167 + 32771 6242.962316 0.00004005 + 49149 7676.810025 0.00004885 + 49152 8149.771111 0.00004601 + 49155 7758.390037 0.00004834 + 65533 6147.722626 0.00008133 + 65536 6261.159737 0.00007986 + 65539 6070.463017 0.00008237 + 98301 7584.744000 0.00009888 + 98304 7358.504685 0.00010192 + 98307 7510.619199 0.00009986 + 131069 8733.644100 0.00011450 + 131072 9127.358840 0.00010956 + 131075 8955.343297 0.00011167 + 196605 10044.567820 0.00014933 + 196608 10429.810847 0.00014382 + 196611 11000.068259 0.00013636 + 262141 11544.030028 0.00017325 + 262144 13201.924092 0.00015149 + 262147 13291.843558 0.00015047 + 393213 13995.239140 0.00021436 + 393216 13089.487805 0.00022919 + 393219 13791.682522 0.00021752 + 524285 13814.348542 0.00028955 + 524288 12914.849923 0.00030972 + 524291 12709.636315 0.00031472 + 786429 14904.555026 0.00040256 + 786432 18599.059647 0.00032260 + 786435 15589.196556 0.00038488 + 1048573 18949.720391 0.00042217 + 1048576 17809.447022 0.00044920 + 1048579 14751.871863 0.00054231 + 1572861 15167.107603 0.00079118 + 1572864 15495.174755 0.00077443 + 1572867 13418.656978 0.00089428 + 2097149 12213.237268 0.00131005 + 2097152 12667.175804 0.00126311 + 2097155 14579.639694 0.00109742 + 3145725 12379.481055 0.00193869 + 3145728 13692.207318 0.00175282 + 3145731 15210.394578 0.00157787 + 4194301 13674.480625 0.00234012 + 4194304 13844.661275 0.00231136 + 4194307 13219.895707 0.00242060 + 6291453 11450.993213 0.00419177 + 6291456 12668.370717 0.00378896 + 6291459 11095.094395 0.00432624 + 8388605 13020.052869 0.00491549 + 8388608 13845.907563 0.00462230 + 8388611 11884.989086 0.00538495 diff --git a/docs/benchmarks/netpipe-standard.txt b/docs/benchmarks/netpipe-standard.txt new file mode 100644 index 0000000..d6a0cc9 --- /dev/null +++ b/docs/benchmarks/netpipe-standard.txt @@ -0,0 +1,124 @@ + 1 0.271739 0.00002808 + 2 0.527352 0.00002893 + 3 0.864740 0.00002647 + 4 1.172232 0.00002603 + 6 1.830063 0.00002501 + 8 2.420999 0.00002521 + 12 3.820573 0.00002396 + 13 4.073067 0.00002435 + 16 5.173979 0.00002359 + 19 5.999408 0.00002416 + 21 6.502058 0.00002464 + 24 7.103063 0.00002578 + 27 8.566173 0.00002405 + 29 8.598646 0.00002573 + 32 10.166434 0.00002401 + 35 10.302855 0.00002592 + 45 14.213910 0.00002415 + 48 14.378267 0.00002547 + 51 14.637744 0.00002658 + 61 18.021413 0.00002582 + 64 18.878820 0.00002586 + 67 21.798199 0.00002345 + 93 30.280693 0.00002343 + 96 30.892160 0.00002371 + 99 28.912132 0.00002612 + 125 39.606498 0.00002408 + 128 40.560404 0.00002408 + 131 41.434631 0.00002412 + 189 61.836905 0.00002332 + 192 61.110074 0.00002397 + 195 62.601410 0.00002377 + 253 82.003349 0.00002354 + 256 78.382060 0.00002492 + 259 76.431690 0.00002585 + 381 126.142014 0.00002304 + 384 121.997385 0.00002401 + 387 122.661813 0.00002407 + 509 148.937476 0.00002607 + 512 155.770935 0.00002508 + 515 163.799277 0.00002399 + 765 229.839666 0.00002539 + 768 231.459755 0.00002531 + 771 239.810229 0.00002453 + 1021 302.868551 0.00002572 + 1024 298.703317 0.00002615 + 1027 311.172883 0.00002518 + 1533 444.020226 0.00002634 + 1536 447.831634 0.00002617 + 1539 451.182634 0.00002602 + 2045 401.368200 0.00003887 + 2048 380.786363 0.00004103 + 2051 394.082308 0.00003971 + 3069 607.290098 0.00003856 + 3072 599.903348 0.00003907 + 3075 567.715333 0.00004132 + 4093 714.630832 0.00004370 + 4096 674.709949 0.00004632 + 4099 688.044295 0.00004545 + 6141 907.731892 0.00005161 + 6144 911.516656 0.00005143 + 6147 911.682774 0.00005144 + 8189 972.290335 0.00006426 + 8192 1090.124017 0.00005733 + 8195 1058.496177 0.00005907 + 12285 1349.474209 0.00006945 + 12288 1368.770226 0.00006849 + 12291 1370.611598 0.00006842 + 16381 1717.159412 0.00007278 + 16384 1625.251103 0.00007691 + 16387 1622.023570 0.00007708 + 24573 1889.056976 0.00009924 + 24576 1864.732089 0.00010055 + 24579 1877.212570 0.00009989 + 32765 2231.157775 0.00011204 + 32768 2152.925316 0.00011612 + 32771 2084.435045 0.00011995 + 49149 2283.518678 0.00016421 + 49152 2318.047630 0.00016177 + 49155 2335.055110 0.00016061 + 65533 2043.928666 0.00024462 + 65536 2014.455634 0.00024821 + 65539 2110.398618 0.00023693 + 98301 2183.428273 0.00034349 + 98304 2177.638569 0.00034441 + 98307 2169.611321 0.00034569 + 131069 2355.683170 0.00042450 + 131072 2390.702707 0.00041829 + 131075 2413.261164 0.00041439 + 196605 2282.562339 0.00065715 + 196608 2494.585589 0.00060130 + 196611 2406.210727 0.00062340 + 262141 2955.537329 0.00067669 + 262144 3020.178557 0.00066221 + 262147 3024.809433 0.00066121 + 393213 3010.209455 0.00099660 + 393216 3210.869736 0.00093433 + 393219 3005.822496 0.00099807 + 524285 3055.047980 0.00130930 + 524288 3319.176826 0.00120512 + 524291 3354.251597 0.00119252 + 786429 3411.484135 0.00175876 + 786432 3446.052653 0.00174112 + 786435 3262.586754 0.00183904 + 1048573 3323.591745 0.00240703 + 1048576 3399.406018 0.00235335 + 1048579 3472.808936 0.00230362 + 1572861 3406.392100 0.00352278 + 1572864 3306.084582 0.00362967 + 1572867 3370.341516 0.00356048 + 2097149 3361.769733 0.00475939 + 2097152 3280.636487 0.00487710 + 2097155 3191.766247 0.00501291 + 3145725 3162.564558 0.00758877 + 3145728 3355.820730 0.00715175 + 3145731 3327.611546 0.00721239 + 4194301 3386.779090 0.00944850 + 4194304 3387.249473 0.00944719 + 4194307 3441.420898 0.00929849 + 6291453 3268.329719 0.01468639 + 6291456 3201.892445 0.01499113 + 6291459 3244.450787 0.01479450 + 8388605 3271.733339 0.01956149 + 8388608 3182.658022 0.02010898 + 8388611 3253.521074 0.01967100 diff --git a/docs/consistency.txt b/docs/consistency.txt index 127d9a7..c648a9a 100644 --- a/docs/consistency.txt +++ b/docs/consistency.txt @@ -22,6 +22,11 @@ Storage Networking ========== + - Check that correct upstream name servers are listed for both DNSMasq (host) and SkyDNS (pods). + If not fix and restart 'origin-node' and 'dnsmasq'. + * '/etc/dnsmasq.d/origin-upstream-dns.conf' + * '/etc/origin/node/resolv.conf' + - Check that both internal and external addresses are resolvable from all hosts. * I.e. we should be able to resolve 'google.com' * And we should be able to resolve 'heketi-storage.glusterfs.svc.cluster.local' diff --git a/docs/databases.txt b/docs/databases.txt new file mode 100644 index 0000000..254674e --- /dev/null +++ b/docs/databases.txt @@ -0,0 +1,118 @@ +- The storage for HA datbases is problematic. There is several ways to organize storage. I list major + characteristics here (INNODB is generally faster, but takes about 20% more disk space. Initially it + significantly faster and takes 5x disk space, but it normalizes...) + + Method Database Performance Clnt/Cache MySQL Gluster HA + HostMount MyISAM/INNODB 8 MB/s fast 250% - Nope. But otherwise least problems to run. + Gluster MyISAM (no logs) 1 MB/s unusable 150% 600-800% Perfect. But too slow (up to completely unusable if bin-logs are on). Slow MyISAM recovery! + Gluster/Block MyISAM (no logs) 5 MB/s slow, but OK 200% ~ 50% No problems on reboot, but requires manual work if node crashes to detach volume. + Galera INNODB 3.5 MB/s fast 3 x 200% - Should be perfect, but I am not sure about automatic recovery... + MySQL Slaves INNODB 6-8 exp. fast Available data is HA, but caching is not. We can easily turn the slave to master. + DRBD MyISAM (no logs) 4-6 exp. ? I expect it as an faster option, but does not fit complete concept. + + + Gluster is a way too slow for anything. If node crashes, MyISAM tables may be left in corrupted state. The recovery will take ages to complete. +The Gluster/Block is faster, but HA suffers. The volume is attached to the pod running on crashed node. It seems not detached automatically until +the failed pod (in Unknown state) is killed with + oc -n adei delete --force --grace-period=0 pod mysql-1-m4wcq +Then, after some delay it is re-attached to the new running pod. Technically, we can run kind of monitoring service which will detect such nodes +and restart. Still, this solution is limited to MyISAM with binary logging disabled. Unlike simple Gluster solution, the clients may use the system +while caching is going, but is quite slow. The main trouble is MyISAM corruption, the recovery is slow. + + Galera is slower when Gluster/Block, but is fully available. The clients have also more servers to query data from. The cluster start-up is a bit +tricky and I am not sure that everything will work smoothely now. Some tunning may be necessary. Furthermore, it seems if cluster is crashed, we +can recover from one of the nodes, but all the data will be destroyed on other members and they would pull the complete dataset. The synchronization +is faster when caching (~ 140 MB/s), but it wil still take about 10 hours to synchronize 5 TB of KATRIN data. + +So, there is no realy a full HA capable solution at the moment. The most reasonable seems compromising on caching HA. + - MySQL with slaves. The asynchronous replication should be significantly faster when Galera. The passthrough to source databases will be working + (i.e. status displays), current data is available. And we can easily switch the master if necessary. + +The other reasonable options have some problems at the moment and can't be used. + - Galera. Is a fine solution, but would need some degree of initial maintenance to work stabily. Furthermore, the caching is quite slow. And the + resync is a big issue. + - Gluster/Block would be a good solution if volume detachment is fixed. As it stands, we don't have HA without manual intervention. Furthermore, the + MyISAM recovery is quite slow. + - HostMount will be using our 3-node storage optimally. But if something crashes there is 1 week to recache the data. + +Gluster/Block +============= + The idea is pretty simple. A standard gluster file system is used to store a 'block' files (just a normal files). This files are used as block devices + with single-pod access policy. GFApi interface is used to access the data on Gluster (avoiding context switches) and is exposed over iSCSI to the clients. + + There are couple of problems with configuration and run-time. + - The default Gluster containers while complain about rpcbind. We are using host networking in this case and the required ports (111) between container + and the host system conflicts. We, however, are able just to use the host rpcbind. Consequently, the rpcbind should be removed from the Gluster container + and the requirements removed from gluster-blockd systemd service. It is still worth checking that the port is accessible from the container (but it + should). We additionally also need 'iscsi-initiator-utils' in the container. + + - Only a single pod should have access to the block device. Consequnetly, when the volume is attached to the client, other pods can't use it any more. + The problem starts if node running pod dies. It is not perfectly handled by OpenShift now. The volume remains attached to the pod in the 'Unknown' state + until it manually killed. Only, then, after another delay it is detached and available for replacement pod (which will struggle in ConteinerCreating + phase until then). The pods in 'Unknown' state is not easy to kill. + oc delete --force --grace-period=0 pod/mysql-1-m4wcq + + - Heketi is buggy. + * If something goes wrong, it starts create multitudes of Gluster volumes and finally crashes with broken database. It is possible to remove the + volumes and recover database from backup, but it is time consuming and unreliable for HA solution. + * Particularly, this happens if we try to allocate more disk-space when available. The OpenShift configures the size of Gluster file system used + to back block devices. It is 100 GB by default. If we specify 500Gi in pvc, it will try to create 15 such devices (another maximum configured by + openshift) before crashing. + * Overall, I'd rather only use the manual provisioning. + + - Also without heketi it is still problematic (may be it is better with official RH container running on GlusterFS 3.7), but I'd not check... We + can try again with GlusterFS 4.1. There are probably multiple problems, but + * GlusterFS may fail on one of the nodes (showing it up and running). If any of the block services have problems communicating with local gluster + daemon, most requests (info/list will still work, but slow) to gluster daemon will timeout. + +Galera +====== + - To bring new cluster up, there is several steps. + * All members need to initialize standard standalone databases + * One node should perform initialization and other nodes join after it is completed. + * The nodes will delete their mysql folders and re-synchronize from the first node. + * Then, cluster will be up and all nodes in so called primary state. + + - The procedure is similar for crash recovery: + * If a node leaves the cluster, it may just come back and be re-sycnronized from other + cluster members if there is a quorum. For this reason, it is necessary to keep at le + ast 3 nodes running. + * If all nodes crashed, then again one node should restart the cluster and others join + later. For older versions, it is necessary to run mysqld with '--wsrep-new-cluster'. + The new tries to automatize it and will recover automatically if 'safe_to_bootstrap' = 1 + in 'grstate.dat' in mysql data folder. It should be set by Galera based on some heuristic, + but in fact I always had to set it manually. IMIMPORTANT, it should be set only on one of + the nodes. + + - Synchrinization only works for INNODB tables. Furthermore, binary logging should be turned + on (yes, it is possible to turn it off and there is no complains, but only the table names are + synchronized, no data is pushed between the nodes). + + - OpenShift uses 'StatefulSet' to perform such initialization. Particularly, it starts first + node and waits until it is running before starting next one. + * Now the nodes need to talk between each other. The 'headless' service is used for that. + Unlinke standard service, the DNS does not load balance service pods, but returns IPs of + all service members if appropriate DNS request is send (SRV). In Service spec we specify. + clusterIP: None - old version + For clients we still need a load-balancing service. So, we need to add a second service + to serve their needs. + * To decide if it should perform cluster initialization, the node tries to resolve members + of the service. If it is alone, it initializes the cluster. Otherwise, tries to join the other + members already registered in the service. The problem is that by default, OpenShift only + will add member when it is ready (Readyness check). Consequently, all nodes will try to + initialize. There is two methods to prevent it. One is working up to 3.7 and other 3.8 up, + but it is no harm to use both for now). + The new is to set in Service spec: + publishNotReadyAddresses: True + The old is to specify in Service metadata.annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: true + * Still, we should quickly check for peers until other pods had chance to start. + * Furthermore, there is some differneces to 'dc' definition. We need to specify 'serviceName' + in the StatefulSet spec. + serviceName: adei-ss + There are few other minor differences. For instance, the 'selector' have more flexible notation + and should include 'matchLabels' before specifying the 'pod' selector, etc. + + - To check current status of the cluster + SHOW STATUS LIKE 'wsrep_cluster_size'; +
\ No newline at end of file diff --git a/docs/managment.txt b/docs/managment.txt index 1eca8a8..9436c3c 100644 --- a/docs/managment.txt +++ b/docs/managment.txt @@ -96,17 +96,23 @@ Problems Storage / Recovery ======= + - We have some manually provisioned resources which needs to be fixed. + * GlusterFS endpoints should be pointing to new nodes. + * If use use Gluster/Block storage all 'pv' refer iscsi 'portals'. They also has to be apdated to + new server names. I am not sure how this handled for auto-provisioned resources. - Furthermore, it is necessary to add glusterfs nodes on a new storage nodes. It is not performed automatically by scale plays. The 'glusterfs' play should be executed with additional options specifying that we are just re-configuring nodes. We can check if all pods are serviced oc -n glusterfs get pods -o wide Both OpenShift and etcd clusters should be in proper state before running this play. Fixing and re-running should be not an issue. - + - More details: https://docs.openshift.com/container-platform/3.7/day_two_guide/host_level_tasks.html + + Heketi ------ - With heketi things are straighforward, we need to mark node broken. Then heketi will automatically move the @@ -160,7 +166,13 @@ KaaS Volumes Scaling ======= -We have currently serveral assumptions which will probably not hold true for larger clusters + - If we use container native routing, we need to add routes to new nodes on the Infiniband routes, + see docs: + https://docs.openshift.com/container-platform/3.7/install_config/configuring_native_container_routing.html#install-config-configuring-native-container-routing + Basically, the Infiniband switch should send packets destined to the network 11.11.<hostid>.0/24 to corresponding node, i.e. 192.168.13.<hostid> + +We also have currently serveral assumptions which will probably not hold true for larger clusters - Gluster To simplify matters we just reference servers in the storage group manually Arbiter may work for several groups and we should define several brick path in this case + diff --git a/docs/network.txt b/docs/network.txt index a164d36..bcd45e1 100644 --- a/docs/network.txt +++ b/docs/network.txt @@ -56,3 +56,26 @@ Hostnames The linux host name (uname -a) should match the hostnames assigned to openshift nodes. Otherwise, the certificate verification will fail. It seems minor issue as system continue functioning, but better to avoid. The check can be performed with etcd: etcdctl3 --key=/etc/etcd/peer.key --cacert=/etc/etcd/ca.crt --endpoints="192.168.213.1:2379,192.168.213.3:2379,192.168.213.4:2379" + +Performance +=========== + - Redhat recommends using Native Container Routing for speeds above 1Gb/s. It creates a new bridge connected to fast fabric and docker + configured to use it instead of docker0 bridge. The docker0 is routed trough the OpenVSwich fabric and the new bridge should go directly. + Unfortunatelly, this is not working with Infiniband. IPoIB is not fully Ethernet compatible and is not working as slave in bridges. + * There is projects for full Ethernet compatibility (eipoib) providing Ethernet L2 interfaces. But it seems there is no really mature + solution ready for production. It also penalyzes performance (about 2x). + * Mellanox cards working in both Ethernet and Infiniband modes. No problem to select the current mode with: + echo "eth|ib|auto" > /sys/bus/pci/devices/0000\:06\:00.0/mlx4_port1 + However, while the switch support Ethernet, it requires additional license basically for 50% of the original switch price (it is about + 4 kEUR for SX6018). License is called: UPGR-6036-GW. + + - Measured performance + Standard: ~ 3.2 Gb/s + Standard (pods on the same node) ~ 20 - 30 Gb/s + hostNet (using cluster IP ) ~ 3.6 Gb/s + hostNet (using host IP) ~ 12 - 15 Gb/s + + - So, I guess the optimal solution is really to introduce a second router for the cluster, but with Ethernet interface. Then, we can + reconfigure the second Infiniband adapter for the Ethernet mode. The switch to native routing should be possible also with running + cluster with short downtime. As temporary solution, we may use hostNetwork. + diff --git a/docs/troubleshooting.txt b/docs/troubleshooting.txt index b4ac8e7..ef3c206 100644 --- a/docs/troubleshooting.txt +++ b/docs/troubleshooting.txt @@ -60,6 +60,8 @@ Debugging oc logs <pod name> --tail=100 [-p] - dc/name or ds/name as well - Verify initialization steps (check if all volumes are mounted) oc describe <pod name> + - Security (SCC) problems are visible if replica controller is queried + oc -n adei get rc/mysql-1 -o yaml - It worth looking the pod environment oc env po <pod name> --list - It worth connecting running container with 'rsh' session and see running processes, @@ -85,6 +87,7 @@ network * that nameserver is pointing to the host itself (but not localhost, this is important to allow running pods to use it) * that correct upstream nameservers are listed in '/etc/dnsmasq.d/origin-upstream-dns.conf' + * that correct upstream nameservers are listed in '/etc/origin/node/resolv.conf' * In some cases, it was necessary to restart dnsmasq (but it could be also for different reasons) If script misbehaves, it is possible to call it manually like that DEVICE_IFACE="eth1" ./99-origin-dns.sh eth1 up @@ -96,6 +99,7 @@ etcd (and general operability) may be needed to restart them manually. I have noticed it with * lvm2-lvmetad.socket (pvscan will complain on problems) * node-origin + * glusterd in container (just kill the misbehaving pod, it will be recreated) * etcd but BEWARE of too entusiastic restarting: - However, restarting etcd many times is BAD as it may trigger a severe problem with 'kube-service-catalog/apiserver'. The bug description is here @@ -181,6 +185,13 @@ pods (failed pods, rogue namespaces, etc...) docker ps -aq --no-trunc | xargs docker rm +Builds +====== + - After changing storage for integrated docker registry, it may refuse builds with HTTP error 500. It is necessary + to run: + oadm policy reconcile-cluster-roles + + Storage ======= - Running a lot of pods may exhaust available storage. It worth checking if @@ -208,3 +219,41 @@ Storage gluster volume start <vol> * This may break services depending on provisioned 'pv' like 'openshift-ansible-service-broker/asb-etcd' + - If something gone wrong, heketi may end-up creating a bunch of new volumes, corrupt database, and crash + refusing to start. Here is the recovery procedure. + * Sometimes, it is still possible to start by setting 'HEKETI_IGNORE_STALE_OPERATIONS' environmental + variable on the container. + oc -n glusterfs env dc heketi-storage -e HEKETI_IGNORE_STALE_OPERATIONS=true + * Even if it works, it does not solve the main issue with corruption. It is necessary to start a + debugging pod for heketi (oc debug) export corrupted databased, fix it, and save back. Having + database backup could save a lot of hussle to find that is amiss. + heketi db export --dbfile heketi.db --jsonfile /tmp/q.json + oc cp glusterfs/heketi-storage-3-jqlwm-debug:/tmp/q.json q.json + cat q.json | python -m json.tool > q2.json + ...... Fixing ..... + oc cp q2.json glusterfs/heketi-storage-3-jqlwm-debug:/tmp/q2.json + heketi db import --dbfile heketi2.db --jsonfile /tmp/q2.json + cp heketi2.db /var/lib/heketi/heketi.db + * If bunch of disks is created, there are still various left-overs. First, the Gluster volumes + have to be cleaned. The idea is to compare 'vol_' prefixed volumes in Heketi and Gluster. And + remove ones not present in heketi. There is the script in 'ands/scripts'. + * There is LVM volumes left from Gluster (or even allocated, but not associated with Gluster for + various failurs. so this clean-up is worth making independently). On each node we can easily find + volumes created today + lvdisplay -o name,time -S 'time since "2018-03-16"' + or again we can compare lvm volumes which are used by Gluster bricks and which are not. The later + ones should be cleaned up. Again there is the script. + +Performance +=========== + - To find if OpenShift restricts the usage of system resources, we can 'rsh' to container and check + cgroup limits in sysfs + /sys/fs/cgroup/cpuset/cpuset.cpus + /sys/fs/cgroup/memory/memory.limit_in_bytes + + +Various +======= + - IPMI may cause problems as well. Particularly, the mounted CDrom may start complaining. Easiest is + just to remove it from the running system with + echo 1 > /sys/block/sdd/device/delete diff --git a/group_vars/OSEv3.yml b/group_vars/OSEv3.yml index 20bfece..46482f4 100644 --- a/group_vars/OSEv3.yml +++ b/group_vars/OSEv3.yml @@ -110,12 +110,14 @@ openshift_hosted_registry_storage_create_pvc: false ### Dynamic Storage openshift_storage_glusterfs_image: chsa/gluster-centos openshift_storage_glusterfs_version: "{{ glusterfs_version }}" -#Either 5 or 6 corresponds to latest +#Either 6 corresponds to latest #openshift_storage_glusterfs_heketi_version: 6 #Only latest #openshift_storage_glusterfs_block_version: latest -#openshift_storage_glusterfs_version: '3.12.5' # Latest 3.10.1 +#openshift_storage_glusterfs_storageclass: True +#openshift_storage_glusterfs_storageclass_default: False +#openshift_storage_glusterfs_version: '3.12.6' # Latest 3.10.1 #openshift_storage_glusterfs_is_native: True #openshift_storage_glusterfs_is_native: False @@ -129,6 +131,14 @@ openshift_storage_glusterfs_version: "{{ glusterfs_version }}" #openshift_storage_glusterfs_heketi_ssh_user: 'root' #openshift_storage_glusterfs_heketi_ssh_keyfile: "{{ omit }}" +# Block Storage +openshift_storage_glusterfs_block_storageclass: True +openshift_storage_glusterfs_block_storageclass_default: False +openshift_storage_glusterfs_block_host_vol_size: 512 +openshift_storage_glusterfs_block_host_vol_max: 8 + + + ### Modules & Configuration openshift_master_dynamic_provisioning_enabled: true #openshift_metrics_install_metrics: false diff --git a/group_vars/ands.yml b/group_vars/ands.yml index faacc40..bd2f066 100644 --- a/group_vars/ands.yml +++ b/group_vars/ands.yml @@ -1,3 +1,5 @@ +ands_router_id: 185 +ands_bridge: "andsbr0" ands_repo_url: http://ufo.kit.edu/ands/repos ands_repositories: - name: ands-updates diff --git a/group_vars/baremetal.yml b/group_vars/baremetal.yml index 640a00a..9584cf0 100644 --- a/group_vars/baremetal.yml +++ b/group_vars/baremetal.yml @@ -1,4 +1,7 @@ -glusterfs_transport: rdma +#rdma causes problems if we run many pods with gluster mounts +#glusterfs_transport: rdma +glusterfs_transport: tcp,rdma +ands_rdma_support: true # Size in GB to detect main Ands device ands_data_device_threshold: 8192 @@ -13,6 +16,9 @@ docker_storage_vg: "ands" #Heketi Volumes ands_registry_volume_size: "2T" +#Not working with Infiniband, we need fast ethernet for bridging +##ands_enable_cnr: true + ands_storage_interface: "ib0" ands_inner_interface: "ib1" #ands_public_interface: "eth0" diff --git a/group_vars/virtual.yml b/group_vars/virtual.yml index 7a61a55..80778e7 100644 --- a/group_vars/virtual.yml +++ b/group_vars/virtual.yml @@ -1,4 +1,5 @@ glusterfs_transport: tcp +ands_rdma_support: false ands_data_device: "/dev/sdb" ands_data_volume_size: "15G" @@ -28,6 +28,7 @@ Actions: ganesha - provide external nfs access to gluster volumes Maintenance + health - cluster health checks maintain - check and fix running configuration upgrade - upgrade to a new version (Dangerous) migrate <from> <to> - migrate GlusterFS bricks to a new server @@ -47,6 +48,7 @@ Actions: storage - reconfigures Gluster and OpenShift volumes projects - reconfigures OpenShift resources if necessary project <name> - reconfigures a single OpenShift namespace + apps <prj> [app] - only re-generates templates for the specific namespaces (or even only specific application) vpn - reconfigure VPN tunnels certs - re-generate OpenShift x509 certificates check - check current setup and report if any maintenace should be peformed diff --git a/playbooks/ands-network.yml b/playbooks/ands-network.yml index 1788781..211ddfe 100644 --- a/playbooks/ands-network.yml +++ b/playbooks/ands-network.yml @@ -14,6 +14,6 @@ - role: ofed - name: Network configuration - hosts: ands + hosts: baremetal roles: - { role: ands_network, action: nm_configure } diff --git a/playbooks/maintain.yml b/playbooks/maintain.yml index 03d6d9a..80061ec 100644 --- a/playbooks/maintain.yml +++ b/playbooks/maintain.yml @@ -4,14 +4,3 @@ - role: ands_facts - { role: ands_network, action: maintain } -- name: Check cert expirys - hosts: nodes:masters:etcd - become: yes - gather_facts: no - vars: - openshift_certificate_expiry_save_json_results: no - openshift_certificate_expiry_generate_html_report: yes - openshift_certificate_expiry_html_report_path: reports/certs.html - openshift_certificate_expiry_show_all: yes - roles: - - role: openshift_certificate_expiry diff --git a/playbooks/openshift-health.yml b/playbooks/openshift-health.yml new file mode 100644 index 0000000..e645d02 --- /dev/null +++ b/playbooks/openshift-health.yml @@ -0,0 +1,19 @@ +- name: Configure cluster hosts names + hosts: nodes:new_nodes + roles: + - { role: ands_facts } + + +- import_playbook: ../anslib/openshift-ansible/playbooks/openshift-checks/health.yml + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_save_json_results: no + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_html_report_path: reports/certs.html + openshift_certificate_expiry_show_all: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/openshift-setup-apps.yml b/playbooks/openshift-setup-apps.yml new file mode 100644 index 0000000..0719f5d --- /dev/null +++ b/playbooks/openshift-setup-apps.yml @@ -0,0 +1,14 @@ +- name: Analyze Ands configuration + hosts: nodes + roles: + - { role: ands_facts } + +- name: "Configure apps for {{ kaas_single_project }}" + hosts: masters + roles: + - { role: ands_kaas, subrole: apps } + vars: + kaas_openshift_volumes: "{{ ands_openshift_volumes }}" + kaas_projects: "{{ ands_openshift_projects.keys() }}" + kaas_single_project: "{{ ands_configure_project }}" + kaas_single_app: "{{ ands_configure_app | default(ands_none) }}" diff --git a/playbooks/openshift-setup-project.yml b/playbooks/openshift-setup-project.yml index a4666e3..f7d80e9 100644 --- a/playbooks/openshift-setup-project.yml +++ b/playbooks/openshift-setup-project.yml @@ -3,6 +3,16 @@ roles: - { role: ands_facts } +- name: Configure per-node {{ kaas_single_project }} project storage + hosts: ands_storage_servers + roles: + - { role: ands_kaas, subrole: storage } + vars: + kaas_projects: "{{ ands_openshift_projects.keys() }}" + kaas_openshift_volumes: "{{ ands_openshift_volumes }}" + kaas_single_project: "{{ ands_configure_project }}" + + - name: "Configure project {{ kaas_single_project }}" hosts: masters roles: @@ -11,3 +21,4 @@ kaas_openshift_volumes: "{{ ands_openshift_volumes }}" kaas_projects: "{{ ands_openshift_projects.keys() }}" kaas_single_project: "{{ ands_configure_project }}" + diff --git a/playbooks/openshift-setup-projects.yml b/playbooks/openshift-setup-projects.yml index 164f91c..00aabfd 100644 --- a/playbooks/openshift-setup-projects.yml +++ b/playbooks/openshift-setup-projects.yml @@ -9,8 +9,20 @@ - { role: ands_openshift, subrole: users } - { role: ands_openshift, subrole: security } - { role: ands_openshift, subrole: storage } - - { role: ands_kaas } + +- name: Configure per-node project storage + hosts: ands_storage_servers + roles: + - { role: ands_kaas, subrole: storage } vars: kaas_projects: "{{ ands_openshift_projects.keys() }}" kaas_openshift_volumes: "{{ ands_openshift_volumes }}" + +- name: Configure users & user projects + hosts: masters + roles: + - { role: ands_kaas } + vars: + kaas_projects: "{{ ands_openshift_projects.keys() }}" + kaas_openshift_volumes: "{{ ands_openshift_volumes }}" diff --git a/reports/certs.html b/reports/certs.html deleted file mode 100644 index 5f3b81f..0000000 --- a/reports/certs.html +++ /dev/null @@ -1,599 +0,0 @@ -<!DOCTYPE html> -<html> - <head> - <meta charset="UTF-8" /> - <title>OCP Certificate Expiry Report</title> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" /> - <link href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,700" rel="stylesheet" /> - <style type="text/css"> - body { - font-family: 'Source Sans Pro', sans-serif; - margin-left: 50px; - margin-right: 50px; - margin-bottom: 20px; - padding-top: 70px; - } - table { - border-collapse: collapse; - margin-bottom: 20px; - } - table, th, td { - border: 1px solid black; - } - th, td { - padding: 5px; - } - .cert-kind { - margin-top: 5px; - margin-bottom: 5px; - } - footer { - font-size: small; - text-align: center; - } - tr.odd { - background-color: #f2f2f2; - } - </style> - </head> - <body> - <nav class="navbar navbar-default navbar-fixed-top"> - <div class="container-fluid"> - <div class="navbar-header"> - <a class="navbar-brand" href="#">OCP Certificate Expiry Report</a> - </div> - <div class="collapse navbar-collapse"> - <p class="navbar-text navbar-right"> - <button> - <a href="https://docs.openshift.com/container-platform/latest/install_config/redeploying_certificates.html" - target="_blank" - class="navbar-link"> - <i class="glyphicon glyphicon-book"></i> Redeploying Certificates - </a> - </button> - <button> - <a href="https://github.com/openshift/openshift-ansible/tree/master/roles/openshift_certificate_expiry" - target="_blank" - class="navbar-link"> - <i class="glyphicon glyphicon-book"></i> Expiry Role Documentation - </a> - </button> - </p> - </div> - </div> - </nav> - - <h1>192.168.226.1</h1> - - <p> - Checked 11 total certificates. Expired/Warning/OK: 0/0/11. Warning window: 30 days - </p> - <ul> - <li><b>Expirations checked at:</b> 2018-03-06 01:40:05.401238</li> - <li><b>Warn after date:</b> 2018-04-05 01:40:05.401238</li> - </ul> - - <table border="1" width="100%"> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">ocp_certs</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:172.30.0.1, DNS:ipeshift1, DNS:ipeshift1.ipe.kit.edu, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, DNS:openshift, DNS:openshift.default, DNS:openshift.default.svc, DNS:openshift.default.svc.cluster.local, DNS:openshift.suren.me, DNS:172.30.0.1, DNS:192.168.213.1, DNS:192.168.226.1, IP Address:172.30.0.1, IP Address:192.168.213.1, IP Address:192.168.226.1</td> - <td><code>int(3)/hex(0x3)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:23:22</td> - <td>/etc/origin/master/master.server.crt</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:openshift-signer@1519716200</td> - <td><code>int(1)/hex(0x1)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:23:21</td> - <td>/etc/origin/master/ca.crt</td> - </tr> - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:192.168.213.1, DNS:ipeshift1, DNS:ipeshift1.ipe.kit.edu, DNS:192.168.213.1, DNS:192.168.226.1, IP Address:192.168.213.1, IP Address:192.168.226.1</td> - <td><code>int(6)/hex(0x6)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:27:37</td> - <td>/etc/origin/node/server.crt</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:openshift-signer@1519716200</td> - <td><code>int(1)/hex(0x1)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:23:21</td> - <td>/etc/origin/node/ca.crt</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">etcd</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:ipeshift1, IP Address:192.168.213.1, DNS:ipeshift1</td> - <td><code>int(1)/hex(0x1)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:22:06</td> - <td>/etc/etcd/server.crt</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:ipeshift1, IP Address:192.168.213.1, DNS:ipeshift1</td> - <td><code>int(4)/hex(0x4)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:22:07</td> - <td>/etc/etcd/peer.crt</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">kubeconfigs</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">O:system:nodes, CN:system:node:ipeshift1</td> - <td><code>int(3)/hex(0x3)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:27:33</td> - <td>/etc/origin/node/system:node:ipeshift1.kubeconfig</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">O:system:cluster-admins, CN:system:admin</td> - <td><code>int(7)/hex(0x7)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:23:23</td> - <td>/etc/origin/master/admin.kubeconfig</td> - </tr> - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">O:system:masters, O:system:openshift-master, CN:system:openshift-master</td> - <td><code>int(16)/hex(0x10)</code></td> - <td>ok</td> - <td>3646</td> - <td>2028-02-28 04:41:57</td> - <td>/etc/origin/master/openshift-master.kubeconfig</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">router</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:*.openshift.suren.me, DNS:*.openshift.suren.me, DNS:openshift.suren.me</td> - <td><code>int(9)/hex(0x9)</code></td> - <td>ok</td> - <td>723</td> - <td>2020-02-27 07:37:27</td> - <td>/api/v1/namespaces/default/secrets/router-certs</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">registry</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:172.30.28.159, DNS:__omit_place_holder__ae699a41d947a82fa4b0786b473af05686593d38, DNS:docker-registry-default.openshift.suren.me, DNS:docker-registry.default.svc, DNS:docker-registry.default.svc.cluster.local, DNS:172.30.28.159, IP Address:172.30.28.159</td> - <td><code>int(11)/hex(0xb)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 15:47:35</td> - <td>/api/v1/namespaces/default/secrets/registry-certificates</td> - </tr> - </table> - <hr /> - <h1>192.168.226.4</h1> - - <p> - Checked 11 total certificates. Expired/Warning/OK: 0/0/11. Warning window: 30 days - </p> - <ul> - <li><b>Expirations checked at:</b> 2018-03-06 01:40:05.358115</li> - <li><b>Warn after date:</b> 2018-04-05 01:40:05.358115</li> - </ul> - - <table border="1" width="100%"> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">ocp_certs</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:openshift-signer@1519716200</td> - <td><code>int(1)/hex(0x1)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:23:21</td> - <td>/etc/origin/master/ca.crt</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:openshift-signer@1519716200</td> - <td><code>int(1)/hex(0x1)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:23:21</td> - <td>/etc/origin/node/ca.crt</td> - </tr> - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:172.30.0.1, DNS:ipeshift4, DNS:ipeshift4.ipe.kit.edu, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, DNS:openshift, DNS:openshift.default, DNS:openshift.default.svc, DNS:openshift.default.svc.cluster.local, DNS:openshift.suren.me, DNS:172.30.0.1, DNS:192.168.213.4, DNS:192.168.226.4, IP Address:172.30.0.1, IP Address:192.168.213.4, IP Address:192.168.226.4</td> - <td><code>int(17)/hex(0x11)</code></td> - <td>ok</td> - <td>3646</td> - <td>2028-02-28 04:42:05</td> - <td>/etc/origin/master/master.server.crt</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:192.168.213.4, DNS:ipeshift4, DNS:ipeshift4.ipe.kit.edu, DNS:192.168.213.4, DNS:192.168.226.4, IP Address:192.168.213.4, IP Address:192.168.226.4</td> - <td><code>int(20)/hex(0x14)</code></td> - <td>ok</td> - <td>3646</td> - <td>2028-02-28 04:44:57</td> - <td>/etc/origin/node/server.crt</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">etcd</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:ipeshift4, IP Address:192.168.213.4, DNS:ipeshift4</td> - <td><code>int(11)/hex(0xb)</code></td> - <td>ok</td> - <td>3646</td> - <td>2028-02-28 04:38:10</td> - <td>/etc/etcd/server.crt</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:ipeshift4, IP Address:192.168.213.4, DNS:ipeshift4</td> - <td><code>int(12)/hex(0xc)</code></td> - <td>ok</td> - <td>3646</td> - <td>2028-02-28 04:38:12</td> - <td>/etc/etcd/peer.crt</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">kubeconfigs</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">O:system:cluster-admins, CN:system:admin</td> - <td><code>int(7)/hex(0x7)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:23:23</td> - <td>/etc/origin/master/admin.kubeconfig</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">O:system:nodes, CN:system:node:ipeshift4</td> - <td><code>int(19)/hex(0x13)</code></td> - <td>ok</td> - <td>3646</td> - <td>2028-02-28 04:44:56</td> - <td>/etc/origin/node/system:node:ipeshift4.kubeconfig</td> - </tr> - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">O:system:masters, O:system:openshift-master, CN:system:openshift-master</td> - <td><code>int(18)/hex(0x12)</code></td> - <td>ok</td> - <td>3646</td> - <td>2028-02-28 04:42:06</td> - <td>/etc/origin/master/openshift-master.kubeconfig</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">router</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:*.openshift.suren.me, DNS:*.openshift.suren.me, DNS:openshift.suren.me</td> - <td><code>int(9)/hex(0x9)</code></td> - <td>ok</td> - <td>723</td> - <td>2020-02-27 07:37:27</td> - <td>/api/v1/namespaces/default/secrets/router-certs</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">registry</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:172.30.28.159, DNS:__omit_place_holder__ae699a41d947a82fa4b0786b473af05686593d38, DNS:docker-registry-default.openshift.suren.me, DNS:docker-registry.default.svc, DNS:docker-registry.default.svc.cluster.local, DNS:172.30.28.159, IP Address:172.30.28.159</td> - <td><code>int(11)/hex(0xb)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 15:47:35</td> - <td>/api/v1/namespaces/default/secrets/registry-certificates</td> - </tr> - </table> - <hr /> - <h1>192.168.226.3</h1> - - <p> - Checked 5 total certificates. Expired/Warning/OK: 0/0/5. Warning window: 30 days - </p> - <ul> - <li><b>Expirations checked at:</b> 2018-03-06 01:40:05.358077</li> - <li><b>Warn after date:</b> 2018-04-05 01:40:05.358077</li> - </ul> - - <table border="1" width="100%"> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">ocp_certs</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:192.168.213.3, DNS:ipeshift3, DNS:ipeshift3.ipe.kit.edu, DNS:192.168.213.3, DNS:192.168.226.3, IP Address:192.168.213.3, IP Address:192.168.226.3</td> - <td><code>int(8)/hex(0x8)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:27:39</td> - <td>/etc/origin/node/server.crt</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:openshift-signer@1519716200</td> - <td><code>int(1)/hex(0x1)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:23:21</td> - <td>/etc/origin/node/ca.crt</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">etcd</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:ipeshift3, IP Address:192.168.213.3, DNS:ipeshift3</td> - <td><code>int(3)/hex(0x3)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:22:06</td> - <td>/etc/etcd/server.crt</td> - </tr> - - <tr class="even"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">CN:ipeshift3, IP Address:192.168.213.3, DNS:ipeshift3</td> - <td><code>int(5)/hex(0x5)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:22:07</td> - <td>/etc/etcd/peer.crt</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">kubeconfigs</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - - <tr class="odd"> - <td style="text-align:center"><i class="glyphicon glyphicon-ok"></i></td> - <td style="width:33%">O:system:nodes, CN:system:node:ipeshift3</td> - <td><code>int(5)/hex(0x5)</code></td> - <td>ok</td> - <td>3643</td> - <td>2028-02-25 07:27:35</td> - <td>/etc/origin/node/system:node:ipeshift3.kubeconfig</td> - </tr> - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">router</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - <tr> - <th colspan="7" style="text-align:center"><h2 class="cert-kind">registry</h2></th> - </tr> - - <tr> - <th> </th> - <th style="width:33%">Certificate Common/Alt Name(s)</th> - <td>Serial</th> - <th>Health</th> - <th>Days Remaining</th> - <th>Expiration Date</th> - <th>Path</th> - </tr> - - </table> - <hr /> - - <footer> - <p> - Expiration report generated by - the <a href="https://github.com/openshift/openshift-ansible" - target="_blank">openshift-ansible</a> - <a href="https://github.com/openshift/openshift-ansible/tree/master/roles/openshift_certificate_expiry" - target="_blank">certificate expiry</a> role. - </p> - <p> - Status icons from bootstrap/glyphicon - </p> - </footer> - </body> -</html> diff --git a/roles/ands_backup/templates/backup.sh.j2 b/roles/ands_backup/templates/backup.sh.j2 index 74fff85..c362957 100755 --- a/roles/ands_backup/templates/backup.sh.j2 +++ b/roles/ands_backup/templates/backup.sh.j2 @@ -15,9 +15,13 @@ etcdctl3 () { ETCDCTL_API=3 /usr/bin/etcdctl --cert /etc/etcd/peer.crt --key /etc/etcd/peer.key --cacert /etc/etcd/ca.crt --endpoints "https://${hostname}:2379" ${@} } - check=$(df | awk '{ print $6 }' | grep -P "^${volume_path}$") -[ $? -ne 0 -o -z "$check" ] && { echo "The volume $volume_path is not mounted. Skipping..." ; exit 1 ; } +if [ $? -ne 0 -o -z "$check" ]; then + echo "Mounting $volume_path" + mount "$volume_path" + check=$(df | awk '{ print $6 }' | grep -P "^${volume_path}$") + [ $? -ne 0 -o -z "$check" ] && { echo "The volume $volume_path is not mounted. Skipping..." ; exit 1 ; } +fi [ -d "$backup_path" ] && { echo "Something wrong, path $backup_path already exists..." ; exit 1 ; } @@ -31,7 +35,13 @@ etcdctl3 --endpoints="192.168.213.1:2379" snapshot save "$backup_path/etcd/snaps # heketi mkdir -p "$backup_path/heketi" || { echo "Can't create ${backup_path}/heketi" ; exit 1 ; } -heketi-cli -s http://heketi-storage.glusterfs.svc.cluster.local:8080 --user admin --secret "$(oc get secret heketi-storage-admin-secret -n glusterfs -o jsonpath='{.data.key}' | base64 -d)" topology info --json > "$backup_path/heketi/topology.json" +heketi-cli -s http://heketi-storage.glusterfs.svc.cluster.local:8080 --user admin --secret "$(oc get secret heketi-storage-admin-secret -n glusterfs -o jsonpath='{.data.key}' | base64 -d)" topology info > "$backup_path/heketi/heketi_topology.json" +heketi-cli -s http://heketi-storage.glusterfs.svc.cluster.local:8080 --user admin --secret "$(oc get secret heketi-storage-admin-secret -n glusterfs -o jsonpath='{.data.key}' | base64 -d)" db dump > "$backup_path/heketi/heketi_db.json" +lvs > "$backup_path/heketi/lvs.txt" 2>/dev/null +lvm fullreport --reportformat json > "$backup_path/heketi/lvm.json" 2>/dev/null +gluster --xml volume info > "$backup_path/heketi/gluster-info.xml" +gluster --xml volume status > "$backup_path/heketi/gluster-status.xml" +gluster volume status > "$backup_path/heketi/gluster.txt" {% endif %} diff --git a/roles/ands_facts/tasks/main.yml b/roles/ands_facts/tasks/main.yml index bd23e13..ce5dd23 100644 --- a/roles/ands_facts/tasks/main.yml +++ b/roles/ands_facts/tasks/main.yml @@ -1,4 +1,9 @@ --- +# We need all mount points ready +- name: "Run mount -a" + command: mount -a + changed_when: false + # Here we set 'openshift_hostname', 'openshift_ip' and other variables - name: "Configuring network facts" include_tasks: "network.yml" diff --git a/roles/ands_facts/tasks/network.yml b/roles/ands_facts/tasks/network.yml index 1acafc1..64ca15a 100644 --- a/roles/ands_facts/tasks/network.yml +++ b/roles/ands_facts/tasks/network.yml @@ -20,6 +20,7 @@ ands_openshift_public_hostname: "{{ ands_openshift_public_hostname | default(ands_openshift_default_hostname) }}" ands_storage_cidr: "{{ ands_storage_network | default(ands_openshift_network) | ipaddr(ands_host_id) }}" ands_storage_ip: "{{ ands_storage_network | default(ands_openshift_network) | ipaddr(ands_host_id) | ipaddr('address') }}" + ands_hostname_public: "ands_public{{ ands_host_id }}" ands_hostname_storage: "ands_storage{{ ands_host_id }}" ands_hostname_openshift: "ands_openshift{{ ands_host_id }}" ands_openshift_set_hostname: "{{ ands_openshift_set_hostname }}" diff --git a/roles/ands_facts/tasks/storage.yml b/roles/ands_facts/tasks/storage.yml index 888ad70..b902a81 100644 --- a/roles/ands_facts/tasks/storage.yml +++ b/roles/ands_facts/tasks/storage.yml @@ -5,7 +5,9 @@ ands_configure_heketi: "{{ ands_configure_heketi }}" - name: Detect Heketi - set_fact: ands_storage_domains="{{ ands_storage_domains | union([ands_heketi_domain]) }}" + set_fact: + ands_storage_domains: "{{ ands_storage_domains | union([ands_heketi_domain]) }}" + ands_block_volumes: "{{ ands_block_volumes }}" when: - ands_configure_heketi - ands_heketi_domain is defined diff --git a/roles/ands_kaas/defaults/main.yml b/roles/ands_kaas/defaults/main.yml index b2bfaf5..9a827ea 100644 --- a/roles/ands_kaas/defaults/main.yml +++ b/roles/ands_kaas/defaults/main.yml @@ -4,7 +4,9 @@ kaas_projects: "{{ ands_openshift_projects.keys() }}" kaas_template_root: "{{ ands_paths.provision }}/kaas/" kaas_glusterfs_endpoints: gfs +kaas_storage_domains: "{{ ands_storage_domains | default({}) | union(ands_local_storage_domains | default({})) }}" kaas_openshift_volumes: "{{ ands_openshift_volumes | default({}) }}" +kaas_block_volumes: "{{ ands_block_volumes | default({}) }}" kaas_openshift_files: "{{ ands_openshift_files | default([]) }}" kaas_openshift_uids: "{{ ands_openshift_uids | default({}) }}" @@ -17,3 +19,8 @@ kaas_default_file_owner: root kaas_default_file_group: root kaas_pod_history_limit: 1 + + +kaas_openshift_api_versions: + DeploymentConfig: 'v1' + StatefulSet: 'apps/v1beta1' diff --git a/roles/ands_kaas/tasks/do_apps.yml b/roles/ands_kaas/tasks/do_apps.yml new file mode 100644 index 0000000..6738b7f --- /dev/null +++ b/roles/ands_kaas/tasks/do_apps.yml @@ -0,0 +1,16 @@ +- name: "Process KaaS apps" + include_tasks: "template.yml" + run_once: true + with_items: "{{ kaas_project_apps }}" + loop_control: + loop_var: appname + when: + - app.provision | default(true) + - (ands_configure_app == ands_none) or (app.name == ands_configure_app) + vars: + app: "{{ kaas_project_config[appname] }}" + name: "{{ app.name | default((app.pods.keys() | list)[0]) }}" + instantiate: "{{ app.instantiate | default(false) }}" + load: "{{ app.load | default(false) }}" + pods: "{{ app.pods }}" + tmpl_name: "50-kaas-pods.yml.j2" diff --git a/roles/ands_kaas/tasks/do_project.yml b/roles/ands_kaas/tasks/do_project.yml index 5cafe25..f5b3276 100644 --- a/roles/ands_kaas/tasks/do_project.yml +++ b/roles/ands_kaas/tasks/do_project.yml @@ -10,11 +10,16 @@ loop_control: loop_var: osv vars: - query: "[*].volumes.{{osv.value.volume}}.mount" - mntpath: "{{ (ands_storage_domains | json_query(query)) }}" + vt_query: "[*].volumes.{{osv.value.volume}}.type" + voltype: "{{ (kaas_storage_domains | json_query(vt_query)) }}" + mp_query: "[*].volumes.{{osv.value.volume}}.mount" + mntpath: "{{ (kaas_storage_domains | json_query(mp_query)) }}" + rp_query: "[*].volumes.{{osv.value.volume}}.path" + realpath: "{{ (kaas_storage_domains | json_query(rp_query)) }}" osvpath: "{{ osv.value.path | default('') }}" prefix: "{{ ( osvpath[:1] == '/' ) | ternary('', '/' ~ kaas_project ~ '/') }}" path: "{{ mntpath[0] ~ prefix ~ osvpath }}" + hostpath: "{{ realpath[0] is defined | ternary((realpath[0] | default('')) ~ prefix ~ osvpath, '') }}" name: "{{osv.key}}" volume: "{{osv.value}}" when: ( mntpath | length ) > 0 @@ -35,8 +40,17 @@ loop_control: loop_var: file vars: + osv: "{{ kaas_project_volumes[file.osv] }}" + vt_query: "[*].volumes.{{osv.volume}}.type" + voltype: "{{ (kaas_storage_domains | json_query(vt_query)) }}" + mp_query: "[*].volumes.{{osv.volume}}.mount" + mntpath: "{{ (kaas_storage_domains | json_query(mp_query)) }}" + rp_query: "[*].volumes.{{osv.volume}}.path" + realpath: "{{ (kaas_storage_domains | json_query(rp_query)) }}" pvar: "kaas_{{ file.osv }}_path" path: "{{ hostvars[inventory_hostname][pvar] }}/{{ file.path }}" + hvar: "kaas_{{ file.osv }}_hostpath" + hostpath: "{{ hostvars[inventory_hostname][hvar] }}/{{ file.path }}" when: file.osv in kaas_project_volumes - name: Load OpenSSL keys @@ -60,3 +74,5 @@ when: - kaas_project_config.oc is undefined +- name: Install Applications + include_tasks: do_apps.yml diff --git a/roles/ands_kaas/tasks/do_storage.yml b/roles/ands_kaas/tasks/do_storage.yml new file mode 100644 index 0000000..ee118fd --- /dev/null +++ b/roles/ands_kaas/tasks/do_storage.yml @@ -0,0 +1,43 @@ +- name: Configure KaaS volumes + include_tasks: volume.yml + with_dict: "{{ kaas_project_volumes }}" + loop_control: + loop_var: osv + vars: + vt_query: "[*].volumes.{{osv.value.volume}}.type" + voltype: "{{ (kaas_storage_domains | json_query(vt_query)) }}" + mp_query: "[*].volumes.{{osv.value.volume}}.mount" + mntpath: "{{ (kaas_storage_domains | json_query(mp_query)) }}" + rp_query: "[*].volumes.{{osv.value.volume}}.path" + realpath: "{{ (kaas_storage_domains | json_query(rp_query)) }}" + osvpath: "{{ osv.value.path | default('') }}" + prefix: "{{ ( osvpath[:1] == '/' ) | ternary('', '/' ~ kaas_project ~ '/') }}" + path: "{{ mntpath[0] ~ prefix ~ osvpath }}" + hostpath: "{{ realpath[0] is defined | ternary((realpath[0] | default('')) ~ prefix ~ osvpath, '') }}" + name: "{{osv.key}}" + volume: "{{osv.value}}" + when: + - ( mntpath | length ) > 0 + - (osv.type | default("host")) in [ "host" ] + + +- name: Configure KaaS files + include_tasks: file.yml + with_items: "{{ kaas_project_config.files | default(kaas_openshift_files) | default([]) }}" + loop_control: + loop_var: file + vars: + osv: "{{ kaas_project_volumes[file.osv] }}" + vt_query: "[*].volumes.{{osv.volume}}.type" + voltype: "{{ (kaas_storage_domains | json_query(vt_query)) }}" + mp_query: "[*].volumes.{{osv.volume}}.mount" + mntpath: "{{ (kaas_storage_domains | json_query(mp_query)) }}" + rp_query: "[*].volumes.{{osv.volume}}.path" + realpath: "{{ (kaas_storage_domains | json_query(rp_query)) }}" + pvar: "kaas_{{ file.osv }}_path" + path: "{{ hostvars[inventory_hostname][pvar] }}/{{ file.path }}" + hvar: "kaas_{{ file.osv }}_hostpath" + hostpath: "{{ hostvars[inventory_hostname][hvar] }}/{{ file.path }}" + when: + - file.osv in kaas_project_volumes + - (osv.type | default("host")) in [ "host" ] diff --git a/roles/ands_kaas/tasks/file.yml b/roles/ands_kaas/tasks/file.yml index 488823b..393fe08 100644 --- a/roles/ands_kaas/tasks/file.yml +++ b/roles/ands_kaas/tasks/file.yml @@ -21,3 +21,12 @@ owner: "{{ owner }}" group: "{{ group }}" state: "{{ file.state | default('directory') }}" + + +- name: "Setting selinux context in {{ path }}" + sefcontext: target="{{ hostpath }}" setype="svirt_sandbox_file_t" state="present" reload="yes" + when: voltype[0] == "host" + +- name: "Apply selinux context in {{ path }}" + shell: restorecon "{{ hostpath }}" + when: voltype[0] == "host" diff --git a/roles/ands_kaas/tasks/main.yml b/roles/ands_kaas/tasks/main.yml index 85110cb..f1cff02 100644 --- a/roles/ands_kaas/tasks/main.yml +++ b/roles/ands_kaas/tasks/main.yml @@ -2,11 +2,11 @@ - name: Provision OpenShift resources & configurations # include_tasks: only_templates.yml include_tasks: project.yml - run_once: true -# delegate_to: "{{ groups.masters[0] }}" + run_once: "{{ do_subrole in [ 'project', 'apps' ] }}" with_items: "{{ (kaas_single_project is defined) | ternary([kaas_single_project], kaas_projects) }}" loop_control: loop_var: kaas_project vars: + do_subrole: "{{ subrole | default('project') }}" kaas_template_path: "{{ kaas_template_root }}/{{ kaas_project }}" kaas_project_path: "{{playbook_dir}}/projects/{{ kaas_project }}" diff --git a/roles/ands_kaas/tasks/project.yml b/roles/ands_kaas/tasks/project.yml index b8574cf..ecb2035 100644 --- a/roles/ands_kaas/tasks/project.yml +++ b/roles/ands_kaas/tasks/project.yml @@ -18,6 +18,33 @@ var_name: "var_{{kaas_project}}_config" when: hostvars[inventory_hostname][var_name] is not defined + +- name: Get information about block volumes + delegate_to: "{{ groups.masters[0] }}" + shell: gluster-block info {{ item.value.volume }}/{{ item.key }} | grep -oP '^GBID:\s*\K.*' + register: iqn_info + with_dict: "{{ kaas_block_volumes }}" + when: item.value.project == kaas_project + +- name: Get information about block volumes + delegate_to: "{{ groups.masters[0] }}" + shell: gluster-block info {{ item.value.volume }}/{{ item.key }} | grep -oP '^EXPORTED NODE.*:\s*\K.*' | tr ' ' '\n' + register: portal_info + with_dict: "{{ kaas_block_volumes }}" + when: item.value.project == kaas_project + + +- set_fact: + kaas_block_iqn: "{{ {} }}" + kaas_block_portals: "{{ {} }}" + +- set_fact: "kaas_block_iqn={{ kaas_block_iqn | combine({item.item.key: item.stdout}) }}" + with_items: "{{ iqn_info.results }}" + +- set_fact: "kaas_block_portals={{ kaas_block_portals | combine({item.item.key: item.stdout_lines}) }}" + with_items: "{{ portal_info.results }}" + + #- debug: msg="{{kaas_project_path}}" #- debug: # msg="{{kaas_project_config}}" @@ -25,11 +52,14 @@ # var_name: "var_{{kaas_project}}_config" # kaas_project_config: "{{hostvars[inventory_hostname][var_name]}}" -- include_tasks: do_project.yml +- include_tasks: "do_{{ do_subrole | default('project') }}.yml" vars: var_name: "var_{{kaas_project}}_config" kaas_project_config: "{{ hostvars[inventory_hostname][var_name] }}" kaas_project_volumes: "{{ kaas_project_config.volumes | default(kaas_project_config.extra_volumes | default({}) | combine(kaas_openshift_volumes)) }}" kaas_project_pods: "{{ kaas_project_config.pods | default({}) }}" + kaas_project_apps: "{{ kaas_project_config.apps | default([]) }}" kaas_project_gids: "{{ kaas_project_config.gids | default(kaas_openshift_gids) }}" kaas_project_uids: "{{ kaas_project_config.uids | default(kaas_openshift_uids) }}" + kaas_blockvol_info: "{{ block_info }}" +
\ No newline at end of file diff --git a/roles/ands_kaas/tasks/template.yml b/roles/ands_kaas/tasks/template.yml index 6c90b3d..418331a 100644 --- a/roles/ands_kaas/tasks/template.yml +++ b/roles/ands_kaas/tasks/template.yml @@ -1,6 +1,9 @@ - name: "Populate template {{ tmpl_name }}" - template: src="{{ item }}" dest="{{ kaas_template_path }}/{{ item | basename | regex_replace('\.j2','') }}" owner=root group=root mode="0644" + template: src="{{ item }}" dest="{{ kaas_template_path }}/{{ dest_name }}" owner=root group=root mode="0644" register: result + vars: + default_name: "{{ item | basename | regex_replace('\\.j2','') }}" + dest_name: "{{ (name is defined) | ternary ( (name | default('')) + '.yml', default_name ) }}" with_first_found: - paths: - "{{ role_path }}/templates/" @@ -10,8 +13,12 @@ - name: "Configure KaaS resources defined in {{ tmpl_name }}" include_role: name="openshift_resource" + when: instantiate == true vars: template: "{{ tmpl_name | basename | regex_replace('\\.j2','') }}" template_path: "{{ kaas_template_path }}" project: "{{ kaas_project }}" recreate: "{{ result | changed | ternary (true, false) }}" + +# alternatively load template +# TODO diff --git a/roles/ands_kaas/tasks/templates.yml b/roles/ands_kaas/tasks/templates.yml index 9fc378f..4417cf3 100644 --- a/roles/ands_kaas/tasks/templates.yml +++ b/roles/ands_kaas/tasks/templates.yml @@ -19,6 +19,11 @@ with_items: "{{ sorted_tmpl }}" vars: sorted_tmpl: "{{ (results.results[0] is defined) | ternary (results | json_query('results[*].stdout_lines') | sum(start=[]) | map('basename') | sort | unique, []) }}" + instantiate: true + load: false + pods: "{{ kaas_project_pods }}" loop_control: loop_var: tmpl_name + + diff --git a/roles/ands_kaas/tasks/volume.yml b/roles/ands_kaas/tasks/volume.yml index 783654a..2c695f2 100644 --- a/roles/ands_kaas/tasks/volume.yml +++ b/roles/ands_kaas/tasks/volume.yml @@ -2,6 +2,9 @@ - name: "Configure {{ name }} fact" set_fact: "kaas_{{ name }}_path={{ path }}" +- name: "Configure {{ name }} fact" + set_fact: "kaas_{{ name }}_hostpath={{ hostpath }}" + - name: "Ensure {{ path }} exists" file: path: "{{ path }}" @@ -41,5 +44,16 @@ - mkdir | changed - chmod | skipped +- name: "Setting SELinux context for non standard locations" + sefcontext: target="{{ hostpath }}" setype="svirt_sandbox_file_t" state="present" reload="yes" + when: + - mkdir | changed + - chmod | skipped + - voltype[0] == "host" - +- name: "Apply SELinux context for non standard locations" + shell: restorecon "{{ hostpath }}" + when: + - mkdir | changed + - chmod | skipped + - voltype[0] == "host" diff --git a/roles/ands_kaas/templates/00-block-volumes.yml.j2 b/roles/ands_kaas/templates/00-block-volumes.yml.j2 new file mode 100644 index 0000000..9982d61 --- /dev/null +++ b/roles/ands_kaas/templates/00-block-volumes.yml.j2 @@ -0,0 +1,48 @@ +apiVersion: v1 +kind: Template +metadata: + name: {{ kaas_project }}-block-volumes + annotations: + descriptions: "{{ kaas_project }} glusterfs block volumes" +objects: +{% for name, vol in kaas_block_volumes.iteritems() %} +{% set oc_name = vol.name | default(name) | regex_replace('_','-') %} +{% if oc_name | regex_search("^" + kaas_project) %} +{% set pvname = oc_name %} +{% else %} +{% set pvname = (kaas_project + "-" + oc_name) | regex_replace('_','-') %} +{% endif %} + - apiVersion: v1 + kind: PersistentVolume + metadata: + name: {{ pvname }} + spec: + persistentVolumeReclaimPolicy: Retain + accessModes: + - ReadWriteOnce + iscsi: + fsType: xfs + iqn: iqn.2016-12.org.gluster-block:{{ kaas_block_iqn[name] }} + iscsiInterface: default + lun: 0 + targetPortal: {{ kaas_block_portals[name][0] }} +{% if kaas_block_portals[name] | length > 1 %} + portals: {{ kaas_block_portals[name][1:] | to_json }} +{% endif %} + capacity: + storage: {{ vol.capacity | default(kaas_default_volume_capacity) }} + claimRef: + name: {{ oc_name }} + namespace: {{ kaas_project }} + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: {{ oc_name }} + spec: + volumeName: {{ pvname }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ vol.capacity | default(kaas_default_volume_capacity) }} +{% endfor %} diff --git a/roles/ands_kaas/templates/00-gfs-volumes.yml.j2 b/roles/ands_kaas/templates/00-gfs-volumes.yml.j2 index a69942d..54064e4 100644 --- a/roles/ands_kaas/templates/00-gfs-volumes.yml.j2 +++ b/roles/ands_kaas/templates/00-gfs-volumes.yml.j2 @@ -7,6 +7,10 @@ metadata: descriptions: "{{ kaas_project }} glusterfs volumes" objects: {% for name, vol in kaas_project_volumes.iteritems() %} +{% set voltypes = kaas_storage_domains | json_query("[*].volumes." + vol.volume + ".type") %} +{% set voltype = voltypes[0] | default('host') %} +{% set mntpaths = kaas_storage_domains | json_query("[*].volumes." + vol.volume + ".mount") %} +{% set mntpath = mntpaths[0] | default('') %} {% set oc_name = vol.name | default(name) | regex_replace('_','-') %} {% set cfgpath = vol.path | default("") %} {% set path = cfgpath if cfgpath[:1] == "/" else "/" + kaas_project + "/" + cfgpath %} @@ -21,9 +25,14 @@ objects: name: {{ pvname }} spec: persistentVolumeReclaimPolicy: Retain +{% if voltype == 'host' %} + hostPath: + path: "{{ mntpath }}{{ path }}" +{% else %} glusterfs: endpoints: {{ kaas_glusterfs_endpoints }} - path: "{{ vol.volume }}{{path}}" + path: "{{ vol.volume }}{{ path }}" +{% endif %} readOnly: {{ not (vol.write | default(false)) }} accessModes: - {{ vol.access | default(vol.write | default(false) | ternary('ReadWriteMany', 'ReadOnlyMany')) }} diff --git a/roles/ands_kaas/templates/50-kaas-pods.yml.j2 b/roles/ands_kaas/templates/50-kaas-pods.yml.j2 index ad1fc58..761004d 100644 --- a/roles/ands_kaas/templates/50-kaas-pods.yml.j2 +++ b/roles/ands_kaas/templates/50-kaas-pods.yml.j2 @@ -3,39 +3,65 @@ apiVersion: v1 kind: Template metadata: - name: {{ kaas_project }}-pods + name: {{ name | default(kaas_project) }}-pods annotations: - descriptions: {{ kaas_project_config.description | default(kaas_project ~ " auto-generated pod template") }} + descriptions: {{ kaas_project_config.description | default(name | default(kaas_project) ~ " auto-generated pod template") }} objects: -{% for name, pod in kaas_project_pods.iteritems() %} - {% set pubkey = "kaas_" ~ name ~ "_pubkey" %} - {% set privkey = "kaas_" ~ name ~ "_privkey" %} - {% set cakey = "kaas_" ~ name ~ "_ca" %} - {% if pod.variant is defined %} - {% set pod = pod[pod.variant] %} - {% endif %} - {% set sched = pod.sched | default({}) %} - {% set node_selector = (sched.selector is defined) | ternary(sched.selector, ands_default_node_selector | combine(sched.restrict | default({}))) %} - - {% if pod.service is defined %} +{% for name, pod in pods.iteritems() %} + {% set kind = pod.kind | default('DeploymentConfig') %} + {% if pod.enabled | default(true) %} + {% set pubkey = "kaas_" ~ name ~ "_pubkey" %} + {% set privkey = "kaas_" ~ name ~ "_privkey" %} + {% set cakey = "kaas_" ~ name ~ "_ca" %} + {% if pod.variant is defined %} + {% set pod = pod[pod.variant] %} + {% endif %} + {% set sched = pod.sched | default({}) %} + {% set node_selector = (sched.selector is defined) | ternary(sched.selector, ands_default_node_selector | combine(sched.restrict | default({}))) %} + {% if pod.service is defined %} + {% if kind == 'StatefulSet' and pod.service.ports is defined %} - apiVersion: v1 kind: Service metadata: - name: {{ pod.name | default(name) }} + name: {{ pod.name | default(name) }}-ss + annotations: {{ pod.service.annotations | default({}) | combine({"service.alpha.kubernetes.io/tolerate-unready-endpoints": "true" }) | to_json }} spec: + clusterIP: None + publishNotReadyAddresses: True selector: name: {{ pod.name | default(name) }} - {% if pod.service.ports is defined %} ports: {% for port in pod.service.ports %} - {% set portmap = (port | string).split('/') %} + {% set portmap = (port | string).split('/') %} - name: "{{ portmap[0] }}" port: {{ portmap[0] }} targetPort: {{ (portmap[1] is defined) | ternary(portmap[1], portmap[0]) }} {% endfor %} - {% endif %} - {% if (pod.service.ports is defined) and (pod.service.host is defined) %} - {% set first_port = (pod.service.ports[0] | string).split('/') %} + {% endif %} + - apiVersion: v1 + kind: Service + metadata: + name: {{ pod.name | default(name) }} + {% if pod.service.annotations is defined %} + annotations: {{ pod.service.annotations | to_json }} + {% endif %} + spec: + selector: + name: {{ pod.name | default(name) }} + {% if pod.service.ip is defined %} + clusterIP: {{ pod.service.ip }} + {% endif %} + {% if pod.service.ports is defined %} + ports: + {% for port in pod.service.ports %} + {% set portmap = (port | string).split('/') %} + - name: "{{ portmap[0] }}" + port: {{ portmap[0] }} + targetPort: {{ (portmap[1] is defined) | ternary(portmap[1], portmap[0]) }} + {% endfor %} + {% endif %} + {% if (pod.service.ports is defined) and (pod.service.host is defined) %} + {% set first_port = (pod.service.ports[0] | string).split('/') %} - apiVersion: v1 kind: Route metadata: @@ -47,27 +73,27 @@ objects: name: {{ pod.name | default(name) }} port: targetPort: {{ (first_port[1] is defined) | ternary(first_port[1], first_port[0]) }} - {% if (first_port[0] == "80") %} + {% if (first_port[0] == "80") %} tls: termination: edge insecureEdgeTerminationPolicy: Allow - {% if hostvars[inventory_hostname][pubkey] is defined %} + {% if hostvars[inventory_hostname][pubkey] is defined %} certificate: |- {{ hostvars[inventory_hostname][pubkey] | indent(10) }} - {% endif %} - {% if hostvars[inventory_hostname][privkey] is defined %} + {% endif %} + {% if hostvars[inventory_hostname][privkey] is defined %} key: |- {{ hostvars[inventory_hostname][privkey] | indent(10) }} - {% endif %} - {% if hostvars[inventory_hostname][cakey] is defined %} + {% endif %} + {% if hostvars[inventory_hostname][cakey] is defined %} caCertificate: |- {{ hostvars[inventory_hostname][cakey] | indent(10) }} + {% endif %} {% endif %} {% endif %} {% endif %} - {% endif %} - - apiVersion: v1 - kind: DeploymentConfig + - apiVersion: {{ kaas_openshift_api_versions[kind] | default('v1') }} + kind: {{ kind }} metadata: name: {{ pod.name | default(name) }} spec: @@ -75,13 +101,32 @@ objects: revisionHistoryLimit: 2 strategy: type: {{ (sched | default({})).strategy | default('Rolling') }} + updateStrategy: + {% if pod.update %} + type: {{ pod.update.strategy | default('OnDelete') }} + {% if pod.update.min_ready is defined %} + minReadySeconds: {{ pod.update.min_ready }} + {% endif %} + {% endif %} triggers: - type: ConfigChange + {% if kind == 'StatefulSet' %} + serviceName: {{ pod.name | default(name) }}-ss + selector: + matchLabels: + name: {{ pod.name | default(name) }} + {% else %} selector: name: {{ pod.name | default(name) }} + {% endif %} template: metadata: name: {{ pod.name | default(name) }} + {% if kind == 'StatefulSet' %} + annotations: {{ pod.annotations | default({}) | combine({"pod.alpha.kubernetes.io/initialized": "true"}) | to_json }} + {% elif pod.annotations is defined %} + annotations: {{ pod.annotations | to_json }} + {% endif %} labels: name: {{ pod.name | default(name) }} spec: @@ -89,16 +134,22 @@ objects: nodeSelector: {{ node_selector | to_json }} {% endif %} {% set mappings = (pod.images | json_query('[*].mappings') | length) %} - {% if mappings > 0 %} + {% set paths = (pod.images | json_query('[*].hostpath') | length) %} + {% if mappings > 0 or paths > 0 %} volumes: {% for img in pod.images %} {% set imgidx = loop.index %} - {% for vol in img.mappings %} + {% for vol in (img.mappings | default([])) %} {% set oc_name = vol.name | default(name) | regex_replace('_','-') %} - name: vol-{{imgidx}}-{{loop.index}} persistentVolumeClaim: claimName: {{ oc_name }} {% endfor %} + {% for vol in (img.hostpath | default([])) %} + - name: host-{{imgidx}}-{{loop.index}} + hostPath: + path: {{ vol.path }} + {% endfor %} {% endfor %} {% endif %} {% if (pod.groups is defined) or (pod.run_as is defined) %} @@ -121,21 +172,31 @@ objects: {% set imgidx = loop.index %} - name: {{ img.name | default(pod.name) | default(name) }} image: {{ img.image }} - imagePullPolicy: Always - ports: + imagePullPolicy: {{ img.pull | default('Always') }} + {% if (img.command is defined) %} + command: {{ img.command | to_json }} + {% endif %} {% if img.ports is defined %} + ports: {% for port in img.ports %} - containerPort: {{ port }} {% endfor %} - {% else %} + {% elif pod.service.ports is defined %} + ports: {% for port in pod.service.ports %} {% set portmap = (port | string).split('/') %} - containerPort: {{ (portmap[1] is defined) | ternary(portmap[1], portmap[0]) }} {% endfor %} {% endif %} + {% if kind == 'StatefulSet' %} + {% set extra_env = [ { "name": "POD_NAMESPACE", "value": "fieldref@metadata.namespace" }, { "name": "POD_REPLICAS", "value": sched.replicas } ] %} + {% set env = img.env | default([]) | union(extra_env) %} + {% elif img.env is defined %} + {% set env = img.env %} + {% endif %} {% if img.env is defined %} env: - {% for env_item in img.env %} + {% for env_item in env %} {% set env_name = env_item.name %} {% set env_val = env_item.value %} {% set env_parts = (env_val | string).split('@') %} @@ -152,18 +213,50 @@ objects: configMapKeyRef: name: {{ env_cm[0] }} key: {{ env_cm[1] }} + {% elif env_parts[0] == "fieldref" %} + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: {{ env_parts[1] }} {% else %} value: "{{ env_val }}" {% endif %} {% endfor %} {% endif %} - {% if img.mappings is defined %} + {% if img.mappings is defined or img.hostpath is defined %} volumeMounts: - {% for vol in img.mappings %} + {% for vol in (img.mappings | default([])) %} - name: vol-{{imgidx}}-{{loop.index}} subPath: {{ vol.path | default("") }} mountPath: {{ vol.mount }} {% endfor %} + {% for vol in (img.hostpath | default([])) %} + - name: host-{{imgidx}}-{{loop.index}} + mountPath: {{ vol.mount }} + {% endfor %} + {% endif %} + {% if img.resources is defined %} + resources: + {% if img.resources.request is defined %} + {% set res = img.resources.request %} + requests: + {% if res.cpu %} + cpu: {{ res.cpu }} + {% endif %} + {% if res.cpu %} + memory: {{ res.mem }} + {% endif %} + {% endif %} + {% if img.resources.limit is defined %} + {% set res = img.resources.limit %} + limits: + {% if res.cpu %} + cpu: {{ res.cpu }} + {% endif %} + {% if res.cpu %} + memory: {{ res.mem }} + {% endif %} + {% endif %} {% endif %} {% if img.probes is defined %} {% for probe in img.probes %} @@ -201,4 +294,5 @@ objects: {% endfor %} {% endif %} {% endfor %} + {% endif %} {% endfor %} diff --git a/roles/ands_storage/tasks/detect_device.yml b/roles/ands_storage/tasks/detect_device.yml index 3467371..f0245f3 100644 --- a/roles/ands_storage/tasks/detect_device.yml +++ b/roles/ands_storage/tasks/detect_device.yml @@ -4,9 +4,12 @@ # when: item.mount == ands_data_path - name: find large block devices +# no_log: true set_fact: ands_data_device="/dev/{{ item.key }}" # debug: msg="{{ item.key }} - {{ (item.value.sectors | int) * (item.value.sectorsize | int) / 1024 / 1024 / 1024 }} GB" with_dict: "{{ ansible_devices }}" + loop_control: + label: "{{ item.key }} of {{ (item.value.sectors | int) * (item.value.sectorsize | int) / 1024 / 1024 / 1024 }} GB" when: - not ands_data_device is defined - not item.value.partitions diff --git a/roles/ands_storage/tasks/hostmount.yml b/roles/ands_storage/tasks/hostmount.yml new file mode 100644 index 0000000..e4f301f --- /dev/null +++ b/roles/ands_storage/tasks/hostmount.yml @@ -0,0 +1,5 @@ +- file: path="{{ item.value.path }}" state=directory + with_dict: "{{ domain.volumes }}" + +- mount: src="{{ item.value.path }}" name="{{ item.value.mount }}" opts=bind fstype=none state=mounted + with_dict: "{{ domain.volumes }}" diff --git a/roles/ands_storage/tasks/main.yml b/roles/ands_storage/tasks/main.yml index 43d4692..8e9d44b 100644 --- a/roles/ands_storage/tasks/main.yml +++ b/roles/ands_storage/tasks/main.yml @@ -48,4 +48,9 @@ - name: Mount Ands Data Volume mount: name="{{ ands_data_path }}" src="/dev/{{ ands_data_vg }}/{{ ands_data_lv }}" fstype="{{ ands_data_fs }}" opts="defaults" state="mounted" -
\ No newline at end of file +- name: Provision Ands local storage domains + include_tasks: hostmount.yml + with_items: "{{ ands_local_storage_domains | default([]) }}" + when: domain.servers | intersect(group_names) | length > 0 + loop_control: + loop_var: domain diff --git a/roles/glusterfs/defaults/main.yml b/roles/glusterfs/defaults/main.yml index 700838d..d66ff5e 100644 --- a/roles/glusterfs/defaults/main.yml +++ b/roles/glusterfs/defaults/main.yml @@ -6,6 +6,7 @@ glusterfs_network: "{{ ands_storage_network }}" glusterfs_servers: "{{ ands_storage_servers }}" glusterfs_bricks_path: "{{ ands_data_path }}/glusterfs" glusterfs_domains: "{{ ands_storage_domains }}" +glusterfs_block_volumes: "{{ ands_block_volumes | default({}) }}" glusterfs_all_subroles: "{{ [ 'software', 'volumes' ] }}" glusterfs_subroles: "{{ ( subrole is defined ) | ternary( [ subrole ], glusterfs_all_subroles ) }}" diff --git a/roles/glusterfs/files/glusterblock-link.service b/roles/glusterfs/files/glusterblock-link.service new file mode 100644 index 0000000..9aecd40 --- /dev/null +++ b/roles/glusterfs/files/glusterblock-link.service @@ -0,0 +1,8 @@ +[Unit] +After=origin-node.service + +[Service] +ExecStart=/usr/bin/ln -sf /run/glusterd/gluster-blockd.socket /run/gluster-blockd.socket + +[Install] +WantedBy=multi-user.target diff --git a/roles/glusterfs/tasks/cfg/vols3.yml b/roles/glusterfs/tasks/cfg/vols3.yml index d8ed728..efd613c 100644 --- a/roles/glusterfs/tasks/cfg/vols3.yml +++ b/roles/glusterfs/tasks/cfg/vols3.yml @@ -7,7 +7,7 @@ cluster: "{{ domain_servers | join(',') }}" replicas: "{{ domain_servers | length }}" bricks: "{{ glusterfs_bricks_path }}/brick-{{ name }}" - transport: "{{ glusterfs_transport }}" + transport: "{{ transport }}" - name: "Start {{ name }} volume" diff --git a/roles/glusterfs/tasks/common.yml b/roles/glusterfs/tasks/common.yml index 67fb815..c94f86e 100644 --- a/roles/glusterfs/tasks/common.yml +++ b/roles/glusterfs/tasks/common.yml @@ -8,8 +8,13 @@ - glusterfs-cli - glusterfs-fuse - glusterfs-rdma - - heketi-client - libsemanage-python + +- name: Ensure GlusterFS is installed + yum: name={{item}} state=latest enablerepo="centos-gluster{{ glusterfs_version }}-test" + with_items: + - heketi-client + - gluster-block - name: Allow fuse in SELinux configuration seboolean: name="virt_sandbox_use_fusefs" state="yes" persistent="yes" diff --git a/roles/glusterfs/tasks/create_block.yml b/roles/glusterfs/tasks/create_block.yml new file mode 100644 index 0000000..5b30f02 --- /dev/null +++ b/roles/glusterfs/tasks/create_block.yml @@ -0,0 +1,18 @@ +- name: Check if the holding volume already exists + shell: "gluster volume info {{ block.value.volume }}" + changed_when: false + register: gv_results + +- name: Get list of existing block volumes + shell: "gluster-block list {{ block.value.volume }}" + changed_when: false + register: bv_results + +- name: Create block volume + shell: "gluster-block create {{ block.value.volume }}/{{ block.key }} ha {{ servers | length }} auth disable prealloc no {{ servers | join(',') }} {{ block.value.capacity }}" + when: block.key not in bv_results.stdout_lines + vars: + ha: "{{ block.value.ha | default(3) }}" + servers: "{{ domain_servers[0:(ha | int)] }}" + loop_control: + loop_var: volume diff --git a/roles/glusterfs/tasks/create_domain.yml b/roles/glusterfs/tasks/create_domain.yml index 76623f2..99f9959 100644 --- a/roles/glusterfs/tasks/create_domain.yml +++ b/roles/glusterfs/tasks/create_domain.yml @@ -14,3 +14,12 @@ domain_servers: "{{ groups[domain.servers] | map('extract', hostvars, 'ands_storage_hostname') | list }}" loop_control: loop_var: volume + +- name: Create block volumes + include_tasks: create_block.yml + when: block.value.volume in domain.volumes.keys() + with_dict: "{{ glusterfs_block_volumes }}" + vars: + domain_servers: "{{ groups[domain.servers] | map('extract', hostvars, 'ands_storage_hostname') | list }}" + loop_control: + loop_var: block diff --git a/roles/glusterfs/tasks/create_volume.yml b/roles/glusterfs/tasks/create_volume.yml index ca4f39a..a94b96f 100644 --- a/roles/glusterfs/tasks/create_volume.yml +++ b/roles/glusterfs/tasks/create_volume.yml @@ -2,3 +2,4 @@ - include_tasks: "{{ volume.value.type }}/vols{{((domain_servers | length) < 4) | ternary((domain_servers | length), 3) }}.yml" vars: name: "{{ volume.key }}" + transport: "{{ volume.value.transport | default(glusterfs_transport) }}" diff --git a/roles/glusterfs/tasks/data/vols2.yml b/roles/glusterfs/tasks/data/vols2.yml index d8ed728..efd613c 100644 --- a/roles/glusterfs/tasks/data/vols2.yml +++ b/roles/glusterfs/tasks/data/vols2.yml @@ -7,7 +7,7 @@ cluster: "{{ domain_servers | join(',') }}" replicas: "{{ domain_servers | length }}" bricks: "{{ glusterfs_bricks_path }}/brick-{{ name }}" - transport: "{{ glusterfs_transport }}" + transport: "{{ transport }}" - name: "Start {{ name }} volume" diff --git a/roles/glusterfs/tasks/data/vols3.yml b/roles/glusterfs/tasks/data/vols3.yml index 14c3763..f28a38c 100644 --- a/roles/glusterfs/tasks/data/vols3.yml +++ b/roles/glusterfs/tasks/data/vols3.yml @@ -8,7 +8,7 @@ replicas: 3 arbiters: 1 bricks: "{{ glusterfs_bricks_path }}/brick-{{ name }}" - transport: "{{ glusterfs_transport }}" + transport: "{{ transport }}" - name: "Start {{ name }} volume" diff --git a/roles/glusterfs/tasks/db/vols3.yml b/roles/glusterfs/tasks/db/vols3.yml index cbd238d..45cb0ce 100644 --- a/roles/glusterfs/tasks/db/vols3.yml +++ b/roles/glusterfs/tasks/db/vols3.yml @@ -8,7 +8,7 @@ disperses: "3" redundancies: "1" bricks: "{{ glusterfs_bricks_path }}/brick-{{ name }}" - transport: "{{ glusterfs_transport }}" + transport: "{{ transport }}" - name: "Start {{ name }} volume" diff --git a/roles/glusterfs/tasks/la/vols3.yml b/roles/glusterfs/tasks/la/vols3.yml index ada8f95..af1e889 100644 --- a/roles/glusterfs/tasks/la/vols3.yml +++ b/roles/glusterfs/tasks/la/vols3.yml @@ -6,7 +6,7 @@ host: "{{ ands_storage_hostname }}" cluster: "{{ domain_servers | join(',') }}" bricks: "{{ glusterfs_bricks_path }}/brick-{{ name }}" - transport: "{{ glusterfs_transport }}" + transport: "{{ transport }}" - name: "Start {{ name }} volume" gluster_volume: state="started" name="{{ name }}" diff --git a/roles/glusterfs/tasks/setup-openshift-server.yml b/roles/glusterfs/tasks/setup-openshift-server.yml index 20ebbf8..c4fcbcc 100644 --- a/roles/glusterfs/tasks/setup-openshift-server.yml +++ b/roles/glusterfs/tasks/setup-openshift-server.yml @@ -1,9 +1,19 @@ --- - name: Link control socket - file: src="/run/glusterd/glusterd.socket" dest="/run/glusterd.socket" state="link" + file: src="/run/glusterd/{{ item }}" dest="/run/{{ item }}" state="link" + with_items: + - glusterd.socket + - gluster-blockd.socket - name: Copy systemd unit to recreate link on re-start - copy: src="gluster-link.service" dest="/etc/systemd/system/gluster-link.service" owner="root" group="root" mode="0644" + copy: src="{{ item }}" dest="/etc/systemd/system/{{ item }}" owner="root" group="root" mode="0644" + with_items: + - gluster-link.service + - glusterblock-link.service - name: Enable systemd unit - systemd: enabled=true name=gluster-link daemon_reload=yes
\ No newline at end of file + systemd: enabled=true name={{ item }} daemon_reload=yes + with_items: + - gluster-link.service + - glusterblock-link.service + - rpcbind diff --git a/roles/ofed/files/rdma_limits.conf b/roles/ofed/files/rdma_limits.conf new file mode 100644 index 0000000..9a34ae4 --- /dev/null +++ b/roles/ofed/files/rdma_limits.conf @@ -0,0 +1,4 @@ +# configuration for rdma tuning +* soft memlock unlimited +* hard memlock unlimited +# rdma tuning end diff --git a/roles/ofed/tasks/main.yml b/roles/ofed/tasks/main.yml index bd85d43..df8392d 100644 --- a/roles/ofed/tasks/main.yml +++ b/roles/ofed/tasks/main.yml @@ -1,6 +1,12 @@ - name: install the 'Infiniband support' package group yum: name="@Infiniband Support" state=present - + + +# https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/networking_guide/sec-configuring_the_base_rdma_subsystem +- name: Allow users unrestricted page-locking + copy: src="rdma_limits.conf" dest="/etc/security/limits.d/50-rdma.conf" owner="root" group="root" mode="0644" + - name: start rdma service service: name="rdma" enabled=yes state=started +
\ No newline at end of file diff --git a/roles/openshift_resource/tasks/template.yml b/roles/openshift_resource/tasks/template.yml index 7e74de4..188599f 100644 --- a/roles/openshift_resource/tasks/template.yml +++ b/roles/openshift_resource/tasks/template.yml @@ -4,14 +4,16 @@ set_fact: resources="{{ tmpl | json_query(query) }}" vars: query: "objects[*].{kind: kind, name: metadata.name}" - + + - set_fact: resources="{{ [] }}" + when: resources == "" + - name: "{{ template }}: Lookup the specified resource in {{project}}" command: "oc get -n {{project}} {{item.kind}}/{{item.name}}" register: results failed_when: false changed_when: (results | failed) with_items: "{{ resources | default([]) }}" -# when: not (recreate|default(false)) - name: "{{ template }}: Detroy existing resources in {{project}}" command: "oc delete -n {{project}} {{resources[item|int].kind}}/{{resources[item|int].name}}" @@ -21,5 +23,8 @@ - name: "{{ template }}: Populate resources to {{project}}" shell: "oc process -n {{project}} -f '{{ template_path }}/{{template}}' {{ template_args | default('') }} | oc create -n {{project}} -f - {{ create_args | default('') }}" - when: (recreate|default(false)) or (results | changed) + when: + - (recreate|default(false)) or (results | changed) + - resources | length > 0 + run_once: true diff --git a/scripts/gluster.sh b/scripts/gluster.sh index 9efea45..07ca7f9 100755 --- a/scripts/gluster.sh +++ b/scripts/gluster.sh @@ -63,16 +63,104 @@ function migrate { heal $vol } + +function transport { + vol=$1 + transport=${2:-tcp,rdma} + echo "Changing $vol to transport $transport" + gluster volume stop "$vol" + gluster volume set "$vol" config.transport "$transport" + gluster volume start "$vol" +} + + + +function restart { + vol=$1 + + echo $vol + bricks=$(gluster volume info "$vol" | grep -P 'Number of Bricks' | awk '{ print $NF }' | tr -d '\r\n') + online=$(gluster volume status "$vol" detail | grep Online | grep Y | wc -l) + + if [ "$bricks" -ne "$online" ]; then + echo "Restarting $vol ($online bricks of $bricks are/is online)" + gluster --mode=script volume stop "$vol" + gluster --mode=script volume start "$vol" + fi +} + +function delete_failed { + vol=$1 + + bricks=$(gluster volume info "$vol" | grep -P 'Number of Bricks' | awk '{ print $NF }' | tr -d '\r\n') + online=$(gluster volume status "$vol" detail | grep Online | grep Y | wc -l) + + if [ "$online" == "0" ]; then + echo "Deleting $vol ($online bricks of $bricks are/is online)" +# gluster --mode=script volume stop "$vol" + gluster --mode=script volume delete "$vol" + fi +} + + +function lvm_clean { + used_bricks=`gluster volume info | grep "/brick_" | sed -r -e 's/.*brick_(.*)\/brick/\1/'` + + for ip in $(seq 1 3); do + echo "Node $ip" + echo "========" + lvm_bricks=`node $ip lvscan | grep brick | sed -r -e 's/.*brick_([0-9a-z]*)[^0-9a-z].*/\1/'` + diff=$(echo $used_bricks $lvm_bricks | tr -d '\r' | tr ' ' '\n' | sort | uniq -u) + remove=$(echo "$diff $lvm_bricks" | tr -d '\r' | tr ' ' '\n' | sort | uniq -d) + + for id in $remove; do + echo "Removing ---------------------------------------------" + node $ip lvs -o name,time,size -S "'name =~ $id'" + echo "Removing ---------------------------------------------" + node $ip lvremove -y -S "'name =~ $id'" + done + done + + +} + +function lvm_remove_today { + for ip in $(seq 1 3); do + node $ip hostname +#lvdisplay -o name,time -S 'time since "2018-03-16"' + done +} -# -# heal $1 +function heketi_cmd { + heketi "$@" +} -if [ -z "$1" -a "$1" =~ ^all ]; then +function heketi_clean { + heketi_vols=`heketi topology info | grep "Name: vol_" | sed -r -e 's/.*(vol_[0-9a-z]+)\s*$/\1/'` + gluster_vols=`gluster volume info | grep "Name: vol_" | sed -r -e 's/.*(vol_[0-9a-z]+)\s*$/\1/'` + echo $heketi_vols + + diff=$(echo $gluster_vols $heketi_vols | tr -d '\r' | tr ' ' '\n' | sort | uniq -u) + remove=$(echo "$diff $gluster_vols" | tr -d '\r' | tr ' ' '\n' | sort | uniq -d) + + for vol in $remove; do + echo "Stopping and deleting volume $vol" + echo "---------------------------------" + gluster --mode=script volume stop "$vol" + gluster --mode=script volume delete "$vol" + done +} + +if [[ "$action" =~ ^heketi ]]; then + eval "$action" "$@" +elif [[ "$action" =~ ^lvm ]]; then + eval "$action" "$@" +elif [[ -z "$1" || "$1" =~ ^all ]]; then all=0 [ "$1" == "all_heketi" ] && all=1 - [ "$1" =~ ^all ] && shift + [[ "$1" =~ ^all ]] && shift vols=$(gluster volume info | grep -P '^Volume Name' | awk '{ print $NF }' | tr '\r\n' ' ') for vol in $vols; do diff --git a/scripts/hawakular.sh b/scripts/hawakular.sh new file mode 100755 index 0000000..73e3a87 --- /dev/null +++ b/scripts/hawakular.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +case $1 in + "stop") + oc -n openshift-infra scale --replicas 0 rc/hawkular-metrics + oc -n openshift-infra scale --replicas 0 rc/heapster + oc -n openshift-infra scale --replicas 0 dc/hawkular-cassandra + ;; + "start") + oc -n openshift-infra scale --replicas 0 dc/hawkular-cassandra + sleep 1 + oc -n openshift-infra scale --replicas 0 rc/heapster + sleep 1 + oc -n openshift-infra scale --replicas 0 rc/hawkular-metrics + ;; + *) + echo "Usage: $0 stop/start" +esac diff --git a/scripts/kube-ops-view.sh b/scripts/kube-ops-view.sh new file mode 100755 index 0000000..ca1389e --- /dev/null +++ b/scripts/kube-ops-view.sh @@ -0,0 +1,12 @@ +#! /bin/bash + +NS=mon + + +oc -n $NS new-project ocp-ops-view +oc -n $NS create sa kube-ops-view +oc -n $NS adm policy add-scc-to-user anyuid -z kube-ops-view +oc -n $NS adm policy add-cluster-role-to-user cluster-admin system:serviceaccount:mon:kube-ops-view +oc -n $NS apply -f https://raw.githubusercontent.com/raffaelespazzoli/kube-ops-view/ocp/deploy-openshift/kube-ops-view.yaml +oc -n $NS expose svc kube-ops-view +oc -n $NS get route | grep kube-ops-view | awk '{print $2}' diff --git a/scripts/opts.sh b/scripts/opts.sh index d484efc..2f76d8e 100644 --- a/scripts/opts.sh +++ b/scripts/opts.sh @@ -7,3 +7,15 @@ gpod=$(get_gluster_pod) function gluster { oc -n glusterfs rsh po/$gpod gluster "$@" } + + +function node { + ip=$1 + shift + + ssh -xq root@192.168.26.$ip "$@" +} + +function heketi { + node 1 heketi-cli -s http://heketi-storage.glusterfs.svc.cluster.local:8080 --user admin --secret "$(oc get secret heketi-storage-admin-secret -n glusterfs -o jsonpath='{.data.key}' | base64 -d)" "$@" +} @@ -55,6 +55,18 @@ case "$action" in [ -n "$project" ] || { usage 'project name should be specified...' ; exit 1; } apply playbooks/openshift-setup-project.yml --extra-vars "ands_configure_project=$project" "$@" || exit 1 ;; + apps) + [ -n "$1" ] || { usage 'project name should be specified...' ; exit 1; } + + vars="ands_configure_project=$1" + shift + + if [[ -n "$1" && ${1:0:1} != "-" ]]; then + vars="$vars,ands_configure_app=$1" + shift + fi + apply playbooks/openshift-setup-apps.yml --extra-vars "$vars" "$@" || exit 1 + ;; openshift-masters) apply playbooks/openshift-add-masters.yml "$@" || exit 1 ;; @@ -99,6 +111,9 @@ case "$action" in upgrade) apply playbooks/openshift-upgrade.yml "$@" || exit 1 ;; + health) + apply playbooks/openshift-health.yml "$@" || exit + ;; maintain) apply playbooks/maintain.yml "$@" || exit ;; diff --git a/setup/configs/volumes.yml b/setup/configs/volumes.yml index 14aadfa..020c7d2 100644 --- a/setup/configs/volumes.yml +++ b/setup/configs/volumes.yml @@ -1,5 +1,6 @@ --- ands_paths: + hostraid: /mnt/hostraid provision: /mnt/provision openshift: /mnt/openshift temporary: /mnt/temporary @@ -24,12 +25,14 @@ ands_storage_domains: - servers: "ands_storage_servers" clients: [ "masters", "new_masters" ] volumes: -# provision: { type: "cfg", mount: "{{ ands_paths.provision }}" } openshift: { type: "cfg", mount: "{{ ands_paths.openshift }}", nfs_clients: "{{ ands_nfs_clients }}" } - databases: { type: "db", mount: "{{ ands_paths.databases }}" } + databases: { type: "db", mount: "{{ ands_paths.databases }}", access: "ReadOnlyMany" } temporary: { type: "tmp", mount: "{{ ands_paths.temporary }}", nfs_clients: "{{ ands_nfs_clients }}" } datastore: { type: "data", mount: "{{ ands_paths.datastore }}", nfs_clients: "{{ ands_nfs_clients }}" } katrin_data: { type: "data", mount: "{{ ands_paths.katrin_data }}", nfs_clients: "{{ ands_nfs_clients }}" } + - servers: "ands_storage_servers" + volumes: + block: { type: "db", transport: "{{ ands_rdma_support | ternary('rdma', 'tcp') }}" } # - servers: "ands_storage_servers" # clients: [ "nodes", "new_nodes" ] @@ -39,6 +42,10 @@ ands_storage_domains: # - ovirt: # - pdv: +ands_local_storage_domains: + - servers: [ "ands_storage_servers" ] + volumes: + hostraid: { type: "host", path: "/mnt/ands/hostmount", mount: "{{ ands_paths.hostraid }}" } # Per project list (to distribute in multiple namespaces later) # If not started with '/' will be prepended with project name @@ -48,7 +55,12 @@ ands_openshift_volumes: data: { volume: "datastore", path: "", write: true } db: { volume: "databases", path: "", write: true } tmp: { volume: "temporary", path: "", write: true } + host: { volume: "hostraid", path: "", write: true } # Global list, we only take things from the volume of project #ands_openshift_files: # - { osv: "log", path: "apache2-kaas", state: "directory", mode: "0777" } + + +#ands_block_volumes: +# adei-mysql: { volume: "block", capacity: "2Ti", ha: 2, project: "kaas" } diff --git a/setup/projects/adei/files/adei_init/mysql/adei.sql b/setup/projects/adei/files/adei_init/mysql/adei.sql index a17fcfe..5bd7e8f 100644 --- a/setup/projects/adei/files/adei_init/mysql/adei.sql +++ b/setup/projects/adei/files/adei_init/mysql/adei.sql @@ -1 +1,3 @@ GRANT ALL ON `adei_%`.* TO 'adei'@'%'; +UPDATE mysql.user SET Super_Priv='Y' WHERE user='adei' AND host='%'; +FLUSH PRIVILEGES; diff --git a/setup/projects/adei/files/adei_init/mysql/initdb.sh b/setup/projects/adei/files/adei_init/mysql/initdb.sh index f877520..2790c2d 100644 --- a/setup/projects/adei/files/adei_init/mysql/initdb.sh +++ b/setup/projects/adei/files/adei_init/mysql/initdb.sh @@ -11,6 +11,6 @@ done - cat adei.sql | awk "{ gsub(/@PWD@/, \"$PMA_PASSWORD\"); print }" | MYSQL_PWD="$MYSQL_ROOT_PASSWORD" mysql -u root -h $HOSTNAME - #cat pma.sql | awk "{ gsub(/@PWD@/, \"$PMA_PASSWORD\"); print }" | MYSQL_PWD="$MYSQL_ROOT_PASSWORD" mysql -u root -h $HOSTNAME + cat adei.sql | awk "{ gsub(/@PWD@/, \"$MYSQL_PMA_PASSWORD\"); print }" | MYSQL_PWD="$MYSQL_ROOT_PASSWORD" mysql -u root -h $HOSTNAME + #cat pma.sql | awk "{ gsub(/@PWD@/, \"$MYSQL_PMA_PASSWORD\"); print }" | MYSQL_PWD="$MYSQL_ROOT_PASSWORD" mysql -u root -h $HOSTNAME ) diff --git a/setup/projects/adei/templates/01-secret.yml.j2 b/setup/projects/adei/templates/01-secret.yml.j2 index f310ec9..44d5914 100644 --- a/setup/projects/adei/templates/01-secret.yml.j2 +++ b/setup/projects/adei/templates/01-secret.yml.j2 @@ -12,10 +12,14 @@ objects: metadata: annotations: template.openshift.io/expose-adei_password: '{.data[''adei-password'']}' + template.openshift.io/expose-root_password: '{.data[''root-password'']}' + template.openshift.io/expose-service_password: '{.data[''service-password'']}' template.openshift.io/expose-pma_password: '{.data[''pma-password'']}' name: adei stringData: adei-password: "{{ kaas_project_config.adei_password }}" + root-password: "{{ kaas_project_config.adei_password }}" + service-password: "${SERVICE_PASSWORD}" pma-password: "${PMA_PASSWORD}" parameters: - description: Password for the PMA connection user. @@ -24,3 +28,9 @@ parameters: generate: expression name: PMA_PASSWORD required: true +- description: Password for the service users + displayName: Service Connection Password + from: '[a-zA-Z0-9]{16}' + generate: expression + name: SERVICE_PASSWORD + required: true diff --git a/setup/projects/adei/templates/60-adei.yml.j2 b/setup/projects/adei/templates/60-adei.yml.j2 index 22f4bb0..7eafd33 100644 --- a/setup/projects/adei/templates/60-adei.yml.j2 +++ b/setup/projects/adei/templates/60-adei.yml.j2 @@ -159,6 +159,29 @@ objects: {% endif %} env: {{ cfg.env | to_json }} volumeMounts: {{ cfg.mounts | to_json }} +{% if cfg.resources is defined %} + resources: +{% if cfg.resources.request is defined %} +{% set res = cfg.resources.request %} + requests: +{% if res.cpu %} + cpu: {{ res.cpu }} +{% endif %} +{% if res.cpu %} + memory: {{ res.mem }} +{% endif %} +{% endif %} +{% if cfg.resources.limit is defined %} +{% set res = cfg.resources.limit %} + limits: +{% if res.cpu %} + cpu: {{ res.cpu }} +{% endif %} +{% if res.cpu %} + memory: {{ res.mem }} +{% endif %} +{% endif %} +{% endif %} {% if (cfg.node is defined) %} livenessProbe: timeoutSeconds: 1 diff --git a/setup/projects/adei/vars/galera.yml b/setup/projects/adei/vars/galera.yml new file mode 100644 index 0000000..ea64daa --- /dev/null +++ b/setup/projects/adei/vars/galera.yml @@ -0,0 +1,66 @@ +galera_app: + name: galera + provision: true + instantiate: false + pods: + galera: + kind: StatefulSet + service: { ports: [ 3306 ] } + sched: { replicas: 3, strategy: "Recreate", restrict: { fat_storage: "1" } } + update: { strategy: RollingUpdate, min_ready: 30 } + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - { key: "hostid", operator: "In", values: [ "1", "2", "3" ] } + groups: [ "adei_db" ] + images: + - image: "chsa/mysql-galera:5.7" + command: [ "run-mysqld-galera" ] + ports: [ 3306, 4444, 4567, 4568 ] + env: + - { name: "MYSQL_ROOT_PASSWORD", value: "secret@adei/adei-password" } + - { name: "MYSQL_USER", value: "adei" } + - { name: "MYSQL_USER_PRIV_SUPER", value: "1" } + - { name: "MYSQL_PASSWORD", value: "secret@adei/adei-password" } + - { name: "MYSQL_DATABASE", value: "adei" } + - { name: "MYSQL_EXTRADB", value: "adei_%" } + - { name: "MYSQL_GALERA_USER", value: "xtrabackup_sst" } + - { name: "MYSQL_GALERA_PASSWORD", value: "secret@adei/service-password" } + mappings: + - { name: "adei_init", mount: "/var/lib/init" } + - { name: "adei_host", path: "galera", mount: "/var/lib/mysql/data" } + resources: { request: { cpu: 2000m, mem: 4Gi }, limit: { cpu: 6000m, mem: 32Gi } } +# probes: +# - { type: "liveness", port: 3306 } +# - { type: "readiness", command: [ /bin/sh, -i, -c, MYSQL_PWD="$MYSQL_PASSWORD" mysql -h 127.0.0.1 -u $MYSQL_USER -D $MYSQL_DATABASE, -e 'SELECT 1' ], delay: "15", timeout: "5" } + + + grecovery: + sched: { replicas: 0, strategy: "Recreate", restrict: { fat_storage: "1" } } + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - { key: "hostid", operator: "In", values: [ "1", "2", "3" ] } + groups: [ "adei_db" ] + images: + - image: "chsa/mysql-galera:5.7" + command: [ "run-mysqld-manager" ] + ports: [ 3306, 4444, 4567, 4568 ] + env: + - { name: "MYSQL_ROOT_PASSWORD", value: "secret@adei/adei-password" } + - { name: "MYSQL_USER", value: "adei" } + - { name: "MYSQL_USER_PRIV_SUPER", value: "1" } + - { name: "MYSQL_PASSWORD", value: "secret@adei/adei-password" } + - { name: "MYSQL_DATABASE", value: "adei" } + - { name: "MYSQL_EXTRADB", value: "adei_%" } + - { name: "MYSQL_GALERA_USER", value: "xtrabackup_sst" } + - { name: "MYSQL_GALERA_PASSWORD", value: "secret@adei/service-password" } + - { name: "POD_NAMESPACE", value: "fieldref@metadata.namespace" } + - { name: "MYSQL_GALERA_CLUSTER", value: "galera-ss" } + mappings: + - { name: "adei_init", mount: "/var/lib/init" } + - { name: "adei_host", path: "galera", mount: "/var/lib/mysql/data" } diff --git a/setup/projects/adei/vars/globals.yml b/setup/projects/adei/vars/globals.yml index 01fb495..86911aa 100644 --- a/setup/projects/adei/vars/globals.yml +++ b/setup/projects/adei/vars/globals.yml @@ -182,6 +182,7 @@ adei_frontends: cacher: name: "adei-${setup}-cacher" replicas: "${cache_replicas}" + resources: { request: { cpu: 1000m, mem: 1Gi } } cmd: [ "/openshift-entrypoint.sh", "/adei/src/scripts/system/cacher.sh" ] env: "{{ adei_pod_env | union(adei_cache_env) }}" vols: "{{ adei_pod_vols }}" diff --git a/setup/projects/adei/vars/pods.yml b/setup/projects/adei/vars/pods.yml index 3923c23..8857fcd 100644 --- a/setup/projects/adei/vars/pods.yml +++ b/setup/projects/adei/vars/pods.yml @@ -1,24 +1,27 @@ pods: mysql: service: { ports: [ 3306 ] } - sched: { replicas: 1, strategy: "Recreate", restrict: { fat_storage: "1" } } + sched: { replicas: 1, strategy: "Recreate", selector: { hostid: "3" } } groups: [ "adei_db" ] images: - - image: "centos/mysql-57-centos7" - env: + - image: "centos/mysql-57-centos7" + env: - { name: "MYSQL_USER", value: "adei" } - { name: "MYSQL_PASSWORD", value: "secret@adei/adei-password" } - - { name: "MYSQL_ROOT_PASSWORD", value: "secret@adei/adei-password" } + - { name: "MYSQL_ROOT_PASSWORD", value: "secret@adei/root-password" } - { name: "MYSQL_DATABASE", value: "adei" } - - { name: "PMA_PASSWORD", value: "secret@adei/pma-password" } - mappings: + - { name: "MYSQL_PMA_PASSWORD", value: "secret@adei/pma-password" } + - { name: "MYSQL_MAX_CONNECTIONS", value: "500" } + mappings: - { name: "adei_init", mount: "/var/lib/init" } - - { name: "adei_db", path: "mysql", mount: "/var/lib/mysql/data" } - probes: + - { name: "adei_host", path: "mysql", mount: "/var/lib/mysql/data" } +# - { name: "adei_db", path: "mysql", mount: "/var/lib/mysql/data" } + resources: { request: { cpu: 2000m, mem: 4Gi }, limit: { cpu: 6000m, mem: 32Gi } } + probes: - { port: 3306 } # - { type: "liveness", port: 3306 } # - { type: "readiness", command: [/bin/sh, -i, -c, MYSQL_PWD="$MYSQL_PASSWORD" mysql -h 127.0.0.1 -u $MYSQL_USER -D $MYSQL_DATABASE, -e 'SELECT 1'] } - hooks: + hooks: - { type: "postStart", command: [ "/bin/bash", "/var/lib/init/mysql/initdb.sh" ] } phpmyadmin: @@ -35,6 +38,14 @@ pods: probes: - { port: 8080, path: '/' } + + +apps: + - "galera_app" + + + + #oc: # - template: "[0-3]*" # - template: "[4-6]*" diff --git a/setup/projects/adei/vars/volumes.yml b/setup/projects/adei/vars/volumes.yml index 768e27f..82f2e18 100644 --- a/setup/projects/adei/vars/volumes.yml +++ b/setup/projects/adei/vars/volumes.yml @@ -3,20 +3,23 @@ gids: adei_db: { id: 6002 } volumes: + adei_host: { volume: "hostraid", path: "/adei", write: true } # mysql adei_init: { volume: "openshift", path: "/adei/init"} # mysql - adei_etc: { volume: "openshift", path: "/adei/etc"} # mysql (maybe) - adei_src: { volume: "openshift", path: "/adei/src", write: true } # prod & debug (init creates setup links) - adei_cfg: { volume: "openshift", path: "/adei/cfg", write: true } # per-setup configs (ADEI/wiki modifies setup) - adei_sys: { volume: "openshift", path: "/adei/sys" } # per-setup cron-jon overrides - adei_tmp: { volume: "temporary", path: "/adei/tmp", write: true } # per-setup temporary files - adei_log: { volume: "temporary", path: "/adei/log", write: true } # per-replica (should be fine) temporary files - adei_db: { volume: "databases", path: "/adei", write: true } # mysql + adei_etc: { volume: "openshift", path: "/adei/etc"} # mysql (maybe) + adei_src: { volume: "openshift", path: "/adei/src", write: true } # prod & debug (init creates setup links) + adei_cfg: { volume: "openshift", path: "/adei/cfg", write: true } # per-setup configs (ADEI/wiki modifies setup) + adei_sys: { volume: "openshift", path: "/adei/sys" } # per-setup cron-jon overrides + adei_tmp: { volume: "temporary", path: "/adei/tmp", write: true } # per-setup temporary files + adei_log: { volume: "temporary", path: "/adei/log", write: true } # per-replica (should be fine) temporary files +# adei_db: { volume: "databases", path: "/adei", write: true } # mysql files: - - { osv: "adei_cfg", path: "/", state: "directory", group: "adei", mode: "02775" } - - { osv: "adei_src", path: "/", state: "directory", group: "adei", mode: "02775" } - - { osv: "adei_src", path: "/prod", state: "directory", group: "adei", mode: "02775" } - - { osv: "adei_src", path: "/dbg", state: "directory", group: "adei", mode: "02775" } - - { osv: "adei_log", path: "/", state: "directory", group: "adei", mode: "02775" } - - { osv: "adei_tmp", path: "/", state: "directory", group: "adei", mode: "02775" } - - { osv: "adei_db", path: "mysql", state: "directory", group: "adei_db", mode: "02775" } + - { osv: "adei_cfg", path: "/", state: "directory", group: "adei", mode: "02775" } + - { osv: "adei_src", path: "/", state: "directory", group: "adei", mode: "02775" } + - { osv: "adei_src", path: "/prod", state: "directory", group: "adei", mode: "02775" } + - { osv: "adei_src", path: "/dbg", state: "directory", group: "adei", mode: "02775" } + - { osv: "adei_log", path: "/", state: "directory", group: "adei", mode: "02775" } + - { osv: "adei_tmp", path: "/", state: "directory", group: "adei", mode: "02775" } + - { osv: "adei_host",path: "mysql", state: "directory", group: "adei_db", mode: "02775" } + - { osv: "adei_host",path: "galera", state: "directory", group: "adei_db", mode: "02775" } +# - { osv: "adei_db", path: "mysql", state: "directory", group: "adei_db", mode: "02775" } diff --git a/setup/projects/kaas/templates/40-kaas-manager.yml.j2 b/setup/projects/kaas/templates/40-kaas-manager.yml.j2 index b9cba4e..0e0f45e 100644 --- a/setup/projects/kaas/templates/40-kaas-manager.yml.j2 +++ b/setup/projects/kaas/templates/40-kaas-manager.yml.j2 @@ -13,7 +13,7 @@ objects: metadata: name: kaas-manager spec: - replicas: 1 + replicas: 0 revisionHistoryLimit: {{ kaas_pod_history_limit }} strategy: type: Rolling |