From 57b1f3905ba249259d2fe3b5b6fc12620920e12f Mon Sep 17 00:00:00 2001
From: Steven Fitzpatrick <steven.fitzpatrick@att.com>
Date: Fri, 10 Jul 2020 14:31:22 -0500
Subject: [PATCH] Elasticsearch - Cluster Wait Function Improvements

This change modifies the cluster wait function to
check the cluster health status explicitly.

Once a status of at least "yellow" has been reached,
the Elasticsearch cluster should be able to facilitate
the API calls required by the other jobs of this chart.

Change-Id: I2660422a8e8122186d648042f5422ca9a82d23c7
---
 .../templates/bin/_es-cluster-wait.sh.tpl     | 98 ++-----------------
 .../job-register-snapshot-repository.yaml     |  2 -
 elasticsearch/values.yaml                     |  5 +-
 3 files changed, 10 insertions(+), 95 deletions(-)

diff --git a/elasticsearch/templates/bin/_es-cluster-wait.sh.tpl b/elasticsearch/templates/bin/_es-cluster-wait.sh.tpl
index da4f6e16a..d4ae9ac11 100644
--- a/elasticsearch/templates/bin/_es-cluster-wait.sh.tpl
+++ b/elasticsearch/templates/bin/_es-cluster-wait.sh.tpl
@@ -13,96 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */}}
 
-function check_master_nodes() {
-  numMasterNodes=0
-  expectedMasterNodes={{ .Values.pod.replicas.master | int64 }}
-  while [ "$numMasterNodes" -ne "$expectedMasterNodes" ]
-  do
-    currentMasterNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
-      "${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-master.")) | .name')
-    numMasterNodes=$(echo $currentMasterNodes | wc -w)
-    if [ "$numMasterNodes" -ne "$expectedMasterNodes" ]
-    then
-      if [ "$numMasterNodes" -eq 0 ]
-      then
-        echo "No Elasticsearch master nodes accounted for: 0/${expectedMasterNodes}"
-      else
-        echo "Not all Elasticsearch master nodes accounted for and ready: (${numMasterNodes} / ${expectedMasterNodes})"
-        echo "$currentMasterNodes"
-      fi
-      echo "Sleeping for 10 seconds before next check"
-      echo ""
-      sleep 10
-    fi
-  done
-  echo "All Elasticsearch master nodes accounted for and ready: (${numMasterNodes} / ${expectedMasterNodes})"
-  echo "$currentMasterNodes"
-  echo ""
-}
-
-function check_data_nodes() {
-  numDataNodes=0
-  expectedDataNodes={{ .Values.pod.replicas.data | int64 }}
-  while [ "$numDataNodes" -ne "$expectedDataNodes" ]
-  do
-    currentDataNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
-      "${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-data.")) | .name')
-    numDataNodes=$(echo $currentDataNodes | wc -w)
-    if [ "$numDataNodes" -ne "$expectedDataNodes" ]
-    then
-      if [ "$numDataNodes" -eq 0 ]
-      then
-        echo "No Elasticsearch data nodes accounted for: 0/${expectedDataNodes}"
-      else
-        echo "Not all Elasticsearch data nodes accounted for and ready: (${numDataNodes} / ${expectedDataNodes})"
-        echo "$currentDataNodes"
-      fi
-      echo "Sleeping for 10 seconds before next check"
-      echo ""
-      sleep 10
-    fi
-  done
-  echo "All Elasticsearch data nodes accounted for and ready: (${numDataNodes} / ${expectedDataNodes})"
-  echo "$currentDataNodes"
-  echo ""
-}
-
-function check_client_nodes() {
-  numClientNodes=0
-  expectedClientNodes={{ .Values.pod.replicas.client | int64 }}
-  while [ "$numClientNodes" -ne "$expectedClientNodes" ]
-  do
-    currentClientNodes=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
-      "${ELASTICSEARCH_HOST}/_cat/nodes?format=json&pretty" | jq -r '.[] | select(.name|test("elasticsearch-client.")) | .name')
-    numClientNodes=$(echo $currentClientNodes | wc -w)
-    if [ "$numClientNodes" -ne "$expectedClientNodes" ]
-    then
-      if [ "$numClientNodes" -eq 0 ]
-      then
-        echo "No Elasticsearch client nodes accounted for: 0/${expectedClientNodes}"
-      else
-        echo "Not all Elasticsearch client nodes accounted for and ready: (${numClientNodes} / ${expectedClientNodes})"
-        echo "$currentClientNodes"
-      fi
-      echo "Sleeping for 10 seconds before next check"
-      echo ""
-      sleep 10
-    fi
-  done
-  echo "All Elasticsearch client nodes accounted for and ready: (${numClientNodes} / ${expectedClientNodes})"
-  echo "$currentClientNodes"
-  echo ""
-}
-
 function check_cluster_health() {
-  clusterHealth=$(curl -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
-    "${ELASTICSEARCH_HOST}/_cat/health?format=json&pretty")
-  echo "Elasticsearch cluster health is:"
-  echo "$clusterHealth"
+  STATUS=$(curl -s -K- <<< "--user ${ELASTICSEARCH_USERNAME}:${ELASTICSEARCH_PASSWORD}" \
+    "${ELASTICSEARCH_HOST}/_cat/health?format=json&pretty" | jq -r .[].status)
+  echo "Status: $STATUS"
 }
 
-sleep 10
-check_data_nodes
-check_client_nodes
-check_master_nodes
 check_cluster_health
+while [[ $STATUS == "red" ]]; do
+  echo "Waiting for cluster to become ready."
+  sleep 30
+  check_cluster_health
+done
+echo "Cluster is ready."
diff --git a/elasticsearch/templates/job-register-snapshot-repository.yaml b/elasticsearch/templates/job-register-snapshot-repository.yaml
index 18a9a303f..e2c24ed0a 100644
--- a/elasticsearch/templates/job-register-snapshot-repository.yaml
+++ b/elasticsearch/templates/job-register-snapshot-repository.yaml
@@ -28,7 +28,6 @@ metadata:
   annotations:
     {{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }}
 spec:
-  backoffLimit: {{ .Values.jobs.snapshot_repository.backoffLimit }}
   template:
     metadata:
       labels:
@@ -38,7 +37,6 @@ spec:
     spec:
 {{ dict "envAll" $envAll "application" "snapshot_repository" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }}
       serviceAccountName: {{ $serviceAccountName }}
-      activeDeadlineSeconds: {{ .Values.jobs.snapshot_repository.activeDeadlineSeconds }}
       restartPolicy: OnFailure
       nodeSelector:
         {{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value | quote }}
diff --git a/elasticsearch/values.yaml b/elasticsearch/values.yaml
index 2f4206c18..00684345e 100644
--- a/elasticsearch/values.yaml
+++ b/elasticsearch/values.yaml
@@ -420,10 +420,7 @@ jobs:
       failed: 1
   es_cluster_wait:
     backoffLimit: 6
-    activeDeadlineSeconds: 600
-  snapshot_repository:
-    backoffLimit: 6
-    activeDeadlineSeconds: 600
+    activeDeadlineSeconds: 1200
   verify_repositories:
     cron: "*/30 * * * *"
     history: