shipyard/shipyard_airflow/plugins/check_k8s_pod_status.py
Anthony Lin 7225cc76cb Add Operator to Check Node Status
Had a discussion with Scott and I agree that checking
node status (instead of pod readiness) should be the way
to determine if the cluster-join process is completed.

This P.S. adds the operator to address that in the short
run. The long term solution will be to add a promenade API
endpoint that checks cluster-join and Airflow will query
that endpoint instead.

This P.S. also updates some of the comments/wordings used
in the 'check_k8s_pod_status' operator.

Change-Id: I2e5390f1f64c7d8ce374b2537e6d14e65dffecd6
2017-12-07 12:21:35 -05:00

73 lines
2.6 KiB
Python

# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import time
from airflow.exceptions import AirflowException
from kubernetes import client, config
def check_pods_status(required_pods):
"""This function retrieves the current state of pods in the
Kubernetes cluster. We can use it to check that all the pods
that are of interest to us are up in the cluster. Function
returns boolean True/False and queries every 30 seconds.
:param required_pods: List of pods that are of interest to us
Example::
from check_k8s_pod_status import check_pods_status
pods_ready = False
required_pods = ['ceph-', 'calico-', 'coredns-', 'kubernetes-']
# Time out can be set as required
while not pods_ready:
pods_ready = check_pods_status(required_pods)
"""
# Note that we are using 'in_cluster_config'
config.load_incluster_config()
v1 = client.CoreV1Api()
ret = v1.list_pod_for_all_namespaces(watch=False)
# Loop through items to get the status of the pod
for i in ret.items:
for pod_name in required_pods:
if pod_name in i.metadata.name:
# Return Boolean 'False' if required pods are not in
# 'Succeeded' or 'Running' state
if i.status.phase not in ['Succeeded', 'Running']:
logging.info("Pods are not in ready state...")
logging.info("%s is in %s state", i.metadata.name,
i.status.phase)
logging.info("Wait for 30 seconds...")
# Back off for 30 seconds
time.sleep(30)
return False
# Raise Execptions if the pod does not exits in the
# Kubernetes cluster
else:
raise AirflowException("Unable to locate pod(s) ",
pod_name)
# Return True when all required pods are in 'Succeeded' or
# 'Running' state
return True