Luca Prete | 1b823d6 | 2018-12-13 17:33:47 -0800 | [diff] [blame] | 1 | #!/usr/bin/env bash |
| 2 | |
Joey Armstrong | aebf185 | 2022-12-09 08:34:13 -0500 | [diff] [blame] | 3 | # Copyright 2018-2023 Open Networking Foundation (ONF) and the ONF Contributors |
Luca Prete | 1b823d6 | 2018-12-13 17:33:47 -0800 | [diff] [blame] | 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | |
| 17 | # wait_for_pods.sh |
| 18 | # waits for all kubernetes pods to complete before exiting, optionally only |
| 19 | # pods in a specific namespace passed as first argument |
| 20 | # inspired by similar scripts in Kolla-Kubernetes and Openstack Helm |
| 21 | |
| 22 | set -e -o pipefail |
| 23 | fail_wfp=0 |
| 24 | |
| 25 | # Set these to configure maximum timeout, and interval for checks |
| 26 | PODS_TIMEOUT=${PODS_TIMEOUT:-600} |
| 27 | CHECK_INTERVAL=${CHECK_INTERVAL:-5} |
| 28 | KUBECTL_ARGS=${KUBECTL_ARGS:-} |
| 29 | |
| 30 | # use namespace if passed as first arg, or "all" for all namespaces |
Matteo Scandolo | 5073411 | 2021-01-11 10:18:18 -0800 | [diff] [blame] | 31 | if [ -n "$1" ] |
Luca Prete | 1b823d6 | 2018-12-13 17:33:47 -0800 | [diff] [blame] | 32 | then |
| 33 | if [[ "$1" == "all" ]] |
| 34 | then |
| 35 | KUBECTL_ARGS+=" --all-namespaces" |
| 36 | else |
| 37 | KUBECTL_ARGS+=" --namespace=$1" |
| 38 | fi |
| 39 | fi |
| 40 | set -u |
| 41 | |
| 42 | # calculate timeout time |
| 43 | START_TIME=$(date +%s) |
| 44 | END_TIME=$((START_TIME + PODS_TIMEOUT)) |
| 45 | |
| 46 | echo "wait_for_pods.sh - Waiting up to ${PODS_TIMEOUT} seconds for all Kubernetes pods to be ready" |
| 47 | echo "Number printed is number of jobs/pods/containers waiting to be ready" |
| 48 | |
| 49 | prev_total_unready=0 |
| 50 | |
| 51 | while true; do |
| 52 | NOW=$(date +%s) |
| 53 | |
| 54 | # handle timeout without completion |
| 55 | if [ "$NOW" -gt "$END_TIME" ] |
| 56 | then |
| 57 | echo "Pods/Containers/Jobs not ready before timeout of ${PODS_TIMEOUT} seconds" |
| 58 | fail_wfp=1 |
| 59 | break |
| 60 | fi |
| 61 | |
| 62 | # get list of uncompleted items with jsonpath, then count them with wc |
| 63 | # ref: https://kubernetes.io/docs/reference/kubectl/jsonpath/ |
| 64 | # jsonpath is picky about string vs comparison quoting, so may need to |
| 65 | # disable SC2026 for these lines. SC2086 allows for multiple args. |
| 66 | |
| 67 | # shellcheck disable=SC2026,SC2086 |
| 68 | pending_pods=$(kubectl get pods ${KUBECTL_ARGS} -o=jsonpath='{range .items[?(@.status.phase=="Pending")]}{.metadata.name}{"\n"}{end}') |
| 69 | # check for empty string before counting lines, echo adds a newline |
| 70 | if [ -z "$pending_pods" ]; then |
| 71 | pending_pod_count=0 |
| 72 | else |
| 73 | pending_pod_count=$( echo "$pending_pods" | wc -l) |
| 74 | fi |
| 75 | |
| 76 | # shellcheck disable=SC2026,SC2086 |
| 77 | unready_containers=$(kubectl get pods ${KUBECTL_ARGS} -o=jsonpath='{range .items[?(@.status.phase=="Running")]}{range .status.containerStatuses[?(@.ready==false)]}{.name}: {.ready}{"\n"}{end}{end}') |
| 78 | if [ -z "$unready_containers" ]; then |
| 79 | unready_container_count=0 |
| 80 | else |
| 81 | unready_container_count=$(echo "$unready_containers" | wc -l) |
| 82 | fi |
| 83 | |
| 84 | # shellcheck disable=SC2026,SC2086 |
| 85 | active_jobs=$(kubectl get jobs $KUBECTL_ARGS -o=jsonpath='{range .items[?(@.status.active=='1')]}{.metadata.name}{"\n"}{end}') |
| 86 | if [ -z "$active_jobs" ]; then |
| 87 | active_job_count=0 |
| 88 | else |
| 89 | active_job_count=$(echo "$active_jobs" | wc -l) |
| 90 | fi |
| 91 | |
| 92 | total_unready=$((pending_pod_count + unready_container_count + active_job_count)) |
| 93 | |
| 94 | # if everything is ready, print runtime and break |
| 95 | if [ "$total_unready" -eq 0 ] |
| 96 | then |
| 97 | runtime=$((NOW - START_TIME)) |
| 98 | echo "" |
| 99 | echo "All pods ready in $runtime seconds" |
| 100 | break |
| 101 | fi |
| 102 | |
| 103 | # deal with changes in number of jobs |
| 104 | if [ "$total_unready" -ne "$prev_total_unready" ] |
| 105 | then |
| 106 | echo "" |
| 107 | echo "Change in unready pods - Pending Pods: $pending_pod_count, Unready Containers: $unready_container_count, Active Jobs: $active_job_count" |
| 108 | fi |
| 109 | prev_total_unready=$total_unready |
| 110 | |
| 111 | # print number of unready pods every $CHECK_INTERVAL |
| 112 | echo -n "$total_unready " |
| 113 | sleep "$CHECK_INTERVAL" |
| 114 | done |
| 115 | |
| 116 | exit ${fail_wfp} |