Skip to content

Commit 370c21a

Browse files
committed
Merge pull request #709 from helgi/initial_delay
fix(deploy): account for readiness initial delay when determining if a pod is ready
2 parents 0a541e6 + 70ea69d commit 370c21a

1 file changed

Lines changed: 25 additions & 6 deletions

File tree

rootfs/scheduler/__init__.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -854,16 +854,35 @@ def _wait_until_pods_terminate(self, namespace, labels, current, desired):
854854

855855
logger.debug("{} pods in namespace {} are terminated".format(delta, namespace)) # noqa
856856

857-
def _get_pod_ready_status(self, namespace, labels, desired):
857+
def _get_pod_ready_status(self, namespace, controller, labels, desired):
858858
# If desired is 0 then there is no ready state to check on
859859
if desired == 0:
860860
return
861861

862-
# Ensure the minimum desired number of pods are available
863-
logger.debug("waiting for {} pods in {} namespace to be in services (120s timeout)".format(desired, namespace)) # noqa
864862
waited = 0
865863
timeout = 120 # 2 minutes
866-
timeout_padded = False # has timeout been increased or not
864+
# If there is initial delay on the readiness check then timeout needs to be higher
865+
# this is to account for kubernetes having readiness check report as failure until
866+
# the initial delay period is up
867+
delay = 0
868+
container_name = '{}-{}'.format(
869+
controller['metadata']['labels']['app'],
870+
controller['metadata']['labels']['type']
871+
)
872+
# get health info from spec
873+
for container in controller['spec']['template']['spec']['containers']:
874+
if container['name'] != container_name or 'readinessProbe' not in container:
875+
continue
876+
877+
delay = int(container['readinessProbe']['initialDelaySeconds'])
878+
logger.debug("adding {}s on to the original {}s timeout to account for the initial delay specified in the readiness probe for the RC".format(delay, timeout, controller['metadata']['name'])) # noqa
879+
timeout += delay
880+
881+
logger.debug("waiting for {} pods in {} namespace to be in services ({} timeout)".format(desired, namespace, timeout)) # noqa
882+
883+
# has timeout been increased or not within the loop
884+
timeout_padded = False
885+
# Ensure the minimum desired number of pods are available
867886
while True:
868887
# timed out, time to bail
869888
if waited > timeout:
@@ -873,7 +892,7 @@ def _get_pod_ready_status(self, namespace, labels, desired):
873892
count = 0 # ready pods
874893
pods = self._get_pods(namespace, labels=labels).json()
875894
for pod in pods['items']:
876-
# If pulling an image is taking long then increase the timout
895+
# If pulling an image is taking long then increase the timeout
877896
if (
878897
pod['status']['phase'] == 'Pending' and
879898
self._pod_pending_status(pod) == 'Pulling' and
@@ -954,7 +973,7 @@ def _scale_rc(self, namespace, name, desired):
954973
logger.debug("RC {} has a new resource version {}".format(name, js_template["metadata"]["resourceVersion"])) # noqa
955974

956975
# Double check enough pods are in the required state to service the application
957-
self._get_pod_ready_status(namespace, labels, desired)
976+
self._get_pod_ready_status(namespace, rc, labels, desired)
958977

959978
# if it was a scale down operation, wait until terminating pods are done
960979
if int(desired) < int(current):

0 commit comments

Comments
 (0)