Skip to content

Commit 70ea69d

Browse files
committed
fix(deploy): account for readiness initial delay when determining if a pod is ready
By default kubernetes will mark a readiness probe as Failure if initial delay is set on it, until the delay is over. Many users will add initial delay so that their application can warm up caches, on pod Part addresses #706
1 parent 0a541e6 commit 70ea69d

1 file changed

Lines changed: 25 additions & 6 deletions

File tree

rootfs/scheduler/__init__.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -854,16 +854,35 @@ def _wait_until_pods_terminate(self, namespace, labels, current, desired):
854854

855855
logger.debug("{} pods in namespace {} are terminated".format(delta, namespace)) # noqa
856856

857-
def _get_pod_ready_status(self, namespace, labels, desired):
857+
def _get_pod_ready_status(self, namespace, controller, labels, desired):
858858
# If desired is 0 then there is no ready state to check on
859859
if desired == 0:
860860
return
861861

862-
# Ensure the minimum desired number of pods are available
863-
logger.debug("waiting for {} pods in {} namespace to be in services (120s timeout)".format(desired, namespace)) # noqa
864862
waited = 0
865863
timeout = 120 # 2 minutes
866-
timeout_padded = False # has timeout been increased or not
864+
# If there is initial delay on the readiness check then timeout needs to be higher
865+
# this is to account for kubernetes having readiness check report as failure until
866+
# the initial delay period is up
867+
delay = 0
868+
container_name = '{}-{}'.format(
869+
controller['metadata']['labels']['app'],
870+
controller['metadata']['labels']['type']
871+
)
872+
# get health info from spec
873+
for container in controller['spec']['template']['spec']['containers']:
874+
if container['name'] != container_name or 'readinessProbe' not in container:
875+
continue
876+
877+
delay = int(container['readinessProbe']['initialDelaySeconds'])
878+
logger.debug("adding {}s on to the original {}s timeout to account for the initial delay specified in the readiness probe for the RC".format(delay, timeout, controller['metadata']['name'])) # noqa
879+
timeout += delay
880+
881+
logger.debug("waiting for {} pods in {} namespace to be in services ({} timeout)".format(desired, namespace, timeout)) # noqa
882+
883+
# has timeout been increased or not within the loop
884+
timeout_padded = False
885+
# Ensure the minimum desired number of pods are available
867886
while True:
868887
# timed out, time to bail
869888
if waited > timeout:
@@ -873,7 +892,7 @@ def _get_pod_ready_status(self, namespace, labels, desired):
873892
count = 0 # ready pods
874893
pods = self._get_pods(namespace, labels=labels).json()
875894
for pod in pods['items']:
876-
# If pulling an image is taking long then increase the timout
895+
# If pulling an image is taking long then increase the timeout
877896
if (
878897
pod['status']['phase'] == 'Pending' and
879898
self._pod_pending_status(pod) == 'Pulling' and
@@ -954,7 +973,7 @@ def _scale_rc(self, namespace, name, desired):
954973
logger.debug("RC {} has a new resource version {}".format(name, js_template["metadata"]["resourceVersion"])) # noqa
955974

956975
# Double check enough pods are in the required state to service the application
957-
self._get_pod_ready_status(namespace, labels, desired)
976+
self._get_pod_ready_status(namespace, rc, labels, desired)
958977

959978
# if it was a scale down operation, wait until terminating pods are done
960979
if int(desired) < int(current):

0 commit comments

Comments
 (0)