Skip to content

Commit 6e9b330

Browse files
committed
feat(health checks): implement new kubernetes 1.2 health check features
Added the following new functionalities * HEALTHCHECK_PERIOD_SECONDS - How often (in seconds) to perform the probe * HEALTHCHECK_SUCCESS_THRESHOLD - How many probe runs need to be done after failure to consider it a success again * HEALTHCHECK_FAILURE_THRESHOLD - How many times a probe should fail before considered being down These defaults are all copied from kubernetes but may have to be adjusted due to our high default timeout, or we bring the default timeout down Closes #251
1 parent 0702285 commit 6e9b330

2 files changed

Lines changed: 46 additions & 6 deletions

File tree

rootfs/api/models/config.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,19 @@ def healthcheck(self):
3838
timeout = int(self.values.get('HEALTHCHECK_TIMEOUT', 50))
3939
delay = int(self.values.get('HEALTHCHECK_INITIAL_DELAY', 50))
4040
port = int(self.values.get('HEALTHCHECK_PORT', 5000))
41-
42-
return {'path': path, 'timeout': timeout, 'delay': delay, 'port': port}
41+
period_seconds = int(self.values.get('HEALTHCHECK_PERIOD_SECONDS', 10))
42+
success_threshold = int(self.values.get('HEALTHCHECK_SUCCESS_THRESHOLD', 1))
43+
failure_threshold = int(self.values.get('HEALTHCHECK_FAILURE_THRESHOLD', 3))
44+
45+
return {
46+
'path': path,
47+
'timeout': timeout,
48+
'delay': delay,
49+
'port': port,
50+
'period_seconds': period_seconds,
51+
'success_threshold': success_threshold,
52+
'failure_threshold': failure_threshold,
53+
}
4354

4455
def set_healthchecks(self):
4556
"""Defines default values for HTTP healthchecks"""
@@ -49,10 +60,25 @@ def set_healthchecks(self):
4960
# fetch set health values and any defaults
5061
# this approach allows new health items to be added without issues
5162
health = self.healthcheck()
63+
64+
# HTTP GET related
5265
self.values['HEALTHCHECK_URL'] = health['path']
66+
self.values['HEALTHCHECK_PORT'] = health['port']
67+
68+
# Number of seconds after which the probe times out.
69+
# More info: http://releases.k8s.io/HEAD/docs/user-guide/pod-states.md#container-probes
5370
self.values['HEALTHCHECK_TIMEOUT'] = health['timeout']
71+
# Number of seconds after the container has started before liveness probes are initiated.
72+
# More info: http://releases.k8s.io/HEAD/docs/user-guide/pod-states.md#container-probes
5473
self.values['HEALTHCHECK_INITIAL_DELAY'] = health['delay']
55-
self.values['HEALTHCHECK_PORT'] = health['port']
74+
# How often (in seconds) to perform the probe.
75+
self.values['HEALTHCHECK_PERIOD_SECONDS'] = health['period_seconds']
76+
# Minimum consecutive successes for the probe to be considered successful
77+
# after having failed.
78+
self.values['HEALTHCHECK_SUCCESS_THRESHOLD'] = health['success_threshold']
79+
# Minimum consecutive failures for the probe to be considered failed after
80+
# having succeeded.
81+
self.values['HEALTHCHECK_FAILURE_THRESHOLD'] = health['failure_threshold']
5682

5783
def save(self, **kwargs):
5884
"""merge the old config with the new"""

rootfs/scheduler/__init__.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -986,7 +986,15 @@ def _delete_rc(self, namespace, name):
986986

987987
return response
988988

989-
def _healthcheck(self, controller, routable=False, path='/', port=5000, delay=30, timeout=5): # noqa
989+
def _healthcheck(self, controller, routable=False, path='/', port=5000, delay=30, timeout=5,
990+
period_seconds=1, success_threshold=1, failure_threshold=3): # noqa
991+
"""
992+
Apply HTTP GET healthcehck to the application container
993+
994+
http://kubernetes.io/docs/user-guide/walkthrough/k8s201/#health-checking
995+
http://kubernetes.io/docs/user-guide/pod-states/#container-probes
996+
http://kubernetes.io/docs/user-guide/liveness/
997+
"""
990998
if not routable:
991999
return controller
9921000

@@ -1011,7 +1019,10 @@ def _healthcheck(self, controller, routable=False, path='/', port=5000, delay=30
10111019
# length of time to wait for a pod to initialize
10121020
# after pod startup, before applying health checking
10131021
'initialDelaySeconds': delay,
1014-
'timeoutSeconds': timeout
1022+
'timeoutSeconds': timeout,
1023+
'periodSeconds': period_seconds,
1024+
'successThreshold': success_threshold,
1025+
'failureThreshold': failure_threshold,
10151026
},
10161027
'readinessProbe': {
10171028
# an http probe
@@ -1022,7 +1033,10 @@ def _healthcheck(self, controller, routable=False, path='/', port=5000, delay=30
10221033
# length of time to wait for a pod to initialize
10231034
# after pod startup, before applying health checking
10241035
'initialDelaySeconds': delay,
1025-
'timeoutSeconds': timeout
1036+
'timeoutSeconds': timeout,
1037+
'periodSeconds': period_seconds,
1038+
'successThreshold': success_threshold,
1039+
'failureThreshold': failure_threshold,
10261040
},
10271041
}
10281042

0 commit comments

Comments
 (0)