|
| 1 | +from datetime import datetime |
1 | 2 | import json |
2 | 3 | import logging |
3 | 4 | import os |
|
63 | 64 | } |
64 | 65 | ], |
65 | 66 | "volumes":[ |
66 | | - { |
| 67 | + { |
67 | 68 | "name":"objectstorage-keyfile", |
68 | 69 | "secret":{ |
69 | 70 | "secretName":"objectstorage-keyfile" |
70 | 71 | } |
71 | | - } |
| 72 | + } |
72 | 73 | ], |
| 74 | + "terminationGracePeriodSeconds": "$terminationGracePeriodSeconds", |
73 | 75 | "restartPolicy": "Never" |
74 | 76 | } |
75 | 77 | } |
|
90 | 92 | "env": [] |
91 | 93 | } |
92 | 94 | ], |
| 95 | + "terminationGracePeriodSeconds": "$terminationGracePeriodSeconds", |
93 | 96 | "restartPolicy": "Never" |
94 | 97 | } |
95 | 98 | } |
|
126 | 129 | } |
127 | 130 | }, |
128 | 131 | "spec": { |
| 132 | + "terminationGracePeriodSeconds": "$terminationGracePeriodSeconds", |
129 | 133 | "containers": [ |
130 | 134 | { |
131 | 135 | "name": "$containername", |
|
181 | 185 | } |
182 | 186 | }, |
183 | 187 | "spec": { |
| 188 | + "terminationGracePeriodSeconds": "$terminationGracePeriodSeconds", |
184 | 189 | "containers": [ |
185 | 190 | { |
186 | 191 | "name": "$containername", |
|
217 | 222 | } |
218 | 223 | ], |
219 | 224 | "volumeMounts":[ |
220 | | - { |
| 225 | + { |
221 | 226 | "name":"objectstorage-keyfile", |
222 | 227 | "mountPath":"/var/run/secrets/deis/objectstore/creds", |
223 | 228 | "readOnly":true |
224 | | - } |
| 229 | + } |
225 | 230 | ] |
226 | 231 | } |
227 | 232 | ], |
@@ -508,7 +513,8 @@ def run(self, namespace, name, image, entrypoint, command, **kwargs): |
508 | 513 | 'version': self.apiversion, |
509 | 514 | 'image': imgurl, |
510 | 515 | 'image_pull_policy': settings.DOCKER_BUILDER_IMAGE_PULL_POLICY, |
511 | | - 'storagetype': os.getenv("APP_STORAGE") |
| 516 | + 'storagetype': os.getenv("APP_STORAGE"), |
| 517 | + 'terminationGracePeriodSeconds': settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS # noqa |
512 | 518 | } |
513 | 519 |
|
514 | 520 | if entrypoint == '/runner/init': |
@@ -796,13 +802,33 @@ def _get_rcs(self, namespace, **kwargs): |
796 | 802 | return response |
797 | 803 |
|
798 | 804 | def _wait_until_pods_terminate(self, namespace, labels, current, desired): |
799 | | - delta = current - desired |
| 805 | + """Wait until all the desired pods are terminated""" |
| 806 | + # http://kubernetes.io/docs/api-reference/v1/definitions/#_v1_podspec |
| 807 | + # https://github.com/kubernetes/kubernetes/blob/release-1.2/docs/devel/api-conventions.md#metadata |
| 808 | + # http://kubernetes.io/docs/user-guide/pods/#termination-of-pods |
800 | 809 |
|
801 | | - logger.debug("waiting for {} pods in {} namespace to be terminated (120s timeout)".format(delta, namespace)) # noqa |
802 | | - for waited in range(120): |
| 810 | + timeout = settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS |
| 811 | + delta = current - desired |
| 812 | + logger.debug("waiting for {} pods in {} namespace to be terminated ({}s timeout)".format(delta, namespace, timeout)) # noqa |
| 813 | + for waited in range(timeout): |
803 | 814 | pods = self._get_pods(namespace, labels=labels).json() |
804 | 815 | count = len(pods['items']) |
805 | 816 |
|
| 817 | + # see if any pods are past their terminationGracePeriodsSeconds (as in stuck) |
| 818 | + # seems to be a problem in k8s around that: |
| 819 | + # https://github.com/kubernetes/kubernetes/search?q=terminating&type=Issues |
| 820 | + # these will be eventually GC'ed by k8s, ignoring them for now |
| 821 | + for pod in pods['items']: |
| 822 | + if 'deletionTimestamp' in pod['metadata']: |
| 823 | + deletion = datetime.strptime( |
| 824 | + pod['metadata']['deletionTimestamp'], |
| 825 | + settings.DEIS_DATETIME_FORMAT |
| 826 | + ) |
| 827 | + |
| 828 | + # past the graceful deletion period |
| 829 | + if deletion < datetime.utcnow(): |
| 830 | + count -= 1 |
| 831 | + |
806 | 832 | # stop when all pods are terminated as expected |
807 | 833 | if count == desired: |
808 | 834 | break |
@@ -909,6 +935,7 @@ def _create_rc(self, namespace, name, image, command, **kwargs): # noqa |
909 | 935 | "storagetype": storageType, |
910 | 936 | "mHost": os.getenv("DEIS_MINIO_SERVICE_HOST"), |
911 | 937 | "mPort": os.getenv("DEIS_MINIO_SERVICE_PORT"), |
| 938 | + "terminationGracePeriodSeconds": settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS # noqa |
912 | 939 | } |
913 | 940 |
|
914 | 941 | # Check if it is a slug builder image. |
@@ -1313,9 +1340,16 @@ def _pod_readiness_status(self, pod): |
1313 | 1340 | if not container['ready']: |
1314 | 1341 | if 'running' in container['state'].keys(): |
1315 | 1342 | return 'Starting' |
1316 | | - elif 'terminated' in container['state'].keys(): |
| 1343 | + elif ( |
| 1344 | + 'terminated' in container['state'].keys() or |
| 1345 | + 'deletionTimestamp' in pod['metadata'] |
| 1346 | + ): |
1317 | 1347 | return 'Terminating' |
1318 | 1348 | else: |
| 1349 | + # See if k8s is in Terminating state |
| 1350 | + if 'deletionTimestamp' in pod['metadata']: |
| 1351 | + return 'Terminating' |
| 1352 | + |
1319 | 1353 | return 'Running' |
1320 | 1354 |
|
1321 | 1355 | # Seems like the most sensible default |
|
0 commit comments