Skip to content

Commit a303f25

Browse files
authored
fix(release): change release cleanup to only remove secrets related to Deployments that are no longer active (#1038)
Before the cleanup proceess and Release::delete were removing RCs and env secrets related to all but current releases, this can (and has) gotten all screwy in the logic due to how RC and Deployment cleanup ended up being merged instead of being previously separate. Now the process it so cleanup non-latest RCs (just as a safety thing), and only remove secrets that are not associated to a specific ReplicaSet
1 parent ad7fc55 commit a303f25

2 files changed

Lines changed: 16 additions & 64 deletions

File tree

rootfs/api/models/release.py

Lines changed: 16 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -234,22 +234,14 @@ def rollback(self, user, version=None):
234234
new_release.delete()
235235
raise DeisException(str(e)) from e
236236

237-
def delete(self, *args, **kwargs):
238-
"""Delete release DB record and any RCs from the affect release"""
239-
try:
240-
self._delete_release_in_scheduler(self.app.id, "v{}".format(self.version))
241-
except KubeHTTPException as e:
242-
# 404 means they were already cleaned up
243-
if e.response.status_code is not 404:
244-
# Another problem came up
245-
message = 'Could not to cleanup RCs for release {}'.format(self.version)
246-
self.app.log(message, level=logging.WARNING)
247-
logger.warning(message + ' - ' + str(e))
248-
finally:
249-
super(Release, self).delete(*args, **kwargs)
250-
251237
def cleanup_old(self): # noqa
252-
"""Cleanup all but the latest release from Kubernetes"""
238+
"""
239+
Cleanup any old resources from Kubernetes
240+
241+
This includes any RCs that are no longer considered the latest release (just a safety net)
242+
Secrets no longer tied to any ReplicaSet
243+
Stray pods no longer relevant to the latest release
244+
"""
253245
latest_version = 'v{}'.format(self.version)
254246
self.app.log(
255247
'Cleaning up RCs for releases older than {} (latest)'.format(latest_version),
@@ -276,24 +268,12 @@ def cleanup_old(self): # noqa
276268
level=logging.DEBUG
277269
)
278270

271+
# this is RC related
279272
for version in controller_removal:
280273
self._delete_release_in_scheduler(self.app.id, version)
281274

282-
# find stray env secrets to remove that may have been missed
283-
self.app.log('Cleaning up orphaned environment var secrets', level=logging.DEBUG)
284-
labels = {
285-
'heritage': 'deis',
286-
'app': self.app.id,
287-
'type': 'env',
288-
}
289-
secrets = self._scheduler.secret.get(self.app.id, labels=labels).json()
290-
for secret in secrets['items']:
291-
current_version = secret['metadata']['labels']['version']
292-
# skip the latest release
293-
if current_version == latest_version:
294-
continue
295-
296-
self._scheduler.secret.delete(self.app.id, secret['metadata']['name'])
275+
# handle Deployments specific cleanups
276+
self._cleanup_deployment_secrets_and_configs(self.app.id)
297277

298278
# Remove stray pods
299279
labels = {'heritage': 'deis'}
@@ -314,8 +294,6 @@ def cleanup_old(self): # noqa
314294
if e.response.status_code == 404:
315295
continue
316296

317-
self._cleanup_deployment_secrets_and_configs(self.app.id)
318-
319297
def _cleanup_deployment_secrets_and_configs(self, namespace):
320298
"""
321299
Clean up any environment secrets (and in the future ConfigMaps) that
@@ -356,7 +334,7 @@ def _delete_release_in_scheduler(self, namespace, version):
356334
"""
357335
Deletes a specific release in k8s based on ReplicationController
358336
359-
Scale RCs to 0 then delete RCs / Deployments and the version specific
337+
Scale RCs to 0 then delete RCs and the version specific
360338
secret that container the env var
361339
"""
362340
labels = {
@@ -366,18 +344,14 @@ def _delete_release_in_scheduler(self, namespace, version):
366344
}
367345

368346
# see if the app config has deploy timeout preference, otherwise use global
369-
deploy_timeout = self.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT) # noqa
347+
timeout = self.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT)
370348

371349
controllers = self._scheduler.rc.get(namespace, labels=labels).json()
372350
for controller in controllers['items']:
373-
self._scheduler.cleanup_release(namespace, controller, deploy_timeout)
374-
375-
# remove secret that contains env vars for the release
376-
try:
377-
secret_name = "{}-{}-env".format(namespace, version)
378-
self._scheduler.secret.delete(namespace, secret_name)
379-
except KubeHTTPException:
380-
pass
351+
# Deployment takes care of this in the API, RC does not
352+
# Have the RC scale down pods and delete itself
353+
self._scheduler._scale_rc(namespace, controller['metadata']['name'], 0, timeout)
354+
self._scheduler.rc.delete(namespace, controller['metadata']['name'])
381355

382356
def save(self, *args, **kwargs): # noqa
383357
if not self.summary:

rootfs/scheduler/__init__.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -186,28 +186,6 @@ def deploy(self, namespace, name, image, entrypoint, command, **kwargs): # noqa
186186
"Additional information:\n{}".format(version, namespace, app_type, str(e))
187187
) from e
188188

189-
def cleanup_release(self, namespace, controller, timeout):
190-
"""
191-
Cleans up resources related to an application deployment
192-
"""
193-
# Deployment takes care of this in the API, RC does not
194-
# Have the RC scale down pods and delete itself
195-
self._scale_rc(namespace, controller['metadata']['name'], 0, timeout)
196-
self.rc.delete(namespace, controller['metadata']['name'])
197-
198-
# Remove stray pods that the scale down will have missed (this can occassionally happen)
199-
pods = self.pod.get(namespace, labels=controller['metadata']['labels']).json()
200-
for pod in pods['items']:
201-
if self.pod.deleted(pod):
202-
continue
203-
204-
try:
205-
self.pod.delete(namespace, pod['metadata']['name'])
206-
except KubeHTTPException as e:
207-
# Sometimes k8s will manage to remove the pod from under us
208-
if e.response.status_code == 404:
209-
continue
210-
211189
def scale(self, namespace, name, image, entrypoint, command, **kwargs):
212190
"""Scale Deployment"""
213191
try:

0 commit comments

Comments
 (0)