Skip to content

Commit c853a34

Browse files
committed
fix(scheduler): terminate all pods from previous release on deploy
Fixes #157 and partially reverts #95 as well. Seems the Exception handling may have never worked due to _scale_app never returning an object
1 parent 1879894 commit c853a34

1 file changed

Lines changed: 24 additions & 13 deletions

File tree

rootfs/scheduler/k8s.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -294,32 +294,44 @@ def deploy(self, name, image, command, **kwargs):
294294
app_name = kwargs.get('aname', {})
295295
name = name.replace('.', '-').replace('_', '-')
296296
app_type = name.split('-')[-1]
297+
298+
# Fetch old RC and create the new one for a release
297299
old_rc = self._get_old_rc(app_name, app_type)
298300
new_rc = self._create_rc(name, image, command, **kwargs)
299-
old_temp_rc = old_rc
301+
302+
# Get the desired number to scale to
300303
if old_rc:
301304
desired = int(old_rc["spec"]["replicas"])
302-
old_rc_name = old_rc["metadata"]["name"]
303305
else:
304306
desired = 1
305307

306-
new_rc_name = new_rc["metadata"]["name"]
307308
try:
308309
count = 1
309310
while desired >= count:
310-
new_rc = self._scale_app(new_rc_name, count, app_name)
311-
if old_temp_rc:
312-
old_temp_rc = self._scale_app(old_rc_name, desired-count, app_name)
311+
logger.debug('scaling release {} to {} out of final {}'.format(
312+
new_rc["metadata"]["name"], count, desired)
313+
)
314+
self._scale_app(new_rc["metadata"]["name"], count, app_name)
315+
if old_rc:
316+
logger.debug('scaling old release {} from {} to {}'.format(
317+
old_rc["metadata"]["name"], desired, (desired-count))
318+
)
319+
self._scale_app(old_rc["metadata"]["name"], (desired-count), app_name)
320+
313321
count += 1
314322
except Exception as e:
323+
logger.error('Could not scale {} to {}. Deleting and going back to old release'.format(
324+
new_rc["metadata"]["name"], desired)
325+
)
315326
self._scale_app(new_rc["metadata"]["name"], 0, app_name)
316327
self._delete_rc(new_rc["metadata"]["name"], app_name)
317328
if old_rc:
318329
self._scale_app(old_rc["metadata"]["name"], desired, app_name)
319330

320331
raise RuntimeError('{} (deploy): {}'.format(name, e))
332+
321333
if old_rc:
322-
self._delete_rc(app_name, old_rc_name)
334+
self._delete_rc(app_name, old_rc["metadata"]["name"])
323335

324336
def _get_events(self, namespace):
325337
url = self._api("/namespaces/{}/events", namespace)
@@ -363,9 +375,10 @@ def _get_schedule_status(self, name, num, namespace):
363375

364376
time.sleep(1)
365377

366-
def _scale_rc(self, rc, namespace):
367-
name = rc['metadata']['name']
368-
num = rc["spec"]["replicas"]
378+
def _scale_rc(self, name, namespace, num):
379+
rc = self._get_rc_(name, namespace)
380+
rc["spec"]["replicas"] = num
381+
369382
url = self._api("/namespaces/{}/replicationcontrollers/{}", namespace, name)
370383
resp = self.session.put(url, json=rc)
371384
if unhealthy(resp.status_code):
@@ -395,9 +408,7 @@ def _scale_rc(self, rc, namespace):
395408
time.sleep(1)
396409

397410
def _scale_app(self, name, num, namespace):
398-
js_template = self._get_rc_(name, namespace)
399-
js_template["spec"]["replicas"] = num
400-
self._scale_rc(js_template, namespace)
411+
self._scale_rc(name, namespace, num)
401412

402413
def scale(self, name, image, command, **kwargs):
403414
logger.debug('scale {}, img {}, params {}, cmd "{}"'.format(name, image, kwargs, command))

0 commit comments

Comments
 (0)