Skip to content

Commit 7b8e6d7

Browse files
author
Gabriel Monroy
committed
fix(controller): track create and start separately to catch early errors
1 parent c28e4ac commit 7b8e6d7

1 file changed

Lines changed: 16 additions & 3 deletions

File tree

controller/scheduler/fleet.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,14 +256,27 @@ def _do_ssh(cmd):
256256
rc, output = chan.recv_exit_status(), out.read()
257257
return rc, output
258258

259-
# wait for container to start
260-
for _ in range(1200):
259+
# wait for container to launch
260+
for _ in range(60):
261261
rc, _ = _do_ssh('docker inspect {name}'.format(**locals()))
262262
if rc == 0:
263263
break
264264
time.sleep(1)
265265
else:
266-
raise RuntimeError('container failed to start on host')
266+
raise RuntimeError('failed to create container')
267+
268+
# wait for container to start
269+
for _ in range(2):
270+
_rc, _output = _do_ssh('docker inspect {name}'.format(**locals()))
271+
if _rc != 0:
272+
raise RuntimeError('failed to inspect container')
273+
_container = json.loads(_output)
274+
started_at = _container[0]["State"]["StartedAt"]
275+
if not started_at.startswith('0001'):
276+
break
277+
time.sleep(1)
278+
else:
279+
raise RuntimeError('container failed to start')
267280

268281
# wait for container to complete
269282
for _ in range(1200):

0 commit comments

Comments
 (0)