Skip to content

Commit 2459c2c

Browse files
author
Gabriel Monroy
committed
Merge pull request #1978 from gabrtv/fix-deis-run
fix(controller): work around fleet state reporting on deis run
2 parents d2b9c39 + 1655fa5 commit 2459c2c

1 file changed

Lines changed: 46 additions & 24 deletions

File tree

controller/scheduler/coreos.py

Lines changed: 46 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -232,19 +232,19 @@ def _destroy_announcer(self, name):
232232
def _destroy_log(self, name):
233233
return self._delete_unit(name+'-log')
234234

235-
def run(self, name, image, command):
235+
def run(self, name, image, command): # noqa
236236
"""Run a one-off command"""
237237
self._create_container(name, image, command, copy.deepcopy(RUN_TEMPLATE))
238238

239-
# wait for the container to return something
240-
for _ in range(1200):
239+
# wait for the container to get scheduled
240+
for _ in range(30):
241241
states = self._get_state(name)
242242
if states and len(states.get('states', [])) == 1:
243243
state = states.get('states')[0]
244-
subState = state.get('systemdSubState')
245-
if subState == 'exited' or subState == 'failed' or subState == 'dead':
246-
break
244+
break
247245
time.sleep(1)
246+
else:
247+
raise RuntimeError('container did not report state')
248248
machineID = state.get('machineID')
249249

250250
# find the machine
@@ -268,28 +268,50 @@ def run(self, name, image, command):
268268
ssh = paramiko.SSHClient()
269269
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
270270
ssh.connect(primaryIP, username="core", pkey=pkey)
271-
272-
# get a pty so stdout/stderr look right
271+
# share a transport
273272
tran = ssh.get_transport()
274-
chan = tran.open_session()
275-
chan.get_pty()
276-
out = chan.makefile()
277-
278-
# exec the command to gather container output
279-
chan.exec_command('docker logs {name}'.format(**locals()))
280-
rc, output = chan.recv_exit_status(), out.read()
281-
if rc != 0:
273+
274+
def _do_ssh(cmd):
275+
chan = tran.open_session()
276+
# get a pty so stdout/stderr look right
277+
chan.get_pty()
278+
out = chan.makefile()
279+
chan.exec_command(cmd)
280+
rc, output = chan.recv_exit_status(), out.read()
281+
return rc, output
282+
283+
# wait for container to start
284+
for _ in range(1200):
285+
rc, _ = _do_ssh('docker inspect {name}'.format(**locals()))
286+
if rc == 0:
287+
break
288+
time.sleep(1)
289+
else:
290+
raise RuntimeError('container failed to start on host')
291+
292+
# wait for container to complete
293+
for _ in range(1200):
294+
_rc, _output = _do_ssh('docker inspect {name}'.format(**locals()))
295+
if _rc != 0:
296+
raise RuntimeError('failed to inspect container')
297+
_container = json.loads(_output)
298+
finished_at = _container[0]["State"]["FinishedAt"]
299+
if not finished_at.startswith('0001'):
300+
break
301+
time.sleep(1)
302+
else:
303+
raise RuntimeError('container timed out')
304+
305+
# gather container output
306+
_rc, output = _do_ssh('docker logs {name}'.format(**locals()))
307+
if _rc != 0:
282308
raise RuntimeError('could not attach to container')
283309

284-
# use another channel to inspect the container
285-
chan = tran.open_session()
286-
chan.get_pty()
287-
out = chan.makefile()
288-
chan.exec_command('docker inspect {name}'.format(**locals()))
289-
rc, inspect_output = chan.recv_exit_status(), out.read()
290-
if rc != 0:
310+
# determine container exit code
311+
_rc, _output = _do_ssh('docker inspect {name}'.format(**locals()))
312+
if _rc != 0:
291313
raise RuntimeError('could not determine exit code')
292-
container = json.loads(inspect_output)
314+
container = json.loads(_output)
293315
rc = container[0]["State"]["ExitCode"]
294316

295317
# cleanup

0 commit comments

Comments
 (0)