@@ -232,19 +232,19 @@ def _destroy_announcer(self, name):
232232 def _destroy_log (self , name ):
233233 return self ._delete_unit (name + '-log' )
234234
235- def run (self , name , image , command ):
235+ def run (self , name , image , command ): # noqa
236236 """Run a one-off command"""
237237 self ._create_container (name , image , command , copy .deepcopy (RUN_TEMPLATE ))
238238
239- # wait for the container to return something
240- for _ in range (1200 ):
239+ # wait for the container to get scheduled
240+ for _ in range (30 ):
241241 states = self ._get_state (name )
242242 if states and len (states .get ('states' , [])) == 1 :
243243 state = states .get ('states' )[0 ]
244- subState = state .get ('systemdSubState' )
245- if subState == 'exited' or subState == 'failed' or subState == 'dead' :
246- break
244+ break
247245 time .sleep (1 )
246+ else :
247+ raise RuntimeError ('container did not report state' )
248248 machineID = state .get ('machineID' )
249249
250250 # find the machine
@@ -268,28 +268,50 @@ def run(self, name, image, command):
268268 ssh = paramiko .SSHClient ()
269269 ssh .set_missing_host_key_policy (paramiko .AutoAddPolicy ())
270270 ssh .connect (primaryIP , username = "core" , pkey = pkey )
271-
272- # get a pty so stdout/stderr look right
271+ # share a transport
273272 tran = ssh .get_transport ()
274- chan = tran .open_session ()
275- chan .get_pty ()
276- out = chan .makefile ()
277-
278- # exec the command to gather container output
279- chan .exec_command ('docker logs {name}' .format (** locals ()))
280- rc , output = chan .recv_exit_status (), out .read ()
281- if rc != 0 :
273+
274+ def _do_ssh (cmd ):
275+ chan = tran .open_session ()
276+ # get a pty so stdout/stderr look right
277+ chan .get_pty ()
278+ out = chan .makefile ()
279+ chan .exec_command (cmd )
280+ rc , output = chan .recv_exit_status (), out .read ()
281+ return rc , output
282+
283+ # wait for container to start
284+ for _ in range (30 ):
285+ rc , _ = _do_ssh ('docker inspect {name}' .format (** locals ()))
286+ if rc == 0 :
287+ break
288+ time .sleep (1 )
289+ else :
290+ raise RuntimeError ('container failed to start on host' )
291+
292+ # wait for container to complete
293+ for _ in range (1200 ):
294+ _rc , _output = _do_ssh ('docker inspect {name}' .format (** locals ()))
295+ if _rc != 0 :
296+ raise RuntimeError ('failed to inspect container' )
297+ _container = json .loads (_output )
298+ finished_at = _container [0 ]["State" ]["FinishedAt" ]
299+ if not finished_at .startswith ('0001' ):
300+ break
301+ time .sleep (1 )
302+ else :
303+ raise RuntimeError ('container timed out' )
304+
305+ # gather container output
306+ _rc , output = _do_ssh ('docker logs {name}' .format (** locals ()))
307+ if _rc != 0 :
282308 raise RuntimeError ('could not attach to container' )
283309
284- # use another channel to inspect the container
285- chan = tran .open_session ()
286- chan .get_pty ()
287- out = chan .makefile ()
288- chan .exec_command ('docker inspect {name}' .format (** locals ()))
289- rc , inspect_output = chan .recv_exit_status (), out .read ()
290- if rc != 0 :
310+ # determine container exit code
311+ _rc , _output = _do_ssh ('docker inspect {name}' .format (** locals ()))
312+ if _rc != 0 :
291313 raise RuntimeError ('could not determine exit code' )
292- container = json .loads (inspect_output )
314+ container = json .loads (_output )
293315 rc = container [0 ]["State" ]["ExitCode" ]
294316
295317 # cleanup
0 commit comments