Skip to content

Commit 232427d

Browse files
author
Matthias Arnason
committed
fix(contrib/ec2): improve timeout handling
output remaining time for provisioning/waiting for health checks, and explicitly declare giving up and why. additionally, tear down the stack if it's failed.
1 parent 8e960ca commit 232427d

1 file changed

Lines changed: 28 additions & 7 deletions

File tree

contrib/ec2/provision-ec2-cluster.sh

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,43 +38,64 @@ fi
3838
# check that the CoreOS user-data file is valid
3939
$CONTRIB_DIR/util/check-user-data.sh
4040

41+
# Prepare bailout function to prevent us polluting the namespace
42+
bailout() {
43+
aws cloudformation delete-stack --stack-name $STACK_NAME
44+
}
45+
4146
# create an EC2 cloudformation stack based on CoreOS's default template
4247
aws cloudformation create-stack \
4348
--template-body "$($THIS_DIR/gen-json.py)" \
4449
--stack-name $STACK_NAME \
4550
--parameters "$(<$THIS_DIR/cloudformation.json)"
4651

4752
# loop until the instances are created
48-
ATTEMPTS=45
53+
ATTEMPTS=60
4954
SLEEPTIME=10
5055
COUNTER=1
5156
INSTANCE_IDS=""
52-
until [ `wc -w <<< $INSTANCE_IDS` -eq $DEIS_NUM_INSTANCES -a "$STACK_STATUS" = "CREATE_COMPLETE" ]; do
53-
if [ $COUNTER -gt $ATTEMPTS ]; then echo "Timed out waiting for instances..." ; exit 1; fi # timeout after 7 1/2 minutes
54-
echo "Waiting for instances to be created... $STACK_STATUS"
55-
sleep $SLEEPTIME
57+
until [ $(wc -w <<< $INSTANCE_IDS) -eq $DEIS_NUM_INSTANCES -a "$STACK_STATUS" = "CREATE_COMPLETE" ]; do
58+
if [ $COUNTER -gt $ATTEMPTS ]; then
59+
echo "Provisioning instances failed (timeout, $(wc -w <<< $INSTANCE_IDS) of $DEIS_NUM_INSTANCES provisioned after 10m)"
60+
echo "Destroying stack $STACK_NAME"
61+
bailout
62+
exit 1
63+
fi
64+
5665
STACK_STATUS=$(aws --output text cloudformation describe-stacks --stack-name $STACK_NAME --query 'Stacks[].StackStatus')
5766
if [ $STACK_STATUS != "CREATE_IN_PROGRESS" -a $STACK_STATUS != "CREATE_COMPLETE" ] ; then
5867
echo "error creating stack: "
5968
aws --output text cloudformation describe-stack-events \
6069
--stack-name $STACK_NAME \
6170
--query 'StackEvents[?ResourceStatus==`CREATE_FAILED`].[LogicalResourceId,ResourceStatusReason]'
71+
bailout
6272
exit 1
6373
fi
74+
6475
INSTANCE_IDS=$(aws ec2 describe-instances \
6576
--filters Name=tag:aws:cloudformation:stack-name,Values=$STACK_NAME Name=instance-state-name,Values=running \
6677
--query 'Reservations[].Instances[].[ InstanceId ]' \
6778
--output text)
79+
80+
echo "Waiting for instances to be provisioned ($STACK_STATUS, $(expr 61 - $COUNTER)0s) ..."
81+
sleep $SLEEPTIME
82+
6883
let COUNTER=COUNTER+1
6984
done
7085

7186
# loop until the instances pass health checks
7287
COUNTER=1
7388
INSTANCE_STATUSES=""
7489
until [ `wc -w <<< $INSTANCE_STATUSES` -eq $DEIS_NUM_INSTANCES ]; do
75-
if [ $COUNTER -gt $ATTEMPTS ]; then echo "Timed out waiting for instances..." ; exit 1; fi # timeout after 7 1/2 minutes
90+
if [ $COUNTER -gt $ATTEMPTS ];
91+
then echo "Health checks not passed after 10m, giving up"
92+
echo "Destroying stack $STACK_NAME"
93+
bailout
94+
exit 1
95+
fi
96+
7697
if [ $COUNTER -ne 1 ]; then sleep $SLEEPTIME; fi
77-
echo "Waiting for instances to pass initial health checks..."
98+
echo "Waiting for instances to pass initial health checks ($(expr 61 - $COUNTER)0s) ..."
7899
INSTANCE_STATUSES=$(aws ec2 describe-instance-status \
79100
--filters Name=instance-status.reachability,Values=passed \
80101
--instance-ids $INSTANCE_IDS \

0 commit comments

Comments
 (0)