@@ -43,6 +43,22 @@ coreos:
4343 Type=oneshot
4444 ExecStart=/usr/bin/systemctl stop update-engine.service
4545 ExecStartPost=/usr/bin/systemctl mask update-engine.service
46+ - name: graceful-deis-shutdown.service
47+ content: |
48+ [Unit]
49+ Description=Clean up
50+ DefaultDependencies=no
51+ After=fleet.service etcd.service docker.service docker.socket deis-store-admin.service deis-store-daemon.service deis-store-volume.service deis-store-monitor.service
52+ Requires=fleet.service etcd.service deis-store-admin.service deis-store-daemon.service deis-store-volume.service docker.service docker.socket deis-store-monitor.service
53+
54+ [Install]
55+ WantedBy=shutdown.target halt.target reboot.target
56+
57+ [Service]
58+ ExecStop=/opt/bin/graceful-shutdown.sh --really
59+ Type=oneshot
60+ TimeoutSec=1200
61+ RemainAfterExit=yes
4662 - name: install-deisctl.service
4763 command: start
4864 content: |
@@ -176,3 +192,61 @@ write_files:
176192 content: |
177193 [Coredump]
178194 Storage=none
195+ - path: /opt/bin/graceful-shutdown.sh
196+ permissions: '0755'
197+ content: |
198+ #!/usr/bin/bash
199+ if [ "$1" != '--really' ]; then
200+ echo "command must be run as: $0 --really"
201+ exit 1
202+ fi
203+ # procedure requires the store-admin
204+ ADMIN_RUNNING=$(docker inspect --format="{{ .State.Running }}" deis-store-admin)
205+ if [ $? -eq 1 ] || [ "$ADMIN_RUNNING" == "false" ]; then
206+ echo "deis-store-admin container is required for graceful shutdown"
207+ exit 2
208+ fi
209+ set -e -x -o pipefail
210+ # determine osd id
211+ CURRENT_STATUS=$(/usr/bin/docker exec deis-store-admin ceph health | awk '{print $1}')
212+ OSD_HOSTS=($(/usr/bin/etcdctl ls /deis/store/hosts/| awk -F'/' '{print $5}'))
213+ for HOST in "${OSD_HOSTS[@]}"
214+ do
215+ PUBLIC_IP=$(fleetctl list-machines -fields="machine,ip" -full -no-legend| grep `cat /etc/machine-id` | awk '{print $2}')
216+ if [ "$HOST" = "$PUBLIC_IP" ] ; then
217+ OSD_ID=$(/usr/bin/etcdctl get /deis/store/osds/$PUBLIC_IP)
218+ break
219+ fi
220+ done
221+ # if we own an osd and its healthy, try to gracefully remove it
222+ if [ ! -z "$OSD_ID" ] && [[ "$CURRENT_STATUS" == *"HEALTH_OK"* ]] && [ ${#OSD_HOSTS[@]} -gt "3" ]; then
223+ /usr/bin/docker exec deis-store-admin ceph osd out $OSD_ID
224+ sleep 30
225+ TIMEWAITED=0
226+ until [[ $(/usr/bin/docker exec deis-store-admin ceph health) == *"HEALTH_OK"* ]]
227+ do
228+ if [ $TIMEWAITED -gt "1200" ]
229+ then
230+ echo "ceph graceful removal timeout exceeded"
231+ break
232+ fi
233+ echo "waiting" && sleep 5
234+ TIMEWAITED=$((TIMEWAITED+5))
235+ done
236+ /usr/bin/docker stop deis-store-daemon
237+ /usr/bin/docker exec deis-store-admin ceph osd crush remove osd.$OSD_ID
238+ /usr/bin/docker exec deis-store-admin ceph auth del osd.$OSD_ID
239+ /usr/bin/docker exec deis-store-admin ceph osd rm $OSD_ID
240+ /usr/bin/etcdctl rm /deis/store/osds/$PUBLIC_IP
241+ etcdctl rm /deis/store/hosts/$PUBLIC_IP && sleep 10
242+ # remove ceph mon
243+ /usr/bin/docker stop deis-store-monitor || true
244+ /usr/bin/docker exec deis-store-admin ceph mon remove `hostname -f` # fixme
245+ /usr/bin/docker stop deis-store-metadata || true
246+ fi
247+ NODE=$(curl -L http://127.0.0.1:7001/v2/admin/machines/`cat /etc/machine-id`)
248+ # remove from etcd cluster
249+ if [ $NODE != 'null' ]; then
250+ /usr/bin/curl -L -XDELETE http://127.0.0.1:7001/v2/admin/machines/`cat /etc/machine-id`
251+ fi
252+ manage_etc_hosts: localhost
0 commit comments