Skip to content

Commit 10168c9

Browse files
author
Matthew
committed
feat(publisher): add http liveness health check
1 parent b5390fc commit 10168c9

7 files changed

Lines changed: 190 additions & 2 deletions

File tree

controller/api/models.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,6 +1159,34 @@ def _etcd_purge_cert(**kwargs):
11591159
pass
11601160

11611161

1162+
def _etcd_publish_config(**kwargs):
1163+
config = kwargs['instance']
1164+
# we purge all existing config when adding the newest instance. This is because
1165+
# deis config:unset would remove an existing value, but not delete the
1166+
# old config object
1167+
try:
1168+
_etcd_client.delete('/deis/config/{}'.format(config.app),
1169+
prevExist=True, dir=True, recursive=True)
1170+
except KeyError:
1171+
pass
1172+
if kwargs['created']:
1173+
for k, v in config.values.iteritems():
1174+
_etcd_client.write(
1175+
'/deis/config/{}/{}'.format(
1176+
config.app,
1177+
unicode(k).encode('utf-8').lower()),
1178+
unicode(v).encode('utf-8'))
1179+
1180+
1181+
def _etcd_purge_config(**kwargs):
1182+
config = kwargs['instance']
1183+
try:
1184+
_etcd_client.delete('/deis/config/{}'.format(config.app),
1185+
prevExist=True, dir=True, recursive=True)
1186+
except KeyError:
1187+
pass
1188+
1189+
11621190
def _etcd_publish_domains(**kwargs):
11631191
domain = kwargs['instance']
11641192
if kwargs['created']:
@@ -1204,3 +1232,5 @@ def create_auth_token(sender, instance=None, created=False, **kwargs):
12041232
post_delete.connect(_etcd_purge_app, sender=App, dispatch_uid='api.models')
12051233
post_save.connect(_etcd_publish_cert, sender=Certificate, dispatch_uid='api.models')
12061234
post_delete.connect(_etcd_purge_cert, sender=Certificate, dispatch_uid='api.models')
1235+
post_save.connect(_etcd_publish_config, sender=Config, dispatch_uid='api.models')
1236+
post_delete.connect(_etcd_purge_config, sender=Config, dispatch_uid='api.models')

docs/managing_deis/backing_up_data.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ use in the ``export`` command should correspond to the IP of the host machine wh
212212
[a.save() for a in App.objects.all()]
213213
[d.save() for d in Domain.objects.all()]
214214
[c.save() for c in Certificate.objects.all()]
215+
[c.save() for c in Config.objects.all()]
215216
EOF
216217
$ exit
217218

publisher/server/publisher.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"log"
66
"net"
7+
"net/http"
78
"regexp"
89
"strconv"
910
"sync"
@@ -115,6 +116,24 @@ func (s *Server) publishContainer(container *docker.APIContainers, ttl time.Dura
115116
port := strconv.Itoa(int(p.PublicPort))
116117
hostAndPort := s.host + ":" + port
117118
if s.IsPublishableApp(containerName) && s.IsPortOpen(hostAndPort) {
119+
configKey := fmt.Sprintf("/deis/config/%s/", appName)
120+
// check if the user specified a healthcheck URL
121+
healthcheckURL := s.getEtcd(configKey + "healthcheck_url")
122+
delay, err := strconv.Atoi(s.getEtcd(configKey + "healthcheck_initial_delay"))
123+
if err != nil {
124+
log.Println(err)
125+
delay = 0
126+
}
127+
timeout, err := strconv.Atoi(s.getEtcd(configKey + "healthcheck_timeout"))
128+
if err != nil {
129+
log.Println(err)
130+
timeout = 1
131+
}
132+
if healthcheckURL != "" {
133+
if !s.HealthCheckOK("http://"+hostAndPort+healthcheckURL, delay, timeout) {
134+
continue
135+
}
136+
}
118137
s.setEtcd(keyPath, hostAndPort, uint64(ttl.Seconds()))
119138
safeMap.Lock()
120139
safeMap.data[container.ID] = appPath
@@ -169,6 +188,23 @@ func (s *Server) IsPortOpen(hostAndPort string) bool {
169188
return portOpen
170189
}
171190

191+
func (s *Server) HealthCheckOK(url string, delay, timeout int) bool {
192+
// sleep for the initial delay
193+
time.Sleep(time.Duration(delay) * time.Second)
194+
client := http.Client{
195+
Timeout: time.Duration(timeout) * time.Second,
196+
}
197+
resp, err := client.Get(url)
198+
if err != nil {
199+
log.Printf("an error occurred while performing a health check at %s (%v)\n", url, err)
200+
return false
201+
}
202+
if resp.StatusCode != http.StatusOK {
203+
log.Printf("healthcheck failed for %s (expected %d, got %d)\n", url, http.StatusOK, resp.StatusCode)
204+
}
205+
return resp.StatusCode == http.StatusOK
206+
}
207+
172208
// latestRunningVersion retrieves the highest version of the application published
173209
// to etcd. If no app has been published, returns 0.
174210
func latestRunningVersion(client *etcd.Client, appName string) int {
@@ -213,6 +249,21 @@ func max(n []int) int {
213249
return val
214250
}
215251

252+
// getEtcd retrieves the etcd key's value. Returns an empty string if the key was not found.
253+
func (s *Server) getEtcd(key string) string {
254+
if s.logLevel == "debug" {
255+
log.Println("get", key)
256+
}
257+
resp, err := s.EtcdClient.Get(key, false, false)
258+
if err != nil {
259+
return ""
260+
}
261+
if resp != nil && resp.Node != nil {
262+
return resp.Node.Value
263+
}
264+
return ""
265+
}
266+
216267
// setEtcd sets the corresponding etcd key with the value and ttl
217268
func (s *Server) setEtcd(key, value string, ttl uint64) {
218269
if _, err := s.EtcdClient.Set(key, value, ttl); err != nil {

publisher/server/publisher_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package server
22

33
import (
4+
"fmt"
45
"net"
6+
"net/http"
7+
"net/http/httptest"
58
"testing"
69
)
710

@@ -45,3 +48,25 @@ func TestIsPortOpen(t *testing.T) {
4548
t.Errorf("Port should be closed")
4649
}
4750
}
51+
52+
func TestHealthCheckOK(t *testing.T) {
53+
s := &Server{}
54+
55+
// good server
56+
ts1 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
57+
fmt.Fprintln(w, "Hello, client")
58+
}))
59+
defer ts1.Close()
60+
if !s.HealthCheckOK(ts1.URL, 0, 0) {
61+
t.Errorf("healthcheck should be OK")
62+
}
63+
64+
// bad server
65+
ts2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
66+
http.NotFound(w, r)
67+
}))
68+
defer ts2.Close()
69+
if s.HealthCheckOK(ts2.URL, 0, 0) {
70+
t.Errorf("healthcheck should be NOT OK")
71+
}
72+
}

tests/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ test-full: test-style
2323
godep go test -tags integration -v -run TestBuilds
2424
godep go test -tags integration -v -run TestConfig
2525
godep go test -tags integration -v -run TestDomains
26+
godep go test -tags integration -v -run TestHealthcheck
2627
godep go test -tags integration -v -run TestKeys
2728
godep go test -tags integration -v -run TestPerms
2829
godep go test -tags integration -v -run TestPs

tests/healthcheck_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// +build integration
2+
3+
package tests
4+
5+
import (
6+
"fmt"
7+
"net/http"
8+
"testing"
9+
10+
"github.com/deis/deis/tests/utils"
11+
)
12+
13+
var (
14+
healthcheckGoodCmd = "config:set HEALTHCHECK_URL=/ --app={{.AppName}}"
15+
)
16+
17+
func TestHealthcheck(t *testing.T) {
18+
client := utils.HTTPClient()
19+
cfg := healthcheckSetup(t)
20+
done := make(chan bool, 1)
21+
url := fmt.Sprintf("http://%s.%s", cfg.AppName, cfg.Domain)
22+
23+
utils.Execute(t, healthcheckGoodCmd, cfg, false, "/")
24+
go func() {
25+
// there should never be any downtime during these health check operations
26+
psScaleTest(t, cfg, psScaleCmd)
27+
cfg.ProcessNum = "1"
28+
psScaleTest(t, cfg, psScaleCmd)
29+
// kill healthcheck goroutine
30+
done <- true
31+
}()
32+
33+
// run health checks in parallel while performing operations
34+
fmt.Printf("starting health checks at %s\n", url)
35+
loop:
36+
for {
37+
select {
38+
case <-done:
39+
fmt.Println("done performing health checks")
40+
break loop
41+
default:
42+
doHealthCheck(t, client, url)
43+
}
44+
}
45+
utils.AppsDestroyTest(t, cfg)
46+
}
47+
48+
func healthcheckSetup(t *testing.T) *utils.DeisTestConfig {
49+
cfg := utils.GetGlobalConfig()
50+
cfg.AppName = "healthchecksample"
51+
utils.Execute(t, authLoginCmd, cfg, false, "")
52+
utils.Execute(t, gitCloneCmd, cfg, false, "")
53+
if err := utils.Chdir(cfg.ExampleApp); err != nil {
54+
t.Fatal(err)
55+
}
56+
utils.Execute(t, appsCreateCmd, cfg, false, "")
57+
utils.Execute(t, gitPushCmd, cfg, false, "")
58+
utils.CurlApp(t, *cfg)
59+
if err := utils.Chdir(".."); err != nil {
60+
t.Fatal(err)
61+
}
62+
return cfg
63+
}
64+
65+
func doHealthCheck(t *testing.T, client *http.Client, url string) {
66+
response, err := client.Get(url)
67+
if err != nil {
68+
t.Fatalf("could not retrieve response from %s: %v\n", url, err)
69+
}
70+
defer response.Body.Close()
71+
if response.StatusCode != http.StatusOK {
72+
t.Fatalf("app had some downtime while undergoing health checks (got %d response)", response.StatusCode)
73+
}
74+
}

tests/utils/itutils.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,17 @@ func GetGlobalConfig() *DeisTestConfig {
123123
return &envCfg
124124
}
125125

126-
func doCurl(url string) ([]byte, error) {
126+
// HTTPClient returns a client for use with the integration tests.
127+
func HTTPClient() *http.Client {
127128
// disable security check for self-signed certificates
128129
tr := &http.Transport{
129130
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
130131
}
131-
client := &http.Client{Transport: tr}
132+
return &http.Client{Transport: tr}
133+
}
134+
135+
func doCurl(url string) ([]byte, error) {
136+
client := HTTPClient()
132137
response, err := client.Get(url)
133138
if err != nil {
134139
return nil, err
@@ -300,6 +305,7 @@ func Execute(t *testing.T, cmd string, params interface{}, failFlag bool, expect
300305

301306
// AppsDestroyTest destroys a Deis app and checks that it was successful.
302307
func AppsDestroyTest(t *testing.T, params *DeisTestConfig) {
308+
fmt.Printf("destroying app %s...\n", params.ExampleApp)
303309
cmd := "apps:destroy --app={{.AppName}} --confirm={{.AppName}}"
304310
if err := Chdir(params.ExampleApp); err != nil {
305311
t.Fatal(err)

0 commit comments

Comments
 (0)