Skip to content

Commit 1b331a9

Browse files
committed
Merge pull request #4506 from iancoffey/upgrade_fixes
fix(deisctl): fixes for graceful upgrades.
2 parents 4d25e8b + 112da78 commit 1b331a9

3 files changed

Lines changed: 169 additions & 33 deletions

File tree

deisctl/client/client.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,27 +74,39 @@ func (c *Client) UpgradePrep(argv []string) error {
7474
usage := `Prepare platform for graceful upgrade.
7575
7676
Usage:
77-
deisctl upgrade-prep [options]
77+
deisctl upgrade-prep [--stateless]
78+
79+
Options:
80+
--stateless Use when the target platform is stateless
7881
`
79-
if _, err := docopt.Parse(usage, argv, true, "", false); err != nil {
82+
args, err := docopt.Parse(usage, argv, true, "", false)
83+
if err != nil {
8084
return err
8185
}
8286

83-
return cmd.UpgradePrep(c.Backend)
87+
stateless, _ := args["--stateless"].(bool)
88+
89+
return cmd.UpgradePrep(stateless, c.Backend)
8490
}
8591

8692
// UpgradeTakeover gracefully restarts a cluster prepared with upgrade-prep
8793
func (c *Client) UpgradeTakeover(argv []string) error {
8894
usage := `Complete the upgrade of a prepped cluster.
8995
9096
Usage:
91-
deisctl upgrade-takeover [options]
97+
deisctl upgrade-takeover [--stateless]
98+
99+
Options:
100+
--stateless Use when the target platform is stateless
92101
`
93-
if _, err := docopt.Parse(usage, argv, true, "", false); err != nil {
102+
args, err := docopt.Parse(usage, argv, true, "", false)
103+
if err != nil {
94104
return err
95105
}
96106

97-
return cmd.UpgradeTakeover(c.Backend, c.configBackend)
107+
stateless, _ := args["--stateless"].(bool)
108+
109+
return cmd.UpgradeTakeover(stateless, c.Backend, c.configBackend)
98110
}
99111

100112
// RollingRestart attempts a rolling restart of an instance unit

deisctl/cmd/cmd_test.go

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,20 @@ func TestUpgradePrep(t *testing.T) {
306306
expected := []string{"database", "registry@*", "controller", "builder", "logger", "logspout", "store-volume",
307307
"store-gateway@*", "store-metadata", "store-daemon", "store-monitor"}
308308

309-
UpgradePrep(&b)
309+
UpgradePrep(false, &b)
310+
311+
if !reflect.DeepEqual(b.stoppedUnits, expected) {
312+
t.Error(fmt.Errorf("Expected %v, Got %v", expected, b.stoppedUnits))
313+
}
314+
}
315+
316+
func TestStatelessUpgradePrep(t *testing.T) {
317+
t.Parallel()
318+
319+
b := backendStub{}
320+
expected := []string{"database", "registry@*", "controller", "builder", "logger", "logspout"}
321+
322+
UpgradePrep(true, &b)
310323

311324
if !reflect.DeepEqual(b.stoppedUnits, expected) {
312325
t.Error(fmt.Errorf("Expected %v, Got %v", expected, b.stoppedUnits))
@@ -322,10 +335,33 @@ func TestUpgradeTakeover(t *testing.T) {
322335
expectedRestarted := []string{"router"}
323336
expectedStarted := []string{"publisher", "store-monitor", "store-daemon", "store-metadata",
324337
"store-gateway@*", "store-volume", "logger", "logspout", "database", "registry@*",
325-
"controller", "builder", "publisher", "router@*", "database", "registry@*",
326-
"controller", "builder", "publisher", "router@*"}
338+
"controller", "builder", "publisher", "database", "registry@*",
339+
"controller", "builder", "publisher"}
340+
341+
if err := doUpgradeTakeOver(false, &b, testMock); err != nil {
342+
t.Error(fmt.Errorf("Takeover failed: %v", err))
343+
}
344+
345+
if !reflect.DeepEqual(b.restartedUnits, expectedRestarted) {
346+
t.Error(fmt.Errorf("Expected %v, Got %v", expectedRestarted, b.restartedUnits))
347+
}
348+
if !reflect.DeepEqual(b.startedUnits, expectedStarted) {
349+
t.Error(fmt.Errorf("Expected %v, Got %v", expectedStarted, b.startedUnits))
350+
}
351+
}
352+
353+
func TestStatelessUpgradeTakeover(t *testing.T) {
354+
t.Parallel()
355+
testMock := mock.ConfigBackend{Expected: []*model.ConfigNode{{Key: "/deis/services/app1", Value: "foo", TTL: 10},
356+
{Key: "/deis/services/app2", Value: "8000", TTL: 10}}}
357+
358+
b := backendStub{}
359+
expectedRestarted := []string{"router"}
360+
expectedStarted := []string{"publisher", "logspout", "registry@*",
361+
"controller", "builder", "publisher", "router@*", "registry@*",
362+
"controller", "builder", "publisher"}
327363

328-
if err := doUpgradeTakeOver(&b, testMock); err != nil {
364+
if err := doUpgradeTakeOver(true, &b, testMock); err != nil {
329365
t.Error(fmt.Errorf("Takeover failed: %v", err))
330366
}
331367

deisctl/cmd/upgrade.go

Lines changed: 111 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package cmd
22

33
import (
4+
"bytes"
45
"fmt"
6+
"io"
57
"sync"
68

79
"github.com/deis/deis/deisctl/backend"
@@ -10,33 +12,35 @@ import (
1012
)
1113

1214
// UpgradePrep stops and uninstalls all components except router and publisher
13-
func UpgradePrep(b backend.Backend) error {
15+
func UpgradePrep(stateless bool, b backend.Backend) error {
1416
var wg sync.WaitGroup
1517

1618
b.Stop([]string{"database", "registry@*", "controller", "builder", "logger", "logspout"}, &wg, Stdout, Stderr)
1719
wg.Wait()
1820
b.Destroy([]string{"database", "registry@*", "controller", "builder", "logger", "logspout"}, &wg, Stdout, Stderr)
1921
wg.Wait()
2022

21-
b.Stop([]string{"store-volume", "store-gateway@*"}, &wg, Stdout, Stderr)
22-
wg.Wait()
23-
b.Destroy([]string{"store-volume", "store-gateway@*"}, &wg, Stdout, Stderr)
24-
wg.Wait()
23+
if !stateless {
24+
b.Stop([]string{"store-volume", "store-gateway@*"}, &wg, Stdout, Stderr)
25+
wg.Wait()
26+
b.Destroy([]string{"store-volume", "store-gateway@*"}, &wg, Stdout, Stderr)
27+
wg.Wait()
2528

26-
b.Stop([]string{"store-metadata"}, &wg, Stdout, Stderr)
27-
wg.Wait()
28-
b.Destroy([]string{"store-metadata"}, &wg, Stdout, Stderr)
29-
wg.Wait()
29+
b.Stop([]string{"store-metadata"}, &wg, Stdout, Stderr)
30+
wg.Wait()
31+
b.Destroy([]string{"store-metadata"}, &wg, Stdout, Stderr)
32+
wg.Wait()
3033

31-
b.Stop([]string{"store-daemon"}, &wg, Stdout, Stderr)
32-
wg.Wait()
33-
b.Destroy([]string{"store-daemon"}, &wg, Stdout, Stderr)
34-
wg.Wait()
34+
b.Stop([]string{"store-daemon"}, &wg, Stdout, Stderr)
35+
wg.Wait()
36+
b.Destroy([]string{"store-daemon"}, &wg, Stdout, Stderr)
37+
wg.Wait()
3538

36-
b.Stop([]string{"store-monitor"}, &wg, Stdout, Stderr)
37-
wg.Wait()
38-
b.Destroy([]string{"store-monitor"}, &wg, Stdout, Stderr)
39-
wg.Wait()
39+
b.Stop([]string{"store-monitor"}, &wg, Stdout, Stderr)
40+
wg.Wait()
41+
b.Destroy([]string{"store-monitor"}, &wg, Stdout, Stderr)
42+
wg.Wait()
43+
}
4044

4145
fmt.Fprintln(Stdout, "The platform has been stopped, but applications are still serving traffic as normal.")
4246
fmt.Fprintln(Stdout, "Your cluster is now ready for upgrade. Install a new deisctl version and run `deisctl upgrade-takeover`.")
@@ -65,16 +69,15 @@ func republishServices(ttl uint64, nodes []*model.ConfigNode, cb config.Backend)
6569
}
6670

6771
// UpgradeTakeover gracefully starts a platform stopped with UpgradePrep
68-
func UpgradeTakeover(b backend.Backend, cb config.Backend) error {
69-
70-
if err := doUpgradeTakeOver(b, cb); err != nil {
72+
func UpgradeTakeover(stateless bool, b backend.Backend, cb config.Backend) error {
73+
if err := doUpgradeTakeOver(stateless, b, cb); err != nil {
7174
return err
7275
}
7376

7477
return nil
7578
}
7679

77-
func doUpgradeTakeOver(b backend.Backend, cb config.Backend) error {
80+
func doUpgradeTakeOver(stateless bool, b backend.Backend, cb config.Backend) error {
7881
var wg sync.WaitGroup
7982

8083
nodes, err := listPublishedServices(cb)
@@ -98,10 +101,95 @@ func doUpgradeTakeOver(b backend.Backend, cb config.Backend) error {
98101
b.Start([]string{"publisher"}, &wg, Stdout, Stderr)
99102
wg.Wait()
100103

101-
installDefaultServices(b, false, &wg, Stdout, Stderr) // @fixme: hax?
104+
installUpgradeServices(b, stateless, &wg, Stdout, Stderr)
102105
wg.Wait()
103106

104-
startDefaultServices(b, false, &wg, Stdout, Stderr) // @fixme: hax?
107+
startUpgradeServices(b, stateless, &wg, Stdout, Stderr)
105108
wg.Wait()
106109
return nil
107110
}
111+
112+
func installUpgradeServices(b backend.Backend, stateless bool, wg *sync.WaitGroup, out, err io.Writer) {
113+
if !stateless {
114+
fmt.Fprintln(out, "Storage subsystem...")
115+
b.Create([]string{"store-daemon", "store-monitor", "store-metadata", "store-volume", "store-gateway@1"}, wg, out, err)
116+
wg.Wait()
117+
}
118+
119+
fmt.Fprintln(out, "Logging subsystem...")
120+
if stateless {
121+
b.Create([]string{"logspout"}, wg, out, err)
122+
} else {
123+
b.Create([]string{"logger", "logspout"}, wg, out, err)
124+
}
125+
wg.Wait()
126+
127+
fmt.Fprintln(out, "Control plane...")
128+
if stateless {
129+
b.Create([]string{"registry@1", "controller", "builder"}, wg, out, err)
130+
} else {
131+
b.Create([]string{"database", "registry@1", "controller", "builder"}, wg, out, err)
132+
}
133+
wg.Wait()
134+
135+
fmt.Fprintln(out, "Data plane...")
136+
b.Create([]string{"publisher"}, wg, out, err)
137+
wg.Wait()
138+
}
139+
140+
func startUpgradeServices(b backend.Backend, stateless bool, wg *sync.WaitGroup, out, err io.Writer) {
141+
142+
// Wait for groups to come up.
143+
// If we're running in stateless mode, we start only a subset of services.
144+
if !stateless {
145+
fmt.Fprintln(out, "Storage subsystem...")
146+
b.Start([]string{"store-monitor"}, wg, out, err)
147+
wg.Wait()
148+
b.Start([]string{"store-daemon"}, wg, out, err)
149+
wg.Wait()
150+
b.Start([]string{"store-metadata"}, wg, out, err)
151+
wg.Wait()
152+
153+
// we start gateway first to give metadata time to come up for volume
154+
b.Start([]string{"store-gateway@*"}, wg, out, err)
155+
wg.Wait()
156+
b.Start([]string{"store-volume"}, wg, out, err)
157+
wg.Wait()
158+
}
159+
160+
// start logging subsystem first to collect logs from other components
161+
fmt.Fprintln(out, "Logging subsystem...")
162+
if !stateless {
163+
b.Start([]string{"logger"}, wg, out, err)
164+
wg.Wait()
165+
}
166+
b.Start([]string{"logspout"}, wg, out, err)
167+
wg.Wait()
168+
169+
// Start these in parallel. This section can probably be removed now.
170+
var bgwg sync.WaitGroup
171+
var trash bytes.Buffer
172+
batch := []string{
173+
"database", "registry@*", "controller", "builder",
174+
"publisher",
175+
}
176+
if stateless {
177+
batch = []string{"registry@*", "controller", "builder", "publisher", "router@*"}
178+
}
179+
b.Start(batch, &bgwg, &trash, &trash)
180+
181+
fmt.Fprintln(Stdout, "Control plane...")
182+
batch = []string{"database", "registry@*", "controller"}
183+
if stateless {
184+
batch = []string{"registry@*", "controller"}
185+
}
186+
b.Start(batch, wg, out, err)
187+
wg.Wait()
188+
189+
b.Start([]string{"builder"}, wg, out, err)
190+
wg.Wait()
191+
192+
fmt.Fprintln(out, "Data plane...")
193+
b.Start([]string{"publisher"}, wg, out, err)
194+
wg.Wait()
195+
}

0 commit comments

Comments
 (0)