Skip to content

Commit 7e3c374

Browse files
committed
Merge pull request #3975 from kmala/k8s
feat(k8s) : add k8s scheduler support to deis
2 parents a54575d + 01c4059 commit 7e3c374

20 files changed

Lines changed: 1284 additions & 80 deletions

contrib/coreos/user-data.example

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,4 +316,42 @@ write_files:
316316
# removing the node from etcd
317317
NODE=$($ETCDCTL member list | grep `cat /etc/machine-id` | cut -d ':' -f 1)
318318
$ETCDCTL member remove $NODE
319+
- path: /opt/bin/wupiao
320+
permissions: '0755'
321+
content: |
322+
#!/bin/bash
323+
# [w]ait [u]ntil [p]ort [i]s [a]ctually [o]pen
324+
[ -n "$1" ] && \
325+
until curl -o /dev/null -sIf http://${1}; do \
326+
sleep 1 && echo .;
327+
done;
328+
exit $?
329+
- path: /opt/bin/download-k8s-binary
330+
permissions: '0755'
331+
content: |
332+
#!/bin/bash
333+
export K8S_VERSION="v1.0.1"
334+
mkdir -p /opt/bin
335+
FILE=$1
336+
if [ ! -f /opt/bin/$FILE ]; then
337+
curl -sSL -o /opt/bin/$FILE https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/$FILE
338+
chmod +x /opt/bin/$FILE
339+
else
340+
# we check the version of the binary
341+
INSTALLED_VERSION=$(/opt/bin/$FILE --version)
342+
MATCH=$(echo "${INSTALLED_VERSION}" | grep -c "${K8S_VERSION}")
343+
if [ $MATCH -eq 0 ]; then
344+
# the version is different
345+
curl -sSL -o /opt/bin/$FILE https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/$FILE
346+
chmod +x /opt/bin/$FILE
347+
fi
348+
fi
349+
- path: /opt/bin/scheduler-policy.json
350+
content: |
351+
{
352+
"kind": "Policy",
353+
"apiVersion": "v1",
354+
"predicates": [{"name": "PodFitsPorts"},{"name": "PodFitsResources"},{"name": "NoDiskConflict"},{"name": "MatchNodeSelector"},{"name": "HostName"}],
355+
"priorities": [{"name": "LeastRequestedPriority","weight": 1},{"name": "BalancedResourceAllocation","weight": 1},{"name": "ServiceSpreadingPriority","weight": 2},{"name": "EqualPriority","weight": 1}]
356+
}
319357
manage_etc_hosts: localhost

controller/api/models.py

Lines changed: 91 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,28 @@ def __str__(self):
179179
def url(self):
180180
return self.id + '.' + settings.DEIS_DOMAIN
181181

182+
def _get_job_id(self, container_type):
183+
app = self.id
184+
release = self.release_set.latest()
185+
version = "v{}".format(release.version)
186+
job_id = "{app}_{version}.{container_type}".format(**locals())
187+
return job_id
188+
189+
def _get_command(self, container_type):
190+
try:
191+
# if this is not procfile-based app, ensure they cannot break out
192+
# and run arbitrary commands on the host
193+
# FIXME: remove slugrunner's hardcoded entrypoint
194+
release = self.release_set.latest()
195+
if release.build.dockerfile or not release.build.sha:
196+
return "bash -c '{}'".format(release.build.procfile[container_type])
197+
else:
198+
return 'start {}'.format(container_type)
199+
# if the key is not present or if a parent attribute is None
200+
except (KeyError, TypeError, AttributeError):
201+
# handle special case for Dockerfile deployments
202+
return '' if container_type == 'cmd' else 'start {}'.format(container_type)
203+
182204
def log(self, message):
183205
"""Logs a message to the application's log file.
184206
@@ -242,6 +264,8 @@ def scale(self, user, structure): # noqa
242264
# iterate and scale by container type (web, worker, etc)
243265
changed = False
244266
to_add, to_remove = [], []
267+
scale_types = {}
268+
245269
# iterate on a copy of the container_type keys
246270
for container_type in requested_structure.keys():
247271
containers = list(self.container_set.filter(type=container_type).order_by('created'))
@@ -253,6 +277,7 @@ def scale(self, user, structure): # noqa
253277
if diff == 0:
254278
continue
255279
changed = True
280+
scale_types[container_type] = requested
256281
while diff < 0:
257282
c = containers.pop()
258283
to_remove.append(c)
@@ -267,11 +292,15 @@ def scale(self, user, structure): # noqa
267292
to_add.append(c)
268293
container_num += 1
269294
diff -= 1
295+
270296
if changed:
271-
if to_add:
272-
self._start_containers(to_add)
273-
if to_remove:
274-
self._destroy_containers(to_remove)
297+
if "scale" in dir(self._scheduler):
298+
self._scale_containers(scale_types, to_remove)
299+
else:
300+
if to_add:
301+
self._start_containers(to_add)
302+
if to_remove:
303+
self._destroy_containers(to_remove)
275304
# save new structure to the database
276305
vals = self.container_set.exclude(type='run').values(
277306
'type').annotate(Count('pk')).order_by()
@@ -281,6 +310,31 @@ def scale(self, user, structure): # noqa
281310
self.save()
282311
return changed
283312

313+
def _scale_containers(self, scale_types, to_remove):
314+
release = self.release_set.latest()
315+
for scale_type in scale_types:
316+
image = release.image
317+
version = "v{}".format(release.version)
318+
kwargs = {'memory': release.config.memory,
319+
'cpu': release.config.cpu,
320+
'tags': release.config.tags,
321+
'version': version,
322+
'aname': self.id,
323+
'num': scale_types[scale_type]}
324+
job_id = self._get_job_id(scale_type)
325+
command = self._get_command(scale_type)
326+
try:
327+
self._scheduler.scale(
328+
name=job_id,
329+
image=image,
330+
command=command,
331+
**kwargs)
332+
except Exception as e:
333+
err = '{} (scale): {}'.format(job_id, e)
334+
log_event(self, err, logging.ERROR)
335+
raise
336+
[c.delete() for c in to_remove]
337+
284338
def _start_containers(self, to_add):
285339
"""Creates and starts containers via the scheduler"""
286340
if not to_add:
@@ -334,21 +388,50 @@ def deploy(self, user, release):
334388
"""Deploy a new release to this application"""
335389
existing = self.container_set.exclude(type='run')
336390
new = []
391+
scale_types = set()
337392
for e in existing:
338393
n = e.clone(release)
339394
n.save()
340395
new.append(n)
396+
scale_types.add(e.type)
341397

342-
self._start_containers(new)
398+
if new and "deploy" in dir(self._scheduler):
399+
self._deploy_app(scale_types, release, existing)
400+
else:
401+
self._start_containers(new)
343402

344-
# destroy old containers
345-
if existing:
346-
self._destroy_containers(existing)
403+
# destroy old containers
404+
if existing:
405+
self._destroy_containers(existing)
347406

348407
# perform default scaling if necessary
349408
if self.structure == {} and release.build is not None:
350409
self._default_scale(user, release)
351410

411+
def _deploy_app(self, scale_types, release, existing):
412+
for scale_type in scale_types:
413+
image = release.image
414+
version = "v{}".format(release.version)
415+
kwargs = {'memory': release.config.memory,
416+
'cpu': release.config.cpu,
417+
'tags': release.config.tags,
418+
'aname': self.id,
419+
'num': 0,
420+
'version': version}
421+
job_id = self._get_job_id(scale_type)
422+
command = self._get_command(scale_type)
423+
try:
424+
self._scheduler.deploy(
425+
name=job_id,
426+
image=image,
427+
command=command,
428+
**kwargs)
429+
except Exception as e:
430+
err = '{} (deploy): {}'.format(job_id, e)
431+
log_event(self, err, logging.ERROR)
432+
raise
433+
[c.delete() for c in existing]
434+
352435
def _default_scale(self, user, release):
353436
"""Scale to default structure based on release type"""
354437
# if there is no SHA, assume a docker image is being promoted

0 commit comments

Comments
 (0)