Skip to content

Commit 1ca970b

Browse files
authored
fix(app): create image pull secrets outside of the async deploy loop (#1032)
The issue is that multiple process types can cause 409 issues when ran in the async process. It's fine to run this ahead of any deploy as this secret is not tied to the release version of an application Moved pull image secret and registry config generation into the App model as it makes more sense given where it gets used. Also reduces `django.settings` in the `scheduler` Fixes #1031
1 parent 1db6146 commit 1ca970b

4 files changed

Lines changed: 157 additions & 143 deletions

File tree

rootfs/api/models/app.py

Lines changed: 97 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import backoff
2+
import base64
23
from collections import OrderedDict
34
from datetime import datetime
5+
from docker.auth import auth as docker_auth
46
import functools
7+
import json
58
import logging
69
import random
710
import re
@@ -398,6 +401,7 @@ def _scale_pods(self, scale_types):
398401
version = "v{}".format(release.version)
399402
image = release.image
400403
envs = self._build_env_vars(release.build.type, version, image, release.config.values)
404+
registry = release.config.registry
401405

402406
# see if the app config has deploy batch preference, otherwise use global
403407
batches = release.config.values.get('DEIS_DEPLOY_BATCHES', settings.DEIS_DEPLOY_BATCHES)
@@ -408,6 +412,9 @@ def _scale_pods(self, scale_types):
408412
# get application level pod termination grace period
409413
pod_termination_grace_period_seconds = release.config.values.get('KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS', settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS) # noqa
410414

415+
# create image pull secret if needed
416+
image_pull_secret_name = self.image_pull_secret(self.id, registry, image)
417+
411418
tasks = []
412419
for scale_type, replicas in scale_types.items():
413420
# only web / cmd are routable
@@ -427,7 +434,7 @@ def _scale_pods(self, scale_types):
427434
'cpu': release.config.cpu,
428435
'tags': release.config.tags,
429436
'envs': envs,
430-
'registry': release.config.registry,
437+
'registry': registry,
431438
'version': version,
432439
'replicas': replicas,
433440
'app_type': scale_type,
@@ -437,6 +444,7 @@ def _scale_pods(self, scale_types):
437444
'deploy_batches': batches,
438445
'deploy_timeout': deploy_timeout,
439446
'pod_termination_grace_period_seconds': pod_termination_grace_period_seconds,
447+
'image_pull_secret_name': image_pull_secret_name,
440448
}
441449

442450
# gather all proc types to be deployed
@@ -482,6 +490,12 @@ def deploy(self, release, force_deploy=False):
482490
self.structure = self._default_structure(release)
483491
self.save()
484492

493+
image = release.image
494+
registry = release.config.registry
495+
version = "v{}".format(release.version)
496+
envs = self._build_env_vars(release.build.type, version, image, release.config.values)
497+
tags = release.config.tags
498+
485499
# see if the app config has deploy batch preference, otherwise use global
486500
batches = release.config.values.get('DEIS_DEPLOY_BATCHES', settings.DEIS_DEPLOY_BATCHES)
487501

@@ -493,12 +507,11 @@ def deploy(self, release, force_deploy=False):
493507
# get application level pod termination grace period
494508
pod_termination_grace_period_seconds = release.config.values.get('KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS', settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS) # noqa
495509

510+
# create image pull secret if needed
511+
image_pull_secret_name = self.image_pull_secret(self.id, registry, image)
512+
496513
# deploy application to k8s. Also handles initial scaling
497514
deploys = {}
498-
image = release.image
499-
version = "v{}".format(release.version)
500-
envs = self._build_env_vars(release.build.type, version, image, release.config.values)
501-
tags = release.config.tags
502515

503516
for scale_type, replicas in self.structure.items():
504517
# only web / cmd are routable
@@ -518,7 +531,7 @@ def deploy(self, release, force_deploy=False):
518531
'cpu': release.config.cpu,
519532
'tags': tags,
520533
'envs': envs,
521-
'registry': release.config.registry,
534+
'registry': registry,
522535
'replicas': replicas,
523536
'version': version,
524537
'app_type': scale_type,
@@ -530,6 +543,7 @@ def deploy(self, release, force_deploy=False):
530543
'deployment_history_limit': deployment_history,
531544
'release_summary': release.summary,
532545
'pod_termination_grace_period_seconds': pod_termination_grace_period_seconds,
546+
'image_pull_secret_name': image_pull_secret_name,
533547
}
534548

535549
# Sort deploys so routable comes first
@@ -733,28 +747,34 @@ def pod_name(size=5, chars=string.ascii_lowercase + string.digits):
733747
if release.build is None:
734748
raise DeisException('No build associated with this release to run this command')
735749

750+
image = release.image
751+
registry = release.config.registry
752+
version = "v{}".format(release.version)
753+
envs = self._build_env_vars(release.build.type, version, image, release.config.values)
754+
736755
# see if the app config has deploy timeout preference, otherwise use global
737756
deploy_timeout = release.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT) # noqa
738757

739758
# get application level pod termination grace period
740759
pod_termination_grace_period_seconds = release.config.values.get('KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS', settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS) # noqa
741760

761+
# create image pull secret if needed
762+
image_pull_secret_name = self.image_pull_secret(self.id, registry, image)
763+
742764
name = self._get_job_id(scale_type) + '-' + pod_name()
743765
self.log("{} on {} runs '{}'".format(user.username, name, command))
744766

745-
image = release.image
746-
version = "v{}".format(release.version)
747-
envs = self._build_env_vars(release.build.type, version, image, release.config.values)
748767
kwargs = {
749768
'memory': release.config.memory,
750769
'cpu': release.config.cpu,
751770
'tags': release.config.tags,
752771
'envs': envs,
753-
'registry': release.config.registry,
772+
'registry': registry,
754773
'version': version,
755774
'build_type': release.build.type,
756775
'deploy_timeout': deploy_timeout,
757776
'pod_termination_grace_period_seconds': pod_termination_grace_period_seconds,
777+
'image_pull_secret_name': image_pull_secret_name,
758778
}
759779

760780
try:
@@ -962,3 +982,70 @@ def autoscale(self, proc_type, autoscale):
962982
else:
963983
# let the user know about any other errors
964984
raise ServiceUnavailable(str(e)) from e
985+
986+
def image_pull_secret(self, namespace, registry, image):
987+
"""
988+
Take registry information and set as an imagePullSecret for an RC / Deployment
989+
http://kubernetes.io/docs/user-guide/images/#specifying-imagepullsecrets-on-a-pod
990+
"""
991+
docker_config, name, create = self._get_private_registry_config(image, registry)
992+
if create is None:
993+
return
994+
elif create:
995+
data = {'.dockerconfigjson': docker_config}
996+
try:
997+
self._scheduler.secret.get(namespace, name)
998+
except KubeHTTPException:
999+
self._scheduler.secret.create(
1000+
namespace,
1001+
name,
1002+
data,
1003+
secret_type='kubernetes.io/dockerconfigjson'
1004+
)
1005+
else:
1006+
self._scheduler.secret.update(
1007+
namespace,
1008+
name,
1009+
data,
1010+
secret_type='kubernetes.io/dockerconfigjson'
1011+
)
1012+
1013+
return name
1014+
1015+
def _get_private_registry_config(self, image, registry=None):
1016+
name = settings.REGISTRY_SECRET_PREFIX
1017+
if registry:
1018+
# try to get the hostname information
1019+
hostname = registry.get('hostname', None)
1020+
if not hostname:
1021+
hostname, _ = docker_auth.split_repo_name(image)
1022+
1023+
if hostname == docker_auth.INDEX_NAME:
1024+
hostname = 'https://index.docker.io/v1/'
1025+
1026+
username = registry.get('username')
1027+
password = registry.get('password')
1028+
elif settings.REGISTRY_LOCATION == 'off-cluster':
1029+
secret = self._scheduler.secret.get('deis', 'registry-secret').json()
1030+
username = secret['data']['username']
1031+
password = secret['data']['password']
1032+
hostname = secret['data']['hostname']
1033+
if hostname == '':
1034+
hostname = 'https://index.docker.io/v1/'
1035+
name = name + '-' + settings.REGISTRY_LOCATION
1036+
elif settings.REGISTRY_LOCATION in ['ecr', 'gcr']:
1037+
return None, name + '-' + settings.REGISTRY_LOCATION, False
1038+
else:
1039+
return None, None, None
1040+
1041+
# create / update private registry secret
1042+
auth = bytes('{}:{}'.format(username, password), 'UTF-8')
1043+
# value has to be a base64 encoded JSON
1044+
docker_config = json.dumps({
1045+
'auths': {
1046+
hostname: {
1047+
'auth': base64.b64encode(auth).decode(encoding='UTF-8')
1048+
}
1049+
}
1050+
})
1051+
return docker_config, name, True

rootfs/api/tests/test_app.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
44
Run the tests with "./manage.py test api"
55
"""
6+
import base64
7+
import json
68
import logging
79
from unittest import mock
810
import random
@@ -11,6 +13,7 @@
1113
from django.conf import settings
1214
from django.contrib.auth.models import User
1315
from django.core.cache import cache
16+
from django.test.utils import override_settings
1417
from rest_framework.authtoken.models import Token
1518

1619
from api.models import App
@@ -508,6 +511,60 @@ def test_app_service_metadata(self, mock_requests):
508511
self.assertIn('labels', svc['metadata'])
509512
self.assertIn('annotations', svc['metadata'])
510513

514+
def test_get_private_registry_config(self, mock_requests):
515+
registry = {'username': 'test', 'password': 'test'}
516+
auth = bytes('{}:{}'.format("test", "test"), 'UTF-8')
517+
encAuth = base64.b64encode(auth).decode(encoding='UTF-8')
518+
image = 'test/test'
519+
520+
docker_config, name, create = App()._get_private_registry_config(image, registry)
521+
dockerConfig = json.loads(docker_config)
522+
expected = {"https://index.docker.io/v1/": {"auth": encAuth}}
523+
self.assertEqual(dockerConfig.get('auths'), expected)
524+
self.assertEqual(name, "private-registry")
525+
self.assertEqual(create, True)
526+
527+
image = "quay.io/test/test"
528+
docker_config, name, create = App()._get_private_registry_config(image, registry)
529+
dockerConfig = json.loads(docker_config)
530+
expected = {"quay.io": {"auth": encAuth}}
531+
self.assertEqual(dockerConfig.get('auths'), expected)
532+
self.assertEqual(name, "private-registry")
533+
self.assertEqual(create, True)
534+
535+
@override_settings(REGISTRY_LOCATION="ecr")
536+
def test_get_private_registry_config_ecr(self, mock_requests):
537+
registry = {}
538+
image = "test.com/test/test"
539+
docker_config, name, create = App()._get_private_registry_config(image, registry)
540+
self.assertEqual(docker_config, None)
541+
self.assertEqual(name, "private-registry-ecr")
542+
self.assertEqual(create, False)
543+
544+
@override_settings(REGISTRY_LOCATION="off-cluster")
545+
def test_get_private_registry_config_off_cluster(self, mock_requests):
546+
registry = {}
547+
auth = bytes('{}:{}'.format("test", "test"), 'UTF-8')
548+
encAuth = base64.b64encode(auth).decode(encoding='UTF-8')
549+
image = "test.com/test/test"
550+
docker_config, name, create = App()._get_private_registry_config(image, registry)
551+
dockerConfig = json.loads(docker_config)
552+
expected = {"https://index.docker.io/v1/": {
553+
"auth": encAuth
554+
}}
555+
self.assertEqual(dockerConfig.get('auths'), expected)
556+
self.assertEqual(name, "private-registry-off-cluster")
557+
self.assertEqual(create, True)
558+
559+
@override_settings(REGISTRY_LOCATION="ecra")
560+
def test_get_private_registry_config_bad_registry_location(self, mock_requests):
561+
registry = {}
562+
image = "test.com/test/test"
563+
docker_config, name, create = App()._get_private_registry_config(image, registry)
564+
self.assertEqual(docker_config, None)
565+
self.assertEqual(name, None)
566+
self.assertEqual(create, None)
567+
511568

512569
FAKE_LOG_DATA = """
513570
2013-08-15 12:41:25 [33454] [INFO] Starting gunicorn 17.5

rootfs/scheduler/resources/pod.py

Lines changed: 3 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
import base64
21
from datetime import datetime, timedelta
3-
from docker.auth import auth as docker_auth
4-
import json
52
import operator
63
import os
74
import time
@@ -159,7 +156,9 @@ def manifest(self, namespace, name, image, **kwargs):
159156
container_name = namespace + '-' + app_type
160157
self._set_container(namespace, container_name, container, **kwargs)
161158
# add image to the mix
162-
self._set_image_secret(spec, namespace, **kwargs)
159+
if kwargs.get('image_pull_secret_name', None) is not None:
160+
# apply image pull secret to a Pod spec
161+
spec['imagePullSecrets'] = [{'name': kwargs.get('image_pull_secret_name')}]
163162

164163
spec['containers'] = [container]
165164

@@ -317,72 +316,6 @@ def _default_dockerapp_readiness_probe(self, port, delay=5, timeout=5, period_se
317316
}
318317
return readinessprobe
319318

320-
def _get_private_registry_config(self, registry, image):
321-
secret_name = settings.REGISTRY_SECRET_PREFIX
322-
if registry:
323-
# try to get the hostname information
324-
hostname = registry.get('hostname', None)
325-
if not hostname:
326-
hostname, _ = docker_auth.split_repo_name(image)
327-
if hostname == docker_auth.INDEX_NAME:
328-
hostname = "https://index.docker.io/v1/"
329-
username = registry.get('username')
330-
password = registry.get('password')
331-
elif settings.REGISTRY_LOCATION == 'off-cluster':
332-
secret = self.secret.get('deis', 'registry-secret').json()
333-
username = secret['data']['username']
334-
password = secret['data']['password']
335-
hostname = secret['data']['hostname']
336-
if hostname == '':
337-
hostname = "https://index.docker.io/v1/"
338-
secret_name = secret_name+"-"+settings.REGISTRY_LOCATION
339-
elif settings.REGISTRY_LOCATION in ['ecr', 'gcr']:
340-
return None, secret_name+"-"+settings.REGISTRY_LOCATION, False
341-
else:
342-
return None, None, None
343-
344-
# create / update private registry secret
345-
auth = bytes('{}:{}'.format(username, password), 'UTF-8')
346-
# value has to be a base64 encoded JSON
347-
docker_config = json.dumps({
348-
"auths": {
349-
hostname: {
350-
"auth": base64.b64encode(auth).decode(encoding='UTF-8')
351-
}
352-
}
353-
})
354-
return docker_config, secret_name, True
355-
356-
def _set_image_secret(self, data, namespace, **kwargs):
357-
"""
358-
Take registry information and set as an imagePullSecret for an RC / Deployment
359-
http://kubernetes.io/docs/user-guide/images/#specifying-imagepullsecrets-on-a-pod
360-
"""
361-
docker_config, secret_name, secret_create = self._get_private_registry_config(kwargs.get('registry', {}), kwargs.get('image')) # noqa
362-
if secret_create is None:
363-
return
364-
elif secret_create:
365-
secret_data = {'.dockerconfigjson': docker_config}
366-
try:
367-
self.secret.get(namespace, secret_name)
368-
except KubeHTTPException:
369-
self.secret.create(
370-
namespace,
371-
secret_name,
372-
secret_data,
373-
secret_type='kubernetes.io/dockerconfigjson'
374-
)
375-
else:
376-
self.secret.update(
377-
namespace,
378-
secret_name,
379-
secret_data,
380-
secret_type='kubernetes.io/dockerconfigjson'
381-
)
382-
383-
# apply image pull secret to a Pod spec
384-
data['imagePullSecrets'] = [{'name': secret_name}]
385-
386319
def delete(self, namespace, name):
387320
# get timeout info from pod
388321
pod = self.pod.get(namespace, name).json()

0 commit comments

Comments
 (0)