Skip to content

Commit 86cb2e9

Browse files
committed
chore(controller): use the kubectl api to override pods restart
1 parent d5379b7 commit 86cb2e9

6 files changed

Lines changed: 45 additions & 105 deletions

File tree

charts/controller/templates/controller-clusterrole.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ rules:
4848
verbs: ["get", "list", "delete", "update"]
4949
- apiGroups: ["extensions", "apps"]
5050
resources: ["deployments"]
51-
verbs: ["get", "list", "create", "update", "delete"]
51+
verbs: ["get", "list", "create", "update", "patch", "delete"]
5252
- apiGroups: ["extensions", "apps"]
5353
resources: ["deployments/scale", "replicasets/scale"]
5454
verbs: ["get", "update"]

rootfs/api/models/app.py

Lines changed: 11 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -356,84 +356,27 @@ def delete(self, *args, **kwargs):
356356

357357
def restart(self, **kwargs): # noqa
358358
"""
359-
Restart found pods by deleting them (RC / Deployment will recreate).
360-
Wait until they are all drained away and RC / Deployment has gotten to a good state
359+
Restart deployments with the kubectl rollout api
361360
"""
362-
try:
363-
# Resolve single pod name if short form (cmd-1269180282-1nyfz) is passed
364-
if 'name' in kwargs and kwargs['name'].count('-') == 2:
365-
kwargs['name'] = '{}-{}'.format(kwargs['id'], kwargs['name'])
366-
367-
# Iterate over RCs / RSs to get total desired count if not a single item
368-
desired = 1
369-
if 'name' not in kwargs:
370-
desired = 0
371-
labels = self._scheduler_filter(**kwargs)
372-
# fetch RS (which represent Deployments)
373-
controllers = self._scheduler.rs.get(kwargs['id'], labels=labels).json()['items']
374-
if not controllers:
375-
controllers = []
376-
for controller in controllers:
377-
desired += controller['spec']['replicas']
378-
except KubeException:
379-
# Nothing was found
380-
return []
381-
361+
deployments = []
362+
if 'type' in kwargs and kwargs['type'] in self.structure:
363+
deployments.append(self._get_job_id(kwargs['type']))
364+
else:
365+
for scale_type, _ in self.structure.items():
366+
deployments.append(self._get_job_id(scale_type))
382367
try:
383368
tasks = [
384369
functools.partial(
385-
self._scheduler.pod.delete,
370+
self._scheduler.deployment.restart,
386371
self.id,
387-
pod['name']
388-
) for pod in self.list_pods(**kwargs)
372+
deployment
373+
) for deployment in deployments
389374
]
390-
391375
apply_tasks(tasks)
392376
except Exception as e:
393-
err = "warning, some pods failed to stop:\n{}".format(str(e))
377+
err = "warning, some pods failed to restart:\n{}".format(str(e))
394378
self.log(err, logging.WARNING)
395379

396-
# Wait for pods to start
397-
try:
398-
timeout = 300 # 5 minutes
399-
elapsed = 0
400-
while True:
401-
# timed out
402-
if elapsed >= timeout:
403-
raise DryccException('timeout - 5 minutes have passed and pods are not up')
404-
405-
# restarting a single pod behaves differently, fetch the *newest* pod
406-
# and hope it is the right one. Comes back sorted
407-
if 'name' in kwargs:
408-
del kwargs['name']
409-
pods = self.list_pods(**kwargs)
410-
# Add in the latest name
411-
if len(pods) == 0:
412-
# if pod is not even scheduled wait for it and pass dummy kwargs
413-
# to indicate restart of a single pod
414-
kwargs['name'] = "dummy"
415-
continue
416-
kwargs['name'] = pods[0]['name']
417-
pods = pods[0]
418-
419-
actual = 0
420-
for pod in self.list_pods(**kwargs):
421-
if pod['state'] == 'up':
422-
actual += 1
423-
424-
if desired == actual:
425-
break
426-
427-
elapsed += 5
428-
time.sleep(5)
429-
except Exception as e:
430-
err = "warning, some pods failed to start:\n{}".format(str(e))
431-
self.log(err, logging.WARNING)
432-
433-
# Return the new pods
434-
pods = self.list_pods(**kwargs)
435-
return pods
436-
437380
def _clean_app_logs(self):
438381
"""Delete application logs stored by the logger component"""
439382
try:

rootfs/api/tests/test_pods.py

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -779,39 +779,20 @@ def test_restart_pods(self, mock_requests):
779779

780780
# restart all pods
781781
response = self.client.post('/v2/apps/{}/pods/restart'.format(app_id))
782-
self.assertEqual(response.status_code, 200, response.data)
783-
# Compare restarted pods to all pods
784-
self.assertEqual(len(response.data), 12)
782+
self.assertEqual(response.status_code, 204, response.data)
785783

786784
# restart only the workers
787785
response = self.client.post('/v2/apps/{}/pods/worker/restart'.format(app_id))
788-
self.assertEqual(response.status_code, 200, response.data)
789-
# Compare restarted pods to only worker pods
790-
self.assertEqual(len(response.data), 8)
786+
self.assertEqual(response.status_code, 204, response.data)
791787

792788
# restart only the web
793789
response = self.client.post('/v2/apps/{}/pods/web/restart'.format(app_id))
794-
self.assertEqual(response.status_code, 200, response.data)
795-
# Compare restarted pods to only worker pods
796-
self.assertEqual(len(response.data), 4)
790+
self.assertEqual(response.status_code, 204, response.data)
797791

798792
# restart only one of the web pods
799793
pods = application.list_pods(type='web')
800794
self.assertEqual(len(pods), 4)
801795

802-
pod = pods.pop()
803-
response = self.client.post('/v2/apps/{}/pods/web/{}/restart'.format(app_id, pod['name']))
804-
self.assertEqual(response.status_code, 200, response.data)
805-
self.assertEqual(len(response.data), 1)
806-
self.assertEqual(response.data[0]['type'], 'web')
807-
808-
# restart only one web port but using the short name of web-asdfg
809-
name = 'web-' + pod['name'].split('-').pop()
810-
response = self.client.post('/v2/apps/{}/pods/web/{}/restart'.format(app_id, name))
811-
self.assertEqual(response.status_code, 200, response.data)
812-
self.assertEqual(len(response.data), 1)
813-
self.assertEqual(response.data[0]['type'], 'web')
814-
815796
def test_list_pods_failure(self, mock_requests):
816797
"""
817798
Listing all available pods exceptions

rootfs/api/urls.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,6 @@
4242
re_path(
4343
r"^apps/(?P<id>{})/pods/(?P<type>[-_\w.]+)/restart/?$".format(settings.APP_URL_REGEX),
4444
views.PodViewSet.as_view({'post': 'restart'})),
45-
re_path(
46-
r"^apps/(?P<id>{})/pods/(?P<type>[-_\w]+)/(?P<name>[-_\w]+)/restart/?$".format(
47-
settings.APP_URL_REGEX),
48-
views.PodViewSet.as_view({'post': 'restart'})),
4945
# list pods
5046
re_path(
5147
r"^apps/(?P<id>{})/pods/(?P<type>[-_\w]+)/(?P<name>[-_\w]+)/?$".format(

rootfs/api/views.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -343,16 +343,8 @@ def list(self, *args, **kwargs):
343343
return Response(pagination, status=status.HTTP_200_OK)
344344

345345
def restart(self, *args, **kwargs):
346-
if "name" in kwargs: # a single pod uses sync
347-
pods = self.get_app().restart(**kwargs)
348-
else: # multi pod uses async
349-
restart_app.delay(self.get_app(), **kwargs)
350-
pods = self.get_app().list_pods(**kwargs)
351-
data = self.get_serializer(pods, many=True).data
352-
# fake out pagination for now
353-
# pagination = {'results': data, 'count': len(data)}
354-
pagination = data
355-
return Response(pagination, status=status.HTTP_200_OK)
346+
restart_app.delay(self.get_app(), **kwargs)
347+
return Response(status=status.HTTP_204_NO_CONTENT)
356348

357349

358350
class AppSettingsViewSet(AppResourceViewSet):

rootfs/scheduler/resources/deployment.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,34 @@ def scale(self, namespace, name, image, entrypoint, command, **kwargs):
174174
kwargs['previous_replicas'] = current
175175
self.wait_until_ready(namespace, name, **kwargs)
176176

177+
def restart(self, namespace, name):
178+
url = self.api(
179+
"/namespaces/{}/deployments/{}?fieldManager=kubectl-rollout&pretty=true",
180+
namespace, name
181+
)
182+
restartedAt = "%sZ" % (timedelta(seconds=3) + datetime.utcnow()).isoformat("T")
183+
response = self.http_patch(
184+
url,
185+
data=json.dumps({
186+
"spec": {
187+
"template": {
188+
"metadata": {
189+
"annotations": {
190+
"kubectl.kubernetes.io/restartedAt": restartedAt,
191+
}
192+
}
193+
}
194+
}
195+
}),
196+
headers={"Content-Type": "application/merge-patch+json"},
197+
)
198+
if self.unhealthy(response.status_code):
199+
raise KubeHTTPException(
200+
response,
201+
'restart Deployment "{}" in Namespace "{}"', name, namespace
202+
)
203+
return response
204+
177205
def in_progress(self, namespace, name, timeout, batches, replicas, tags):
178206
"""
179207
Determine if a Deployment has a deploy in progress

0 commit comments

Comments
 (0)