Skip to content

Commit 8403b9f

Browse files
committed
ref(exceptions/logging): unify logging and make sure exceptions go into logs
Add a custom exception handler to deal with uncaught exceptions so they get properly logged to the stdout logging, and also return a 500 with a JSON body like everything else does. A lot of scheduling logging has moved up from debug to info (the new default value) and now DEBUG=True (in the django config file) will cause debug information to outputting tbgrep was added, usage would be k logs -f <controller pod> | tbgrep
1 parent 56e77ab commit 8403b9f

13 files changed

Lines changed: 133 additions & 125 deletions

File tree

rootfs/api/exceptions.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,38 @@
1-
class HealthcheckException(Exception):
1+
import logging
2+
from rest_framework.compat import set_rollback
3+
from rest_framework.exceptions import APIException, status
4+
from rest_framework.response import Response
5+
from rest_framework.views import exception_handler
6+
7+
8+
class HealthcheckException(APIException):
29
"""Exception class used for when the application's health check fails"""
310
pass
11+
12+
13+
class DeisException(APIException):
14+
status_code = 400
15+
16+
17+
class AlreadyExists(APIException):
18+
status_code = 409
19+
20+
21+
class ServiceUnavailable(APIException):
22+
status_code = 503
23+
default_detail = 'Service temporarily unavailable, try again later.'
24+
25+
26+
def custom_exception_handler(exc, context):
27+
# Call REST framework's default exception handler first,
28+
# to get the standard error response.
29+
response = exception_handler(exc, context)
30+
31+
# No response means DRF couldn't handle it
32+
# Output a generic 500 in a JSON format
33+
if response is None:
34+
logging.exception('Uncaught Exception', exc_info=exc)
35+
set_rollback()
36+
return Response({'detail': 'Server Error'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
37+
38+
return response

rootfs/api/models/__init__.py

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,16 @@
1414
from django.db.models.signals import post_delete, post_save
1515
from django.dispatch import receiver
1616

17-
from rest_framework.exceptions import ValidationError, APIException
17+
from rest_framework.exceptions import ValidationError
1818
from rest_framework.authtoken.models import Token
1919

20+
from api.exceptions import DeisException, AlreadyExists, ServiceUnavailable # noqa
2021
from api.utils import dict_merge
2122
from scheduler import KubeException
2223

2324
logger = logging.getLogger(__name__)
2425

2526

26-
class DeisException(APIException):
27-
status_code = 400
28-
29-
30-
class AlreadyExists(APIException):
31-
status_code = 409
32-
33-
34-
class ServiceUnavailable(APIException):
35-
status_code = 503
36-
default_detail = 'Service temporarily unavailable, try again later.'
37-
38-
39-
def log_event(app, msg, level=logging.INFO):
40-
# controller needs to know which app this log comes from
41-
logger.log(level, "{}: {}".format(app.id, msg))
42-
app.log(msg, level)
43-
44-
4527
def validate_label(value):
4628
"""
4729
Check that the value follows the kubernetes name constraints
@@ -72,7 +54,7 @@ def _fetch_service_config(self, app):
7254
# Get the service from k8s to attach the domain correctly
7355
svc = self._scheduler._get_service(app, app).json()
7456
except KubeException as e:
75-
raise ServiceUnavailable(str(e)) from e
57+
raise ServiceUnavailable('Could not fetch Kubernetes Service {}'.format(app)) from e
7658

7759
# Get minimum structure going if it is missing on the service
7860
if 'metadata' not in svc or 'annotations' not in svc['metadata']:
@@ -109,7 +91,7 @@ def _save_service_config(self, app, component, data):
10991
try:
11092
self._scheduler._update_service(app, app, svc)
11193
except KubeException as e:
112-
raise ServiceUnavailable(str(e)) from e
94+
raise ServiceUnavailable('Could not update Kubernetes Service {}'.format(app)) from e
11395

11496

11597
class UuidAuditedModel(AuditedModel):
@@ -144,35 +126,31 @@ def _log_build_created(**kwargs):
144126
if kwargs.get('created'):
145127
build = kwargs['instance']
146128
# log only to the controller; this event will be logged in the release summary
147-
logger.info("{}: build {} created".format(build.app, build))
129+
build.app.log("build {} created".format(build))
148130

149131

150132
def _log_release_created(**kwargs):
151133
if kwargs.get('created'):
152134
release = kwargs['instance']
153-
# log only to the controller; this event will be logged in the release summary
154-
logger.info("{}: release {} created".format(release.app, release))
155135
# append release lifecycle logs to the app
156136
release.app.log(release.summary)
157137

158138

159139
def _log_config_updated(**kwargs):
160140
config = kwargs['instance']
161141
# log only to the controller; this event will be logged in the release summary
162-
logger.info("{}: config {} updated".format(config.app, config))
142+
config.app.log("config {} updated".format(config))
163143

164144

165145
def _log_domain_added(**kwargs):
166146
if kwargs.get('created'):
167147
domain = kwargs['instance']
168-
msg = "domain {} added".format(domain)
169-
log_event(domain.app, msg)
148+
domain.app.log("domain {} added".format(domain))
170149

171150

172151
def _log_domain_removed(**kwargs):
173152
domain = kwargs['instance']
174-
msg = "domain {} removed".format(domain)
175-
log_event(domain.app, msg)
153+
domain.app.log("domain {} removed".format(domain))
176154

177155

178156
def _log_cert_added(**kwargs):

rootfs/api/models/app.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
from jsonfield import JSONField
1616

1717
from deis import __version__ as deis_version
18-
from api.models import UuidAuditedModel, log_event, AlreadyExists, \
19-
DeisException, ServiceUnavailable
18+
from api.models import UuidAuditedModel, AlreadyExists, DeisException, ServiceUnavailable
2019

2120
from api.utils import generate_app_name
2221
from api.models.release import Release
@@ -86,7 +85,7 @@ def save(self, *args, **kwargs):
8685
if self._scheduler._get_namespace(self.id).status_code == 200:
8786
# Namespace already exists
8887
err = "{} already exists as a namespace in this kuberenetes setup".format(self.id) # noqa
89-
log_event(self, err, logging.INFO)
88+
self.log(err, logging.INFO)
9089
raise AlreadyExists(err)
9190
except KubeHTTPException:
9291
pass
@@ -174,7 +173,7 @@ def create(self, *args, **kwargs):
174173
try:
175174
self._scheduler.create(self.id)
176175
except KubeException as e:
177-
raise ServiceUnavailable(str(e)) from e
176+
raise ServiceUnavailable('Kubernetes resources could not be created') from e
178177

179178
# Attach the platform specific application sub domain to the k8s service
180179
# Only attach it on first release in case a customer has remove the app domain
@@ -183,11 +182,12 @@ def create(self, *args, **kwargs):
183182

184183
def delete(self, *args, **kwargs):
185184
"""Delete this application including all containers"""
185+
self.log("deleting environment")
186186
try:
187187
# attempt to remove application from kubernetes
188188
self._scheduler.destroy(self.id)
189189
except KubeException as e:
190-
raise ServiceUnavailable(str(e)) from e
190+
raise ServiceUnavailable('Could not delete Kubernetes Namespace {}'.format(self.id)) from e # noqa
191191

192192
self._clean_app_logs()
193193
return super(App, self).delete(*args, **kwargs)
@@ -226,7 +226,7 @@ def restart(self, **kwargs): # noqa
226226
self._scheduler._delete_pod(self.id, pod['name'])
227227
except Exception as e:
228228
err = "warning, some pods failed to stop:\n{}".format(str(e))
229-
log_event(self, err, logging.WARNING)
229+
self.log(err, logging.WARNING)
230230

231231
# Wait for pods to start
232232
try:
@@ -258,7 +258,7 @@ def restart(self, **kwargs): # noqa
258258
time.sleep(5)
259259
except Exception as e:
260260
err = "warning, some pods failed to start:\n{}".format(str(e))
261-
log_event(self, err, logging.WARNING)
261+
self.log(err, logging.WARNING)
262262

263263
# Return the new pods
264264
pods = self.list_pods(**kwargs)
@@ -274,7 +274,7 @@ def _clean_app_logs(self):
274274
# Ignore errors deleting application logs. An error here should not interfere with
275275
# the overall success of deleting an application, but we should log it.
276276
err = 'Error deleting existing application logs: {}'.format(e)
277-
log_event(self, err, logging.WARNING)
277+
self.log(err, logging.WARNING)
278278

279279
def scale(self, user, structure): # noqa
280280
"""Scale containers up or down to match requested structure."""
@@ -317,7 +317,7 @@ def scale(self, user, structure): # noqa
317317

318318
msg = '{} scaled pods '.format(user.username) + ' '.join(
319319
"{}={}".format(k, v) for k, v in list(structure.items()))
320-
log_event(self, msg)
320+
self.log(msg)
321321

322322
return True
323323

@@ -358,7 +358,7 @@ def _scale_pods(self, scale_types):
358358
)
359359
except Exception as e:
360360
err = '{} (scale): {}'.format(self._get_job_id(scale_type), e)
361-
log_event(self, err, logging.ERROR)
361+
self.log(err, logging.ERROR)
362362
raise ServiceUnavailable(e) from e
363363

364364
def deploy(self, release):
@@ -423,7 +423,7 @@ def deploy(self, release):
423423

424424
except Exception as e:
425425
err = '{} (app::deploy): {}'.format(self._get_job_id(scale_type), e)
426-
log_event(self, err, logging.ERROR)
426+
self.log(err, logging.ERROR)
427427
raise ServiceUnavailable(err) from e
428428

429429
# cleanup old releases from kubernetes
@@ -577,7 +577,7 @@ def pod_name(size=5, chars=string.ascii_lowercase + string.digits):
577577
entrypoint, command = self._get_command_run(command)
578578

579579
name = self._get_job_id('run') + '-' + pod_name()
580-
log_event(self, "{} on {} runs '{}'".format(user.username, name, command))
580+
self.log("{} on {} runs '{}'".format(user.username, name, command))
581581

582582
kwargs = {
583583
'memory': release.config.memory,
@@ -602,8 +602,7 @@ def pod_name(size=5, chars=string.ascii_lowercase + string.digits):
602602
return exit_code, output
603603
except Exception as e:
604604
err = '{} (run): {}'.format(name, e)
605-
log_event(self, err, logging.ERROR)
606-
raise ServiceUnavailable(str(e)) from e
605+
raise ServiceUnavailable(err) from e
607606

608607
def list_pods(self, *args, **kwargs):
609608
"""Used to list basic information about pods running for a given application"""
@@ -653,7 +652,7 @@ def list_pods(self, *args, **kwargs):
653652
pass
654653
except Exception as e:
655654
err = '(list pods): {}'.format(e)
656-
log_event(self, err, logging.ERROR)
655+
self.log(err, logging.ERROR)
657656
raise ServiceUnavailable(err) from e
658657

659658
def _scheduler_filter(self, **kwargs):

rootfs/api/models/certificate.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,9 @@ def attach_in_kubernetes(self, domain):
188188
try:
189189
self._scheduler._update_secret(app, name, data)
190190
except KubeException as e:
191-
raise ServiceUnavailable(str(e)) from e
191+
msg = 'There was a problem updating the certificate secret ' \
192+
'{} for {}'.format(name, app)
193+
raise ServiceUnavailable(msg) from e
192194

193195
# get config for the service
194196
config = self._load_service_config(app, 'router')

rootfs/api/models/key.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def validate_base64(value):
1313
try:
1414
base64.b64decode(value.split()[1])
1515
except Exception as e:
16-
raise ValidationError(str(e))
16+
raise ValidationError('Key contains invalid base64 chars') from e
1717

1818

1919
class Key(UuidAuditedModel):

rootfs/api/models/release.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from registry import publish_release, get_port as docker_get_port, RegistryException
77
from api.utils import dict_diff
8-
from api.models import UuidAuditedModel, log_event, DeisException
8+
from api.models import UuidAuditedModel, DeisException
99
from scheduler import KubeHTTPException
1010

1111
logger = logging.getLogger(__name__)
@@ -87,10 +87,10 @@ def new(self, user, config, build, summary=None, source_version='latest'):
8787
release.publish()
8888
except DeisException as e:
8989
# If we cannot publish this app, just log and carry on
90-
log_event(self.app, e)
90+
self.app.log(e)
9191
pass
9292
except RegistryException as e:
93-
log_event(self.app, e)
93+
self.app.log(e)
9494
raise DeisException(str(e)) from e
9595

9696
return release
@@ -113,15 +113,15 @@ def publish(self):
113113
# hostname tells Builder where to push images
114114
not registry.get('hostname', None)
115115
):
116-
log_event(self.app, '{} exists in the target registry. Using image for release {} of app {}'.format(self.build.image, self.version, self.app)) # noqa
116+
self.app.log('{} exists in the target registry. Using image for release {} of app {}'.format(self.build.image, self.version, self.app)) # noqa
117117
return
118118

119119
# return image if it is already in the registry, test host and then host + port
120120
if (
121121
self.build.image.startswith(settings.REGISTRY_HOST) or
122122
self.build.image.startswith(settings.REGISTRY_URL)
123123
):
124-
log_event(self.app, '{} exists in the target registry. Using image for release {} of app {}'.format(self.build.image, self.version, self.app)) # noqa
124+
self.app.log('{} exists in the target registry. Using image for release {} of app {}'.format(self.build.image, self.version, self.app)) # noqa
125125
return
126126

127127
# add tag if it was not provided
@@ -151,13 +151,13 @@ def get_port(self, routable=False):
151151

152152
if self.build.type == "buildpack":
153153
msg = "Using default port 5000 for build pack image {}".format(self.image)
154-
log_event(self.app, msg)
154+
self.app.log(msg)
155155
return 5000
156156

157157
# application has registry auth - $PORT is required
158158
if creds is not None:
159159
if envs.get('PORT', None) is None:
160-
log_event(self.app, 'Private registry detected but no $PORT defined. Defaulting to $PORT 5000', logging.WARNING) # noqa
160+
self.app.log('Private registry detected but no $PORT defined. Defaulting to $PORT 5000', logging.WARNING) # noqa
161161
return 5000
162162

163163
# User provided PORT
@@ -171,7 +171,7 @@ def get_port(self, routable=False):
171171
port = docker_get_port(self.image, deis_registry, creds)
172172
if port is None:
173173
msg = "Expose a port or make the app non routable by changing the process type"
174-
log_event(self.app, msg, logging.ERROR)
174+
self.app.log(msg, logging.ERROR)
175175
raise DeisException(msg)
176176

177177
return port
@@ -233,7 +233,7 @@ def rollback(self, user, version=None):
233233
except Exception as e:
234234
if 'new_release' in locals():
235235
new_release.delete()
236-
raise DeisException(str(e))
236+
raise DeisException(str(e)) from e
237237

238238
def delete(self, *args, **kwargs):
239239
"""Delete release DB record and any RCs from the affect release"""
@@ -244,16 +244,15 @@ def delete(self, *args, **kwargs):
244244
if e.response.status_code is not 404:
245245
# Another problem came up
246246
message = 'Could not to cleanup RCs for release {}'.format(self.version)
247-
log_event(self.app, message)
247+
self.app.log(message, level=logging.WARNING)
248248
logger.warning(message + ' - ' + str(e))
249249
finally:
250250
super(Release, self).delete(*args, **kwargs)
251251

252252
def cleanup_old(self):
253253
"""Cleanup all but the latest release from Kubernetes"""
254254
latest_version = 'v{}'.format(self.version)
255-
log_event(
256-
self.app,
255+
self.app.log(
257256
'Cleaning up RCS for releases older than {} (latest)'.format(latest_version),
258257
level=logging.DEBUG
259258
)
@@ -275,8 +274,7 @@ def cleanup_old(self):
275274
controller_removal.append(current_version)
276275

277276
if controller_removal:
278-
log_event(
279-
self.app,
277+
self.app.log(
280278
'Found the following versions to cleanup: {}'.format(', '.join(controller_removal)), # noqa
281279
level=logging.DEBUG
282280
)
@@ -285,7 +283,7 @@ def cleanup_old(self):
285283
self._delete_release_in_scheduler(self.app.id, version)
286284

287285
# find stray env secrets to remove that may have been missed
288-
log_event(self.app, 'Cleaning up orphaned environment var secrets', level=logging.DEBUG)
286+
self.app.log('Cleaning up orphaned environment var secrets', level=logging.DEBUG)
289287
labels = {
290288
'heritage': 'deis',
291289
'app': self.app.id,

rootfs/api/tests/test_key.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def test_ecdsa_key(self):
105105

106106
def test_bad_key(self):
107107
response = self._check_bad_key(BAD_KEY)
108-
self.assertEqual(response.data, {'public': ['Incorrect padding']})
108+
self.assertEqual(response.data, {'public': ['Key contains invalid base64 chars']})
109109

110110
def _check_duplicate_key(self, pubkey, pubkey2):
111111
"""

0 commit comments

Comments
 (0)