Skip to content

Commit a393479

Browse files
committed
feat(controller): add a multi-tenant proxy for prometheus api
1 parent 65631a0 commit a393479

10 files changed

Lines changed: 80 additions & 36 deletions

File tree

charts/controller/templates/_helpers.tpl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,15 @@ env:
112112
valueFrom:
113113
fieldRef:
114114
fieldPath: metadata.namespace
115-
{{- if (.Values.prometheusUrl) }}
116-
- name: "DRYCC_PROMETHEUS_URL"
115+
{{- if (.Values.victoriametricsUrl) }}
116+
- name: "DRYCC_VICTORIAMETRICS_URL"
117117
valueFrom:
118118
secretKeyRef:
119119
name: controller-creds
120-
key: prometheus-url
121-
{{- else if .Values.prometheus.enabled }}
122-
- name: "DRYCC_PROMETHEUS_URL"
123-
value: "http://drycc-victoriametrics-vmselect.{{$.Release.Namespace}}.svc.{{$.Values.global.clusterDomain}}:8481/select/0/prometheus"
120+
key: victoriametrics-url
121+
{{- else if .Values.victoriametrics.enabled }}
122+
- name: "DRYCC_VICTORIAMETRICS_URL"
123+
value: "http://drycc-victoriametrics-vmselect.{{$.Release.Namespace}}.svc.{{$.Values.global.clusterDomain}}:8481"
124124
{{- end }}
125125
{{- if .Values.passport.enabled }}
126126
- name: "DRYCC_PASSPORT_URL"

charts/controller/templates/controller-secret-creds.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ data:
1414
{{- if (.Values.databaseReplicaUrl) }}
1515
database-replica-url: {{ .Values.databaseReplicaUrl | b64enc }}
1616
{{- end }}
17-
{{- if (.Values.prometheusUrl) }}
18-
prometheus-url: {{ .Values.prometheusUrl | b64enc }}
17+
{{- if (.Values.victoriametricsUrl) }}
18+
victoriametrics-url: {{ .Values.victoriametricsUrl | b64enc }}
1919
{{- end }}
2020
{{- if (.Values.passportUrl) }}
2121
passport-url: {{ .Values.passportUrl | b64enc }}

charts/controller/values.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ databaseReplicaUrl: ""
6363
passportUrl: ""
6464
passportKey: ""
6565
passportSecret: ""
66-
# prometheusUrl is will no longer use the built-in prometheus component
67-
prometheusUrl: ""
66+
# victoriametricsUrl is will no longer use the built-in victoriametrics component
67+
victoriametricsUrl: ""
6868
# Workflow-manager Configuration Options
6969
workflowManagerUrl: ""
7070
workflowManagerAccessKey: ""
@@ -169,7 +169,7 @@ registry:
169169
passport:
170170
enabled: true
171171

172-
prometheus:
172+
victoriametrics:
173173
enabled: true
174174

175175
global:

rootfs/api/authentication.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,11 @@ def authenticate(self, request):
4545
if token_type is None or token is None:
4646
return None
4747
if token_type == 'bearer': # drycc oauth access token
48-
from api.backend import OauthCacheManager
49-
return OauthCacheManager().get_user(token), token
48+
try:
49+
from api.backend import OauthCacheManager
50+
return OauthCacheManager().get_user(token), token
51+
except exceptions.AuthenticationFailed:
52+
return None, None
5053
# drycc token
5154
user = cache.get(token, None)
5255
if not user:
@@ -62,11 +65,14 @@ def authenticate_credentials(self, key):
6265
if not token.owner.is_active:
6366
raise exceptions.AuthenticationFailed(gettext_lazy('User inactive or deleted.'))
6467
if token.expires():
65-
from api.backend import OauthCacheManager
66-
token.refresh_token()
67-
user = OauthCacheManager().get_user(token.oauth['access_token'])
68-
cache.set(key, user, timeout=token.oauth['expires_in'])
69-
return user, token.key
68+
try:
69+
from api.backend import OauthCacheManager
70+
user = OauthCacheManager().get_user(token.oauth['access_token'])
71+
cache.set(key, user, timeout=token.oauth['expires_in'])
72+
token.refresh_token()
73+
return user, token.key
74+
except exceptions.AuthenticationFailed:
75+
return None, None
7076
return (token.owner, token.key)
7177

7278
def authenticate_header(self, request):

rootfs/api/management/commands/measure_loadbalancers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def _measure_loadbalancers(self, app_map, timestamp):
4848
send_measurements.delay(loadbalancers)
4949

5050
def handle(self, *args, **options):
51-
if settings.WORKFLOW_MANAGER_URL and settings.DRYCC_PROMETHEUS_URL:
51+
if settings.WORKFLOW_MANAGER_URL and settings.DRYCC_VICTORIAMETRICS_URL:
5252
timestamp = int(time.time())
5353
task_id = uuid.uuid4().hex
5454
logger.info(f"pushing {task_id} limits to workflow_manager when {timezone.now()}")

rootfs/api/monitor.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ async def query_prom(url, params) -> list[tuple[dict[str, str], int]]:
5858
async def last_metrics(namespace) -> AsyncGenerator[Iterator, str]:
5959
if not settings.DRYCC_METRICS_CONFIG:
6060
return
61-
url = urljoin(settings.DRYCC_PROMETHEUS_URL, "/api/v1/query")
61+
url = urljoin(settings.DRYCC_VICTORIAMETRICS_URL, "/select/0/prometheus/api/v1/query")
6262
promql = query_last_metrics_promql_tpl % (
6363
'|'.join(settings.DRYCC_METRICS_CONFIG.keys()),
6464
namespace,
@@ -76,29 +76,29 @@ async def last_metrics(namespace) -> AsyncGenerator[Iterator, str]:
7676

7777
async def query_loadbalancer(namespaces: Iterator[str], start: int, stop: int
7878
) -> list[tuple[dict[str, str], int]]:
79-
url = urljoin(settings.DRYCC_PROMETHEUS_URL, "/api/v1/query")
79+
url = urljoin(settings.DRYCC_VICTORIAMETRICS_URL, "/select/0/prometheus/api/v1/query")
8080
promql = query_loadbalancer_promql_tpl % "|".join(namespaces)
8181
return await query_prom(url, {"query": promql, "start": start, "end": stop})
8282

8383

8484
async def query_network_receive_flow(namespaces: Iterator[str], start: int, stop: int
8585
) -> list[tuple[dict[str, str], int]]:
86-
url = urljoin(settings.DRYCC_PROMETHEUS_URL, "/api/v1/query")
86+
url = urljoin(settings.DRYCC_VICTORIAMETRICS_URL, "/select/0/prometheus/api/v1/query")
8787
promql = query_network_receive_flow_promql_tpl % ("|".join(namespaces), f"{stop-start}s")
8888
return await query_prom(url, {"query": promql, "start": start, "end": stop})
8989

9090

9191
async def query_network_transmit_flow(namespaces: Iterator[str], start: int, stop: int
9292
) -> list[tuple[dict[str, str], int]]:
93-
url = urljoin(settings.DRYCC_PROMETHEUS_URL, "/api/v1/query")
93+
url = urljoin(settings.DRYCC_VICTORIAMETRICS_URL, "/select/0/prometheus/api/v1/query")
9494
promql = query_network_transmit_flow_promql_tpl % ("|".join(namespaces), f"{stop-start}s")
9595
return await query_prom(url, {"query": promql, "start": start, "end": stop})
9696

9797

9898
async def query_cpu_usage(namespace: str, ptype: str, every: str,
9999
start: int, stop: int, step: int,
100100
) -> list[tuple[dict[str, str], int]]:
101-
url = urljoin(settings.DRYCC_PROMETHEUS_URL, "/api/v1/query_range")
101+
url = urljoin(settings.DRYCC_VICTORIAMETRICS_URL, "/select/0/prometheus/api/v1/query_range")
102102
pod_prefix = "%s-%s" % (namespace, ptype)
103103
promql = query_cpu_usage_promql_tpl % (pod_prefix, namespace, every)
104104
return await query_prom(url, {"query": promql, "start": start, "end": stop, "step": step})
@@ -107,7 +107,7 @@ async def query_cpu_usage(namespace: str, ptype: str, every: str,
107107
async def query_memory_usage(namespace: str, ptype: str, every: str,
108108
start: int, stop: int, step: int,
109109
) -> list[tuple[dict[str, str], int]]:
110-
url = urljoin(settings.DRYCC_PROMETHEUS_URL, "/api/v1/query_range")
110+
url = urljoin(settings.DRYCC_VICTORIAMETRICS_URL, "/select/0/prometheus/api/v1/query_range")
111111
pod_prefix = "%s-%s" % (namespace, ptype)
112112
promql = query_memory_usage_promql_tpl % (pod_prefix, namespace, every)
113113
return await query_prom(url, {"query": promql, "start": start, "end": stop, "step": step})
@@ -116,7 +116,7 @@ async def query_memory_usage(namespace: str, ptype: str, every: str,
116116
async def query_network_receive_usage(namespace: str, ptype: str, every: str,
117117
start: int, stop: int, step: int,
118118
) -> list[tuple[dict[str, str], int]]:
119-
url = urljoin(settings.DRYCC_PROMETHEUS_URL, "/api/v1/query_range")
119+
url = urljoin(settings.DRYCC_VICTORIAMETRICS_URL, "/select/0/prometheus/api/v1/query_range")
120120
pod_prefix = "%s-%s" % (namespace, ptype)
121121
promql = query_network_receive_usage_promql_tpl % (pod_prefix, namespace, every)
122122
return await query_prom(url, {"query": promql, "start": start, "end": stop, "step": step})
@@ -125,7 +125,7 @@ async def query_network_receive_usage(namespace: str, ptype: str, every: str,
125125
async def query_network_transmit_usage(namespace: str, ptype: str, every: str,
126126
start: int, stop: int, step: int,
127127
) -> list[tuple[dict[str, str], int]]:
128-
url = urljoin(settings.DRYCC_PROMETHEUS_URL, "/api/v1/query_range")
128+
url = urljoin(settings.DRYCC_VICTORIAMETRICS_URL, "/select/0/prometheus/api/v1/query_range")
129129
pod_prefix = "%s-%s" % (namespace, ptype)
130130
promql = query_network_transmit_usage_promql_tpl % (pod_prefix, namespace, every)
131131
return await query_prom(url, {"query": promql, "start": start, "end": stop, "step": step})

rootfs/api/settings/production.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,8 @@
269269

270270
K8S_API_VERIFY_TLS = os.environ.get('K8S_API_VERIFY_TLS', 'true').lower() == "true"
271271

272-
# drycc prometheus url
273-
DRYCC_PROMETHEUS_URL = os.environ.get('DRYCC_PROMETHEUS_URL', '')
272+
# drycc victoriametrics url
273+
DRYCC_VICTORIAMETRICS_URL = os.environ.get('DRYCC_VICTORIAMETRICS_URL', '')
274274

275275
# drycc metrics config file
276276
DRYCC_METRICS_CONFIG = {}

rootfs/api/urls.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -247,13 +247,11 @@
247247
re_path(
248248
r'^nodes/(?P<node>[a-zA-Z0-9-]+)/proxy/metrics(?:/(?P<metrics>[^/]+))?/?$',
249249
views.ProxyMetricsView.as_view()),
250+
# prometheus
251+
re_path(r'^prometheus/(?P<path>.+)/?$', views.PrometheusProxy.as_view()),
250252
# tokens
251253
re_path(r'^tokens/?$', views.TokenViewSet.as_view({'get': 'list'})),
252254
re_path(r"^tokens/(?P<pk>[-_\w]+)/?$", views.TokenViewSet.as_view({'delete': 'destroy'})),
253-
# social login is placed at the end of the URL match
254-
re_path(r'^login/(?P<backend>[^/]+){0}$'.format(extra), views.auth, name='begin'),
255-
re_path(r'^complete/(?P<backend>[^/]+){0}$'.format(extra), views.complete, name='complete'),
256-
re_path('', include('social_django.urls', namespace='social')),
257255
]
258256

259257
mutate_urlpatterns = [
@@ -263,8 +261,15 @@
263261
),
264262
]
265263

264+
# social login is placed at the end of the URL match
265+
social_urlpatterns = [
266+
re_path(r'^login/(?P<backend>[^/]+){0}$'.format(extra), views.auth, name='begin'),
267+
re_path(r'^complete/(?P<backend>[^/]+){0}$'.format(extra), views.complete, name='complete'),
268+
re_path('', include('social_django.urls', namespace='social')),
269+
]
270+
266271
# If there is a mutating admission mutate configuration, use mutate url
267272
if settings.MUTATE_KEY:
268273
urlpatterns = mutate_urlpatterns
269274
else:
270-
urlpatterns = app_urlpatterns
275+
urlpatterns = app_urlpatterns + social_urlpatterns

rootfs/api/views.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
import random
1111
import aiohttp
1212
import requests
13+
import warnings
1314

15+
from urllib.parse import urljoin
1416
from asgiref.sync import async_to_sync
1517
from django.db.models import Q
1618
from django.core.cache import cache
@@ -38,7 +40,8 @@
3840
from django.views.decorators.cache import never_cache
3941
from django.contrib.auth import REDIRECT_FIELD_NAME
4042
from django.views.decorators.csrf import csrf_exempt
41-
from django.http.response import FileResponse, StreamingHttpResponse
43+
from django.http.response import FileResponse, JsonResponse, StreamingHttpResponse
44+
from channels.db import database_sync_to_async
4245
from social_django.utils import psa
4346
from social_django.views import _do_login
4447
from social_core.utils import setting_name
@@ -1217,6 +1220,8 @@ def wrap(app_id, ptype, every, start, stop, step):
12171220
@method_decorator(cache_page(settings.DRYCC_METRICS_EXPIRY))
12181221
@method_decorator(vary_on_headers("Authorization"))
12191222
def status(self, request, **kwargs):
1223+
warnings.warn(
1224+
'this interface will be removed in the next version.', PendingDeprecationWarning)
12201225
app_id = self._get_app().id
12211226
data = serializers.MetricSerializer(data=self.request.query_params)
12221227
if not data.is_valid():
@@ -1243,6 +1248,8 @@ def status(self, request, **kwargs):
12431248
@method_decorator(cache_page(settings.DRYCC_METRICS_EXPIRY))
12441249
@method_decorator(vary_on_headers("Authorization"))
12451250
def metric(self, request, **kwargs):
1251+
warnings.warn(
1252+
'this interface will be removed in the next version.', PendingDeprecationWarning)
12461253
app_id = self._get_app().id
12471254
return StreamingHttpResponse(
12481255
streaming_content=monitor.last_metrics(app_id)
@@ -1316,3 +1323,29 @@ async def stream_response():
13161323
yield sample
13171324
content_type = f"text/plain; version={__version__}"
13181325
return StreamingHttpResponse(stream_response(), content_type=content_type)
1326+
1327+
1328+
@method_decorator(csrf_exempt, name='dispatch')
1329+
class PrometheusProxy(View):
1330+
timeout = aiohttp.ClientTimeout(total=30, connect=10, sock_read=15)
1331+
authentication = authentication.DryccAuthentication()
1332+
1333+
async def proxy(self, request, path):
1334+
auth = await database_sync_to_async(self.authentication.authenticate)(request)
1335+
if not auth or len(auth) != 2 or not isinstance(auth[0], User):
1336+
return JsonResponse({'error': 'access denied'}, status=403)
1337+
if auth[0].is_superuser or auth[0].is_staff:
1338+
path = f"/select/0/prometheus/{path}"
1339+
else:
1340+
path = f"/select/{auth[0].id}/prometheus/{path}"
1341+
url = urljoin(settings.DRYCC_VICTORIAMETRICS_URL, path)
1342+
params = dict(request.GET) if request.method == "GET" else dict(request.POST)
1343+
try:
1344+
async with aiohttp.ClientSession() as session:
1345+
async with session.get(url, params=params, timeout=self.timeout) as response:
1346+
data, status = await response.json(), response.status
1347+
except aiohttp.ClientError as e:
1348+
data, status = {'error': f'victoriametrics connection failed: {str(e)}'}, 502
1349+
return JsonResponse(data, status=status)
1350+
1351+
get = post = proxy

rootfs/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Drycc controller requirements
22
inotify==0.2.10
33
backoff==2.2.1
4-
django==4.2.21
4+
django==4.2.22
55
channels==4.2.2
66
aiohttp==3.11.16
77
django-cors-headers==4.7.0

0 commit comments

Comments
 (0)