Skip to content

Commit c8a33fb

Browse files
committed
feat(oauth): use oauth to unify service-to-service authentication.
1 parent 29092f1 commit c8a33fb

12 files changed

Lines changed: 373 additions & 84 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@
22
*.swp
33
*.swo
44
.DS_Store
5+
.sisyphus
6+
__pycache__

charts/grafana/templates/_helpers.tmpl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,6 @@ env:
2121
- name: DRYCC_VALKEY_URL
2222
value: "redis://:$(DRYCC_VALKEY_PASSWORD)@drycc-valkey:16379/2"
2323
{{- end }}
24-
- name: DRYCC_SERVICE_KEY
25-
valueFrom:
26-
secretKeyRef:
27-
name: controller-creds
28-
key: service-key
2924
- name: "DRYCC_CONTROLLER_URL"
3025
value: http://drycc-controller-api
3126
- name: "DRYCC_QUICKWIT_URL"
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
apiVersion: batch/v1
2+
kind: CronJob
3+
metadata:
4+
name: grafana-oauth2-token-refresher
5+
labels:
6+
app: grafana
7+
component: token-refresher
8+
heritage: drycc
9+
spec:
10+
# Run daily at 2 AM (avoid peak business hours)
11+
schedule: "0 2 * * *"
12+
timeZone: "Asia/Shanghai"
13+
14+
# Prevent concurrent executions (ensure a single instance)
15+
concurrencyPolicy: Forbid
16+
17+
# Keep the latest 3 successful and failed job records
18+
successfulJobsHistoryLimit: 3
19+
failedJobsHistoryLimit: 3
20+
21+
jobTemplate:
22+
spec:
23+
# Retry the job up to 3 times
24+
backoffLimit: 3
25+
26+
template:
27+
metadata:
28+
labels:
29+
app: grafana
30+
component: token-refresher
31+
spec:
32+
restartPolicy: OnFailure
33+
containers:
34+
- name: token-refresher
35+
image: {{ .Values.imageRegistry }}/{{ .Values.imageOrg }}/grafana:{{ .Values.imageTag }}
36+
imagePullPolicy: {{ .Values.imagePullPolicy }}
37+
command: ["/usr/bin/env", "python3", "/usr/share/grafana/oauth2/token.py"]
38+
{{- include "grafana.envs" . | indent 12 }}
39+
resources:
40+
requests:
41+
memory: "64Mi"
42+
cpu: "100m"
43+
limits:
44+
memory: "128Mi"
45+
cpu: "200m"

charts/grafana/templates/grafana-statefulset.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ spec:
5151
GF_LIVE_HA_ENGINE_ADDRESS=$(echo "${DRYCC_VALKEY_JSON}" |jq -r '.address')
5252
GF_LIVE_HA_ENGINE_PASSWORD=$(echo "${DRYCC_VALKEY_JSON}" |jq -r '.password')
5353
export GF_LIVE_HA_ENGINE_ADDRESS GF_LIVE_HA_ENGINE_PASSWORD
54+
DRYCC_PASSPORT_TOKEN=$(curl -s -X POST \
55+
-H "Content-Type: application/x-www-form-urlencoded" \
56+
-d "grant_type=client_credentials&client_id=$DRYCC_PASSPORT_KEY&client_secret=$DRYCC_PASSPORT_SECRET" \
57+
$DRYCC_PASSPORT_URL/o/token/ | jq -r .access_token)
58+
export DRYCC_PASSPORT_TOKEN
5459
grafana server --config /usr/share/grafana/grafana.ini --homepath /opt/drycc/grafana &
5560
GRAFANA_PID=$!
5661
echo "Waiting for Grafana to come up..."
@@ -89,6 +94,11 @@ spec:
8994
GF_LIVE_HA_ENGINE_ADDRESS=$(echo "${DRYCC_VALKEY_JSON}" |jq -r '.address')
9095
GF_LIVE_HA_ENGINE_PASSWORD=$(echo "${DRYCC_VALKEY_JSON}" |jq -r '.password')
9196
export GF_LIVE_HA_ENGINE_ADDRESS GF_LIVE_HA_ENGINE_PASSWORD
97+
DRYCC_PASSPORT_TOKEN=$(curl -s -X POST \
98+
-H "Content-Type: application/x-www-form-urlencoded" \
99+
-d "grant_type=client_credentials&client_id=$DRYCC_PASSPORT_KEY&client_secret=$DRYCC_PASSPORT_SECRET" \
100+
$DRYCC_PASSPORT_URL/o/token/ | jq -r .access_token)
101+
export DRYCC_PASSPORT_TOKEN
92102
exec grafana server --config /usr/share/grafana/grafana.ini --homepath /opt/drycc/grafana
93103
{{- end }}
94104
{{- with index .Values "resources" }}

rootfs/usr/share/grafana/oauth2/datasources/prometheus.json

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,12 @@
22
"name": "Prometheus on Drycc",
33
"type": "prometheus",
44
"uid": "prometheus_on_drycc",
5-
"url": "${controller_url}/v2/prometheus/${workspace}",
5+
"url": "http://localhost:4000/proxy/prometheus/${workspace}",
66
"access": "proxy",
77
"isDefault": true,
88
"basicAuth": false,
99
"jsonData": {
10-
"httpHeaderName1": "Authorization",
1110
"httpMethod": "POST",
1211
"timeInterval": "${time_interval}"
13-
},
14-
"secureJsonData": {
15-
"httpHeaderValue1": "Token ${token}"
1612
}
17-
}
13+
}

rootfs/usr/share/grafana/oauth2/datasources/quickwit.json

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,11 @@
22
"name": "Application Logs",
33
"type": "quickwit-quickwit-datasource",
44
"uid": "application_logs",
5-
"url": "${controller_url}/v2/quickwit/${workspace}",
5+
"url": "http://localhost:4000/proxy/quickwit/${workspace}",
66
"access": "proxy",
77
"basicAuth": false,
88
"jsonData": {
9-
"httpHeaderName1": "Authorization",
109
"index": "logs-*",
1110
"logMessageField": "log"
12-
},
13-
"secureJsonData": {
14-
"httpHeaderValue1": "Token ${token}"
1511
}
1612
}

rootfs/usr/share/grafana/oauth2/hook/grafana.py

Lines changed: 26 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
import logging
2-
import os
32
import time
43
import json
54
import httpx
5+
from pathlib import Path
66
from string import Template
77
from psycopg import AsyncConnection
8+
from ..settings import settings
89

910
logger = logging.getLogger(__name__)
1011

1112
DEFAULT_HEADERS = {"Content-Type": "application/json"}
12-
DRYCC_CONTROLLER_URL = os.environ.get('DRYCC_CONTROLLER_URL')
13-
DRYCC_GRAFANA_REFRESH = os.environ.get('DRYCC_GRAFANA_REFRESH', '60s')
14-
DRYCC_GRAFANA_DASHBOARD = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../")
13+
DRYCC_GRAFANA_DASHBOARD = Path(__file__).resolve().parent.parent
1514

1615
# Drycc Workspace role to Grafana role mapping
1716
DRYCC_WORKSPACE_ROLE_MAPPING = {"admin": "Editor", "member": "Editor", "viewer": "Viewer"}
@@ -107,15 +106,15 @@ async def sync_alerting(context: dict, token: dict, userinfo: dict):
107106
The alerts field only controls notification channels (handled in sync_default).
108107
"""
109108
workspace_orgs = context.get("workspace_orgs", {})
110-
alerting_path = os.path.join(os.path.dirname(__file__), "..", "alerting")
109+
alerting_path = Path(__file__).resolve().parent.parent / "alerting"
111110

112111
for ws_name, ws_info in workspace_orgs.items():
113112
org_id = ws_info["org_id"]
114113
ctx = {**context, "org_id": org_id}
115114

116115
async with httpx.AsyncClient() as client:
117-
for filename in os.listdir(alerting_path):
118-
with open(os.path.join(alerting_path, filename)) as f:
116+
for filepath in alerting_path.glob("*.json"):
117+
with filepath.open() as f:
119118
rule = json.load(f)
120119
# Use PUT for idempotent upsert (POST would create duplicates)
121120
resp = await client.put(
@@ -135,23 +134,20 @@ async def sync_alerting(context: dict, token: dict, userinfo: dict):
135134
async def sync_datasources(context: dict, token: dict, userinfo: dict):
136135
"""Create datasources for each workspace org with workspace-specific URLs."""
137136
workspace_orgs = context.get("workspace_orgs", {})
138-
datasources_path = os.path.join(os.path.dirname(__file__), "..", "datasources")
139-
drycc_token = context.get("drycc_token")
137+
datasources_path = Path(__file__).resolve().parent.parent / "datasources"
140138

141139
for ws_name, ws_info in workspace_orgs.items():
142140
org_id = ws_info["org_id"]
143141
ctx = {**context, "org_id": org_id}
144142
headers = _api_headers(ctx, userinfo)
145143

146144
async with httpx.AsyncClient() as client:
147-
for filename in os.listdir(datasources_path):
148-
with open(os.path.join(datasources_path, filename)) as f:
145+
for filepath in datasources_path.glob("*.json"):
146+
with filepath.open() as f:
149147
template = Template(f.read())
150148
datasource = json.loads(template.substitute(
151-
controller_url=DRYCC_CONTROLLER_URL,
152-
workspace=ws_name,
153-
time_interval=DRYCC_GRAFANA_REFRESH,
154-
token=drycc_token
149+
controller_url=settings.drycc_controller_url,
150+
time_interval=settings.drycc_grafana_refresh
155151
))
156152
resp = await client.get(
157153
_api_url(f"/api/datasources/name/{datasource['name']}"), headers=headers)
@@ -174,17 +170,17 @@ async def sync_datasources(context: dict, token: dict, userinfo: dict):
174170
async def sync_dashboards(context: dict, token: dict, userinfo: dict):
175171
"""Create dashboards for each workspace org."""
176172
workspace_orgs = context.get("workspace_orgs", {})
177-
dashboards_path = os.path.join(os.path.dirname(__file__), "..", "dashboards")
173+
dashboards_path = Path(__file__).resolve().parent.parent / "dashboards"
178174

179175
for ws_name, ws_info in workspace_orgs.items():
180176
org_id = ws_info["org_id"]
181177
ctx = {**context, "org_id": org_id}
182178

183179
async with httpx.AsyncClient() as client:
184-
for filename in os.listdir(dashboards_path):
185-
with open(os.path.join(dashboards_path, filename)) as f:
180+
for filepath in dashboards_path.glob("*.json"):
181+
with filepath.open() as f:
186182
dashboard = json.load(f)
187-
dashboard.update({"id": None, "refresh": DRYCC_GRAFANA_REFRESH})
183+
dashboard.update({"id": None, "refresh": settings.drycc_grafana_refresh})
188184
await client.post(
189185
_api_url("/api/dashboards/db"),
190186
headers=_api_headers(ctx, userinfo),
@@ -202,12 +198,12 @@ async def sync_dashboards(context: dict, token: dict, userinfo: dict):
202198
def _api_url(url_path, is_admin=False):
203199
if is_admin:
204200
return "http://{}:{}@localhost:{}{}".format(
205-
os.environ.get('GF_SECURITY_ADMIN_USER'),
206-
os.environ.get('GF_SECURITY_ADMIN_PASSWORD'),
207-
os.environ.get('GF_SERVER_HTTP_PORT', 3000),
208-
url_path,
201+
settings.gf_security_admin_user,
202+
settings.gf_security_admin_password,
203+
settings.gf_server_http_port,
204+
url_path
209205
)
210-
return "http://localhost:{}{}".format(os.environ.get('GF_SERVER_HTTP_PORT', 3000), url_path)
206+
return "http://localhost:{}{}".format(settings.gf_server_http_port, url_path)
211207

212208

213209
def _api_headers(context: dict, userinfo):
@@ -233,7 +229,7 @@ async def _get_workspaces(drycc_token: str) -> list:
233229
headers = {"Authorization": f"Token {drycc_token}"}
234230
async with httpx.AsyncClient() as client:
235231
resp = await client.get(
236-
f"{DRYCC_CONTROLLER_URL}/v2/workspaces", headers=headers)
232+
f"{settings.drycc_controller_url}/v2/workspaces", headers=headers)
237233
resp.raise_for_status()
238234
return resp.json().get("results", [])
239235

@@ -243,7 +239,7 @@ async def _get_workspace_members(workspace_name: str, drycc_token: str) -> list:
243239
headers = {"Authorization": f"Token {drycc_token}"}
244240
async with httpx.AsyncClient() as client:
245241
resp = await client.get(
246-
f"{DRYCC_CONTROLLER_URL}/v2/workspaces/{workspace_name}/members",
242+
f"{settings.drycc_controller_url}/v2/workspaces/{workspace_name}/members",
247243
headers=headers)
248244
resp.raise_for_status()
249245
return resp.json().get("results", [])
@@ -420,7 +416,7 @@ def _build_alertmanager_config(alert_addresses: str) -> str:
420416

421417
async def _upsert_alert_configuration(org_id: int, config: str):
422418
"""Insert or update alert configuration for an org using parameterized query."""
423-
async with await AsyncConnection.connect(os.environ.get("GF_DATABASE_URL")) as conn:
419+
async with await AsyncConnection.connect(settings.gf_database_url) as conn:
424420
async with conn.cursor() as cursor:
425421
await cursor.execute(
426422
"""
@@ -438,37 +434,6 @@ async def _upsert_alert_configuration(org_id: int, config: str):
438434

439435

440436
async def _get_or_create_drycc_token(username, token: dict):
441-
async def _check_or_create_drycc_token(drycc_token, token):
442-
async with httpx.AsyncClient() as client:
443-
created = False if drycc_token else True
444-
if drycc_token:
445-
headers = {"Authorization": f"Token {drycc_token}"}
446-
resp = await client.get(
447-
f"{DRYCC_CONTROLLER_URL}/v2/auth/whoami", headers=headers)
448-
if resp.status_code in [401, 403]:
449-
created = True
450-
if created:
451-
headers = {"Authorization": f"Bearer {token['access_token']}"}
452-
data = (await client.post(
453-
f"{DRYCC_CONTROLLER_URL}/v2/auth/token/?alias=grafana-datasource",
454-
headers=headers, json=token)).json()
455-
drycc_token = data["token"]
456-
return created, drycc_token
457-
458-
async with await AsyncConnection.connect(os.environ.get("GF_DATABASE_URL")) as conn:
459-
async with conn.cursor() as cursor:
460-
await cursor.execute(
461-
"SELECT o_auth_id_token FROM user_auth WHERE auth_module=%s AND auth_id=%s",
462-
("authproxy", username)
463-
)
464-
row = await cursor.fetchone()
465-
drycc_token = row[0] if row else None
466-
created, drycc_token = await _check_or_create_drycc_token(drycc_token, token)
467-
if created:
468-
async with conn.cursor() as cursor:
469-
await cursor.execute(
470-
"UPDATE user_auth SET o_auth_id_token=%s WHERE auth_module=%s AND auth_id=%s",
471-
(drycc_token, "authproxy", username)
472-
)
473-
await conn.commit()
474-
return created, drycc_token
437+
# Pass through the Passport access_token directly, no need for DRF token conversion
438+
drycc_token = token.get("access_token")
439+
return True, drycc_token

0 commit comments

Comments
 (0)