Skip to content

Commit 1948569

Browse files
authored
feat(blocklist): blocklist api (#190)
1 parent 7ec6627 commit 1948569

14 files changed

Lines changed: 689 additions & 11 deletions

File tree

charts/controller/templates/controller-clusterrole.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ rules:
5151
- apiGroups: ["extensions", "apps"]
5252
resources: ["deployments/scale", "replicasets/scale"]
5353
verbs: ["get", "update"]
54+
- apiGroups: ["apps"]
55+
resources: ["statefulsets"]
56+
verbs: ["get", "list", "patch"]
57+
- apiGroups: ["extensions", "apps"]
58+
resources: ["daemonsets"]
59+
verbs: ["get", "list", "patch"]
5460
- apiGroups: ["extensions", "autoscaling"]
5561
resources: ["horizontalpodautoscalers"]
5662
verbs: ["get", "list", "create", "update", "delete"]

rootfs/api/backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class DryccOIDC(OpenIdConnectAuth):
2121
USERINFO_URL = settings.SOCIAL_AUTH_DRYCC_USERINFO_URL
2222
JWKS_URI = settings.SOCIAL_AUTH_DRYCC_JWKS_URI
2323
OIDC_ENDPOINT = settings.SOCIAL_AUTH_DRYCC_OIDC_ENDPOINT
24-
DEFAULT_SCOPE = ['openid']
24+
DEFAULT_SCOPE = ['openid', 'profile', 'email']
2525
EXTRA_DATA = [
2626
('id', 'id'),
2727
('access_token', 'access_token'),
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Generated by Django 4.2.22 on 2025-08-22 02:48
2+
3+
import api.models.app
4+
import api.utils
5+
from django.db import migrations, models
6+
7+
8+
class Migration(migrations.Migration):
9+
10+
dependencies = [
11+
('api', '0024_limitplan_container_volume_mounts_and_more'),
12+
]
13+
14+
operations = [
15+
migrations.AlterField(
16+
model_name='app',
17+
name='id',
18+
field=models.SlugField(max_length=63, null=True, unique=True, validators=[api.models.app.validate_app_id]),
19+
),
20+
migrations.AlterField(
21+
model_name='gateway',
22+
name='name',
23+
field=models.CharField(db_index=True, max_length=63, validators=[api.utils.validate_label]),
24+
),
25+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 4.2.22 on 2025-08-22 02:56
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('api', '0025_alter_app_id_alter_gateway_name'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='app',
15+
name='suspended_state',
16+
field=models.JSONField(blank=True, default=dict),
17+
),
18+
]

rootfs/api/models/app.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class App(UuidAuditedModel):
114114
validators=[validate_app_id])
115115
structure = models.JSONField(
116116
default=dict, blank=True, validators=[validate_app_structure])
117+
suspended_state = models.JSONField(default=dict, blank=True)
117118

118119
class Meta:
119120
verbose_name = 'Application'

rootfs/api/models/blocklist.py

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1+
import logging
2+
import functools
13
from django.db import models
24
from django.contrib.auth import get_user_model
5+
from scheduler import KubeHTTPException
6+
from api.exceptions import ServiceUnavailable
7+
from api.utils import apply_tasks
38
from .app import App
49
from .base import UuidAuditedModel
510

11+
612
User = get_user_model()
13+
logger = logging.getLogger(__name__)
714

815

916
class Blocklist(UuidAuditedModel):
@@ -41,3 +48,227 @@ def get_blocklist(cls, app: App):
4148
class Meta:
4249
ordering = ['-created']
4350
unique_together = (("id", "type"),)
51+
52+
def related_resource_deployments(self, app: App):
53+
"get resource deployments"
54+
try:
55+
deployments = self.scheduler.deployment.get(app.id, labels={}).json()['items'] # noqa
56+
if not deployments:
57+
deployments = []
58+
data = []
59+
for d in deployments:
60+
item = {
61+
'name': d['metadata']['name'],
62+
'replicas': d['spec'].get("replicas", 0),
63+
}
64+
data.append(item)
65+
data.sort(key=lambda x: x['name'])
66+
return data
67+
except KubeHTTPException:
68+
pass
69+
except Exception as e:
70+
err = f'(list resource deployments): {e}'
71+
logger.info(err)
72+
raise ServiceUnavailable(err) from e
73+
74+
def related_resource_statefulsets(self, app: App):
75+
"get resource statefulsets"
76+
try:
77+
statefulsets = self.scheduler.statefulset.get(app.id, labels={}).json()['items'] # noqa
78+
if not statefulsets:
79+
statefulsets = []
80+
data = []
81+
for s in statefulsets:
82+
item = {
83+
'name': s['metadata']['name'],
84+
'replicas': s['spec'].get("replicas", 0),
85+
}
86+
data.append(item)
87+
data.sort(key=lambda x: x['name'])
88+
return data
89+
except KubeHTTPException:
90+
pass
91+
except Exception as e:
92+
err = f'(list resource statefulsets): {e}'
93+
logger.info(err)
94+
raise ServiceUnavailable(err) from e
95+
96+
def related_resource_daemonsets(self, app: App):
97+
"get resource daemonsets"
98+
try:
99+
daemonsets = self.scheduler.daemonset.get(app.id, labels={}).json()['items'] # noqa
100+
if not daemonsets:
101+
daemonsets = []
102+
data = []
103+
for d in daemonsets:
104+
item = {
105+
'name': d['metadata']['name'],
106+
'affinity': d['spec']['template']['spec'].get('affinity', {}),
107+
}
108+
data.append(item)
109+
data.sort(key=lambda x: x['name'])
110+
return data
111+
except KubeHTTPException:
112+
pass
113+
except Exception as e:
114+
err = f'(list resource daemonsets): {e}'
115+
logger.info(err)
116+
raise ServiceUnavailable(err) from e
117+
118+
def suspended_state(self, app: App):
119+
"""
120+
Get deployments/statefulsets/daemonsets with labels app.kubernetes.io/managed-by=Helm
121+
Store in the suspended_state field of the app model
122+
Format:
123+
{"deployments": [{"name":"sample1", "replicas": 1}],
124+
"statefulsets": [{"name":"sample1", "replicas": 1}],
125+
"daemonsets": [{"name":"sample1", "affinity": {}}]}
126+
"""
127+
suspended_state = {}
128+
suspended_state['deployments'] = self.related_resource_deployments(app)
129+
suspended_state['statefulsets'] = self.related_resource_statefulsets(app)
130+
suspended_state['daemonsets'] = self.related_resource_daemonsets(app)
131+
return suspended_state
132+
133+
def scale_resource_deployments(self, app: App, deployment_name: str, replicas=0):
134+
"""scale deployments"""
135+
try:
136+
deployment = self.scheduler.deployment.get(app.id, deployment_name).json()
137+
self.scheduler.scales.update(app.id, deployment_name, replicas, deployment)
138+
except KubeHTTPException:
139+
pass
140+
except Exception as e:
141+
err = f'(scale resource deployments): {e}'
142+
logger.info(err)
143+
raise ServiceUnavailable(err) from e
144+
145+
def scale_resource_statefulsets(self, app: App, statefulset_name: str, replicas=0):
146+
"""scale statefulsets"""
147+
try:
148+
self.scheduler.statefulset.get(app.id, statefulset_name).json()
149+
manifest = {
150+
'spec': {
151+
'persistentVolumeClaimRetentionPolicy': {
152+
'whenScaled': 'Retain'
153+
},
154+
'replicas': replicas
155+
}
156+
}
157+
self.scheduler.statefulset.patch(app.id, statefulset_name, manifest)
158+
except KubeHTTPException:
159+
pass
160+
except Exception as e:
161+
err = f'(scale resource statefulsets): {e}'
162+
logger.info(err)
163+
raise ServiceUnavailable(err) from e
164+
165+
def scale_resource_daemonsets(self, app: App, daemonset_name: str, manifest: dict):
166+
"""set affinity"""
167+
try:
168+
self.scheduler.daemonset.get(app.id, daemonset_name).json()
169+
self.scheduler.daemonset.patch(app.id, daemonset_name, manifest)
170+
except KubeHTTPException:
171+
pass
172+
except Exception as e:
173+
err = f'(scale resource daemonsets): {e}'
174+
logger.info(err)
175+
raise ServiceUnavailable(err) from e
176+
177+
def scale_resources(self, app: App, suspended_state: dict, scale_type="block"):
178+
"scale resources tasks"
179+
tasks = []
180+
deployments = suspended_state.get('deployments', [])
181+
if not deployments:
182+
deployments = []
183+
for d in deployments:
184+
if scale_type == "unblock":
185+
replicas = d['replicas']
186+
else:
187+
replicas = 0
188+
tasks.append((
189+
functools.partial(
190+
self.scale_resource_deployments,
191+
app=app,
192+
deployment_name=d['name'],
193+
replicas=replicas
194+
),
195+
lambda future, name=d["name"]: app.log(
196+
f'{scale_type} scale deployment {name} callback: {future.result()}',
197+
)
198+
))
199+
200+
statefulsets = suspended_state.get('statefulsets', [])
201+
if not statefulsets:
202+
statefulsets = []
203+
for s in statefulsets:
204+
if scale_type == "unblock":
205+
replicas = s['replicas']
206+
else:
207+
replicas = 0
208+
tasks.append((
209+
functools.partial(
210+
self.scale_resource_statefulsets,
211+
app=app,
212+
statefulset_name=s['name'],
213+
replicas=replicas
214+
),
215+
lambda future, name=s["name"]: app.log(
216+
f'{scale_type} scale statefulset {name} callback: {future.result()}',
217+
)
218+
))
219+
220+
daemonsets = suspended_state.get('daemonsets', [])
221+
if not daemonsets:
222+
daemonsets = []
223+
for d in daemonsets:
224+
if scale_type == "unblock":
225+
manifest = {
226+
"spec": {
227+
"template": {
228+
"spec": {
229+
"affinity": d['affinity']
230+
}
231+
}
232+
}
233+
}
234+
else:
235+
manifest = {
236+
"spec": {
237+
"template": {
238+
"spec": {
239+
"affinity": {
240+
"nodeAffinity": {
241+
"requiredDuringSchedulingIgnoredDuringExecution": {
242+
"nodeSelectorTerms": [{
243+
"matchExpressions": [{
244+
"key": "kubernetes.io/hostname",
245+
"operator": "In",
246+
"values": [
247+
"nohostname"
248+
]
249+
}]
250+
}]
251+
}
252+
}
253+
}
254+
}
255+
}
256+
}
257+
}
258+
tasks.append((
259+
functools.partial(
260+
self.scale_resource_daemonsets,
261+
app=app,
262+
daemonset_name=d['name'],
263+
manifest=manifest
264+
),
265+
lambda future, name=d["name"]: app.log(
266+
f'{scale_type} scale daemonset {name} callback: {future.result()}',
267+
)
268+
))
269+
try:
270+
apply_tasks(tasks)
271+
except Exception as e:
272+
err = f'({scale_type} scale resources): {e}'
273+
logger.info(err)
274+
raise ServiceUnavailable(err) from e

rootfs/api/tasks.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,19 @@ def downstream_model_owner(app, old_owner, new_owner):
158158
raise e
159159
else:
160160
signals.request_finished.send(sender=task_id)
161+
162+
163+
@shared_task(
164+
autoretry_for=(ServiceUnavailable, ),
165+
retry_kwargs={'max_retries': None}
166+
)
167+
def scale_resources(blocklist, app, suspended_state, scale_type):
168+
task_id = uuid.uuid4().hex
169+
signals.request_started.send(sender=task_id)
170+
try:
171+
blocklist.scale_resources(app, suspended_state, scale_type)
172+
except Exception as e:
173+
signals.got_request_exception.send(sender=task_id)
174+
raise e
175+
else:
176+
signals.request_finished.send(sender=task_id)

rootfs/api/views.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
from api import monitor, models, permissions, serializers, viewsets, authentication, __version__
3737
from api.tasks import scale_app, restart_app, mount_app, downstream_model_owner, \
38-
delete_pod
38+
delete_pod, scale_resources
3939
from api.exceptions import AlreadyExists, ServiceUnavailable, DryccException
4040

4141
from django.views.decorators.cache import never_cache
@@ -183,24 +183,35 @@ class WorkflowManagerViewset(GenericViewSet):
183183

184184
def block(self, request, **kwargs):
185185
try:
186-
blocklist, _ = models.blocklist.Blocklist.objects.get_or_create(
186+
blocklist, created = models.blocklist.Blocklist.objects.get_or_create(
187187
id=kwargs['id'],
188188
type=models.blocklist.Blocklist.get_type(kwargs["type"]),
189189
defaults={"remark": request.data.get("remark")}
190190
)
191191
for app in blocklist.related_apps:
192-
scale_app.delay(app, app.owner, {key: 0 for key in app.structure.keys()})
192+
if created:
193+
app.suspended_state = blocklist.suspended_state(app)
194+
app.save()
195+
# scale to 0
196+
scale_resources.delay(blocklist, app, app.suspended_state, "block")
193197
return HttpResponse(status=201)
194198
except ValueError as e:
195199
logger.info(e)
196200
raise DryccException("Unsupported block type: %s" % kwargs["type"])
197201

198202
def unblock(self, request, **kwargs):
199203
try:
200-
models.blocklist.Blocklist.objects.filter(
204+
blocklists = models.blocklist.Blocklist.objects.filter(
201205
id=kwargs['id'],
202206
type=models.blocklist.Blocklist.get_type(kwargs["type"])
203-
).delete()
207+
)
208+
for blocklist in blocklists:
209+
for app in blocklist.related_apps:
210+
# scale to up
211+
scale_resources.delay(blocklist, app, app.suspended_state, "unblock")
212+
app.suspended_state = {}
213+
app.save()
214+
blocklist.delete()
204215
return HttpResponse(status=204)
205216
except ValueError as e:
206217
logger.info(e)

rootfs/dev_requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@ flake8==7.2.0
88

99
# mock out python-requests, mostly k8s
1010
requests-mock==1.12.1
11+
12+
# tracebacks
13+
tblib==3.1.0

0 commit comments

Comments
 (0)