chore(controller): add overcommit cpu and ram support

duanhongyi · duanhongyi · commit 54b869bf2ad8 · 2020-09-21T15:23:35.000+08:00
diff --git a/rootfs/api/models/app.py b/rootfs/api/models/app.py
@@ -1,5 +1,6 @@
 import backoff
 import base64
+import math
 from collections import OrderedDict
 from datetime import datetime
 from docker import auth as docker_auth
@@ -11,7 +12,9 @@
 import requests
 import string
 import time
+from itertools import groupby
 from urllib.parse import urljoin
+from collections import defaultdict
 
 from django.conf import settings
 from django.db import models
@@ -1084,6 +1087,46 @@ def _get_private_registry_config(self, image, registry=None):
         })
         return docker_config, name, True
 
+    @staticmethod
+    def _get_cpu_allocation(size):
+        cpu_allocation_ratio = settings.KUBERNETES_CPU_ALLOCATION_RATIO
+        num, unit = (
+            ''.join(item[1]) for item in groupby(
+                size, key=lambda x: x.isdigit()
+            )
+        )
+        return "{num}{unit}".format(
+            num=math.ceil(int(num) / cpu_allocation_ratio),
+            unit=unit
+        )
+
+    @staticmethod
+    def _get_ram_allocation(size):
+        ram_allocation_ratio = settings.KUBERNETES_RAM_ALLOCATION_RATIO
+        num, unit = (
+            ''.join(item[1]) for item in groupby(
+                size, key=lambda x: x.isdigit()
+            )
+        )
+        return "{num}{unit}".format(
+            num=math.ceil(int(num) / ram_allocation_ratio),
+            unit=unit
+        )
+
+    def _get_default_resources(self):
+        resources = defaultdict(dict)
+        resources.update(
+            json.loads(settings.KUBERNETES_POD_DEFAULT_RESOURCES))
+        if "cpu" in resources["limits"]:
+            if "cpu" not in resources["requests"]:
+                resources["requests"]["cpu"] = self._get_cpu_allocation(
+                    resources["limits"]["cpu"])
+        if "memory" in resources["limits"]:
+            if "memory" not in resources["requests"]:
+                resources["requests"]["memory"] = self._get_ram_allocation(
+                    resources["limits"]["memory"])
+        return resources
+
     def _gather_app_settings(self, release, app_settings, process_type, replicas, volumes=None):
         """
         Gathers all required information needed in one easy place for passing into
@@ -1093,7 +1136,11 @@ def _gather_app_settings(self, release, app_settings, process_type, replicas, vo
         """
         envs = self._build_env_vars(release)
         config = release.config
-
+        cpu, ram = {}, {}
+        for key, value in config.cpu.items():
+            cpu[key] = "%s/%s" % (self._get_cpu_allocation(value), value)
+        for key, value in config.memory.items():
+            ram[key] = "%s/%s" % (self._get_ram_allocation(value), value)
         # see if the app config has deploy batch preference, otherwise use global
         batches = int(config.values.get('DRYCC_DEPLOY_BATCHES', settings.DRYCC_DEPLOY_BATCHES))  # noqa
 
@@ -1108,9 +1155,6 @@ def _gather_app_settings(self, release, app_settings, process_type, replicas, vo
         pod_termination_grace_period_seconds = int(config.values.get(
             'KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS', settings.KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS))  # noqa
 
-        # get pod default resources
-        pod_default_resources = json.loads(settings.KUBERNETES_POD_DEFAULT_RESOURCES)
-
         # set the image pull policy that is associated with the application container
         image_pull_policy = config.values.get('IMAGE_PULL_POLICY', settings.IMAGE_PULL_POLICY)
 
@@ -1135,15 +1179,15 @@ def _gather_app_settings(self, release, app_settings, process_type, replicas, vo
         } for _ in volumes] if volumes else []
 
         return {
-            'memory': config.memory,
-            'cpu': config.cpu,
+            'memory': ram,
+            'cpu': cpu,
             'tags': config.tags,
             'envs': envs,
             'registry': config.registry,
             'replicas': replicas,
             'version': 'v{}'.format(release.version),
             'app_type': process_type,
-            'resources': pod_default_resources,
+            'resources': self._get_default_resources(),
             'build_type': release.build.type,
             'healthcheck': healthcheck,
             'lifecycle_post_start': config.lifecycle_post_start,
diff --git a/rootfs/api/serializers.py b/rootfs/api/serializers.py
@@ -24,14 +24,14 @@
 PROCTYPE_MATCH = re.compile(r'^(?P<type>[a-z0-9]+(\-[a-z0-9]+)*)$')
 PROCTYPE_MISMATCH_MSG = "Process types can only contain lowercase alphanumeric characters"
 MEMLIMIT_MATCH = re.compile(
-    r'^(?P<mem>(([0-9]+(MB|KB|GB|[BKMG])|0)(/([0-9]+(MB|KB|GB|[BKMG])))?))$', re.IGNORECASE)
+    r'^(?P<mem>([0-9]+(MB|KB|GB|[BKMG])|0))$', re.IGNORECASE)
 CPUSHARE_MATCH = re.compile(
-    r'^(?P<cpu>(([-+]?[0-9]*\.?[0-9]+[m]?)(/([-+]?[0-9]*\.?[0-9]+[m]?))?))$')
+    r'^(?P<cpu>([-+]?[0-9]*\.?[0-9]+[m]?))$')
 TAGVAL_MATCH = re.compile(r'^(?:[a-zA-Z\d][-\.\w]{0,61})?[a-zA-Z\d]$')
 CONFIGKEY_MATCH = re.compile(r'^[a-z_]+[a-z0-9_]*$', re.IGNORECASE)
 TERMINATION_GRACE_PERIOD_MATCH = re.compile(r'^[0-9]*$')
 VOLUME_SIZE_MATCH = re.compile(
-    r'^(?P<mem>(([0-9]+(MB|KB|GB|[BKMG])|0)(/([0-9]+(MB|KB|GB|[BKMG])))?))$', re.IGNORECASE)
+    r'^(?P<mem>([0-9]+(MB|KB|GB|[BKMG])|0))$', re.IGNORECASE)
 VOLUME_PATH = re.compile(r'^\/(\w+\/?)+$', re.IGNORECASE)
 
 PROBE_SCHEMA = {
@@ -201,7 +201,8 @@ class Meta:
         fields = ['owner', 'app', 'image', 'stack', 'sha', 'procfile',
                   'dockerfile', 'created', 'updated', 'uuid']
 
-    def validate_procfile(self, data):
+    @staticmethod
+    def validate_procfile(data):
         for key, value in data.items():
             if value is None or value == "":
                 raise serializers.ValidationError("Command can't be empty for process type")
@@ -233,7 +234,8 @@ class Meta:
         model = models.Config
         fields = '__all__'
 
-    def validate_values(self, data):
+    @staticmethod
+    def validate_values(data):
         for key, value in data.items():
             if value is None:  # use NoneType to unset an item
                 continue
@@ -278,7 +280,8 @@ def validate_values(self, data):
 
         return data
 
-    def validate_memory(self, data):
+    @staticmethod
+    def validate_memory(data):
         for key, value in data.items():
             if value is None:  # use NoneType to unset an item
                 continue
@@ -288,12 +291,13 @@ def validate_memory(self, data):
 
             if not re.match(MEMLIMIT_MATCH, str(value)):
                 raise serializers.ValidationError(
-                    "Memory limit format: <number><unit> or <number><unit>/<number><unit>, "
+                    "Memory limit format: <number><unit>, "
                     "where unit = B, K, M or G")
 
         return data
 
-    def validate_cpu(self, data):
+    @staticmethod
+    def validate_cpu(data):
         for key, value in data.items():
             if value is None:  # use NoneType to unset an item
                 continue
@@ -304,11 +308,12 @@ def validate_cpu(self, data):
             shares = re.match(CPUSHARE_MATCH, str(value))
             if not shares:
                 raise serializers.ValidationError(
-                    "CPU limit format: <value> or <value>/<value>, where value must be a numeric")
+                    "CPU limit format: <value>, where value must be a numeric")
 
         return data
 
-    def validate_termination_grace_period(self, data):
+    @staticmethod
+    def validate_termination_grace_period(data):
         for key, value in data.items():
             if value is None:  # use NoneType to unset an item
                 continue
@@ -323,7 +328,8 @@ def validate_termination_grace_period(self, data):
 
         return data
 
-    def validate_tags(self, data):
+    @staticmethod
+    def validate_tags(data):
         for key, value in data.items():
             if value is None:  # use NoneType to unset an item
                 continue
@@ -357,7 +363,8 @@ def validate_tags(self, data):
 
         return data
 
-    def validate_registry(self, data):
+    @staticmethod
+    def validate_registry(data):
         for key, value in data.items():
             if value is None:  # use NoneType to unset an item
                 continue
@@ -369,7 +376,8 @@ def validate_registry(self, data):
 
         return data
 
-    def validate_healthcheck(self, data):
+    @staticmethod
+    def validate_healthcheck(data):
         for procType, healthcheck in data.items():
             if healthcheck is None:
                 continue
@@ -432,7 +440,8 @@ class Meta:
         fields = ['owner', 'created', 'updated', 'app', 'domain']
         read_only_fields = ['uuid']
 
-    def validate_domain(self, value):
+    @staticmethod
+    def validate_domain(value):
         """
         Check that the hostname is valid
         """
@@ -496,13 +505,15 @@ class Meta:
         fields = ['owner', 'created', 'updated', 'app', 'procfile_type', 'path_pattern']
         read_only_fields = ['uuid']
 
-    def validate_procfile_type(self, value):
+    @staticmethod
+    def validate_procfile_type(value):
         if not re.match(PROCTYPE_MATCH, value):
             raise serializers.ValidationError(PROCTYPE_MISMATCH_MSG)
 
         return value
 
-    def validate_path_pattern(self, value):
+    @staticmethod
+    def validate_path_pattern(value):
         for pattern in str(value).split(","):
             if not pattern.strip():
                 raise serializers.ValidationError(
@@ -561,7 +572,8 @@ class Meta:
         model = models.AppSettings
         fields = '__all__'
 
-    def validate_whitelist(self, data):
+    @staticmethod
+    def validate_whitelist(data):
         for address in data:
             try:
                 ipaddress.ip_address(address)
@@ -577,7 +589,8 @@ def validate_whitelist(self, data):
 
         return data
 
-    def validate_autoscale(self, data):
+    @staticmethod
+    def validate_autoscale(data):
         schema = {
             "$schema": "http://json-schema.org/schema#",
             "type": "object",
@@ -632,14 +645,16 @@ class Meta:
         model = models.Volume
         fields = '__all__'
 
-    def validate_size(self, data):
+    @staticmethod
+    def validate_size(data):
         if not re.match(VOLUME_SIZE_MATCH, str(data)):
             raise serializers.ValidationError(
                 "Volume size limit format: <number><unit> or <number><unit>/<number><unit>, "
                 "where unit = B, K, M or G")
         return data
 
-    def validate_path(self, data):
+    @staticmethod
+    def validate_path(data):
         for key, value in data.items():
             if value is None:  # use NoneType to unset an item
                 continue
diff --git a/rootfs/api/settings/production.py b/rootfs/api/settings/production.py
@@ -335,23 +335,25 @@
 # How long k8s waits for a pod to finish work after a SIGTERM before sending SIGKILL
 KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS = int(os.environ.get('KUBERNETES_POD_TERMINATION_GRACE_PERIOD_SECONDS', 30))  # noqa
 
-# Default pod spec for application
+KUBERNETES_CPU_ALLOCATION_RATIO = int(os.environ.get('KUBERNETES_CPU_ALLOCATION_RATIO', '10'))
+KUBERNETES_RAM_ALLOCATION_RATIO = int(os.environ.get('KUBERNETES_RAM_ALLOCATION_RATIO', '2'))
+# Default pod spec for application.
+# Please do not set requests.cpu and requests.memory.
+# If set, they will not be dynamically computed when the first resource is allocated;
+# unless in the future through `drycc limits:set` manual setting
 KUBERNETES_POD_DEFAULT_RESOURCES = os.environ.get(
     'KUBERNETES_POD_DEFAULT_RESOURCES',
     json.dumps({
         "requests": {
-            "cpu": "200m",
-            "memory": "256Mi",
-            "ephemeral-storage": "1Gi"
+            "ephemeral-storage": "256Mi"
         },
         "limits": {
             "cpu": "500m",
             "memory": "512Mi",
-            "ephemeral-storage": "2Gi"
-        },
+            "ephemeral-storage": "1Gi"
+        }
     })
 )
-
 # Default quota spec for application namespace
 KUBERNETES_NAMESPACE_DEFAULT_QUOTA_SPEC = os.environ.get(
     'KUBERNETES_NAMESPACE_DEFAULT_QUOTA_SPEC',
diff --git a/rootfs/api/tests/test_config.py b/rootfs/api/tests/test_config.py
@@ -204,7 +204,7 @@ def test_response_data_types_converted(self, mock_requests):
         response = self.client.post(url, body)
         self.assertEqual(response.status_code, 400, response.data)
         self.assertIn(
-            'CPU limit format: <value> or <value>/<value>, where value must be a numeric',
+            'CPU limit format: <value>, where value must be a numeric',
             response.data['cpu'])
 
     def test_config_set_same_key(self, mock_requests):
diff --git a/rootfs/api/tests/test_limits.py b/rootfs/api/tests/test_limits.py