diff --git a/.gitignore b/.gitignore index 1ee9ef0..4573d78 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ contrib/ci/tmp -.vscode/ \ No newline at end of file +.vscode/ +charts/database/Chart.lock +charts/database/charts/ diff --git a/Dockerfile b/Dockerfile index d8c2cb9..f0764de 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,12 +10,15 @@ ARG PYTHON_VERSION="3.13" \ ENV HOME=/data \ PG_MAJOR=18 \ - PG_MINOR=1 -ENV PGDATA $HOME/$PG_MAJOR + PG_MINOR=1 \ + S3_RCLONE_VERSION="1.71.1" -RUN install-packages vim gcc \ +ENV PGDATA=$HOME/$PG_MAJOR + +RUN install-packages vim gcc pigz jq\ && install-stack python $PYTHON_VERSION \ && install-stack postgresql $PG_MAJOR.$PG_MINOR \ + && install-stack rclone $S3_RCLONE_VERSION \ && install-stack postgres_exporter $POSTGRES_EXPORTER_VERSION \ && . init-stack \ && set -eux; pip3 install --disable-pip-version-check --no-cache-dir psycopg[binary] patroni[kubernetes] 2>/dev/null; set +eux \ @@ -23,18 +26,18 @@ RUN install-packages vim gcc \ && apt-get autoremove -y \ && apt-get clean -y \ && rm -rf \ - /usr/share/doc \ - /usr/share/man \ - /usr/share/info \ - /usr/share/locale \ - /var/lib/apt/lists/* \ - /var/log/* \ - /var/cache/debconf/* \ - /etc/systemd \ - /lib/lsb \ - /lib/udev \ - /usr/lib/`echo $(uname -m)`-linux-gnu/gconv/IBM* \ - /usr/lib/`echo $(uname -m)`-linux-gnu/gconv/EBC* \ + /usr/share/doc \ + /usr/share/man \ + /usr/share/info \ + /usr/share/locale \ + /var/lib/apt/lists/* \ + /var/log/* \ + /var/cache/debconf/* \ + /etc/systemd \ + /lib/lsb \ + /lib/udev \ + /usr/lib/`echo $(uname -m)`-linux-gnu/gconv/IBM* \ + /usr/lib/`echo $(uname -m)`-linux-gnu/gconv/EBC* \ && mkdir -p /usr/share/man/man{1..8} \ && mkdir -p $PGDATA \ && groupadd postgres && useradd -g postgres postgres \ diff --git a/charts/database/templates/_helper.tpl b/charts/database/templates/_helper.tpl index 38570fb..3d1638f 100644 --- a/charts/database/templates/_helper.tpl +++ b/charts/database/templates/_helper.tpl @@ -1,5 +1,51 @@ {{- define "database.envs" }} env: +- name: RETAIN_BACKUPS_AGE + value: "{{.Values.cronjob.retainBackupsAge}}" +{{- if (.Values.storageEndpoint) }} +- name: "DRYCC_STORAGE_BUCKET" + valueFrom: + secretKeyRef: + name: database-creds + key: storage-bucket +- name: "DRYCC_STORAGE_ENDPOINT" + valueFrom: + secretKeyRef: + name: database-creds + key: storage-endpoint +- name: "DRYCC_STORAGE_ACCESSKEY" + valueFrom: + secretKeyRef: + name: database-creds + key: storage-accesskey +- name: "DRYCC_STORAGE_SECRETKEY" + valueFrom: + secretKeyRef: + name: database-creds + key: storage-secretkey +- name: "DRYCC_STORAGE_PATH_STYLE" + valueFrom: + secretKeyRef: + name: database-creds + key: storage-path-style +{{- else if .Values.storage.enabled }} +- name: "DRYCC_STORAGE_BUCKET" + value: "database" +- name: "DRYCC_STORAGE_ENDPOINT" + value: http://drycc-storage:9000 +- name: "DRYCC_STORAGE_ACCESSKEY" + valueFrom: + secretKeyRef: + name: storage-creds + key: accesskey +- name: "DRYCC_STORAGE_SECRETKEY" + valueFrom: + secretKeyRef: + name: storage-creds + key: secretkey +- name: "DRYCC_STORAGE_PATH_STYLE" + value: "true" +{{- end }} {{- if eq .Values.debug "true" }} - name: PATRONI_LOG_LEVEL value: DEBUG diff --git a/charts/database/templates/database-cronjob-backup.yaml b/charts/database/templates/database-cronjob-backup.yaml new file mode 100644 index 0000000..ee4e042 --- /dev/null +++ b/charts/database/templates/database-cronjob-backup.yaml @@ -0,0 +1,29 @@ +{{- if .Values.cronjob.enabled }} +apiVersion: {{ include "common.capabilities.cronjob.apiVersion" . }} +kind: CronJob +metadata: + name: drycc-database-cronjob-backup + labels: + heritage: drycc + annotations: + component.drycc.cc/version: {{ .Values.imageTag }} + +spec: + schedule: "{{ .Values.cronjob.scheduleCronJob }}" + failedJobsHistoryLimit: 1 + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: drycc-database-cronjob-backup + image: {{.Values.imageRegistry}}/{{.Values.imageOrg}}/database:{{.Values.imageTag}} + imagePullPolicy: {{.Values.imagePullPolicy}} + command: + - init-stack + args: + - /usr/share/scripts/backup.sh + {{- include "database.envs" . | indent 12 }} +{{- end -}} \ No newline at end of file diff --git a/charts/database/templates/database-secret-creds.yaml b/charts/database/templates/database-secret-creds.yaml index 1f5eddc..6b7f01d 100644 --- a/charts/database/templates/database-secret-creds.yaml +++ b/charts/database/templates/database-secret-creds.yaml @@ -12,3 +12,10 @@ data: replicator-password: {{ include "common.secrets.lookup" (dict "secret" "database-creds" "key" "replicator-password" "defaultValue" (.Values.replicatorPassword | default (randAlphaNum 32) | lower) "context" $) }} user: {{ include "common.secrets.lookup" (dict "secret" "database-creds" "key" "user" "defaultValue" (.Values.user | default (randAlpha 32) | lower) "context" $) }} password: {{ include "common.secrets.lookup" (dict "secret" "database-creds" "key" "password" "defaultValue" (.Values.password | default (randAlphaNum 32)) "context" $) }} + {{- if (.Values.storageEndpoint) }} + storage-bucket: {{ .Values.storageBucket | b64enc }} + storage-endpoint: {{ .Values.storageEndpoint | b64enc }} + storage-accesskey: {{ .Values.storageAccesskey | b64enc }} + storage-secretkey: {{ .Values.storageSecretkey | b64enc }} + storage-path-style: {{ .Values.storagePathStyle | b64enc }} + {{- end }} diff --git a/charts/database/values.yaml b/charts/database/values.yaml index e407b0a..4377b35 100644 --- a/charts/database/values.yaml +++ b/charts/database/values.yaml @@ -20,7 +20,8 @@ diagnosticMode: args: - infinity -resources: {} +resources: + {} # limits: # cpu: 200m # memory: 50Mi @@ -45,7 +46,24 @@ podAntiAffinityPreset: app: "drycc-database" debug: "false" +# Patroni pg_ctl timeout timeout: "1200" + +# The following parameters will no longer use the built-in storage component. +storageBucket: "database" +storageEndpoint: "" +storageAccesskey: "" +storageSecretkey: "" +storagePathStyle: "true" + +storage: + enabled: true + +cronjob: + enabled: true + scheduleCronJob: "0 3 * * *" + retainBackupsAge: 30d + # Service service: # Provide any additional service annotations @@ -66,7 +84,7 @@ persistence: storageClass: "" # The username and password to be used by the on-cluster database. # If left empty they will be generated -# The user name should be set to lowercase letters +# The user name should be set to lowercase letters superuser: "postgres" superuserPassword: "" replicator: "standby" diff --git a/rootfs/entrypoint.sh b/rootfs/entrypoint.sh index 4f005a8..8865438 100755 --- a/rootfs/entrypoint.sh +++ b/rootfs/entrypoint.sh @@ -1,5 +1,4 @@ #!/bin/bash - if [[ $UID -ge 10000 ]]; then GID=$(id -g) sed -e "s/^postgres:x:[^:]*:[^:]*:/postgres:x:$UID:$GID:/" /etc/passwd > /tmp/passwd @@ -19,14 +18,14 @@ bootstrap: use_pg_rewind: true use_slots: true initdb: - - auth-host: md5 + - auth-host: scram-sha-256 - auth-local: trust - encoding: UTF8 - locale: ${LANG} - data-checksums pg_hba: - - host all all 0.0.0.0/0 md5 - - host replication ${DRYCC_DATABASE_REPLICATOR} ${PATRONI_KUBERNETES_POD_IP}/16 md5 + - host all all 0.0.0.0/0 scram-sha-256 + - host replication ${DRYCC_DATABASE_REPLICATOR} ${PATRONI_KUBERNETES_POD_IP}/16 scram-sha-256 post_bootstrap: /usr/share/scripts/patroni/post_init.sh restapi: connect_address: '${PATRONI_KUBERNETES_POD_IP}:8008' @@ -43,6 +42,7 @@ postgresql: max_prepared_transactions: 0 max_locks_per_transaction: 64 wal_log_hints: "on" + wal_level: logical track_commit_timestamp: "off" archive_mode: "on" archive_timeout: 300s @@ -50,6 +50,7 @@ postgresql: log_min_duration_statement: 1000 log_lock_waits: on log_statement: 'ddl' + jit: off connect_address: '${PATRONI_KUBERNETES_POD_IP}:5432' authentication: superuser: diff --git a/rootfs/usr/share/scripts/backup.sh b/rootfs/usr/share/scripts/backup.sh new file mode 100755 index 0000000..441c33e --- /dev/null +++ b/rootfs/usr/share/scripts/backup.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set -e + +# Setup Rclone +/usr/share/scripts/create_bucket + +# Backup PostgreSQL databases to S3-compatible object storage +export PGPASSWORD=$DRYCC_DATABASE_SUPERUSER_PASSWORD +PGPORT=$DRYCC_DATABASE_REPLICA_SERVICE_PORT_POSTGRES +PGUSER=postgres +POSTGRES_HOST=$DRYCC_DATABASE_REPLICA_SERVICE_HOST +RETAIN_BACKUPS_AGE=$RETAIN_BACKUPS_AGE + +# PostgreSQL global objects backup +BACKUP_PATH="${DRYCC_STORAGE_BUCKET}/$(date +%Y%m%d%H%M)" +echo "DB: ${POSTGRES_USER}@${POSTGRES_HOST}" +echo "S3 Storage: storage:${BACKUP_PATH}" +# Backup global objects +if pg_dumpall -g -U "${POSTGRES_USER}" -h "${POSTGRES_HOST}" \ + | pigz -c -p 4 -6 \ + | rclone rcat \ + "storage:${BACKUP_PATH}/roles_globals.sql.gz" \ + --bwlimit 10M \ + --transfers 4 \ + --s3-chunk-size 64M \ + --stats 5s \ + --progress \ + --retries 3; then + echo "✅ PostgreSQL global objects backup complated!" +fi + # fetch the list of databases + DATABASES=$(psql -U "$POSTGRES_USER" -h "$POSTGRES_HOST" -t -c "SELECT datname FROM pg_database WHERE datistemplate = false;") + + # backup each database individually + for DB in $DATABASES; do + echo "Backing up $DB to $MINIO_PATH/$DB.sql.gz" + if pg_dump -U "${POSTGRES_USER}" -h "${POSTGRES_HOST}" "$DB" \ + | pigz -c -p 4 -6 \ + | rclone rcat \ + "storage:${BACKUP_PATH}/$DB.sql.gz" \ + --bwlimit 10M \ + --transfers 4 \ + --s3-chunk-size 64M \ + --stats 5s \ + --progress \ + --retries 3; then + echo "✅ PostgreSQL $DB global objects backup completed!" + fi + done + + echo "Backup process completed!" + + echo "delete storage before ${RETAIN_BACKUPS_AGE} ..." + rclone delete "storage:${DRYCC_STORAGE_BUCKET}" \ + --min-age ${RETAIN_BACKUPS_AGE} \ + --include "*.sql.gz" \ + --dry-run \ + -v || true + + rclone delete "storage:${DRYCC_STORAGE_BUCKET}" \ + --min-age ${RETAIN_BACKUPS_AGE} \ + --include "*.sql.gz" \ + || true + echo "delete completed." + +echo "=== backup completed: $(date) ===" diff --git a/rootfs/usr/share/scripts/create_bucket b/rootfs/usr/share/scripts/create_bucket new file mode 100755 index 0000000..addadaa --- /dev/null +++ b/rootfs/usr/share/scripts/create_bucket @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -e + +mkdir -p ~/.config/rclone +touch ~/.config/rclone/rclone.conf +rclone config create storage s3 \ + provider=Other \ + access_key_id="${DRYCC_STORAGE_ACCESSKEY}" \ + secret_access_key="${DRYCC_STORAGE_SECRETKEY}" \ + endpoint="${DRYCC_STORAGE_ENDPOINT}" \ + force_path_style="${DRYCC_STORAGE_PATH_STYLE:-true}" --no-output + +if ! rclone lsd storage: > /dev/null 2>&1; then + sleep 9s + echo "waiting for object storage to become ready..." +fi + +rclone mkdir "storage:${DRYCC_STORAGE_BUCKET}"