Bash · 7397 bytes Raw Blame History
1 #!/usr/bin/env bash
2 # SPDX-License-Identifier: AGPL-3.0-or-later
3 #
4 # One-time provisioner for the WAL archive buckets + key grants.
5 # Runs from the OPERATOR'S laptop (not the droplet). Idempotent
6 # enough to re-run after a partial failure.
7 #
8 # Why this script exists:
9 # - DO's `doctl` CLI does NOT manage Spaces buckets — bucket
10 # create/delete go through the S3-compatible API directly.
11 # - The original provision-do.sh created shithub-backups{,-dr}
12 # and shithub-docs but skipped the WAL buckets.
13 # - The existing scoped Spaces keys can't create buckets; only
14 # a FullAccess key can. So this script:
15 # 1. Mints a temporary FullAccess Spaces key via doctl.
16 # 2. SSHes to the app droplet (which already has rclone) to
17 # PUT-create both buckets using that temp key. Creds are
18 # passed via env vars on the SSH command line, never
19 # written to disk.
20 # 3. Deletes the temp FullAccess key (security hygiene —
21 # minimize the lifetime of a key that can do anything).
22 # 4. Extends the existing scoped RW key's grants to include
23 # readwrite on shithub-wal{,-dr}.
24 # 5. Verifies the droplet can write to both buckets through
25 # its production rclone config.
26 #
27 # Prereqs (one-time on the operator laptop):
28 # - doctl installed + authenticated (`doctl auth init`)
29 # - ssh access to root@$DEPLOY_HOST (the same key that the GH
30 # Actions deploy uses works fine)
31 #
32 # Usage:
33 # DEPLOY_HOST=shithub.sh \
34 # PRIMARY_REGION=sfo3 DR_REGION=ams3 \
35 # PROD_RW_KEY_NAME=shithub-prod-app-rw \
36 # ./deploy/cutover/provision-wal-buckets.sh
37
38 set -euo pipefail
39
40 DEPLOY_HOST="${DEPLOY_HOST:?set DEPLOY_HOST (e.g. shithub.sh)}"
41 PRIMARY_REGION="${PRIMARY_REGION:-sfo3}"
42 DR_REGION="${DR_REGION:-ams3}"
43 PROD_RW_KEY_NAME="${PROD_RW_KEY_NAME:-shithub-prod-app-rw}"
44
45 WAL_BUCKET="${WAL_BUCKET:-shithub-wal}"
46 WAL_DR_BUCKET="${WAL_DR_BUCKET:-shithub-wal-dr}"
47
48 if ! command -v doctl >/dev/null 2>&1; then
49 echo "fatal: doctl not on PATH; brew install doctl / apt-get install doctl" >&2
50 exit 2
51 fi
52 if ! doctl account get >/dev/null 2>&1; then
53 echo "fatal: doctl not authenticated; run 'doctl auth init'" >&2
54 exit 2
55 fi
56
57 # ── Step 1: mint a temporary FullAccess Spaces key.
58 TMP_KEY_NAME="shithub-wal-bootstrap-$(date -u +%Y%m%dT%H%M%SZ)"
59 echo "minting temp FullAccess key: $TMP_KEY_NAME" >&2
60 TMP_KEY_JSON="$(doctl spaces keys create "$TMP_KEY_NAME" \
61 --grants 'bucket=;permission=fullaccess' \
62 --output json)"
63 TMP_ACCESS_KEY="$(printf '%s' "$TMP_KEY_JSON" | jq -r '.[0].access_key')"
64 TMP_SECRET_KEY="$(printf '%s' "$TMP_KEY_JSON" | jq -r '.[0].secret_key')"
65 if [[ -z "$TMP_ACCESS_KEY" || "$TMP_ACCESS_KEY" == "null" ]]; then
66 echo "fatal: failed to extract access_key from doctl response: $TMP_KEY_JSON" >&2
67 exit 2
68 fi
69
70 # Always clean up the temp key, even on partial failure.
71 cleanup_temp_key() {
72 local rc=$?
73 echo "deleting temp FullAccess key $TMP_ACCESS_KEY..." >&2
74 doctl spaces keys delete "$TMP_ACCESS_KEY" --force >&2 || \
75 echo "WARN: temp key delete failed; revoke manually via dashboard" >&2
76 exit "$rc"
77 }
78 trap cleanup_temp_key EXIT
79
80 # ── Step 2: PUT-create both buckets via rclone on the droplet.
81 # We use rclone's inline-config syntax (`:s3,...`) so no rclone.conf
82 # edit is needed and the temp creds never land on disk. The buckets
83 # are EMPTY after creation; pg_archive starts shipping segments on
84 # the next archive_timeout once Postgres is reconfigured.
85 ssh_create_bucket() {
86 local bucket="$1" region="$2"
87 echo "creating $bucket in $region..." >&2
88 ssh -o BatchMode=yes "root@$DEPLOY_HOST" \
89 "AKEY='$TMP_ACCESS_KEY' SKEY='$TMP_SECRET_KEY' \
90 rclone mkdir \
91 ':s3,provider=DigitalOcean,access_key_id='\$AKEY',secret_access_key='\$SKEY',endpoint=$region.digitaloceanspaces.com:$bucket' 2>&1"
92 }
93 ssh_create_bucket "$WAL_BUCKET" "$PRIMARY_REGION"
94 ssh_create_bucket "$WAL_DR_BUCKET" "$DR_REGION"
95
96 # ── Step 3: temp key cleanup happens via the EXIT trap above.
97
98 # ── Step 4: extend the scoped prod RW key's grants.
99 echo "looking up existing prod RW key id..." >&2
100 PROD_KEY_LINE="$(doctl spaces keys list --output json \
101 | jq -r --arg n "$PROD_RW_KEY_NAME" '.[] | select(.name == $n)')"
102 PROD_KEY_ID="$(printf '%s' "$PROD_KEY_LINE" | jq -r '.access_key')"
103 PROD_KEY_GRANTS="$(printf '%s' "$PROD_KEY_LINE" | jq -r '.grants')"
104 if [[ -z "$PROD_KEY_ID" || "$PROD_KEY_ID" == "null" ]]; then
105 echo "fatal: no Spaces key named $PROD_RW_KEY_NAME" >&2
106 exit 2
107 fi
108 echo "found $PROD_RW_KEY_NAME ($PROD_KEY_ID); current grants: $PROD_KEY_GRANTS" >&2
109
110 # Build the new grants string from the existing JSON, adding wal +
111 # wal-dr at readwrite if absent. doctl wants the comma-separated
112 # `bucket=NAME;permission=PERM,...` shape on update.
113 NEW_GRANTS="$(printf '%s' "$PROD_KEY_LINE" \
114 | jq -r '
115 .grants
116 | (if (any(.bucket == "'"$WAL_BUCKET"'")) then . else . + [{bucket: "'"$WAL_BUCKET"'", permission: "readwrite"}] end)
117 | (if (any(.bucket == "'"$WAL_DR_BUCKET"'")) then . else . + [{bucket: "'"$WAL_DR_BUCKET"'", permission: "readwrite"}] end)
118 | map("bucket=\(.bucket);permission=\(.permission)")
119 | join(",")
120 ')"
121 echo "new grants: $NEW_GRANTS" >&2
122 doctl spaces keys update "$PROD_KEY_ID" \
123 --name "$PROD_RW_KEY_NAME" \
124 --grants "$NEW_GRANTS" >&2
125
126 # ── Step 5: verify the droplet's existing rclone can now write to
127 # both buckets using its on-disk config (which references the just-
128 # updated scoped key). A failure here means either the key cache
129 # hasn't propagated (wait 30s, re-run) or the scoped key isn't the
130 # one in /etc/rclone-shithub.conf (check by hand).
131 echo "verifying droplet → both WAL buckets..." >&2
132 ssh -o BatchMode=yes "root@$DEPLOY_HOST" "
133 set -e
134 echo wal-write-probe-\$(date -u +%Y%m%dT%H%M%SZ) \
135 | rclone --config /etc/rclone-shithub.conf \
136 --s3-no-check-bucket \
137 rcat spaces-prod:$WAL_BUCKET/.write-probe
138 echo wal-write-probe-\$(date -u +%Y%m%dT%H%M%SZ) \
139 | rclone --config /etc/rclone-shithub.conf \
140 --s3-no-check-bucket \
141 rcat spaces-dr:$WAL_DR_BUCKET/.write-probe
142 rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket \
143 delete spaces-prod:$WAL_BUCKET/.write-probe spaces-dr:$WAL_DR_BUCKET/.write-probe
144 echo OK
145 "
146
147 cat <<DONE >&2
148
149 ==============================================================
150 WAL buckets provisioned.
151
152 $WAL_BUCKET ($PRIMARY_REGION)
153 $WAL_DR_BUCKET ($DR_REGION)
154
155 The prod RW Spaces key now has readwrite on both. Next step:
156
157 ssh root@$DEPLOY_HOST 'systemctl restart postgresql@16-main'
158
159 (only needed if the conf.d drop-in at
160 /etc/postgresql/16/main/conf.d/99_shithub_archive.conf is
161 already in place — see the WAL archive PR for that change).
162
163 Verify within ~60s:
164 ssh root@$DEPLOY_HOST 'sudo -u postgres psql -xc "SELECT * FROM pg_stat_archiver"'
165 ssh root@$DEPLOY_HOST 'rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket lsf spaces-prod:$WAL_BUCKET/ --recursive | head'
166 ==============================================================
167 DONE