tenseleyflow/shithub / 9eff67e

Browse files

rclone: move shared config from /root/.config to /etc/rclone-shithub.conf

The previous path was unreachable to the postgres user (Postgres
invokes archive_command as itself, /root is mode 0700). Single
file at the new path serves both root-run scripts (backup, sync,
restore-drill, provisioner) and the postgres-run archive_command.
Authored by espadonne
SHA
9eff67eb77e9380c69ff81179ff3ddf16bea972f
Parents
4aa3385
Tree
fecf0a1

10 changed files

StatusFile+-
M deploy/cutover/provision-wal-buckets.sh 5 5
M deploy/docs-site/sync-to-spaces.sh 1 1
M deploy/postgres/archive_command.sh 1 1
M deploy/postgres/backup-daily.sh 1 1
M deploy/restore-drill/run.sh 2 2
M deploy/spaces/sync-cross-region.sh 2 2
M docs/internal/runbooks/backups.md 3 3
M docs/internal/runbooks/day-one.md 1 1
M docs/public/self-host/backup-restore.md 1 1
M docs/public/self-host/troubleshooting.md 1 1
deploy/cutover/provision-wal-buckets.shmodified
@@ -127,19 +127,19 @@ doctl spaces keys update "$PROD_KEY_ID" \
127127
 # both buckets using its on-disk config (which references the just-
128128
 # updated scoped key). A failure here means either the key cache
129129
 # hasn't propagated (wait 30s, re-run) or the scoped key isn't the
130
-# one in /root/.config/rclone/rclone.conf (check by hand).
130
+# one in /etc/rclone-shithub.conf (check by hand).
131131
 echo "verifying droplet → both WAL buckets..." >&2
132132
 ssh -o BatchMode=yes "root@$DEPLOY_HOST" "
133133
         set -e
134134
         echo wal-write-probe-\$(date -u +%Y%m%dT%H%M%SZ) \
135
-                | rclone --config /root/.config/rclone/rclone.conf \
135
+                | rclone --config /etc/rclone-shithub.conf \
136136
                         --s3-no-check-bucket \
137137
                         rcat spaces-prod:$WAL_BUCKET/.write-probe
138138
         echo wal-write-probe-\$(date -u +%Y%m%dT%H%M%SZ) \
139
-                | rclone --config /root/.config/rclone/rclone.conf \
139
+                | rclone --config /etc/rclone-shithub.conf \
140140
                         --s3-no-check-bucket \
141141
                         rcat spaces-dr:$WAL_DR_BUCKET/.write-probe
142
-        rclone --config /root/.config/rclone/rclone.conf --s3-no-check-bucket \
142
+        rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket \
143143
                 delete spaces-prod:$WAL_BUCKET/.write-probe spaces-dr:$WAL_DR_BUCKET/.write-probe
144144
         echo OK
145145
 "
@@ -162,6 +162,6 @@ already in place — see the WAL archive PR for that change).
162162
 
163163
 Verify within ~60s:
164164
   ssh root@$DEPLOY_HOST 'sudo -u postgres psql -xc "SELECT * FROM pg_stat_archiver"'
165
-  ssh root@$DEPLOY_HOST 'rclone --config /root/.config/rclone/rclone.conf --s3-no-check-bucket lsf spaces-prod:$WAL_BUCKET/ --recursive | head'
165
+  ssh root@$DEPLOY_HOST 'rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket lsf spaces-prod:$WAL_BUCKET/ --recursive | head'
166166
 ==============================================================
167167
 DONE
deploy/docs-site/sync-to-spaces.shmodified
@@ -30,7 +30,7 @@ if [[ ! -d build/docs ]]; then
3030
 fi
3131
 
3232
 echo "syncing to $BUCKET..."
33
-rclone --config /root/.config/rclone/rclone.conf \
33
+rclone --config /etc/rclone-shithub.conf \
3434
        sync --transfers 8 --checkers 16 \
3535
        build/docs "$BUCKET"
3636
 
deploy/postgres/archive_command.shmodified
@@ -22,7 +22,7 @@ BUCKET="${SHITHUB_WAL_BUCKET:-spaces-prod:shithub-wal}"
2222
 # --s3-no-check-bucket: scoped Spaces keys lack GetBucketLocation; the
2323
 # actual PUT works fine on a key with bucket-level readwrite. Matches
2424
 # the same flag in backup-daily.sh + sync-cross-region.sh.
25
-rclone --config /root/.config/rclone/rclone.conf \
25
+rclone --config /etc/rclone-shithub.conf \
2626
        --s3-no-check-bucket \
2727
        --quiet \
2828
        copyto "$SRC" "$BUCKET/$(date +%Y/%m/%d)/$NAME"
deploy/postgres/backup-daily.shmodified
@@ -32,7 +32,7 @@ pg_restore --list "$LOCAL_DIR/$NAME" >/dev/null
3232
 # --s3-no-check-bucket: skip the GetBucketLocation pre-check that
3333
 # requires a permission our scoped-RW Spaces key doesn't grant.
3434
 # The actual PUT works fine on a key with bucket-level readwrite.
35
-rclone --config /root/.config/rclone/rclone.conf --s3-no-check-bucket \
35
+rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket \
3636
        copyto "$LOCAL_DIR/$NAME" "$BUCKET/daily/$(date -u +%Y/%m/%d)/$NAME"
3737
 
3838
 # Local retention: keep the last 7 dumps; bucket lifecycle handles
deploy/restore-drill/run.shmodified
@@ -70,7 +70,7 @@ say "restore drill start (work=$WORK port=$PGPORT pg=$PG_BIN)"
7070
 # 1. Resolve dump path. --s3-no-check-bucket: scoped Spaces keys lack
7171
 # GetBucketLocation; the actual GET works fine.
7272
 if [[ -z "$DUMP" ]]; then
73
-  LATEST="$(rclone --config /root/.config/rclone/rclone.conf --s3-no-check-bucket \
73
+  LATEST="$(rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket \
7474
                    lsf "$BUCKET/daily/" --recursive --files-only \
7575
                 | sort | tail -n 1)"
7676
   if [[ -z "$LATEST" ]]; then
@@ -79,7 +79,7 @@ if [[ -z "$DUMP" ]]; then
7979
   fi
8080
   DUMP="$WORK/$(basename "$LATEST")"
8181
   say "fetching $LATEST"
82
-  rclone --config /root/.config/rclone/rclone.conf --s3-no-check-bucket \
82
+  rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket \
8383
          copyto "$BUCKET/daily/$LATEST" "$DUMP"
8484
 fi
8585
 chown postgres:postgres "$DUMP"
deploy/spaces/sync-cross-region.shmodified
@@ -28,11 +28,11 @@ ts() { date -u +%Y-%m-%dT%H:%M:%SZ; }
2828
 {
2929
   echo "[$(ts)] sync start"
3030
 
31
-  rclone --config /root/.config/rclone/rclone.conf --s3-no-check-bucket \
31
+  rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket \
3232
          copy --transfers 8 --checkers 16 --fast-list \
3333
          "$PRIMARY" "$DR"
3434
 
35
-  rclone --config /root/.config/rclone/rclone.conf --s3-no-check-bucket \
35
+  rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket \
3636
          copy --transfers 8 --checkers 16 --fast-list \
3737
          "$WAL_PRIMARY" "$WAL_DR"
3838
 
docs/internal/runbooks/backups.mdmodified
@@ -27,7 +27,7 @@ ships zero WAL segments until the operator runs through this once:
2727
    Edit) to grant `readwrite` on `shithub-wal`. The `dr` key needs
2828
    `readwrite` on `shithub-wal-dr` so `sync-cross-region.sh` can push.
2929
 3. **Confirm the rclone config on the app droplet** has both keys
30
-   (`/root/.config/rclone/rclone.conf` — `spaces-prod` and
30
+   (`/etc/rclone-shithub.conf` — `spaces-prod` and
3131
    `spaces-dr` remotes).
3232
 4. **Re-run ansible** (or drop the conf.d file by hand at
3333
    `/etc/postgresql/16/main/conf.d/99_shithub_archive.conf`), then
@@ -39,7 +39,7 @@ ships zero WAL segments until the operator runs through this once:
3939
    # last_archived_wal: 000000010000000000000003 (or similar)
4040
    # last_archived_time: <recent timestamp>
4141
    # failed_count: 0
42
-   rclone --config /root/.config/rclone/rclone.conf --s3-no-check-bucket \
42
+   rclone --config /etc/rclone-shithub.conf --s3-no-check-bucket \
4343
           lsf spaces-prod:shithub-wal/ --recursive | head
4444
    ```
4545
 6. **If `failed_count > 0`** before any successful archive:
@@ -62,7 +62,7 @@ If you want to confirm by hand:
6262
 
6363
 ```sh
6464
 ssh db
65
-sudo -u postgres rclone --config /root/.config/rclone/rclone.conf \
65
+sudo -u postgres rclone --config /etc/rclone-shithub.conf \
6666
      lsf spaces-prod:shithub-backups/daily/$(date -u +%Y/%m/%d)/
6767
 ```
6868
 
docs/internal/runbooks/day-one.mdmodified
@@ -42,7 +42,7 @@ if you haven't.
4242
 1. **Backups.** The first daily logical backup should have run.
4343
    ```sh
4444
    ssh db
45
-   sudo -u postgres rclone --config /root/.config/rclone/rclone.conf \
45
+   sudo -u postgres rclone --config /etc/rclone-shithub.conf \
4646
         lsf spaces-prod:shithub-backups/daily/$(date -u +%Y/%m/%d)/
4747
    ```
4848
    Should list one `.dump` file. If empty, see
docs/public/self-host/backup-restore.mdmodified
@@ -28,7 +28,7 @@ By hand:
2828
 
2929
 ```sh
3030
 ssh db
31
-sudo -u postgres rclone --config /root/.config/rclone/rclone.conf \
31
+sudo -u postgres rclone --config /etc/rclone-shithub.conf \
3232
      lsf spaces-prod:shithub-backups/daily/$(date -u +%Y/%m/%d)/
3333
 ```
3434
 
docs/public/self-host/troubleshooting.mdmodified
@@ -107,7 +107,7 @@ will print the rclone error. Common causes:
107107
 Confirm by hand:
108108
 
109109
 ```sh
110
-sudo -u postgres rclone --config /root/.config/rclone/rclone.conf \
110
+sudo -u postgres rclone --config /etc/rclone-shithub.conf \
111111
      lsd spaces-prod:
112112
 ```
113113