tenseleyflow/shithub / fbd40ba

Browse files

audit: read-only droplet-drift checker (issue #38)

Compares md5 of every file the ansible roles install against the
live droplet over a single ssh round-trip. TEMPLATE rows (those
rendered from .j2 with inventory vars) are reported with stat
info but not auto-diffed.

Run after any PR that touches deploy/ansible/ to surface what
needs to be pushed manually until we resolve the broader ansible
ownership question (issue #38).
Authored by espadonne
SHA
fbd40bae8691e24b7635071a7cdf67d13fbc2ff1
Parents
e2c3943
Tree
3ea09a1

1 changed file

StatusFile+-
A deploy/audit/check-droplet-drift.sh 123 0
deploy/audit/check-droplet-drift.shadded
@@ -0,0 +1,123 @@
1
+#!/usr/bin/env bash
2
+# SPDX-License-Identifier: AGPL-3.0-or-later
3
+#
4
+# Read-only audit: compare files the ansible roles claim to manage
5
+# against what's actually on shithub-prod. Reports drift; never
6
+# writes. Run after every PR that touches deploy/ansible/ to confirm
7
+# what (if anything) needs to be pushed to the droplet manually.
8
+#
9
+# This is a stopgap until we pick a long-term strategy (see issue #38).
10
+#
11
+# Usage:
12
+#   deploy/audit/check-droplet-drift.sh
13
+#
14
+# Exits 0 if no drift, 1 if drift detected, 2 on infra failure.
15
+
16
+set -uo pipefail
17
+
18
+HOST="${SHITHUB_HOST:-root@shithub.sh}"
19
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
20
+
21
+# Map each managed path to its source in the repo (when there's a
22
+# direct copy: with no templating). For template: actions we just
23
+# check existence and mtime — comparing rendered output requires
24
+# inventory variables we don't have locally.
25
+#
26
+# Format: <droplet path>::<source in repo or ::TEMPLATE>
27
+declare -a MANAGED=(
28
+  "/usr/local/bin/shithub-backup-daily::deploy/postgres/backup-daily.sh"
29
+  "/usr/local/bin/shithub-spaces-sync::deploy/spaces/sync-cross-region.sh"
30
+  "/usr/local/bin/shithub-pg-archive::deploy/postgres/archive_command.sh"
31
+  "/usr/local/bin/shithub-verify-wal-archive::deploy/postgres/verify-wal-archive.sh"
32
+  "/usr/local/bin/shithub-aide-check::deploy/ansible/roles/base/files/shithub-aide-check.sh"
33
+  "/usr/local/bin/shithub-ssh-authkeys::deploy/ansible/roles/shithubd/files/shithub-ssh-authkeys"
34
+  "/var/lib/git/git-shell-commands/shithubd::deploy/ansible/roles/shithubd/files/git-shell-commands-shithubd"
35
+  "/etc/rclone-shithub.conf::TEMPLATE"
36
+  "/etc/alloy/config.alloy::TEMPLATE"
37
+  "/etc/alloy/credentials.env::TEMPLATE"
38
+  "/etc/systemd/system/alloy.service.d/shithub.conf::TEMPLATE"
39
+  "/etc/postgresql/16/main/conf.d/99_shithub_archive.conf::TEMPLATE"
40
+  "/etc/aide/aide.conf.d/99_shithub_exclude::deploy/ansible/roles/base/files/aide-shithub.conf"
41
+  "/etc/cron.daily/aide::TEMPLATE"
42
+  "/etc/caddy/Caddyfile::TEMPLATE"
43
+  "/etc/fail2ban/jail.d/shithub.local::TEMPLATE"
44
+  "/etc/fail2ban/filter.d/shithubd-auth.conf::TEMPLATE"
45
+  "/etc/systemd/system/shithubd-web.service::TEMPLATE"
46
+  "/etc/systemd/system/shithubd-worker.service::TEMPLATE"
47
+  "/etc/shithub/web.env::TEMPLATE"
48
+  "/etc/shithub/worker.env::TEMPLATE"
49
+  "/etc/ssh/sshd_config::TEMPLATE"
50
+)
51
+
52
+DRIFT_COUNT=0
53
+
54
+# Build a single-shot SSH script that returns md5 + stat for every
55
+# managed path. Avoids one ssh round-trip per file.
56
+remote_script="
57
+for path in"
58
+for entry in "${MANAGED[@]}"; do
59
+  remote_script+=" '${entry%%::*}'"
60
+done
61
+remote_script+=$'; do\n'
62
+remote_script+=$'  if [ -f "$path" ]; then\n'
63
+remote_script+=$'    md5=$(md5sum "$path" 2>/dev/null | cut -d" " -f1)\n'
64
+remote_script+=$'    stat=$(stat -c "%a %U:%G %s" "$path" 2>/dev/null)\n'
65
+remote_script+=$'    printf "%s|EXISTS|%s|%s\\n" "$path" "$md5" "$stat"\n'
66
+remote_script+=$'  else\n'
67
+remote_script+=$'    printf "%s|MISSING||\\n" "$path"\n'
68
+remote_script+=$'  fi\n'
69
+remote_script+=$'done\n'
70
+
71
+if ! remote_output=$(ssh -o BatchMode=yes "$HOST" "$remote_script" 2>&1); then
72
+  echo "ssh to $HOST failed:" >&2
73
+  echo "$remote_output" >&2
74
+  exit 2
75
+fi
76
+
77
+printf "%-60s  %-10s  %s\n" "PATH" "STATUS" "DETAIL"
78
+printf "%-60s  %-10s  %s\n" "----" "------" "------"
79
+
80
+while IFS='|' read -r dpath status remote_md5 remote_stat; do
81
+  # Find the matching entry to look up the source
82
+  src=""
83
+  for entry in "${MANAGED[@]}"; do
84
+    if [ "${entry%%::*}" = "$dpath" ]; then
85
+      src="${entry##*::}"
86
+      break
87
+    fi
88
+  done
89
+
90
+  if [ "$status" = "MISSING" ]; then
91
+    printf "%-60s  \033[33m%-10s\033[0m  (not on droplet)\n" "$dpath" "MISSING"
92
+    DRIFT_COUNT=$((DRIFT_COUNT + 1))
93
+    continue
94
+  fi
95
+
96
+  if [ "$src" = "TEMPLATE" ]; then
97
+    printf "%-60s  \033[36m%-10s\033[0m  %s  (template — manual check)\n" "$dpath" "TEMPLATE" "$remote_stat"
98
+    continue
99
+  fi
100
+
101
+  if [ ! -f "$REPO_ROOT/$src" ]; then
102
+    printf "%-60s  \033[31m%-10s\033[0m  source missing: %s\n" "$dpath" "ERROR" "$src"
103
+    DRIFT_COUNT=$((DRIFT_COUNT + 1))
104
+    continue
105
+  fi
106
+
107
+  local_md5=$(md5sum "$REPO_ROOT/$src" | cut -d' ' -f1)
108
+  if [ "$local_md5" = "$remote_md5" ]; then
109
+    printf "%-60s  \033[32m%-10s\033[0m  %s\n" "$dpath" "OK" "$remote_stat"
110
+  else
111
+    printf "%-60s  \033[31m%-10s\033[0m  repo=%s droplet=%s\n" "$dpath" "DRIFT" "${local_md5:0:8}" "${remote_md5:0:8}"
112
+    DRIFT_COUNT=$((DRIFT_COUNT + 1))
113
+  fi
114
+done <<< "$remote_output"
115
+
116
+echo ""
117
+if [ "$DRIFT_COUNT" -gt 0 ]; then
118
+  echo "Drift detected: $DRIFT_COUNT file(s) need attention."
119
+  echo "TEMPLATE rows are not auto-checked — eyeball the timestamps and confirm by hand."
120
+  exit 1
121
+fi
122
+echo "No drift detected on copy: files. TEMPLATE rows still need manual review."
123
+exit 0