tenseleyflow/shithub / c569abe

Browse files

Strip URL userinfo (user:pass@host) from log values while preserving host+path

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
c569abe7adec7acbae2403bd7cfc6464fb5e8052
Parents
ce07729
Tree
a3ff602

2 changed files

StatusFile+-
M internal/infra/log/log.go 22 0
M internal/infra/log/log_test.go 24 0
internal/infra/log/log.gomodified
@@ -22,6 +22,7 @@ import (
2222
 	"context"
2323
 	"io"
2424
 	"log/slog"
25
+	"regexp"
2526
 	"strings"
2627
 )
2728
 
@@ -153,5 +154,26 @@ func redactValueIfSensitive(v string) string {
153154
 			return "***"
154155
 		}
155156
 	}
157
+	if u := stripURLUserinfo(v); u != v {
158
+		return u
159
+	}
156160
 	return v
157161
 }
162
+
163
+// urlUserinfoRE matches the credentials portion of a URL of the form
164
+// scheme://user:pass@host/path. We replace it with scheme://***@host/path
165
+// so the host + path stay readable in logs (helps debugging) while the
166
+// credentials are gone.
167
+//
168
+// The pattern is deliberately strict: scheme [a-z][a-z0-9+\-.]* / / a
169
+// minimal user:pass containing no @ before the literal @host. Slack and
170
+// other URL-detection regexes get this wrong all the time; we only need
171
+// to handle URLs that come through Go's net/url emitter, which canonicalizes.
172
+var urlUserinfoRE = regexp.MustCompile(`(?i)\b([a-z][a-z0-9+\-.]*)://[^\s/@:]+:[^\s/@]+@`)
173
+
174
+func stripURLUserinfo(s string) string {
175
+	if !strings.Contains(s, "://") || !strings.Contains(s, "@") {
176
+		return s
177
+	}
178
+	return urlUserinfoRE.ReplaceAllString(s, "$1://***@")
179
+}
internal/infra/log/log_test.gomodified
@@ -59,3 +59,27 @@ func TestNew_KeepsOrdinaryValues(t *testing.T) {
5959
 		t.Errorf("non-secret fields dropped: %s", out)
6060
 	}
6161
 }
62
+
63
+func TestNew_StripsURLCredentials(t *testing.T) {
64
+	t.Parallel()
65
+	var buf bytes.Buffer
66
+	logger := New(Options{Level: "info", Format: "json", Writer: &buf})
67
+
68
+	// Non-secret-keyed value containing user:pass@host — the per-value
69
+	// regex strips just the userinfo, keeping host + path readable.
70
+	logger.Info("db", "uri", "postgres://shithub:hunter2@127.0.0.1:5432/shithub?sslmode=disable")
71
+	// PAT-bearing URL routes through the value-marker scrub (shithub_pat_),
72
+	// which is more aggressive — the whole value collapses to ***.
73
+	logger.Info("git remote", "remote_uri", "https://alice:shithub_pat_abcdefghijklmnopqrstuvwxyz0123456789@host.example/owner/repo.git")
74
+
75
+	out := buf.String()
76
+	for _, leak := range []string{"hunter2", "shithub_pat_abc", "alice:shithub_pat_"} {
77
+		if strings.Contains(out, leak) {
78
+			t.Errorf("credential leaked: %q in %s", leak, out)
79
+		}
80
+	}
81
+	// Generic case keeps the host so logs stay useful.
82
+	if !strings.Contains(out, "127.0.0.1") {
83
+		t.Errorf("host stripped from generic URL: %s", out)
84
+	}
85
+}