tenseleyflow/shithub / 7825298

Browse files

S11: repo name validation + reserved list; loosen storage repo regex to GitHub-shape (≤100, [a-z0-9._-])

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
782529807c02854ad420910131c0a0dff13a4651
Parents
1687110
Tree
f39e63d

5 changed files

StatusFile+-
M internal/infra/storage/reposfs.go 19 10
M internal/infra/storage/reposfs_test.go 13 4
A internal/repos/errors.go 40 0
A internal/repos/validate.go 98 0
A internal/repos/validate_test.go 72 0
internal/infra/storage/reposfs.gomodified
@@ -46,19 +46,28 @@ func NewRepoFS(root string) (*RepoFS, error) {
4646
 // Root returns the absolute root path. Useful for logging and `storage check`.
4747
 func (r *RepoFS) Root() string { return r.root }
4848
 
49
-// nameRE is the whitelist for owner and repo names: lowercase ASCII
50
-// letters, digits, and hyphens; cannot start or end with a hyphen; length
51
-// 1..39 (matches GitHub's username constraint, sufficient for us).
52
-var nameRE = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9-]{0,37}[a-z0-9])?$`)
49
+// ownerNameRE is the whitelist for owner names: lowercase ASCII letters,
50
+// digits, and hyphens; cannot start or end with a hyphen; length 1..39
51
+// (matches GitHub's username constraint).
52
+var ownerNameRE = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9-]{0,37}[a-z0-9])?$`)
5353
 
54
-// validateName enforces the whitelist. Returns ErrInvalidPath wrapped
55
-// with a precise reason on failure.
54
+// repoNameRE is the whitelist for repository names: lowercase ASCII
55
+// letters, digits, hyphens, dots, and underscores. Can't start or end
56
+// with a separator. Length 1..100 (matches GitHub).
57
+var repoNameRE = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9._-]{0,98}[a-z0-9_])?$`)
58
+
59
+// validateName enforces the per-kind whitelist. Returns ErrInvalidPath
60
+// wrapped with a precise reason on failure.
5661
 func validateName(kind, name string) error {
5762
 	if name == "" {
5863
 		return fmt.Errorf("%w: %s empty", ErrInvalidPath, kind)
5964
 	}
60
-	if len(name) > 39 {
61
-		return fmt.Errorf("%w: %s %q too long (max 39)", ErrInvalidPath, kind, name)
65
+	maxLen, re, alphabet := 39, ownerNameRE, "[a-z0-9-]"
66
+	if kind == "repo" {
67
+		maxLen, re, alphabet = 100, repoNameRE, "[a-z0-9._-]"
68
+	}
69
+	if len(name) > maxLen {
70
+		return fmt.Errorf("%w: %s %q too long (max %d)", ErrInvalidPath, kind, name, maxLen)
6271
 	}
6372
 	if name != strings.ToLower(name) {
6473
 		return fmt.Errorf("%w: %s %q must be lowercase", ErrInvalidPath, kind, name)
@@ -72,8 +81,8 @@ func validateName(kind, name string) error {
7281
 	if filepath.IsAbs(name) {
7382
 		return fmt.Errorf("%w: %s is absolute", ErrInvalidPath, kind)
7483
 	}
75
-	if !nameRE.MatchString(name) {
76
-		return fmt.Errorf("%w: %s %q fails whitelist [a-z0-9-]", ErrInvalidPath, kind, name)
84
+	if !re.MatchString(name) {
85
+		return fmt.Errorf("%w: %s %q fails whitelist %s", ErrInvalidPath, kind, name, alphabet)
7786
 	}
7887
 	return nil
7988
 }
internal/infra/storage/reposfs_test.gomodified
@@ -49,6 +49,16 @@ func TestRepoPath_HappyPath(t *testing.T) {
4949
 	}
5050
 }
5151
 
52
+func TestRepoPath_AcceptsRepoExtraChars(t *testing.T) {
53
+	t.Parallel()
54
+	r, _ := mustNewRepoFS(t)
55
+	for _, name := range []string{"name.with.dots", "name_under", "rust-by-example", "a1.b2_c3"} {
56
+		if _, err := r.RepoPath("alice", name); err != nil {
57
+			t.Errorf("RepoPath %q: %v", name, err)
58
+		}
59
+	}
60
+}
61
+
5262
 func TestRepoPath_ShortOwnerPaddedShard(t *testing.T) {
5363
 	t.Parallel()
5464
 	r, root := mustNewRepoFS(t)
@@ -106,11 +116,10 @@ func TestRepoPath_RejectsUnsafe(t *testing.T) {
106116
 		{"АliCe", "name", "owner non-ASCII (Cyrillic A)"},
107117
 		{"alice", "café", "repo non-ASCII"},
108118
 		{strings.Repeat("a", 40), "name", "owner too long"},
109
-		{"alice", strings.Repeat("b", 40), "repo too long"},
110
-		{"alice", "name.with.dots", "repo dots not allowed"},
111
-		{"alice", "name_under", "repo underscore not allowed"},
112
-		{"al!ice", "name", "owner punctuation"},
119
+		{"alice", strings.Repeat("b", 101), "repo too long"},
120
+		{"alice", "al!ice", "repo punctuation"},
113121
 		{"alice", "name@thing", "repo @"},
122
+		{"al!ice", "name", "owner punctuation"},
114123
 	}
115124
 
116125
 	for _, c := range cases {
internal/repos/errors.goadded
@@ -0,0 +1,40 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+// Package repos owns repository creation, validation, and identity. It
4
+// sits above sqlc-generated reposdb queries and orchestrates the
5
+// DB + filesystem dance that produces a usable bare repo. The git
6
+// plumbing helpers used to build the optional initial commit live in
7
+// internal/repos/git.
8
+package repos
9
+
10
+import "errors"
11
+
12
+// User-facing errors. Handlers map these to friendly UI strings; lower
13
+// layers wrap them with %w so handlers can errors.Is for routing.
14
+var (
15
+	// ErrInvalidName is returned when the requested repo name fails the
16
+	// shape whitelist (length, characters, edges, dot-dot, etc.).
17
+	ErrInvalidName = errors.New("repos: invalid name")
18
+
19
+	// ErrReservedName is returned when the name is on the reserved list.
20
+	// Distinguished from ErrInvalidName so the UI can say "reserved" vs
21
+	// "format" — they're different fixes for the user.
22
+	ErrReservedName = errors.New("repos: reserved name")
23
+
24
+	// ErrTaken is returned when an active repo (deleted_at IS NULL)
25
+	// already exists for this owner with the same name. Surfaces as
26
+	// either the explicit pre-check OR the unique-constraint error.
27
+	ErrTaken = errors.New("repos: name taken for owner")
28
+
29
+	// ErrNoVerifiedEmail is returned when the user has no verified
30
+	// primary email. We refuse rather than fabricate an author identity.
31
+	ErrNoVerifiedEmail = errors.New("repos: no verified primary email")
32
+
33
+	// ErrUnknownLicense / ErrUnknownGitignore — picker chose a key not
34
+	// in our curated set.
35
+	ErrUnknownLicense   = errors.New("repos: unknown license key")
36
+	ErrUnknownGitignore = errors.New("repos: unknown gitignore key")
37
+
38
+	// ErrDescriptionTooLong mirrors the DB CHECK on description (≤350).
39
+	ErrDescriptionTooLong = errors.New("repos: description too long")
40
+)
internal/repos/validate.goadded
@@ -0,0 +1,98 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package repos
4
+
5
+import (
6
+	"fmt"
7
+	"regexp"
8
+	"strings"
9
+	"unicode/utf8"
10
+)
11
+
12
+// MaxNameLen / MaxDescriptionLen mirror the DB CHECK constraints in
13
+// migration 0017. Validate before insert so we surface a friendly error
14
+// rather than a 500 from the constraint.
15
+const (
16
+	MaxNameLen        = 100
17
+	MaxDescriptionLen = 350
18
+)
19
+
20
+// nameRE matches the name shape we accept: lowercase letters, digits,
21
+// dots, hyphens, underscores. Edges can't be a separator. Length is
22
+// bounded separately to give a more specific error message.
23
+var nameRE = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9._-]{0,98}[a-z0-9_])?$`)
24
+
25
+// reservedRepoNames is the set of names a repo may NOT take. Most are
26
+// special filenames git itself uses (or that would break our routing).
27
+// The list is kept short on purpose; it's all-lowercase because we
28
+// always lowercase the name before comparing.
29
+var reservedRepoNames = map[string]struct{}{
30
+	".git":         {},
31
+	".gitignore":   {},
32
+	".gitmodules":  {},
33
+	".gitattributes": {},
34
+	".well-known":  {},
35
+	".github":      {},
36
+	"head":         {},
37
+	"refs":         {},
38
+	"objects":      {},
39
+	"info":         {},
40
+	"hooks":        {},
41
+	"branches":     {},
42
+}
43
+
44
+// IsReservedRepoName reports whether name (case-insensitive) is on the
45
+// reserved list. Callers MUST also reject leading dots and the empty
46
+// string via ValidateName.
47
+func IsReservedRepoName(name string) bool {
48
+	_, ok := reservedRepoNames[strings.ToLower(name)]
49
+	return ok
50
+}
51
+
52
+// NormalizeName lowercases and trims. Repos are case-preserved in the
53
+// DB (citext does case-insensitive uniqueness), but disk paths are
54
+// lowercased. We lowercase up-front so the same string drives both.
55
+func NormalizeName(name string) string {
56
+	return strings.ToLower(strings.TrimSpace(name))
57
+}
58
+
59
+// ValidateName enforces the shape whitelist. Returns ErrInvalidName /
60
+// ErrReservedName wrapped with a precise reason on failure.
61
+//
62
+// Caller is expected to have lowercased the name first; uppercase input
63
+// is rejected as a defensive check in case a future caller forgets.
64
+func ValidateName(name string) error {
65
+	if name == "" {
66
+		return fmt.Errorf("%w: empty", ErrInvalidName)
67
+	}
68
+	if utf8.RuneCountInString(name) > MaxNameLen {
69
+		return fmt.Errorf("%w: too long (max %d)", ErrInvalidName, MaxNameLen)
70
+	}
71
+	if name != strings.ToLower(name) {
72
+		return fmt.Errorf("%w: must be lowercase", ErrInvalidName)
73
+	}
74
+	if strings.Contains(name, "..") {
75
+		return fmt.Errorf("%w: contains dot-dot", ErrInvalidName)
76
+	}
77
+	if strings.HasPrefix(name, ".") {
78
+		return fmt.Errorf("%w: starts with dot", ErrInvalidName)
79
+	}
80
+	if strings.HasSuffix(name, ".") || strings.HasSuffix(name, "-") {
81
+		return fmt.Errorf("%w: ends with separator", ErrInvalidName)
82
+	}
83
+	if !nameRE.MatchString(name) {
84
+		return fmt.Errorf("%w: must match [a-z0-9._-] with non-separator edges", ErrInvalidName)
85
+	}
86
+	if IsReservedRepoName(name) {
87
+		return fmt.Errorf("%w: %q", ErrReservedName, name)
88
+	}
89
+	return nil
90
+}
91
+
92
+// ValidateDescription enforces the DB CHECK length cap.
93
+func ValidateDescription(desc string) error {
94
+	if utf8.RuneCountInString(desc) > MaxDescriptionLen {
95
+		return fmt.Errorf("%w: max %d characters", ErrDescriptionTooLong, MaxDescriptionLen)
96
+	}
97
+	return nil
98
+}
internal/repos/validate_test.goadded
@@ -0,0 +1,72 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package repos_test
4
+
5
+import (
6
+	"errors"
7
+	"strings"
8
+	"testing"
9
+
10
+	"github.com/tenseleyFlow/shithub/internal/repos"
11
+)
12
+
13
+func TestValidateName_Accepts(t *testing.T) {
14
+	t.Parallel()
15
+	for _, name := range []string{
16
+		"a", "abc", "rust-by-example", "name.with.dots", "name_under",
17
+		"a1.b2-c3_d4", "letter9", strings.Repeat("a", 100),
18
+	} {
19
+		if err := repos.ValidateName(name); err != nil {
20
+			t.Errorf("ValidateName(%q): %v", name, err)
21
+		}
22
+	}
23
+}
24
+
25
+func TestValidateName_Rejects(t *testing.T) {
26
+	t.Parallel()
27
+	cases := []struct {
28
+		name string
29
+		want error
30
+		why  string
31
+	}{
32
+		{"", repos.ErrInvalidName, "empty"},
33
+		{strings.Repeat("a", 101), repos.ErrInvalidName, "too long"},
34
+		{"UPPER", repos.ErrInvalidName, "uppercase"},
35
+		{".dotfile", repos.ErrInvalidName, "leading dot"},
36
+		{"-dash", repos.ErrInvalidName, "leading dash"},
37
+		{"trailing-", repos.ErrInvalidName, "trailing dash"},
38
+		{"trailing.", repos.ErrInvalidName, "trailing dot"},
39
+		{"two..dots", repos.ErrInvalidName, "dot-dot"},
40
+		{"a@b", repos.ErrInvalidName, "punctuation"},
41
+		{"with space", repos.ErrInvalidName, "space"},
42
+		{"café", repos.ErrInvalidName, "non-ASCII"},
43
+		{".git", repos.ErrInvalidName, "leading dot beats reserved"},
44
+		{"head", repos.ErrReservedName, "reserved (head)"},
45
+		{"refs", repos.ErrReservedName, "reserved (refs)"},
46
+		{"objects", repos.ErrReservedName, "reserved (objects)"},
47
+		{"hooks", repos.ErrReservedName, "reserved (hooks)"},
48
+	}
49
+	for _, c := range cases {
50
+		err := repos.ValidateName(c.name)
51
+		if err == nil {
52
+			t.Errorf("%s: expected error", c.why)
53
+			continue
54
+		}
55
+		if !errors.Is(err, c.want) {
56
+			t.Errorf("%s: err = %v, want sentinel %v", c.why, err, c.want)
57
+		}
58
+	}
59
+}
60
+
61
+func TestValidateDescription(t *testing.T) {
62
+	t.Parallel()
63
+	if err := repos.ValidateDescription(""); err != nil {
64
+		t.Errorf("empty desc: %v", err)
65
+	}
66
+	if err := repos.ValidateDescription(strings.Repeat("a", 350)); err != nil {
67
+		t.Errorf("at-cap desc: %v", err)
68
+	}
69
+	if err := repos.ValidateDescription(strings.Repeat("a", 351)); err == nil {
70
+		t.Errorf("over-cap desc should error")
71
+	}
72
+}