tenseleyflow/shithub / 7d778dc

Browse files

S13: SSH_ORIGINAL_COMMAND strict parser (matched-quote alternation, owner/repo whitelist, traversal-safe)

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
7d778dc1733a222faeb0d382371155ce108c7b2c
Parents
b35662e
Tree
a36bf6c

2 changed files

StatusFile+-
A internal/git/protocol/ssh_command.go 117 0
A internal/git/protocol/ssh_command_test.go 103 0
internal/git/protocol/ssh_command.goadded
@@ -0,0 +1,117 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package protocol
4
+
5
+import (
6
+	"errors"
7
+	"fmt"
8
+	"regexp"
9
+	"strings"
10
+)
11
+
12
+// ParsedSSHCommand is the validated shape of `SSH_ORIGINAL_COMMAND`. The
13
+// dispatcher resolves Owner+Repo against the DB and feeds Service to
14
+// the same exec helper used by the HTTP transport.
15
+type ParsedSSHCommand struct {
16
+	Service Service // UploadPack or ReceivePack
17
+	Owner   string  // already lowercase, validated via the same shape rules as RepoFS
18
+	Repo    string  // already lowercase, validated via repo-name rules
19
+}
20
+
21
+// sshCmdRE captures the only acceptable input shape:
22
+//
23
+//	git-upload-pack 'owner/repo(.git)?'
24
+//	git-upload-pack owner/repo(.git)?
25
+//	git-receive-pack ...
26
+//
27
+// Single quotes around the path are optional but MUST MATCH — `a/b'`
28
+// (one trailing quote, no leading) is rejected. The path group has
29
+// two alternatives: a single-quoted captured form OR an unquoted form
30
+// that may contain neither quotes nor whitespace. Anything else —
31
+// extra args, embedded shell metacharacters, escaped quotes — is a
32
+// hard reject. We never invoke a shell so we don't need escapes.
33
+var sshCmdRE = regexp.MustCompile(`^(git-upload-pack|git-receive-pack)\s+(?:'([^']+)'|([^'\s]+))$`)
34
+
35
+// ErrUnknownSSHCommand is returned when SSH_ORIGINAL_COMMAND doesn't
36
+// match the strict whitelist. Callers should surface a friendly
37
+// "shithub does not allow shell access" message and exit non-zero.
38
+var ErrUnknownSSHCommand = errors.New("ssh: unsupported command")
39
+
40
+// ErrInvalidSSHPath is returned when the parsed path fails owner/repo
41
+// validation. This is the path-traversal defense.
42
+var ErrInvalidSSHPath = errors.New("ssh: invalid repo path")
43
+
44
+// ParseSSHCommand strict-parses SSH_ORIGINAL_COMMAND into Service +
45
+// Owner + Repo. Returns ErrUnknownSSHCommand for anything that isn't a
46
+// `git-{upload,receive}-pack <path>` invocation, and ErrInvalidSSHPath
47
+// when the path fails the storage whitelist.
48
+//
49
+// Caller is expected to have already trimmed surrounding whitespace if
50
+// any; we additionally enforce no leading/trailing whitespace ourselves.
51
+func ParseSSHCommand(cmd string) (ParsedSSHCommand, error) {
52
+	if cmd != strings.TrimSpace(cmd) {
53
+		return ParsedSSHCommand{}, ErrUnknownSSHCommand
54
+	}
55
+	m := sshCmdRE.FindStringSubmatch(cmd)
56
+	if m == nil {
57
+		return ParsedSSHCommand{}, ErrUnknownSSHCommand
58
+	}
59
+	// The path group has two alternatives — quoted (m[2]) and unquoted
60
+	// (m[3]). Whichever fired is the path we use.
61
+	svc, path := Service(m[1]), m[2]
62
+	if path == "" {
63
+		path = m[3]
64
+	}
65
+
66
+	// Strip a single trailing `.git` if present.
67
+	path = strings.TrimSuffix(path, ".git")
68
+
69
+	owner, repo, ok := strings.Cut(path, "/")
70
+	if !ok {
71
+		return ParsedSSHCommand{}, fmt.Errorf("%w: missing owner/repo split", ErrInvalidSSHPath)
72
+	}
73
+	if strings.Contains(repo, "/") {
74
+		return ParsedSSHCommand{}, fmt.Errorf("%w: extra path segments", ErrInvalidSSHPath)
75
+	}
76
+	owner = strings.ToLower(owner)
77
+	repo = strings.ToLower(repo)
78
+	if err := validateOwner(owner); err != nil {
79
+		return ParsedSSHCommand{}, fmt.Errorf("%w: %v", ErrInvalidSSHPath, err)
80
+	}
81
+	if err := validateRepo(repo); err != nil {
82
+		return ParsedSSHCommand{}, fmt.Errorf("%w: %v", ErrInvalidSSHPath, err)
83
+	}
84
+	return ParsedSSHCommand{Service: svc, Owner: owner, Repo: repo}, nil
85
+}
86
+
87
+// ownerRE / repoRE mirror storage's name validation. We don't import
88
+// the storage package directly to avoid a runtime dependency in the
89
+// SSH-shell hot path (every SSH connection runs through ParseSSHCommand
90
+// and the storage package pulls in extra init).
91
+var (
92
+	ownerRE = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9-]{0,37}[a-z0-9])?$`)
93
+	repoRE  = regexp.MustCompile(`^[a-z0-9](?:[a-z0-9._-]{0,98}[a-z0-9_])?$`)
94
+)
95
+
96
+func validateOwner(s string) error {
97
+	if !ownerRE.MatchString(s) {
98
+		return fmt.Errorf("owner shape")
99
+	}
100
+	if strings.Contains(s, "..") {
101
+		return fmt.Errorf("dot-dot")
102
+	}
103
+	return nil
104
+}
105
+
106
+func validateRepo(s string) error {
107
+	if !repoRE.MatchString(s) {
108
+		return fmt.Errorf("repo shape")
109
+	}
110
+	if strings.Contains(s, "..") {
111
+		return fmt.Errorf("dot-dot")
112
+	}
113
+	if strings.HasPrefix(s, ".") {
114
+		return fmt.Errorf("starts with dot")
115
+	}
116
+	return nil
117
+}
internal/git/protocol/ssh_command_test.goadded
@@ -0,0 +1,103 @@
1
+// SPDX-License-Identifier: AGPL-3.0-or-later
2
+
3
+package protocol_test
4
+
5
+import (
6
+	"errors"
7
+	"testing"
8
+
9
+	"github.com/tenseleyFlow/shithub/internal/git/protocol"
10
+)
11
+
12
+func TestParseSSHCommand_Accepts(t *testing.T) {
13
+	t.Parallel()
14
+	cases := []struct {
15
+		in           string
16
+		wantService  protocol.Service
17
+		wantOwner    string
18
+		wantRepo     string
19
+	}{
20
+		{"git-upload-pack 'alice/foo'", protocol.UploadPack, "alice", "foo"},
21
+		{"git-upload-pack 'alice/foo.git'", protocol.UploadPack, "alice", "foo"},
22
+		{"git-receive-pack 'bob/bar'", protocol.ReceivePack, "bob", "bar"},
23
+		{"git-receive-pack alice/foo.git", protocol.ReceivePack, "alice", "foo"},
24
+		{"git-upload-pack 'alice/rust-by-example'", protocol.UploadPack, "alice", "rust-by-example"},
25
+		{"git-upload-pack 'alice/name.with.dots'", protocol.UploadPack, "alice", "name.with.dots"},
26
+	}
27
+	for _, c := range cases {
28
+		got, err := protocol.ParseSSHCommand(c.in)
29
+		if err != nil {
30
+			t.Errorf("%q: %v", c.in, err)
31
+			continue
32
+		}
33
+		if got.Service != c.wantService || got.Owner != c.wantOwner || got.Repo != c.wantRepo {
34
+			t.Errorf("%q: got %+v", c.in, got)
35
+		}
36
+	}
37
+}
38
+
39
+func TestParseSSHCommand_RejectsUnknown(t *testing.T) {
40
+	t.Parallel()
41
+	for _, in := range []string{
42
+		"",
43
+		" ",
44
+		"ls",
45
+		"bash",
46
+		"git-archive 'alice/foo'",
47
+		"git-upload-pack",                            // no path
48
+		" git-upload-pack 'a/b'",                     // leading space
49
+		"git-upload-pack 'a/b' ",                     // trailing space
50
+		"git-upload-pack 'a/b' && rm -rf /",          // command injection
51
+		`git-upload-pack 'a/b'; cat /etc/passwd`,
52
+		"GIT-UPLOAD-PACK 'a/b'",                       // case
53
+		"git-upload-pack a/b'",                        // mismatched quotes
54
+	} {
55
+		_, err := protocol.ParseSSHCommand(in)
56
+		if !errors.Is(err, protocol.ErrUnknownSSHCommand) {
57
+			t.Errorf("%q: err = %v, want ErrUnknownSSHCommand", in, err)
58
+		}
59
+	}
60
+}
61
+
62
+func TestParseSSHCommand_RejectsBadPath(t *testing.T) {
63
+	t.Parallel()
64
+	for _, in := range []string{
65
+		"git-upload-pack '../../etc/passwd'",
66
+		"git-upload-pack 'alice/../bob/foo'",
67
+		"git-upload-pack 'alice/foo/extra'",
68
+		"git-upload-pack '/alice/foo'",
69
+		"git-upload-pack 'alice/'",
70
+		"git-upload-pack '/foo'",
71
+		"git-upload-pack 'alice/.git-meta'",
72
+		"git-upload-pack 'alice/-leading'",
73
+		"git-upload-pack 'alice/trailing-'",
74
+		"git-upload-pack 'al!ce/foo'",
75
+		"git-upload-pack 'alice/foo bar'",
76
+	} {
77
+		_, err := protocol.ParseSSHCommand(in)
78
+		if !errors.Is(err, protocol.ErrInvalidSSHPath) {
79
+			t.Errorf("%q: err = %v, want ErrInvalidSSHPath", in, err)
80
+		}
81
+	}
82
+}
83
+
84
+func TestParseSSHCommand_StripsSingleDotGit(t *testing.T) {
85
+	t.Parallel()
86
+	got, err := protocol.ParseSSHCommand("git-upload-pack 'alice/foo.git'")
87
+	if err != nil {
88
+		t.Fatalf("err: %v", err)
89
+	}
90
+	if got.Repo != "foo" {
91
+		t.Errorf("Repo = %q, want %q", got.Repo, "foo")
92
+	}
93
+	// And nested .git.git would split to ".git" repo (which would then
94
+	// be rejected because leading dot — but the parser doesn't do extra
95
+	// stripping past the single suffix).
96
+	got2, err := protocol.ParseSSHCommand("git-upload-pack 'alice/foo.git.git'")
97
+	if err != nil {
98
+		t.Fatalf("err: %v", err)
99
+	}
100
+	if got2.Repo != "foo.git" {
101
+		t.Errorf("nested .git.git: Repo = %q, want %q", got2.Repo, "foo.git")
102
+	}
103
+}