tenseleyflow/shithub / 0996131

Browse files

C2: position-mapper compares original line text against new file

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
099613113c9424d355d21545ae9ed17562b6f145
Parents
1f1cde9
Tree
9d2d98f

2 changed files

StatusFile+-
M internal/pulls/review/position_map.go 70 55
M internal/pulls/review/review_test.go 66 0
internal/pulls/review/position_map.gomodified
@@ -13,20 +13,25 @@ import (
13
 )
13
 )
14
 
14
 
15
 // RemapAllForPR re-anchors every non-draft review comment on a PR
15
 // RemapAllForPR re-anchors every non-draft review comment on a PR
16
-// against the new (base, head) snapshot. For each comment:
16
+// against the new (base, head) snapshot. For each comment we do a
17
+// content-aware check:
17
 //
18
 //
18
-//	1. Re-walk the diff for its file at (newBase..newHead).
19
+//  1. Read the line text at (original_commit_sha, file_path,
19
-//	2. If the line at original_line still appears on the chosen side
20
+//     original_line). This is what the comment was written against.
20
-//	   (left = base, right = head), set current_position to its new
21
+//  2. Read the same line number in the new blob (newHeadOID for
21
-//	   position index. Otherwise NULL → outdated.
22
+//     side=right; newBaseOID for side=left).
23
+//  3. If the bytes are identical → keep current_position = original_line.
24
+//     If not → current_position = NULL (outdated). The comment still
25
+//     renders in the conversation timeline; the Files tab hides it
26
+//     until the user clicks "Show outdated."
22
 //
27
 //
23
-// This is the conservative v1 implementation. The spec calls out
28
+// This is the conservative v1 mapper. Lines that have been re-indented,
29
+// shifted by an insertion above, or merely had a comma added all
30
+// outdate — that's the right default. The spec calls out
24
 // `git blame --porcelain` as a richer mapper for rebase-heavy PRs;
31
 // `git blame --porcelain` as a richer mapper for rebase-heavy PRs;
25
-// add that when the simple line-presence check proves insufficient.
32
+// add that when the simple presence check proves too aggressive.
26
 //
33
 //
27
-// The function reads `pr_review_comments` then issues per-row
34
+// Idempotent: re-running converges on the same answer.
28
-// SetPRReviewCommentCurrentPosition updates. Idempotent: re-running
29
-// converges on the same answer.
30
 func RemapAllForPR(ctx context.Context, deps Deps, gitDir string, prID int64, newBaseOID, newHeadOID string) error {
35
 func RemapAllForPR(ctx context.Context, deps Deps, gitDir string, prID int64, newBaseOID, newHeadOID string) error {
31
 	if newBaseOID == "" || newHeadOID == "" {
36
 	if newBaseOID == "" || newHeadOID == "" {
32
 		return nil
37
 		return nil
@@ -40,47 +45,41 @@ func RemapAllForPR(ctx context.Context, deps Deps, gitDir string, prID int64, ne
40
 		return nil
45
 		return nil
41
 	}
46
 	}
42
 
47
 
43
-	// Build per-file caches keyed on (file_path, side). Each entry
48
+	// Per-(ref, path) blob cache so we read each blob at most once per
44
-	// stores the line-text → position map for that side of the new
49
+	// PR, regardless of how many comments anchor into it.
45
-	// diff. v1 reads file contents at the relevant snapshot OID and
50
+	type blobKey struct {
46
-	// computes positions on demand.
51
+		ref  string
47
-	type sideKey struct {
48
 		path string
52
 		path string
49
-		side string
50
 	}
53
 	}
51
-	cache := map[sideKey]map[int]int{} // line# (from original) → new position
54
+	cache := map[blobKey][]byte{}
52
-
55
+	loadBlob := func(ref, path string) []byte {
53
-	getMap := func(path, side string) (map[int]int, error) {
56
+		k := blobKey{ref, path}
54
-		k := sideKey{path, side}
57
+		if b, ok := cache[k]; ok {
55
-		if m, ok := cache[k]; ok {
58
+			return b
56
-			return m, nil
57
-		}
58
-		// For the v1 mapper we just compute position-by-line via the
59
-		// snapshot file content. side=right uses newHeadOID; side=left
60
-		// uses newBaseOID.
61
-		ref := newHeadOID
62
-		if side == "left" {
63
-			ref = newBaseOID
64
 		}
59
 		}
65
-		// Inline cap on file size — we don't bother mapping files >
60
+		// We don't bother mapping files > 1 MiB; those comments outdate.
66
-		// 1 MiB; those comments outdate.
67
 		const maxBytes = 1 << 20
61
 		const maxBytes = 1 << 20
68
 		blob, err := repogit.ReadBlobBytes(ctx, gitDir, ref, path, maxBytes)
62
 		blob, err := repogit.ReadBlobBytes(ctx, gitDir, ref, path, maxBytes)
69
 		if err != nil {
63
 		if err != nil {
70
-			cache[k] = nil // miss → outdate everything in this file
64
+			cache[k] = nil
71
-			return nil, nil
65
+			return nil
72
 		}
66
 		}
73
-		m := buildLineMap(blob)
67
+		cache[k] = blob
74
-		cache[k] = m
68
+		return blob
75
-		return m, nil
76
 	}
69
 	}
77
 
70
 
78
 	for _, c := range rows {
71
 	for _, c := range rows {
79
-		m, _ := getMap(c.FilePath, string(c.Side))
72
+		newRef := newHeadOID
73
+		if c.Side == pullsdb.PrReviewSideLeft {
74
+			newRef = newBaseOID
75
+		}
76
+		original := loadBlob(c.OriginalCommitSha, c.FilePath)
77
+		current := loadBlob(newRef, c.FilePath)
78
+
80
 		var newPos pgtype.Int4
79
 		var newPos pgtype.Int4
81
-		if m != nil {
80
+		if line, ok := lineAt(original, int(c.OriginalLine)); ok {
82
-			if pos, ok := m[int(c.OriginalLine)]; ok {
81
+			if cur, ok := lineAt(current, int(c.OriginalLine)); ok && bytesEqual(line, cur) {
83
-				newPos = pgtype.Int4{Int32: int32(pos), Valid: true}
82
+				newPos = pgtype.Int4{Int32: c.OriginalLine, Valid: true}
84
 			}
83
 			}
85
 		}
84
 		}
86
 		if err := q.SetPRReviewCommentCurrentPosition(ctx, deps.Pool, pullsdb.SetPRReviewCommentCurrentPositionParams{
85
 		if err := q.SetPRReviewCommentCurrentPosition(ctx, deps.Pool, pullsdb.SetPRReviewCommentCurrentPositionParams{
@@ -93,25 +92,41 @@ func RemapAllForPR(ctx context.Context, deps Deps, gitDir string, prID int64, ne
93
 	return nil
92
 	return nil
94
 }
93
 }
95
 
94
 
96
-// buildLineMap returns line-number → position-index for a blob. v1
95
+// lineAt returns the bytes of line N (1-indexed) in blob, excluding
97
-// just maps line N to position N — line numbers in the blob *are*
96
+// the trailing newline. Returns (nil, false) when the blob is nil/empty
98
-// the positions for the simple "line still exists" check. The map
97
+// or N is out of range.
99
-// will be replaced with a content-aware mapping when we add the
98
+func lineAt(blob []byte, n int) ([]byte, bool) {
100
-// blame-based variant.
99
+	if blob == nil || n < 1 {
101
-func buildLineMap(blob []byte) map[int]int {
100
+		return nil, false
102
-	out := map[int]int{}
101
+	}
103
-	line := 1
102
+	start, line := 0, 1
104
-	pos := 1
105
 	for i := 0; i < len(blob); i++ {
103
 	for i := 0; i < len(blob); i++ {
106
 		if blob[i] == '\n' {
104
 		if blob[i] == '\n' {
107
-			out[line] = pos
105
+			if line == n {
106
+				return blob[start:i], true
107
+			}
108
 			line++
108
 			line++
109
-			pos++
109
+			start = i + 1
110
 		}
110
 		}
111
 	}
111
 	}
112
-	// Trailing line without newline.
112
+	// Trailing line without terminator.
113
-	if len(blob) > 0 && blob[len(blob)-1] != '\n' {
113
+	if line == n && start < len(blob) {
114
-		out[line] = pos
114
+		return blob[start:], true
115
+	}
116
+	return nil, false
117
+}
118
+
119
+// bytesEqual is a tiny escape hatch so the test fixtures stay readable.
120
+// `bytes.Equal` would do, but pulling the whole package in for one call
121
+// adds noise to the position-map dependency graph.
122
+func bytesEqual(a, b []byte) bool {
123
+	if len(a) != len(b) {
124
+		return false
125
+	}
126
+	for i := range a {
127
+		if a[i] != b[i] {
128
+			return false
129
+		}
115
 	}
130
 	}
116
-	return out
131
+	return true
117
 }
132
 }
internal/pulls/review/review_test.gomodified
@@ -398,3 +398,69 @@ func TestDismiss_ClearsBlock(t *testing.T) {
398
 		t.Errorf("post-dismiss: state=%s, want clean", got.MergeableState)
398
 		t.Errorf("post-dismiss: state=%s, want clean", got.MergeableState)
399
 	}
399
 	}
400
 }
400
 }
401
+
402
+// TestPositionMap_ContentAware verifies the audit fix for the
403
+// position-mapper: a comment anchored at line N stays anchored only
404
+// when the line text at that position matches the original; otherwise
405
+// it goes outdated. Pre-fix, the mapper just checked "does line N
406
+// exist" (yes if file has ≥ N lines), which kept comments anchored
407
+// onto entirely unrelated content. (S00-S25 audit, finding C2.)
408
+func TestPositionMap_ContentAware(t *testing.T) {
409
+	f := setup(t)
410
+	ctx := context.Background()
411
+	commitOnBranch(t, f.gitDir, "trunk", "init", "README.md", "hi\n")
412
+	// Initial 3-line file.
413
+	commitOnBranch(t, f.gitDir, "feature", "add", "x.txt", "alpha\nbravo\ncharlie\n")
414
+	pr := f.openPR(t, "trunk", "feature")
415
+
416
+	// Comment anchored at line 2 ("bravo") on the original head.
417
+	c, err := review.AddComment(ctx, f.reviewDeps, review.CommentParams{
418
+		PRIssueID: pr.IssueID, AuthorUserID: f.reviewerID,
419
+		FilePath: "x.txt", Side: "right", OriginalCommitSHA: pr.HeadOid,
420
+		OriginalLine: 2, OriginalPosition: 2, CurrentPosition: 2,
421
+		Body: "comment on bravo",
422
+	})
423
+	if err != nil {
424
+		t.Fatalf("AddComment: %v", err)
425
+	}
426
+
427
+	// Push a new head where line 2 has been replaced with totally
428
+	// different content. The PR's head_oid is updated by Synchronize,
429
+	// which RemapAllForPR runs against. We push to feature directly.
430
+	commitOnBranch(t, f.gitDir, "feature", "rewrite line 2", "x.txt", "alpha\nDELTA\ncharlie\n")
431
+	if err := pulls.Synchronize(ctx, f.pullsDeps, f.gitDir, pr.IssueID); err != nil {
432
+		t.Fatalf("Synchronize: %v", err)
433
+	}
434
+
435
+	got, err := pullsdb.New().GetPRReviewComment(ctx, f.pool, c.ID)
436
+	if err != nil {
437
+		t.Fatalf("GetPRReviewComment: %v", err)
438
+	}
439
+	if got.CurrentPosition.Valid {
440
+		t.Errorf("expected current_position=NULL (outdated), got %d (line text changed)", got.CurrentPosition.Int32)
441
+	}
442
+
443
+	// Sanity counter-test: a comment on an unchanged line should stay
444
+	// anchored. Open a fresh PR for cleanliness.
445
+	commitOnBranch(t, f.gitDir, "trunk2", "init", "README.md", "hi\n")
446
+	commitOnBranch(t, f.gitDir, "feature2", "add", "y.txt", "alpha\nbravo\ncharlie\n")
447
+	pr2 := f.openPR(t, "trunk2", "feature2")
448
+	c2, err := review.AddComment(ctx, f.reviewDeps, review.CommentParams{
449
+		PRIssueID: pr2.IssueID, AuthorUserID: f.reviewerID,
450
+		FilePath: "y.txt", Side: "right", OriginalCommitSHA: pr2.HeadOid,
451
+		OriginalLine: 2, OriginalPosition: 2, CurrentPosition: 2,
452
+		Body: "comment that should survive",
453
+	})
454
+	if err != nil {
455
+		t.Fatalf("AddComment 2: %v", err)
456
+	}
457
+	// Push a change to a different line of the same file.
458
+	commitOnBranch(t, f.gitDir, "feature2", "tweak line 1", "y.txt", "ALPHA\nbravo\ncharlie\n")
459
+	if err := pulls.Synchronize(ctx, f.pullsDeps, f.gitDir, pr2.IssueID); err != nil {
460
+		t.Fatalf("Synchronize 2: %v", err)
461
+	}
462
+	got2, _ := pullsdb.New().GetPRReviewComment(ctx, f.pool, c2.ID)
463
+	if !got2.CurrentPosition.Valid || got2.CurrentPosition.Int32 != 2 {
464
+		t.Errorf("comment on unchanged line: current_position=%v (want valid=2)", got2.CurrentPosition)
465
+	}
466
+}