tenseleyflow/loader / 549ed74

Browse files

Normalize malformed patch hunks

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
549ed74647b4a48df5d599c6b1e332cbd3b8f03c
Parents
df98255
Tree
1731cc3

3 changed files

StatusFile+-
M src/loader/tools/fs_safety.py 77 4
M tests/test_expanded_tools.py 67 0
M tests/test_safeguard_services.py 25 0
src/loader/tools/fs_safety.pymodified
@@ -62,6 +62,23 @@ class StructuredPatchHunk:
6262
         """Deserialize a patch hunk, expanding replacement-block variants."""
6363
 
6464
         if not isinstance(data.get("new_lines"), list):
65
+            raw_lines = [str(line) for line in data.get("lines", [])]
66
+            if _looks_like_raw_replacement_lines(raw_lines):
67
+                old_start = int(data.get("old_start", 0))
68
+                old_line_count = max(0, int(data.get("old_lines", 0)))
69
+                start_index = max(0, old_start - 1)
70
+                end_index = start_index + old_line_count
71
+                removed_lines = [
72
+                    f"-{line}" for line in original_lines[start_index:end_index]
73
+                ]
74
+                added_lines = [f"+{line}" for line in raw_lines]
75
+                return cls(
76
+                    old_start=old_start,
77
+                    old_lines=old_line_count,
78
+                    new_start=int(data.get("new_start", old_start)),
79
+                    new_lines=len(raw_lines),
80
+                    lines=[*removed_lines, *added_lines],
81
+                )
6582
             return cls.from_dict(data)
6683
 
6784
         old_start = int(data.get("old_start", 0))
@@ -100,10 +117,14 @@ def coerce_structured_patch_payload(
100117
         try:
101118
             value = json.loads(value)
102119
         except json.JSONDecodeError:
103
-            try:
104
-                value = ast.literal_eval(value)
105
-            except (SyntaxError, ValueError):
106
-                return []
120
+            repaired = _load_json_with_balanced_closers(value)
121
+            if repaired is not None:
122
+                value = repaired
123
+            else:
124
+                try:
125
+                    value = ast.literal_eval(value)
126
+                except (SyntaxError, ValueError):
127
+                    return []
107128
 
108129
     if isinstance(value, StructuredPatchHunk):
109130
         return [value]
@@ -115,6 +136,58 @@ def coerce_structured_patch_payload(
115136
     return [item for item in value if isinstance(item, (dict, StructuredPatchHunk))]
116137
 
117138
 
139
+def _load_json_with_balanced_closers(value: str) -> object | None:
140
+    suffix = _missing_json_closer_suffix(value)
141
+    if not suffix:
142
+        return None
143
+    try:
144
+        return json.loads(value + suffix)
145
+    except json.JSONDecodeError:
146
+        return None
147
+
148
+
149
+def _missing_json_closer_suffix(value: str) -> str:
150
+    stack: list[str] = []
151
+    in_string = False
152
+    escaped = False
153
+    pairs = {"[": "]", "{": "}"}
154
+    openers = set(pairs)
155
+    closers = {"]": "[", "}": "{"}
156
+
157
+    for char in value:
158
+        if in_string:
159
+            if escaped:
160
+                escaped = False
161
+            elif char == "\\":
162
+                escaped = True
163
+            elif char == '"':
164
+                in_string = False
165
+            continue
166
+
167
+        if char == '"':
168
+            in_string = True
169
+        elif char in openers:
170
+            stack.append(char)
171
+        elif char in closers:
172
+            if not stack or stack[-1] != closers[char]:
173
+                return ""
174
+            stack.pop()
175
+
176
+    if in_string:
177
+        return ""
178
+    return "".join(pairs[char] for char in reversed(stack))
179
+
180
+
181
+def _looks_like_raw_replacement_lines(lines: list[str]) -> bool:
182
+    if not lines:
183
+        return False
184
+    if any(line == "" for line in lines):
185
+        return True
186
+    if any(line[0] not in " +-" for line in lines):
187
+        return True
188
+    return not any(line[0] in "+-" for line in lines)
189
+
190
+
118191
 def resolve_workspace_path(
119192
     raw_path: str,
120193
     *,
tests/test_expanded_tools.pymodified
@@ -65,6 +65,34 @@ async def test_patch_tool_accepts_json_encoded_structured_hunks(
6565
     assert target.read_text() == "alpha\nbeta from json string\ngamma\n"
6666
 
6767
 
68
+@pytest.mark.asyncio
69
+async def test_patch_tool_accepts_json_hunks_missing_outer_close(
70
+    temp_dir: Path,
71
+) -> None:
72
+    target = temp_dir / "sample.txt"
73
+    target.write_text("alpha\nbeta\ngamma\n")
74
+    tool = PatchTool(workspace_root=temp_dir)
75
+
76
+    hunk_payload = json.dumps(
77
+        [
78
+            {
79
+                "old_start": 2,
80
+                "old_lines": 1,
81
+                "new_start": 2,
82
+                "new_lines": 1,
83
+                "lines": ["-beta", "+beta from repaired json string"],
84
+            }
85
+        ]
86
+    )[:-1]
87
+    result = await tool.execute(
88
+        file_path=str(target),
89
+        hunks=hunk_payload,
90
+    )
91
+
92
+    assert result.is_error is False
93
+    assert target.read_text() == "alpha\nbeta from repaired json string\ngamma\n"
94
+
95
+
6896
 @pytest.mark.asyncio
6997
 async def test_patch_tool_accepts_python_literal_structured_hunks(
7098
     temp_dir: Path,
@@ -141,6 +169,45 @@ async def test_patch_tool_accepts_replacement_block_hunks(temp_dir: Path) -> Non
141169
     assert result.metadata["structured_patch"]
142170
 
143171
 
172
+@pytest.mark.asyncio
173
+async def test_patch_tool_accepts_raw_lines_replacement_hunks(
174
+    temp_dir: Path,
175
+) -> None:
176
+    target = temp_dir / "sample.html"
177
+    target.write_text("<h1>Title</h1>\n<p>Short.</p>\n<footer>Back</footer>\n")
178
+    tool = PatchTool(workspace_root=temp_dir)
179
+
180
+    result = await tool.execute(
181
+        file_path=str(target),
182
+        hunks=[
183
+            {
184
+                "old_start": 2,
185
+                "old_lines": 1,
186
+                "new_start": 2,
187
+                "new_lines": 4,
188
+                "lines": [
189
+                    "<p>Expanded body copy.</p>",
190
+                    "",
191
+                    "<ul>",
192
+                    "<li>Concrete detail</li>",
193
+                    "</ul>",
194
+                ],
195
+            }
196
+        ],
197
+    )
198
+
199
+    assert result.is_error is False
200
+    assert target.read_text() == (
201
+        "<h1>Title</h1>\n"
202
+        "<p>Expanded body copy.</p>\n"
203
+        "\n"
204
+        "<ul>\n"
205
+        "<li>Concrete detail</li>\n"
206
+        "</ul>\n"
207
+        "<footer>Back</footer>\n"
208
+    )
209
+
210
+
144211
 @pytest.mark.asyncio
145212
 async def test_patch_tool_accepts_unified_diff_string(temp_dir: Path) -> None:
146213
     target = temp_dir / "sample.txt"
tests/test_safeguard_services.pymodified
@@ -378,6 +378,31 @@ def test_pre_action_validator_allows_json_encoded_patch_hunks() -> None:
378378
     assert result == ValidationResult(valid=True)
379379
 
380380
 
381
+def test_pre_action_validator_allows_json_patch_hunks_missing_outer_close() -> None:
382
+    validator = PreActionValidator()
383
+    hunk_payload = json.dumps(
384
+        [
385
+            {
386
+                "old_start": 1,
387
+                "old_lines": 1,
388
+                "new_start": 1,
389
+                "new_lines": 1,
390
+                "lines": ["-old", "+new"],
391
+            }
392
+        ]
393
+    )[:-1]
394
+
395
+    result = validator.validate(
396
+        "patch",
397
+        {
398
+            "file_path": "notes.txt",
399
+            "hunks": hunk_payload,
400
+        },
401
+    )
402
+
403
+    assert result == ValidationResult(valid=True)
404
+
405
+
381406
 def test_pre_action_validator_allows_python_literal_patch_hunks() -> None:
382407
     validator = PreActionValidator()
383408