Normalize malformed patch hunks
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
549ed74647b4a48df5d599c6b1e332cbd3b8f03c- Parents
-
df98255 - Tree
1731cc3
549ed74
549ed74647b4a48df5d599c6b1e332cbd3b8f03cdf98255
1731cc3| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/tools/fs_safety.py
|
77 | 4 |
| M |
tests/test_expanded_tools.py
|
67 | 0 |
| M |
tests/test_safeguard_services.py
|
25 | 0 |
src/loader/tools/fs_safety.pymodified@@ -62,6 +62,23 @@ class StructuredPatchHunk: | ||
| 62 | 62 | """Deserialize a patch hunk, expanding replacement-block variants.""" |
| 63 | 63 | |
| 64 | 64 | if not isinstance(data.get("new_lines"), list): |
| 65 | + raw_lines = [str(line) for line in data.get("lines", [])] | |
| 66 | + if _looks_like_raw_replacement_lines(raw_lines): | |
| 67 | + old_start = int(data.get("old_start", 0)) | |
| 68 | + old_line_count = max(0, int(data.get("old_lines", 0))) | |
| 69 | + start_index = max(0, old_start - 1) | |
| 70 | + end_index = start_index + old_line_count | |
| 71 | + removed_lines = [ | |
| 72 | + f"-{line}" for line in original_lines[start_index:end_index] | |
| 73 | + ] | |
| 74 | + added_lines = [f"+{line}" for line in raw_lines] | |
| 75 | + return cls( | |
| 76 | + old_start=old_start, | |
| 77 | + old_lines=old_line_count, | |
| 78 | + new_start=int(data.get("new_start", old_start)), | |
| 79 | + new_lines=len(raw_lines), | |
| 80 | + lines=[*removed_lines, *added_lines], | |
| 81 | + ) | |
| 65 | 82 | return cls.from_dict(data) |
| 66 | 83 | |
| 67 | 84 | old_start = int(data.get("old_start", 0)) |
@@ -100,10 +117,14 @@ def coerce_structured_patch_payload( | ||
| 100 | 117 | try: |
| 101 | 118 | value = json.loads(value) |
| 102 | 119 | except json.JSONDecodeError: |
| 103 | - try: | |
| 104 | - value = ast.literal_eval(value) | |
| 105 | - except (SyntaxError, ValueError): | |
| 106 | - return [] | |
| 120 | + repaired = _load_json_with_balanced_closers(value) | |
| 121 | + if repaired is not None: | |
| 122 | + value = repaired | |
| 123 | + else: | |
| 124 | + try: | |
| 125 | + value = ast.literal_eval(value) | |
| 126 | + except (SyntaxError, ValueError): | |
| 127 | + return [] | |
| 107 | 128 | |
| 108 | 129 | if isinstance(value, StructuredPatchHunk): |
| 109 | 130 | return [value] |
@@ -115,6 +136,58 @@ def coerce_structured_patch_payload( | ||
| 115 | 136 | return [item for item in value if isinstance(item, (dict, StructuredPatchHunk))] |
| 116 | 137 | |
| 117 | 138 | |
| 139 | +def _load_json_with_balanced_closers(value: str) -> object | None: | |
| 140 | + suffix = _missing_json_closer_suffix(value) | |
| 141 | + if not suffix: | |
| 142 | + return None | |
| 143 | + try: | |
| 144 | + return json.loads(value + suffix) | |
| 145 | + except json.JSONDecodeError: | |
| 146 | + return None | |
| 147 | + | |
| 148 | + | |
| 149 | +def _missing_json_closer_suffix(value: str) -> str: | |
| 150 | + stack: list[str] = [] | |
| 151 | + in_string = False | |
| 152 | + escaped = False | |
| 153 | + pairs = {"[": "]", "{": "}"} | |
| 154 | + openers = set(pairs) | |
| 155 | + closers = {"]": "[", "}": "{"} | |
| 156 | + | |
| 157 | + for char in value: | |
| 158 | + if in_string: | |
| 159 | + if escaped: | |
| 160 | + escaped = False | |
| 161 | + elif char == "\\": | |
| 162 | + escaped = True | |
| 163 | + elif char == '"': | |
| 164 | + in_string = False | |
| 165 | + continue | |
| 166 | + | |
| 167 | + if char == '"': | |
| 168 | + in_string = True | |
| 169 | + elif char in openers: | |
| 170 | + stack.append(char) | |
| 171 | + elif char in closers: | |
| 172 | + if not stack or stack[-1] != closers[char]: | |
| 173 | + return "" | |
| 174 | + stack.pop() | |
| 175 | + | |
| 176 | + if in_string: | |
| 177 | + return "" | |
| 178 | + return "".join(pairs[char] for char in reversed(stack)) | |
| 179 | + | |
| 180 | + | |
| 181 | +def _looks_like_raw_replacement_lines(lines: list[str]) -> bool: | |
| 182 | + if not lines: | |
| 183 | + return False | |
| 184 | + if any(line == "" for line in lines): | |
| 185 | + return True | |
| 186 | + if any(line[0] not in " +-" for line in lines): | |
| 187 | + return True | |
| 188 | + return not any(line[0] in "+-" for line in lines) | |
| 189 | + | |
| 190 | + | |
| 118 | 191 | def resolve_workspace_path( |
| 119 | 192 | raw_path: str, |
| 120 | 193 | *, |
tests/test_expanded_tools.pymodified@@ -65,6 +65,34 @@ async def test_patch_tool_accepts_json_encoded_structured_hunks( | ||
| 65 | 65 | assert target.read_text() == "alpha\nbeta from json string\ngamma\n" |
| 66 | 66 | |
| 67 | 67 | |
| 68 | +@pytest.mark.asyncio | |
| 69 | +async def test_patch_tool_accepts_json_hunks_missing_outer_close( | |
| 70 | + temp_dir: Path, | |
| 71 | +) -> None: | |
| 72 | + target = temp_dir / "sample.txt" | |
| 73 | + target.write_text("alpha\nbeta\ngamma\n") | |
| 74 | + tool = PatchTool(workspace_root=temp_dir) | |
| 75 | + | |
| 76 | + hunk_payload = json.dumps( | |
| 77 | + [ | |
| 78 | + { | |
| 79 | + "old_start": 2, | |
| 80 | + "old_lines": 1, | |
| 81 | + "new_start": 2, | |
| 82 | + "new_lines": 1, | |
| 83 | + "lines": ["-beta", "+beta from repaired json string"], | |
| 84 | + } | |
| 85 | + ] | |
| 86 | + )[:-1] | |
| 87 | + result = await tool.execute( | |
| 88 | + file_path=str(target), | |
| 89 | + hunks=hunk_payload, | |
| 90 | + ) | |
| 91 | + | |
| 92 | + assert result.is_error is False | |
| 93 | + assert target.read_text() == "alpha\nbeta from repaired json string\ngamma\n" | |
| 94 | + | |
| 95 | + | |
| 68 | 96 | @pytest.mark.asyncio |
| 69 | 97 | async def test_patch_tool_accepts_python_literal_structured_hunks( |
| 70 | 98 | temp_dir: Path, |
@@ -141,6 +169,45 @@ async def test_patch_tool_accepts_replacement_block_hunks(temp_dir: Path) -> Non | ||
| 141 | 169 | assert result.metadata["structured_patch"] |
| 142 | 170 | |
| 143 | 171 | |
| 172 | +@pytest.mark.asyncio | |
| 173 | +async def test_patch_tool_accepts_raw_lines_replacement_hunks( | |
| 174 | + temp_dir: Path, | |
| 175 | +) -> None: | |
| 176 | + target = temp_dir / "sample.html" | |
| 177 | + target.write_text("<h1>Title</h1>\n<p>Short.</p>\n<footer>Back</footer>\n") | |
| 178 | + tool = PatchTool(workspace_root=temp_dir) | |
| 179 | + | |
| 180 | + result = await tool.execute( | |
| 181 | + file_path=str(target), | |
| 182 | + hunks=[ | |
| 183 | + { | |
| 184 | + "old_start": 2, | |
| 185 | + "old_lines": 1, | |
| 186 | + "new_start": 2, | |
| 187 | + "new_lines": 4, | |
| 188 | + "lines": [ | |
| 189 | + "<p>Expanded body copy.</p>", | |
| 190 | + "", | |
| 191 | + "<ul>", | |
| 192 | + "<li>Concrete detail</li>", | |
| 193 | + "</ul>", | |
| 194 | + ], | |
| 195 | + } | |
| 196 | + ], | |
| 197 | + ) | |
| 198 | + | |
| 199 | + assert result.is_error is False | |
| 200 | + assert target.read_text() == ( | |
| 201 | + "<h1>Title</h1>\n" | |
| 202 | + "<p>Expanded body copy.</p>\n" | |
| 203 | + "\n" | |
| 204 | + "<ul>\n" | |
| 205 | + "<li>Concrete detail</li>\n" | |
| 206 | + "</ul>\n" | |
| 207 | + "<footer>Back</footer>\n" | |
| 208 | + ) | |
| 209 | + | |
| 210 | + | |
| 144 | 211 | @pytest.mark.asyncio |
| 145 | 212 | async def test_patch_tool_accepts_unified_diff_string(temp_dir: Path) -> None: |
| 146 | 213 | target = temp_dir / "sample.txt" |
tests/test_safeguard_services.pymodified@@ -378,6 +378,31 @@ def test_pre_action_validator_allows_json_encoded_patch_hunks() -> None: | ||
| 378 | 378 | assert result == ValidationResult(valid=True) |
| 379 | 379 | |
| 380 | 380 | |
| 381 | +def test_pre_action_validator_allows_json_patch_hunks_missing_outer_close() -> None: | |
| 382 | + validator = PreActionValidator() | |
| 383 | + hunk_payload = json.dumps( | |
| 384 | + [ | |
| 385 | + { | |
| 386 | + "old_start": 1, | |
| 387 | + "old_lines": 1, | |
| 388 | + "new_start": 1, | |
| 389 | + "new_lines": 1, | |
| 390 | + "lines": ["-old", "+new"], | |
| 391 | + } | |
| 392 | + ] | |
| 393 | + )[:-1] | |
| 394 | + | |
| 395 | + result = validator.validate( | |
| 396 | + "patch", | |
| 397 | + { | |
| 398 | + "file_path": "notes.txt", | |
| 399 | + "hunks": hunk_payload, | |
| 400 | + }, | |
| 401 | + ) | |
| 402 | + | |
| 403 | + assert result == ValidationResult(valid=True) | |
| 404 | + | |
| 405 | + | |
| 381 | 406 | def test_pre_action_validator_allows_python_literal_patch_hunks() -> None: |
| 382 | 407 | validator = PreActionValidator() |
| 383 | 408 | |