@@ -193,9 +193,10 @@ class ActionTracker: |
| 193 | 193 | self._response_history: list[str] = [] |
| 194 | 194 | self._action_index = 0 |
| 195 | 195 | self._mutation_epoch = 0 |
| 196 | | - self._recent_reads: dict[str, tuple[int, int, int]] = {} |
| 197 | | - self._recent_searches: dict[str, tuple[int, int, int]] = {} |
| 198 | | - self._recent_bash_observations: dict[str, tuple[int, int, int]] = {} |
| 196 | + self._mutation_records: list[_MutationRecord] = [] |
| 197 | + self._recent_reads: dict[str, _ObservationRecord] = {} |
| 198 | + self._recent_searches: dict[str, _ObservationRecord] = {} |
| 199 | + self._recent_bash_observations: dict[str, _ObservationRecord] = {} |
| 199 | 200 | self._recent_path_contexts: list[str] = [] |
| 200 | 201 | |
| 201 | 202 | def reset(self) -> None: |
@@ -207,6 +208,7 @@ class ActionTracker: |
| 207 | 208 | self._response_history.clear() |
| 208 | 209 | self._action_index = 0 |
| 209 | 210 | self._mutation_epoch = 0 |
| 211 | + self._mutation_records.clear() |
| 210 | 212 | self._recent_reads.clear() |
| 211 | 213 | self._recent_searches.clear() |
| 212 | 214 | self._recent_bash_observations.clear() |
@@ -313,10 +315,11 @@ class ActionTracker: |
| 313 | 315 | ( |
| 314 | 316 | "Already read " |
| 315 | 317 | f"{str(arguments.get('file_path', '')).strip()} " |
| 316 | | - "recently without any intervening changes; " |
| 318 | + "recently without any relevant intervening changes; " |
| 317 | 319 | "reuse the earlier read result instead of rereading" |
| 318 | 320 | ), |
| 319 | 321 | repeat_threshold=self.READ_REPEAT_THRESHOLD, |
| 322 | + target_paths=self._read_target_paths(arguments), |
| 320 | 323 | ) |
| 321 | 324 | if duplicate: |
| 322 | 325 | return True, reason |
@@ -328,10 +331,12 @@ class ActionTracker: |
| 328 | 331 | self._recent_searches, |
| 329 | 332 | observation_key, |
| 330 | 333 | ( |
| 331 | | - "Already ran the same search recently without any intervening " |
| 332 | | - "changes; reuse the earlier search result instead of rerunning it" |
| 334 | + "Already ran the same search recently without any relevant " |
| 335 | + "intervening changes; reuse the earlier search result instead of " |
| 336 | + "rerunning it" |
| 333 | 337 | ), |
| 334 | 338 | repeat_threshold=self.SEARCH_REPEAT_THRESHOLD, |
| 339 | + target_paths=self._search_target_paths(arguments), |
| 335 | 340 | ) |
| 336 | 341 | if duplicate: |
| 337 | 342 | return True, reason |
@@ -344,9 +349,11 @@ class ActionTracker: |
| 344 | 349 | self._normalize_command(command), |
| 345 | 350 | ( |
| 346 | 351 | "Already ran the same read-only shell probe recently without any " |
| 347 | | - "intervening changes; reuse the earlier shell output instead of rerunning it" |
| 352 | + "relevant intervening changes; reuse the earlier shell output instead " |
| 353 | + "of rerunning it" |
| 348 | 354 | ), |
| 349 | 355 | repeat_threshold=self.BASH_OBSERVATION_REPEAT_THRESHOLD, |
| 356 | + target_paths=self._bash_observation_target_paths(command), |
| 350 | 357 | ) |
| 351 | 358 | if duplicate: |
| 352 | 359 | return True, reason |
@@ -368,7 +375,7 @@ class ActionTracker: |
| 368 | 375 | if file_path: |
| 369 | 376 | self.record_file_create(file_path, content) |
| 370 | 377 | self._record_path_context(file_path) |
| 371 | | - self._note_mutation() |
| 378 | + self._note_mutation(file_path) |
| 372 | 379 | |
| 373 | 380 | elif tool_name == "edit": |
| 374 | 381 | file_path = arguments.get("file_path", "") |
@@ -377,7 +384,7 @@ class ActionTracker: |
| 377 | 384 | if file_path: |
| 378 | 385 | self.record_edit(file_path, old_string, new_string) |
| 379 | 386 | self._record_path_context(file_path) |
| 380 | | - self._note_mutation() |
| 387 | + self._note_mutation(file_path) |
| 381 | 388 | |
| 382 | 389 | elif tool_name == "patch": |
| 383 | 390 | file_path = arguments.get("file_path", "") |
@@ -389,7 +396,7 @@ class ActionTracker: |
| 389 | 396 | elif isinstance(raw_patch, str) and raw_patch.strip(): |
| 390 | 397 | self.record_edit(file_path, raw_patch, "raw_patch") |
| 391 | 398 | self._record_path_context(file_path) |
| 392 | | - self._note_mutation() |
| 399 | + self._note_mutation(file_path) |
| 393 | 400 | |
| 394 | 401 | elif tool_name == "read": |
| 395 | 402 | read_key = self._make_read_key(arguments) |
@@ -397,6 +404,7 @@ class ActionTracker: |
| 397 | 404 | self._record_observation( |
| 398 | 405 | self._recent_reads, |
| 399 | 406 | read_key, |
| 407 | + target_paths=self._read_target_paths(arguments), |
| 400 | 408 | ) |
| 401 | 409 | file_path = str(arguments.get("file_path", "")).strip() |
| 402 | 410 | if file_path: |
@@ -408,6 +416,7 @@ class ActionTracker: |
| 408 | 416 | self._record_observation( |
| 409 | 417 | self._recent_searches, |
| 410 | 418 | observation_key, |
| 419 | + target_paths=self._search_target_paths(arguments), |
| 411 | 420 | ) |
| 412 | 421 | search_path = str(arguments.get("path", "")).strip() |
| 413 | 422 | if search_path: |
@@ -418,11 +427,12 @@ class ActionTracker: |
| 418 | 427 | if command: |
| 419 | 428 | self.record_command(command) |
| 420 | 429 | if self._is_mutating_bash(command): |
| 421 | | - self._note_mutation() |
| 430 | + self._note_mutation(paths=self._bash_mutation_target_paths(command)) |
| 422 | 431 | elif self._is_observational_bash(command): |
| 423 | 432 | self._record_observation( |
| 424 | 433 | self._recent_bash_observations, |
| 425 | 434 | self._normalize_command(command), |
| 435 | + target_paths=self._bash_observation_target_paths(command), |
| 426 | 436 | ) |
| 427 | 437 | |
| 428 | 438 | def detect_loop(self) -> tuple[bool, str]: |
@@ -502,54 +512,213 @@ class ActionTracker: |
| 502 | 512 | def _normalize_command(command: str) -> str: |
| 503 | 513 | return " ".join(command.split()) |
| 504 | 514 | |
| 505 | | - def _note_mutation(self) -> None: |
| 515 | + def _note_mutation( |
| 516 | + self, |
| 517 | + path_value: str | None = None, |
| 518 | + *, |
| 519 | + paths: tuple[str, ...] | list[str] | None = None, |
| 520 | + ) -> None: |
| 506 | 521 | self._mutation_epoch += 1 |
| 522 | + candidate_paths = list(paths or ()) |
| 523 | + if path_value: |
| 524 | + candidate_paths.append(path_value) |
| 525 | + normalized_paths = tuple( |
| 526 | + dict.fromkeys( |
| 527 | + self._normalize_path(str(path)) |
| 528 | + for path in candidate_paths |
| 529 | + if str(path).strip() |
| 530 | + ) |
| 531 | + ) |
| 532 | + self._mutation_records.append( |
| 533 | + _MutationRecord(epoch=self._mutation_epoch, paths=normalized_paths) |
| 534 | + ) |
| 535 | + if len(self._mutation_records) > self.MAX_SEQUENCE_LENGTH: |
| 536 | + del self._mutation_records[: -self.MAX_SEQUENCE_LENGTH] |
| 507 | 537 | |
| 508 | 538 | def _check_recent_observation( |
| 509 | 539 | self, |
| 510 | | - cache: dict[str, tuple[int, int, int]], |
| 540 | + cache: dict[str, _ObservationRecord], |
| 511 | 541 | key: str, |
| 512 | 542 | reason: str, |
| 513 | 543 | *, |
| 514 | 544 | repeat_threshold: int, |
| 545 | + target_paths: tuple[str, ...], |
| 515 | 546 | ) -> tuple[bool, str]: |
| 516 | 547 | last_seen = cache.get(key) |
| 517 | 548 | if last_seen is None: |
| 518 | 549 | return False, "" |
| 519 | 550 | |
| 520 | | - last_epoch, last_index, repeat_count = last_seen |
| 521 | | - if last_epoch != self._mutation_epoch: |
| 551 | + if ( |
| 552 | + last_seen.mutation_epoch != self._mutation_epoch |
| 553 | + and self._has_relevant_mutation_since( |
| 554 | + last_seen.mutation_epoch, |
| 555 | + target_paths or last_seen.target_paths, |
| 556 | + ) |
| 557 | + ): |
| 522 | 558 | return False, "" |
| 523 | | - gap = self._action_index - last_index |
| 559 | + gap = self._action_index - last_seen.action_index |
| 524 | 560 | if gap > self.OBSERVATION_REPEAT_WINDOW: |
| 525 | 561 | return False, "" |
| 526 | 562 | if gap <= 0: |
| 527 | 563 | return True, reason |
| 528 | | - if repeat_count >= repeat_threshold: |
| 564 | + if last_seen.repeat_count >= repeat_threshold: |
| 529 | 565 | return True, reason |
| 530 | 566 | return False, "" |
| 531 | 567 | |
| 532 | 568 | def _record_observation( |
| 533 | 569 | self, |
| 534 | | - cache: dict[str, tuple[int, int, int]], |
| 570 | + cache: dict[str, _ObservationRecord], |
| 535 | 571 | key: str, |
| 572 | + *, |
| 573 | + target_paths: tuple[str, ...], |
| 536 | 574 | ) -> None: |
| 537 | 575 | last_seen = cache.get(key) |
| 538 | 576 | if last_seen is None: |
| 539 | | - cache[key] = (self._mutation_epoch, self._action_index, 1) |
| 577 | + cache[key] = _ObservationRecord( |
| 578 | + mutation_epoch=self._mutation_epoch, |
| 579 | + action_index=self._action_index, |
| 580 | + repeat_count=1, |
| 581 | + target_paths=target_paths, |
| 582 | + ) |
| 540 | 583 | return |
| 541 | 584 | |
| 542 | | - last_epoch, last_index, repeat_count = last_seen |
| 543 | | - gap = self._action_index - last_index |
| 544 | | - if last_epoch != self._mutation_epoch or gap > self.OBSERVATION_REPEAT_WINDOW: |
| 545 | | - cache[key] = (self._mutation_epoch, self._action_index, 1) |
| 585 | + gap = self._action_index - last_seen.action_index |
| 586 | + relevant_mutation = ( |
| 587 | + last_seen.mutation_epoch != self._mutation_epoch |
| 588 | + and self._has_relevant_mutation_since( |
| 589 | + last_seen.mutation_epoch, |
| 590 | + target_paths or last_seen.target_paths, |
| 591 | + ) |
| 592 | + ) |
| 593 | + if relevant_mutation or gap > self.OBSERVATION_REPEAT_WINDOW: |
| 594 | + cache[key] = _ObservationRecord( |
| 595 | + mutation_epoch=self._mutation_epoch, |
| 596 | + action_index=self._action_index, |
| 597 | + repeat_count=1, |
| 598 | + target_paths=target_paths, |
| 599 | + ) |
| 546 | 600 | return |
| 547 | 601 | |
| 548 | | - cache[key] = ( |
| 549 | | - self._mutation_epoch, |
| 550 | | - self._action_index, |
| 551 | | - repeat_count + 1, |
| 602 | + cache[key] = _ObservationRecord( |
| 603 | + mutation_epoch=self._mutation_epoch, |
| 604 | + action_index=self._action_index, |
| 605 | + repeat_count=last_seen.repeat_count + 1, |
| 606 | + target_paths=target_paths or last_seen.target_paths, |
| 607 | + ) |
| 608 | + |
| 609 | + def _has_relevant_mutation_since( |
| 610 | + self, |
| 611 | + epoch: int, |
| 612 | + target_paths: tuple[str, ...], |
| 613 | + ) -> bool: |
| 614 | + if not target_paths: |
| 615 | + return True |
| 616 | + for mutation in self._mutation_records: |
| 617 | + if mutation.epoch <= epoch: |
| 618 | + continue |
| 619 | + if not mutation.paths: |
| 620 | + return True |
| 621 | + for mutation_path in mutation.paths: |
| 622 | + if any(self._paths_overlap(target_path, mutation_path) for target_path in target_paths): |
| 623 | + return True |
| 624 | + return False |
| 625 | + |
| 626 | + @staticmethod |
| 627 | + def _paths_overlap(first: str, second: str) -> bool: |
| 628 | + try: |
| 629 | + common = os.path.commonpath([first, second]) |
| 630 | + except ValueError: |
| 631 | + return False |
| 632 | + return common == first or common == second |
| 633 | + |
| 634 | + def _read_target_paths(self, arguments: dict) -> tuple[str, ...]: |
| 635 | + file_path = str(arguments.get("file_path", "")).strip() |
| 636 | + return (self._normalize_path(file_path),) if file_path else () |
| 637 | + |
| 638 | + def _search_target_paths(self, arguments: dict) -> tuple[str, ...]: |
| 639 | + path = str(arguments.get("path", "")).strip() |
| 640 | + if path: |
| 641 | + return (self._normalize_path(path),) |
| 642 | + pattern = str(arguments.get("pattern", "")).strip() |
| 643 | + inferred_path = self._path_from_glob_pattern(pattern) |
| 644 | + return (self._normalize_path(inferred_path),) if inferred_path else () |
| 645 | + |
| 646 | + @staticmethod |
| 647 | + def _path_from_glob_pattern(pattern: str) -> str: |
| 648 | + if not pattern: |
| 649 | + return "." |
| 650 | + first_glob_index = min( |
| 651 | + (index for index in (pattern.find("*"), pattern.find("?"), pattern.find("[")) if index >= 0), |
| 652 | + default=-1, |
| 552 | 653 | ) |
| 654 | + if first_glob_index < 0: |
| 655 | + path = Path(pattern) |
| 656 | + return str(path.parent if path.parent != Path("") else Path(".")) |
| 657 | + prefix = pattern[:first_glob_index] |
| 658 | + if prefix.endswith(("/", "\\")): |
| 659 | + return prefix.rstrip("/\\") or "/" |
| 660 | + path = Path(prefix) |
| 661 | + return str(path.parent if path.parent != Path("") else Path(".")) |
| 662 | + |
| 663 | + def _bash_observation_target_paths(self, command: str) -> tuple[str, ...]: |
| 664 | + try: |
| 665 | + argv = shlex.split(self._normalize_command(command)) |
| 666 | + except ValueError: |
| 667 | + return () |
| 668 | + if not argv: |
| 669 | + return () |
| 670 | + |
| 671 | + program = Path(argv[0]).name |
| 672 | + operands = self._shell_path_operands(argv[1:]) |
| 673 | + if program in {"ls", "find"}: |
| 674 | + return tuple(self._normalize_path(path) for path in (operands or ["."])) |
| 675 | + if program in {"cat", "head", "tail", "stat"}: |
| 676 | + return tuple(self._normalize_path(path) for path in operands) |
| 677 | + if program == "rg": |
| 678 | + return tuple(self._normalize_path(path) for path in operands[1:] or ["."]) |
| 679 | + if program == "pwd": |
| 680 | + return (self._normalize_path("."),) |
| 681 | + return () |
| 682 | + |
| 683 | + def _bash_mutation_target_paths(self, command: str) -> tuple[str, ...]: |
| 684 | + normalized = self._normalize_command(command) |
| 685 | + rewrite_target = extract_shell_text_rewrite_target(normalized) |
| 686 | + if rewrite_target is not None: |
| 687 | + return (self._normalize_path(rewrite_target),) |
| 688 | + |
| 689 | + try: |
| 690 | + argv = shlex.split(normalized) |
| 691 | + except ValueError: |
| 692 | + return () |
| 693 | + if not argv: |
| 694 | + return () |
| 695 | + |
| 696 | + program = Path(argv[0]).name |
| 697 | + operands = self._shell_path_operands(argv[1:]) |
| 698 | + if program == "cp": |
| 699 | + return (self._normalize_path(operands[-1]),) if operands else () |
| 700 | + if program in {"mkdir", "touch", "rm", "mv", "chmod", "chown"}: |
| 701 | + return tuple(self._normalize_path(path) for path in operands) |
| 702 | + return () |
| 703 | + |
| 704 | + @staticmethod |
| 705 | + def _shell_path_operands(tokens: list[str]) -> list[str]: |
| 706 | + operands: list[str] = [] |
| 707 | + skip_next = False |
| 708 | + option_value_flags = {"-n", "--lines", "-c", "--bytes", "-m", "--max-count"} |
| 709 | + for token in tokens: |
| 710 | + if skip_next: |
| 711 | + skip_next = False |
| 712 | + continue |
| 713 | + if token in option_value_flags: |
| 714 | + skip_next = True |
| 715 | + continue |
| 716 | + if token.startswith("-"): |
| 717 | + continue |
| 718 | + if re.fullmatch(r"\d+", token): |
| 719 | + continue |
| 720 | + operands.append(token) |
| 721 | + return operands |
| 553 | 722 | |
| 554 | 723 | def _make_search_key(self, tool_name: str, arguments: dict) -> str | None: |
| 555 | 724 | pattern = str(arguments.get("pattern", "")).strip() |
@@ -633,6 +802,20 @@ class ActionTracker: |
| 633 | 802 | if len(self._recent_path_contexts) > self.RECENT_PATH_CONTEXT_LIMIT: |
| 634 | 803 | del self._recent_path_contexts[self.RECENT_PATH_CONTEXT_LIMIT :] |
| 635 | 804 | |
| 805 | +@dataclass |
| 806 | +class _MutationRecord: |
| 807 | + epoch: int |
| 808 | + paths: tuple[str, ...] |
| 809 | + |
| 810 | + |
| 811 | +@dataclass |
| 812 | +class _ObservationRecord: |
| 813 | + mutation_epoch: int |
| 814 | + action_index: int |
| 815 | + repeat_count: int |
| 816 | + target_paths: tuple[str, ...] |
| 817 | + |
| 818 | + |
| 636 | 819 | @dataclass |
| 637 | 820 | class ValidationResult: |
| 638 | 821 | """Result of pre-action validation.""" |