| 1 | """Tests for doctor, status, and session inspection surfaces.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import sys |
| 6 | from pathlib import Path |
| 7 | |
| 8 | import pytest |
| 9 | from click.testing import CliRunner |
| 10 | |
| 11 | import loader.cli.main as cli_main_module |
| 12 | from loader.llm.base import Message, Role |
| 13 | from loader.runtime.completion_trace import CompletionTraceEntry |
| 14 | from loader.runtime.dod import ( |
| 15 | DefinitionOfDoneStore, |
| 16 | VerificationEvidence, |
| 17 | create_definition_of_done, |
| 18 | ) |
| 19 | from loader.runtime.evidence_provenance import EvidenceProvenance |
| 20 | from loader.runtime.explore_state import ExploreSnapshot, ExploreStateStore |
| 21 | from loader.runtime.inspection import ( |
| 22 | CheckStatus, |
| 23 | collect_doctor_report, |
| 24 | collect_permission_snapshot, |
| 25 | collect_prompt_diff, |
| 26 | collect_prompt_preview, |
| 27 | collect_status_snapshot, |
| 28 | collect_workflow_artifact_diffs, |
| 29 | collect_workflow_timeline, |
| 30 | dry_run_permission_check, |
| 31 | list_session_summaries, |
| 32 | load_session_detail, |
| 33 | ) |
| 34 | from loader.runtime.prompt_history import PromptSnapshot |
| 35 | from loader.runtime.session import SessionSnapshot, SessionStore |
| 36 | from loader.runtime.verification_observations import VerificationObservation |
| 37 | from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem |
| 38 | from loader.runtime.workflow_policy import WorkflowTimelineEntry |
| 39 | |
| 40 | |
| 41 | class FakeOllamaBackend: |
| 42 | """Small async backend stub for doctor tests.""" |
| 43 | |
| 44 | def __init__( |
| 45 | self, |
| 46 | *, |
| 47 | model: str, |
| 48 | health: bool, |
| 49 | models: list[dict[str, object]], |
| 50 | model_details: dict[str, object] | None = None, |
| 51 | ) -> None: |
| 52 | self.model = model |
| 53 | self._health = health |
| 54 | self._models = models |
| 55 | self._model_details = model_details |
| 56 | |
| 57 | async def list_models(self) -> list[dict[str, object]]: |
| 58 | return list(self._models) |
| 59 | |
| 60 | async def health_check(self) -> bool: |
| 61 | return self._health |
| 62 | |
| 63 | async def describe_model(self) -> dict[str, object] | None: |
| 64 | return self._model_details |
| 65 | |
| 66 | async def close(self) -> None: |
| 67 | return None |
| 68 | |
| 69 | |
| 70 | def _write_python_workspace(temp_dir: Path) -> None: |
| 71 | (temp_dir / "pyproject.toml").write_text( |
| 72 | "\n".join( |
| 73 | [ |
| 74 | "[build-system]", |
| 75 | 'requires = ["hatchling"]', |
| 76 | 'build-backend = "hatchling.build"', |
| 77 | "", |
| 78 | "[tool.pytest.ini_options]", |
| 79 | 'testpaths = ["tests"]', |
| 80 | "", |
| 81 | ] |
| 82 | ) |
| 83 | + "\n" |
| 84 | ) |
| 85 | (temp_dir / "src").mkdir() |
| 86 | (temp_dir / "tests").mkdir() |
| 87 | |
| 88 | |
| 89 | def _ensure_loader_dirs(temp_dir: Path) -> None: |
| 90 | loader_root = temp_dir / ".loader" |
| 91 | for name in ("sessions", "state", "dod", "briefs", "plans"): |
| 92 | (loader_root / name).mkdir(parents=True, exist_ok=True) |
| 93 | (loader_root / "project-memory.json").write_text("{}\n") |
| 94 | |
| 95 | |
| 96 | def _persist_session_with_dod(temp_dir: Path) -> tuple[str, str]: |
| 97 | dod = create_definition_of_done("Fix the failing tests") |
| 98 | dod.status = "fixing" |
| 99 | dod.pending_items = ["Re-run pytest"] |
| 100 | dod.completed_items = ["Patch the broken parser"] |
| 101 | dod.last_verification_result = "failed" |
| 102 | dod.evidence = [ |
| 103 | VerificationEvidence( |
| 104 | command="pytest -q", |
| 105 | passed=False, |
| 106 | stderr="1 failed", |
| 107 | kind="test", |
| 108 | ) |
| 109 | ] |
| 110 | dod_path = DefinitionOfDoneStore(temp_dir).save(dod) |
| 111 | workflow_timeline = [ |
| 112 | WorkflowTimelineEntry( |
| 113 | timestamp="2026-04-06T12:04:00Z", |
| 114 | kind="handoff", |
| 115 | mode="verify", |
| 116 | reason_code="execute_completed", |
| 117 | summary="verify: execution completed; verifying the parser fix", |
| 118 | decision_kind="handoff", |
| 119 | prompt_format="native", |
| 120 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 121 | artifact_paths=[str(temp_dir / ".loader" / "plans" / "fix-tests.md")], |
| 122 | ), |
| 123 | WorkflowTimelineEntry( |
| 124 | timestamp="2026-04-06T12:05:00Z", |
| 125 | kind="reentry", |
| 126 | mode="execute", |
| 127 | reason_code="verification_failed_reentry", |
| 128 | summary="execute: verification failed; returning to execute for fixes", |
| 129 | decision_kind="reentry", |
| 130 | scheduled_next_mode="verify", |
| 131 | runner_up_mode="verify", |
| 132 | runner_up_score=0.52, |
| 133 | verification_observations=[ |
| 134 | VerificationObservation( |
| 135 | status="failed", |
| 136 | summary="verification failed for `pytest -q`", |
| 137 | command="pytest -q", |
| 138 | kind="test", |
| 139 | detail="1 failed", |
| 140 | ) |
| 141 | ], |
| 142 | prompt_format="native", |
| 143 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 144 | artifact_paths=[str(temp_dir / ".loader" / "plans" / "fix-tests.md")], |
| 145 | ), |
| 146 | ] |
| 147 | |
| 148 | snapshot = SessionSnapshot( |
| 149 | session_id="20260406T120000Z-abcdef01", |
| 150 | created_at="2026-04-06T12:00:00Z", |
| 151 | updated_at="2026-04-06T12:05:00Z", |
| 152 | messages=[ |
| 153 | Message(role=Role.USER, content="Fix the failing tests"), |
| 154 | Message(role=Role.ASSISTANT, content="I updated the parser."), |
| 155 | ], |
| 156 | usage={"turns": 1, "tool_calls": 2}, |
| 157 | active_dod_path=str(dod_path), |
| 158 | current_task="Fix the failing tests", |
| 159 | runtime_owner_type="RuntimeHandle", |
| 160 | runtime_owner_path="runtime-handle", |
| 161 | workflow_mode="execute", |
| 162 | permission_mode="prompt", |
| 163 | permission_prompting_enabled=True, |
| 164 | permission_rule_counts={"allow": 1, "deny": 2, "ask": 1}, |
| 165 | permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"), |
| 166 | prompt_format="native", |
| 167 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 168 | prompt_history=[ |
| 169 | PromptSnapshot( |
| 170 | timestamp="2026-04-06T12:04:00Z", |
| 171 | workflow_mode="verify", |
| 172 | permission_mode="prompt", |
| 173 | current_task="Fix the failing tests", |
| 174 | prompt_format="native", |
| 175 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 176 | content="# Introduction\nverify parser fix\n", |
| 177 | ), |
| 178 | PromptSnapshot( |
| 179 | timestamp="2026-04-06T12:05:00Z", |
| 180 | workflow_mode="execute", |
| 181 | permission_mode="prompt", |
| 182 | current_task="Fix the failing tests", |
| 183 | prompt_format="native", |
| 184 | prompt_sections=[ |
| 185 | "Runtime Config", |
| 186 | "Workflow Context", |
| 187 | "Mode Guidance", |
| 188 | "Project Context", |
| 189 | ], |
| 190 | content="# Introduction\nexecute parser fix\n# Project Context\npython\n", |
| 191 | ), |
| 192 | ], |
| 193 | workflow_reason_code="verification_failed_reentry", |
| 194 | workflow_reason_summary="verification failed; returning to execute for fixes", |
| 195 | workflow_decision_kind="reentry", |
| 196 | workflow_ambiguity_score=0.1, |
| 197 | workflow_complexity_score=0.7, |
| 198 | workflow_scheduled_next_mode="verify", |
| 199 | active_turn_phase="completion", |
| 200 | last_completion_decision_code="verification_failed_reentry", |
| 201 | last_completion_decision_summary=( |
| 202 | "continued after verification failed and the runtime re-entered execute mode" |
| 203 | ), |
| 204 | completion_trace=[ |
| 205 | CompletionTraceEntry( |
| 206 | stage="continuation_check", |
| 207 | outcome="accept", |
| 208 | decision_code="completion_response_accepted", |
| 209 | decision_summary="accepted the response because completion heuristics found no missing follow-through", |
| 210 | ), |
| 211 | CompletionTraceEntry( |
| 212 | stage="definition_of_done", |
| 213 | outcome="continue", |
| 214 | decision_code="verification_failed_reentry", |
| 215 | decision_summary="continued after verification failed and the runtime re-entered execute mode", |
| 216 | ), |
| 217 | ], |
| 218 | last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn", |
| 219 | last_turn_transition_kind="terminal", |
| 220 | last_turn_transition_reason_code="turn_complete", |
| 221 | workflow_timeline=workflow_timeline, |
| 222 | ) |
| 223 | SessionStore(temp_dir).save(snapshot) |
| 224 | return snapshot.session_id, str(dod_path) |
| 225 | |
| 226 | |
| 227 | def _persist_explore_snapshot(temp_dir: Path) -> None: |
| 228 | ExploreStateStore(temp_dir).save( |
| 229 | ExploreSnapshot( |
| 230 | turn_count=2, |
| 231 | model_name="llama3.1:8b", |
| 232 | messages=[ |
| 233 | Message(role=Role.USER, content="Where should I start?"), |
| 234 | Message(role=Role.ASSISTANT, content="Start with README.md."), |
| 235 | Message(role=Role.USER, content="What file did you mention?"), |
| 236 | Message(role=Role.ASSISTANT, content="I mentioned README.md."), |
| 237 | ], |
| 238 | last_history_mode="continue", |
| 239 | last_query="What file did you mention?", |
| 240 | last_response="I mentioned README.md.", |
| 241 | ) |
| 242 | ) |
| 243 | |
| 244 | |
| 245 | def _persist_session_with_rich_workflow(temp_dir: Path) -> str: |
| 246 | slug = "tighten-loader-workflow-behavior" |
| 247 | brief_old = temp_dir / ".loader" / "briefs" / f"20260406T150000Z-{slug}.md" |
| 248 | brief_new = temp_dir / ".loader" / "briefs" / f"20260406T150200Z-{slug}.md" |
| 249 | brief_old.write_text( |
| 250 | "# Task Brief\n\n## Likely Touchpoints\n- planned.txt\n\n## Acceptance Criteria\n- planned.txt exists.\n" |
| 251 | ) |
| 252 | brief_new.write_text( |
| 253 | "# Task Brief\n\n## Likely Touchpoints\n- notes.txt\n\n## Acceptance Criteria\n- notes.txt exists.\n" |
| 254 | ) |
| 255 | plan_old_root = temp_dir / ".loader" / "plans" / f"20260406T150100Z-{slug}" |
| 256 | plan_new_root = temp_dir / ".loader" / "plans" / f"20260406T150300Z-{slug}" |
| 257 | plan_old_root.mkdir(parents=True, exist_ok=True) |
| 258 | plan_new_root.mkdir(parents=True, exist_ok=True) |
| 259 | (plan_old_root / "implementation.md").write_text( |
| 260 | "# Implementation Plan\n\n## File Changes\n- Create planned.txt.\n" |
| 261 | ) |
| 262 | (plan_old_root / "verification.md").write_text( |
| 263 | "# Verification Plan\n\n## Acceptance Criteria\n- planned.txt exists.\n" |
| 264 | ) |
| 265 | (plan_new_root / "implementation.md").write_text( |
| 266 | "# Implementation Plan\n\n## File Changes\n- Keep notes.txt as the runtime artifact.\n" |
| 267 | ) |
| 268 | (plan_new_root / "verification.md").write_text( |
| 269 | "# Verification Plan\n\n## Acceptance Criteria\n- notes.txt exists.\n" |
| 270 | ) |
| 271 | |
| 272 | dod = create_definition_of_done("Tighten Loader workflow behavior") |
| 273 | dod.status = "fixing" |
| 274 | dod.clarify_brief = str(brief_new) |
| 275 | dod.implementation_plan = str(plan_new_root / "implementation.md") |
| 276 | dod.verification_plan = str(plan_new_root / "verification.md") |
| 277 | dod.acceptance_criteria = ["notes.txt exists in the workspace root."] |
| 278 | dod_path = DefinitionOfDoneStore(temp_dir).save(dod) |
| 279 | |
| 280 | snapshot = SessionSnapshot( |
| 281 | session_id="20260406T150000Z-feedface", |
| 282 | created_at="2026-04-06T15:00:00Z", |
| 283 | updated_at="2026-04-06T15:04:00Z", |
| 284 | messages=[ |
| 285 | Message(role=Role.USER, content="Tighten Loader workflow behavior"), |
| 286 | Message(role=Role.ASSISTANT, content="I refreshed the workflow contract."), |
| 287 | ], |
| 288 | active_dod_path=str(dod_path), |
| 289 | current_task="Tighten Loader workflow behavior", |
| 290 | runtime_owner_type="RuntimeHandle", |
| 291 | runtime_owner_path="runtime-handle", |
| 292 | workflow_mode="execute", |
| 293 | permission_mode="prompt", |
| 294 | permission_prompting_enabled=True, |
| 295 | prompt_format="native", |
| 296 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 297 | prompt_history=[ |
| 298 | PromptSnapshot( |
| 299 | timestamp="2026-04-06T15:02:00Z", |
| 300 | workflow_mode="plan", |
| 301 | permission_mode="prompt", |
| 302 | current_task="Tighten Loader workflow behavior", |
| 303 | prompt_format="native", |
| 304 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 305 | content="# Introduction\nplan around planned.txt\n", |
| 306 | ), |
| 307 | PromptSnapshot( |
| 308 | timestamp="2026-04-06T15:04:00Z", |
| 309 | workflow_mode="execute", |
| 310 | permission_mode="prompt", |
| 311 | current_task="Tighten Loader workflow behavior", |
| 312 | prompt_format="native", |
| 313 | prompt_sections=[ |
| 314 | "Runtime Config", |
| 315 | "Workflow Context", |
| 316 | "Mode Guidance", |
| 317 | "Project Context", |
| 318 | ], |
| 319 | content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n", |
| 320 | ), |
| 321 | ], |
| 322 | workflow_reason_code="full_replan_completed", |
| 323 | workflow_reason_summary="clarify and plan artifacts refreshed; returning to execute", |
| 324 | workflow_decision_kind="handoff", |
| 325 | workflow_timeline=[ |
| 326 | WorkflowTimelineEntry( |
| 327 | timestamp="2026-04-06T15:01:00Z", |
| 328 | kind="clarify_continue", |
| 329 | mode="clarify", |
| 330 | reason_code="clarify_pressure_pass_required", |
| 331 | summary="clarify: Loader still needs a tradeoff pass around non-goals", |
| 332 | decision_kind="forced", |
| 333 | unresolved_questions=["Concrete files or subsystems are still not pinned down."], |
| 334 | signal_summary=["ambiguity=0.82", "open_questions=1"], |
| 335 | clarify_stage="readiness", |
| 336 | clarify_pressure_kind="tradeoff", |
| 337 | pressure_pass_complete=False, |
| 338 | missing_readiness_gates=["non_goals", "decision_boundaries"], |
| 339 | ), |
| 340 | WorkflowTimelineEntry( |
| 341 | timestamp="2026-04-06T15:02:00Z", |
| 342 | kind="reentry", |
| 343 | mode="plan", |
| 344 | reason_code="full_replan_required", |
| 345 | summary="plan: clarify and plan artifacts drifted; rebuilding the plan", |
| 346 | decision_kind="reentry", |
| 347 | scheduled_next_mode="execute", |
| 348 | unresolved_questions=["Touched files outside the current plan: notes.txt"], |
| 349 | evidence_summary=[ |
| 350 | "confirmed touchpoint: `notes.txt` was already touched during execution.", |
| 351 | ( |
| 352 | "verification contradiction: Failed verification exposed " |
| 353 | "missing brief coverage for `notes.txt exists`." |
| 354 | ), |
| 355 | ], |
| 356 | signal_summary=["recent_reentry=1", "stale_plan=true"], |
| 357 | artifact_paths=[ |
| 358 | str(brief_new), |
| 359 | str(plan_new_root / "implementation.md"), |
| 360 | str(plan_new_root / "verification.md"), |
| 361 | ], |
| 362 | ), |
| 363 | WorkflowTimelineEntry( |
| 364 | timestamp="2026-04-06T15:03:00Z", |
| 365 | kind="verify_skip", |
| 366 | mode="verify", |
| 367 | reason_code="verify_skip_no_commands", |
| 368 | summary="verify: no verification commands were available for this turn", |
| 369 | decision_kind="forced", |
| 370 | signal_summary=["verify_pressure=low"], |
| 371 | ), |
| 372 | ], |
| 373 | workflow_ledger=WorkflowLedger( |
| 374 | assumptions=[ |
| 375 | WorkflowLedgerItem( |
| 376 | text="notes.txt stays out of scope unless clarified otherwise.", |
| 377 | status="contradicted", |
| 378 | introduced_phase="clarify", |
| 379 | updated_phase="recovery", |
| 380 | evidence=["Clarify scope assumed `notes.txt` stayed out of scope."], |
| 381 | ) |
| 382 | ], |
| 383 | acceptance_anchors=[ |
| 384 | WorkflowLedgerItem( |
| 385 | text="notes.txt exists in the workspace root.", |
| 386 | status="changed", |
| 387 | introduced_phase="clarify", |
| 388 | updated_phase="recovery", |
| 389 | evidence=[ |
| 390 | ( |
| 391 | "Failed verification exposed missing brief coverage for " |
| 392 | "`notes.txt exists`." |
| 393 | ) |
| 394 | ], |
| 395 | ) |
| 396 | ], |
| 397 | decision_boundaries=[ |
| 398 | WorkflowLedgerItem( |
| 399 | text="Escalate before broad UX changes.", |
| 400 | status="reopened", |
| 401 | introduced_phase="clarify", |
| 402 | updated_phase="recovery", |
| 403 | evidence=["The active task framing outgrew the persisted clarify brief."], |
| 404 | ) |
| 405 | ], |
| 406 | ), |
| 407 | ) |
| 408 | SessionStore(temp_dir).save(snapshot) |
| 409 | return snapshot.session_id |
| 410 | |
| 411 | |
| 412 | def _persist_session_with_policy_accountability(temp_dir: Path) -> str: |
| 413 | snapshot = SessionSnapshot( |
| 414 | session_id="20260406T160000Z-abcd1234", |
| 415 | created_at="2026-04-06T16:00:00Z", |
| 416 | updated_at="2026-04-06T16:03:00Z", |
| 417 | messages=[ |
| 418 | Message(role=Role.USER, content="Explain Loader policy accountability"), |
| 419 | Message(role=Role.ASSISTANT, content="The runtime tracked repair and completion decisions."), |
| 420 | ], |
| 421 | current_task="Explain Loader policy accountability", |
| 422 | runtime_owner_type="RuntimeHandle", |
| 423 | runtime_owner_path="runtime-handle", |
| 424 | workflow_mode="execute", |
| 425 | permission_mode="workspace-write", |
| 426 | prompt_format="native", |
| 427 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 428 | workflow_timeline=[ |
| 429 | WorkflowTimelineEntry( |
| 430 | timestamp="2026-04-06T16:01:00Z", |
| 431 | kind="repair_retry", |
| 432 | mode="execute", |
| 433 | reason_code="raw_text_tool_recovered", |
| 434 | summary="repair: recovered raw-text tool calls into executable tool invocations", |
| 435 | decision_kind="forced", |
| 436 | policy_stage="raw_text_tool_fallback", |
| 437 | policy_outcome="retry", |
| 438 | prompt_format="native", |
| 439 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 440 | ), |
| 441 | WorkflowTimelineEntry( |
| 442 | timestamp="2026-04-06T16:02:00Z", |
| 443 | kind="completion_check", |
| 444 | mode="execute", |
| 445 | reason_code="completion_response_accepted", |
| 446 | summary="completion: accepted the response because completion heuristics found no missing follow-through", |
| 447 | decision_kind="forced", |
| 448 | policy_stage="continuation_check", |
| 449 | policy_outcome="accept", |
| 450 | prompt_format="native", |
| 451 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 452 | ), |
| 453 | WorkflowTimelineEntry( |
| 454 | timestamp="2026-04-06T16:03:00Z", |
| 455 | kind="completion_continue", |
| 456 | mode="execute", |
| 457 | reason_code="verification_failed_reentry", |
| 458 | summary="completion: continued after verification failed and the runtime re-entered execute mode", |
| 459 | decision_kind="forced", |
| 460 | policy_stage="definition_of_done", |
| 461 | policy_outcome="continue", |
| 462 | evidence_provenance=[ |
| 463 | EvidenceProvenance( |
| 464 | category="verification", |
| 465 | source="dod.evidence", |
| 466 | summary="verification failed for `pytest -q`", |
| 467 | status="contradicts", |
| 468 | subject="pytest -q", |
| 469 | ) |
| 470 | ], |
| 471 | verification_observations=[ |
| 472 | VerificationObservation( |
| 473 | status="failed", |
| 474 | summary="verification failed for `pytest -q`", |
| 475 | command="pytest -q", |
| 476 | kind="test", |
| 477 | detail="1 failed", |
| 478 | ) |
| 479 | ], |
| 480 | prompt_format="native", |
| 481 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 482 | ), |
| 483 | ], |
| 484 | ) |
| 485 | SessionStore(temp_dir).save(snapshot) |
| 486 | return snapshot.session_id |
| 487 | |
| 488 | |
| 489 | def _persist_session_with_pending_verification(temp_dir: Path) -> str: |
| 490 | snapshot = SessionSnapshot( |
| 491 | session_id="20260406T160500Z-pending123", |
| 492 | created_at="2026-04-06T16:05:00Z", |
| 493 | updated_at="2026-04-06T16:05:30Z", |
| 494 | messages=[ |
| 495 | Message(role=Role.USER, content="Verify the runtime changes"), |
| 496 | Message(role=Role.ASSISTANT, content="Entering verification."), |
| 497 | ], |
| 498 | current_task="Verify the runtime changes", |
| 499 | runtime_owner_type="RuntimeHandle", |
| 500 | runtime_owner_path="runtime-handle", |
| 501 | workflow_mode="verify", |
| 502 | permission_mode="workspace-write", |
| 503 | prompt_format="native", |
| 504 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 505 | workflow_timeline=[ |
| 506 | WorkflowTimelineEntry( |
| 507 | timestamp="2026-04-06T16:05:30Z", |
| 508 | kind="verify_observation", |
| 509 | mode="verify", |
| 510 | reason_code="verification_pending", |
| 511 | summary="verify: verification is pending for the active command set", |
| 512 | decision_kind="forced", |
| 513 | policy_stage="verification", |
| 514 | policy_outcome="pending", |
| 515 | verification_observations=[ |
| 516 | VerificationObservation( |
| 517 | status="pending", |
| 518 | summary="verification pending for `uv run pytest -q`", |
| 519 | command="uv run pytest -q", |
| 520 | kind="test", |
| 521 | attempt_id="verification-attempt-2", |
| 522 | attempt_number=2, |
| 523 | ) |
| 524 | ], |
| 525 | prompt_format="native", |
| 526 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 527 | ) |
| 528 | ], |
| 529 | ) |
| 530 | SessionStore(temp_dir).save(snapshot) |
| 531 | return snapshot.session_id |
| 532 | |
| 533 | |
| 534 | def _persist_session_with_planned_verification(temp_dir: Path) -> str: |
| 535 | snapshot = SessionSnapshot( |
| 536 | session_id="20260406T160430Z-plan1234", |
| 537 | created_at="2026-04-06T16:04:30Z", |
| 538 | updated_at="2026-04-06T16:04:50Z", |
| 539 | messages=[ |
| 540 | Message(role=Role.USER, content="Keep editing the runtime"), |
| 541 | Message(role=Role.ASSISTANT, content="Verification will run after execution."), |
| 542 | ], |
| 543 | current_task="Keep editing the runtime", |
| 544 | runtime_owner_type="RuntimeHandle", |
| 545 | runtime_owner_path="runtime-handle", |
| 546 | workflow_mode="execute", |
| 547 | permission_mode="workspace-write", |
| 548 | prompt_format="native", |
| 549 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 550 | workflow_timeline=[ |
| 551 | WorkflowTimelineEntry( |
| 552 | timestamp="2026-04-06T16:04:50Z", |
| 553 | kind="verify_observation", |
| 554 | mode="execute", |
| 555 | reason_code="verification_planned", |
| 556 | summary="verify: verification is planned after new mutating work", |
| 557 | decision_kind="forced", |
| 558 | policy_stage="verification", |
| 559 | policy_outcome="planned", |
| 560 | verification_observations=[ |
| 561 | VerificationObservation( |
| 562 | status="planned", |
| 563 | summary="verification planned for `uv run pytest -q`", |
| 564 | command="uv run pytest -q", |
| 565 | kind="runtime", |
| 566 | detail="write changed src/loader/runtime/tool_batches.py", |
| 567 | attempt_id="verification-attempt-3", |
| 568 | attempt_number=3, |
| 569 | ) |
| 570 | ], |
| 571 | prompt_format="native", |
| 572 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 573 | ) |
| 574 | ], |
| 575 | ) |
| 576 | SessionStore(temp_dir).save(snapshot) |
| 577 | return snapshot.session_id |
| 578 | |
| 579 | |
| 580 | def _persist_session_with_stale_verification(temp_dir: Path) -> str: |
| 581 | snapshot = SessionSnapshot( |
| 582 | session_id="20260406T160700Z-stale1234", |
| 583 | created_at="2026-04-06T16:07:00Z", |
| 584 | updated_at="2026-04-06T16:07:30Z", |
| 585 | messages=[ |
| 586 | Message(role=Role.USER, content="Keep working on the runtime"), |
| 587 | Message(role=Role.ASSISTANT, content="Fresh verification is required again."), |
| 588 | ], |
| 589 | current_task="Keep working on the runtime", |
| 590 | runtime_owner_type="RuntimeHandle", |
| 591 | runtime_owner_path="runtime-handle", |
| 592 | workflow_mode="execute", |
| 593 | permission_mode="workspace-write", |
| 594 | prompt_format="native", |
| 595 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 596 | workflow_timeline=[ |
| 597 | WorkflowTimelineEntry( |
| 598 | timestamp="2026-04-06T16:07:30Z", |
| 599 | kind="verify_observation", |
| 600 | mode="execute", |
| 601 | reason_code="verification_stale", |
| 602 | summary="verify: previous verification became stale after new mutating work", |
| 603 | decision_kind="forced", |
| 604 | policy_stage="verification", |
| 605 | policy_outcome="stale", |
| 606 | verification_observations=[ |
| 607 | VerificationObservation( |
| 608 | status="stale", |
| 609 | summary=( |
| 610 | "verification became stale for `uv run pytest -q` " |
| 611 | "after new mutating work" |
| 612 | ), |
| 613 | command="uv run pytest -q", |
| 614 | kind="runtime", |
| 615 | detail="write changed src/loader/runtime/finalization.py", |
| 616 | attempt_id="verification-attempt-1", |
| 617 | attempt_number=1, |
| 618 | supersedes_attempt_id="verification-attempt-2", |
| 619 | ) |
| 620 | ], |
| 621 | prompt_format="native", |
| 622 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 623 | ) |
| 624 | ], |
| 625 | ) |
| 626 | SessionStore(temp_dir).save(snapshot) |
| 627 | return snapshot.session_id |
| 628 | |
| 629 | |
| 630 | @pytest.mark.asyncio |
| 631 | async def test_collect_doctor_report_passes_for_healthy_workspace(temp_dir: Path) -> None: |
| 632 | _write_python_workspace(temp_dir) |
| 633 | _ensure_loader_dirs(temp_dir) |
| 634 | |
| 635 | report = await collect_doctor_report( |
| 636 | temp_dir, |
| 637 | model="qwen2.5-coder:14b", |
| 638 | backend_factory=lambda model: FakeOllamaBackend( |
| 639 | model=model, |
| 640 | health=True, |
| 641 | models=[{"name": "qwen2.5-coder:14b"}], |
| 642 | model_details={"details": {"family": "qwen2.5"}}, |
| 643 | ), |
| 644 | ) |
| 645 | |
| 646 | assert report.overall_status == CheckStatus.PASS |
| 647 | assert {check.name for check in report.checks} == { |
| 648 | "backend", |
| 649 | "capabilities", |
| 650 | "workspace", |
| 651 | "write_access", |
| 652 | "commands", |
| 653 | "state", |
| 654 | "permissions", |
| 655 | } |
| 656 | backend_check = next(check for check in report.checks if check.name == "backend") |
| 657 | state_check = next(check for check in report.checks if check.name == "state") |
| 658 | |
| 659 | assert backend_check.status == CheckStatus.PASS |
| 660 | assert state_check.status == CheckStatus.PASS |
| 661 | |
| 662 | |
| 663 | @pytest.mark.asyncio |
| 664 | async def test_collect_doctor_report_surfaces_backend_and_state_failures(temp_dir: Path) -> None: |
| 665 | _write_python_workspace(temp_dir) |
| 666 | (temp_dir / ".loader").mkdir() |
| 667 | (temp_dir / ".loader" / "project-memory.json").write_text("{broken json") |
| 668 | |
| 669 | report = await collect_doctor_report( |
| 670 | temp_dir, |
| 671 | model="missing-model:latest", |
| 672 | backend_factory=lambda model: FakeOllamaBackend( |
| 673 | model=model, |
| 674 | health=False, |
| 675 | models=[{"name": "llama3.1:8b"}], |
| 676 | model_details=None, |
| 677 | ), |
| 678 | ) |
| 679 | |
| 680 | backend_check = next(check for check in report.checks if check.name == "backend") |
| 681 | state_check = next(check for check in report.checks if check.name == "state") |
| 682 | |
| 683 | assert report.overall_status == CheckStatus.FAIL |
| 684 | assert backend_check.status == CheckStatus.FAIL |
| 685 | assert "not pulled" in backend_check.message |
| 686 | assert state_check.status == CheckStatus.FAIL |
| 687 | assert "corrupted" in state_check.message |
| 688 | |
| 689 | |
| 690 | @pytest.mark.asyncio |
| 691 | async def test_collect_doctor_report_fails_closed_on_invalid_permission_rules( |
| 692 | temp_dir: Path, |
| 693 | ) -> None: |
| 694 | _write_python_workspace(temp_dir) |
| 695 | _ensure_loader_dirs(temp_dir) |
| 696 | (temp_dir / ".loader" / "permission-rules.json").write_text('{"allow": "nope"}\n') |
| 697 | |
| 698 | report = await collect_doctor_report( |
| 699 | temp_dir, |
| 700 | model="qwen2.5-coder:14b", |
| 701 | permission_mode="prompt", |
| 702 | backend_factory=lambda model: FakeOllamaBackend( |
| 703 | model=model, |
| 704 | health=True, |
| 705 | models=[{"name": "qwen2.5-coder:14b"}], |
| 706 | ), |
| 707 | ) |
| 708 | |
| 709 | permission_check = next(check for check in report.checks if check.name == "permissions") |
| 710 | assert report.overall_status == CheckStatus.FAIL |
| 711 | assert permission_check.status == CheckStatus.FAIL |
| 712 | assert report.permission_rules_valid is False |
| 713 | assert "invalid" in permission_check.message.lower() |
| 714 | |
| 715 | |
| 716 | def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> None: |
| 717 | _write_python_workspace(temp_dir) |
| 718 | _ensure_loader_dirs(temp_dir) |
| 719 | session_id, dod_path = _persist_session_with_dod(temp_dir) |
| 720 | _persist_explore_snapshot(temp_dir) |
| 721 | |
| 722 | snapshot = collect_status_snapshot( |
| 723 | temp_dir, |
| 724 | model="llama3.1:8b", |
| 725 | ) |
| 726 | sessions = list_session_summaries(temp_dir) |
| 727 | detail = load_session_detail(session_id, project_root=temp_dir) |
| 728 | |
| 729 | assert snapshot.active_session_id == session_id |
| 730 | assert snapshot.dod_status == "fixing" |
| 731 | assert snapshot.dod_pending_items_count == 1 |
| 732 | assert snapshot.last_verification_result == "failed" |
| 733 | assert snapshot.active_dod_path == dod_path |
| 734 | assert snapshot.permission_mode == "prompt" |
| 735 | assert snapshot.runtime_boundary_summary == "runtime-first via runtime-handle (RuntimeHandle)" |
| 736 | assert snapshot.runtime_owner_type == "RuntimeHandle" |
| 737 | assert snapshot.runtime_owner_path == "runtime-handle" |
| 738 | assert snapshot.permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1} |
| 739 | assert snapshot.permission_prompting_enabled is True |
| 740 | assert snapshot.permission_rules_valid is True |
| 741 | assert snapshot.permission_rules_source == str( |
| 742 | temp_dir / ".loader" / "permission-rules.json" |
| 743 | ) |
| 744 | assert snapshot.prompt_format == "native" |
| 745 | assert snapshot.prompt_sections == [ |
| 746 | "Runtime Config", |
| 747 | "Workflow Context", |
| 748 | "Mode Guidance", |
| 749 | ] |
| 750 | assert snapshot.workflow_reason_code == "verification_failed_reentry" |
| 751 | assert snapshot.workflow_reason_summary == ( |
| 752 | "verification failed; returning to execute for fixes" |
| 753 | ) |
| 754 | assert snapshot.workflow_decision_kind == "reentry" |
| 755 | assert snapshot.workflow_scheduled_next_mode == "verify" |
| 756 | assert snapshot.active_turn_phase == "completion" |
| 757 | assert snapshot.completion_decision_code == "verification_failed_reentry" |
| 758 | assert snapshot.completion_decision_summary == ( |
| 759 | "continued after verification failed and the runtime re-entered execute mode" |
| 760 | ) |
| 761 | assert snapshot.last_turn_transition_summary == ( |
| 762 | "completion -> finalize [terminal] Finalizing completed turn" |
| 763 | ) |
| 764 | assert snapshot.explore_turn_count == 2 |
| 765 | assert snapshot.explore_message_count == 4 |
| 766 | assert snapshot.explore_history_mode == "continue" |
| 767 | assert snapshot.explore_last_query == "What file did you mention?" |
| 768 | assert snapshot.explore_last_response == "I mentioned README.md." |
| 769 | assert snapshot.explore_updated_at is not None |
| 770 | assert [item.status for item in snapshot.recent_verification] == ["failed"] |
| 771 | assert [item.command for item in snapshot.recent_verification] == ["pytest -q"] |
| 772 | assert [item.detail for item in snapshot.recent_verification] == ["1 failed"] |
| 773 | assert snapshot.verification_state_summary == "failed for pytest -q" |
| 774 | |
| 775 | assert len(sessions) == 1 |
| 776 | assert sessions[0].session_id == session_id |
| 777 | assert sessions[0].is_current is True |
| 778 | assert sessions[0].runtime_owner_type == "RuntimeHandle" |
| 779 | assert sessions[0].runtime_owner_path == "runtime-handle" |
| 780 | assert sessions[0].runtime_boundary_summary == ( |
| 781 | "runtime-first via runtime-handle (RuntimeHandle)" |
| 782 | ) |
| 783 | assert sessions[0].dod_status == "fixing" |
| 784 | assert sessions[0].permission_prompting_enabled is True |
| 785 | assert sessions[0].permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1} |
| 786 | assert sessions[0].permission_rules_source == str( |
| 787 | temp_dir / ".loader" / "permission-rules.json" |
| 788 | ) |
| 789 | assert sessions[0].prompt_format == "native" |
| 790 | assert sessions[0].workflow_reason_code == "verification_failed_reentry" |
| 791 | assert sessions[0].workflow_reason_summary == ( |
| 792 | "verification failed; returning to execute for fixes" |
| 793 | ) |
| 794 | assert sessions[0].workflow_decision_kind == "reentry" |
| 795 | assert sessions[0].completion_decision_code == "verification_failed_reentry" |
| 796 | assert sessions[0].completion_decision_summary == ( |
| 797 | "continued after verification failed and the runtime re-entered execute mode" |
| 798 | ) |
| 799 | assert sessions[0].last_turn_transition_summary == ( |
| 800 | "completion -> finalize [terminal] Finalizing completed turn" |
| 801 | ) |
| 802 | |
| 803 | assert detail.snapshot.session_id == session_id |
| 804 | assert detail.is_current is True |
| 805 | assert detail.snapshot.runtime_owner_type == "RuntimeHandle" |
| 806 | assert detail.snapshot.runtime_owner_path == "runtime-handle" |
| 807 | assert detail.runtime_boundary_summary == ( |
| 808 | "runtime-first via runtime-handle (RuntimeHandle)" |
| 809 | ) |
| 810 | assert detail.verification_state_summary == "failed for pytest -q" |
| 811 | assert detail.definition_of_done is not None |
| 812 | assert detail.definition_of_done.status == "fixing" |
| 813 | assert detail.snapshot.permission_rules_source == str( |
| 814 | temp_dir / ".loader" / "permission-rules.json" |
| 815 | ) |
| 816 | assert detail.snapshot.workflow_reason_code == "verification_failed_reentry" |
| 817 | assert detail.snapshot.last_completion_decision_code == ( |
| 818 | "verification_failed_reentry" |
| 819 | ) |
| 820 | assert [entry.decision_code for entry in detail.snapshot.completion_trace] == [ |
| 821 | "completion_response_accepted", |
| 822 | "verification_failed_reentry", |
| 823 | ] |
| 824 | assert [item.status for item in detail.recent_verification] == ["failed"] |
| 825 | assert [item.command for item in detail.recent_verification] == ["pytest -q"] |
| 826 | assert detail.snapshot.last_turn_transition_reason_code == "turn_complete" |
| 827 | assert len(detail.snapshot.workflow_timeline) == 2 |
| 828 | assert detail.snapshot.workflow_timeline[-1].scheduled_next_mode == "verify" |
| 829 | |
| 830 | |
| 831 | def test_collect_workflow_timeline_reflects_persisted_history(temp_dir: Path) -> None: |
| 832 | _write_python_workspace(temp_dir) |
| 833 | _ensure_loader_dirs(temp_dir) |
| 834 | session_id, _ = _persist_session_with_dod(temp_dir) |
| 835 | |
| 836 | snapshot = collect_workflow_timeline(project_root=temp_dir) |
| 837 | |
| 838 | assert snapshot.session_id == session_id |
| 839 | assert snapshot.is_current is True |
| 840 | assert snapshot.runtime_owner_type == "RuntimeHandle" |
| 841 | assert snapshot.runtime_owner_path == "runtime-handle" |
| 842 | assert snapshot.runtime_boundary_summary == ( |
| 843 | "runtime-first via runtime-handle (RuntimeHandle)" |
| 844 | ) |
| 845 | assert snapshot.workflow_mode == "execute" |
| 846 | assert snapshot.current_task == "Fix the failing tests" |
| 847 | assert snapshot.verification_state_summary == "failed for pytest -q" |
| 848 | assert snapshot.total_entries == 2 |
| 849 | assert [entry.kind for entry in snapshot.entries] == ["handoff", "reentry"] |
| 850 | assert snapshot.entries[-1].reason_code == "verification_failed_reentry" |
| 851 | |
| 852 | |
| 853 | def test_collect_workflow_timeline_supports_filters_and_highlights( |
| 854 | temp_dir: Path, |
| 855 | ) -> None: |
| 856 | _write_python_workspace(temp_dir) |
| 857 | _ensure_loader_dirs(temp_dir) |
| 858 | session_id = _persist_session_with_rich_workflow(temp_dir) |
| 859 | |
| 860 | snapshot = collect_workflow_timeline( |
| 861 | project_root=temp_dir, |
| 862 | mode="clarify", |
| 863 | limit=1, |
| 864 | ) |
| 865 | |
| 866 | assert snapshot.session_id == session_id |
| 867 | assert snapshot.total_entries == 3 |
| 868 | assert snapshot.selected_mode == "clarify" |
| 869 | assert snapshot.selected_kind is None |
| 870 | assert snapshot.entry_limit == 1 |
| 871 | assert len(snapshot.entries) == 1 |
| 872 | assert snapshot.entries[0].kind == "clarify_continue" |
| 873 | assert snapshot.entries[0].clarify_stage == "readiness" |
| 874 | assert snapshot.entries[0].clarify_pressure_kind == "tradeoff" |
| 875 | assert snapshot.entries[0].missing_readiness_gates == [ |
| 876 | "non_goals", |
| 877 | "decision_boundaries", |
| 878 | ] |
| 879 | assert any(item.startswith("Asked again:") for item in snapshot.highlights) |
| 880 | assert snapshot.workflow_ledger.assumptions[0].status == "contradicted" |
| 881 | assert any( |
| 882 | item.startswith("Contradicted assumptions:") |
| 883 | for item in snapshot.highlights |
| 884 | ) |
| 885 | |
| 886 | |
| 887 | def test_collect_workflow_timeline_highlights_policy_accountability( |
| 888 | temp_dir: Path, |
| 889 | ) -> None: |
| 890 | _write_python_workspace(temp_dir) |
| 891 | _ensure_loader_dirs(temp_dir) |
| 892 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 893 | |
| 894 | snapshot = collect_workflow_timeline(project_root=temp_dir) |
| 895 | |
| 896 | assert snapshot.session_id == session_id |
| 897 | assert [entry.kind for entry in snapshot.entries] == [ |
| 898 | "repair_retry", |
| 899 | "completion_check", |
| 900 | "completion_continue", |
| 901 | ] |
| 902 | assert any(item.startswith("Repair path:") for item in snapshot.highlights) |
| 903 | assert any(item.startswith("Completion decision:") for item in snapshot.highlights) |
| 904 | assert any( |
| 905 | "policy-stage=definition_of_done" in item for item in snapshot.highlights |
| 906 | ) |
| 907 | |
| 908 | |
| 909 | def test_collect_status_snapshot_includes_latest_policy_summary( |
| 910 | temp_dir: Path, |
| 911 | ) -> None: |
| 912 | _write_python_workspace(temp_dir) |
| 913 | _ensure_loader_dirs(temp_dir) |
| 914 | _persist_session_with_policy_accountability(temp_dir) |
| 915 | |
| 916 | snapshot = collect_status_snapshot(temp_dir) |
| 917 | |
| 918 | assert snapshot.latest_policy_summary is not None |
| 919 | assert "verification_failed_reentry" in snapshot.latest_policy_summary |
| 920 | assert "observed=verification failed for `pytest -q` [1 failed]" in ( |
| 921 | snapshot.latest_policy_summary |
| 922 | ) |
| 923 | assert "policy-stage=definition_of_done" in snapshot.latest_policy_summary |
| 924 | assert snapshot.latest_policy_blocking_evidence == [ |
| 925 | "verification failed for `pytest -q`" |
| 926 | ] |
| 927 | assert snapshot.latest_policy_observed_verification == [ |
| 928 | "verification failed for `pytest -q` [1 failed]" |
| 929 | ] |
| 930 | assert [item.status for item in snapshot.recent_verification] == ["failed"] |
| 931 | assert [item.command for item in snapshot.recent_verification] == ["pytest -q"] |
| 932 | assert [item.detail for item in snapshot.recent_verification] == ["1 failed"] |
| 933 | |
| 934 | |
| 935 | def test_collect_status_snapshot_surfaces_pending_verification( |
| 936 | temp_dir: Path, |
| 937 | ) -> None: |
| 938 | _write_python_workspace(temp_dir) |
| 939 | _ensure_loader_dirs(temp_dir) |
| 940 | _persist_session_with_pending_verification(temp_dir) |
| 941 | |
| 942 | snapshot = collect_status_snapshot(temp_dir) |
| 943 | |
| 944 | assert snapshot.latest_policy_summary is not None |
| 945 | assert "verification_pending" in snapshot.latest_policy_summary |
| 946 | assert "policy-outcome=pending" in snapshot.latest_policy_summary |
| 947 | assert snapshot.latest_policy_observed_verification == [ |
| 948 | "verification pending for `uv run pytest -q` [attempt 2]" |
| 949 | ] |
| 950 | assert [item.status for item in snapshot.recent_verification] == ["pending"] |
| 951 | assert [item.command for item in snapshot.recent_verification] == [ |
| 952 | "uv run pytest -q" |
| 953 | ] |
| 954 | assert [item.attempt for item in snapshot.recent_verification] == ["attempt 2"] |
| 955 | assert snapshot.verification_state_summary == ( |
| 956 | "pending (attempt 2) for uv run pytest -q" |
| 957 | ) |
| 958 | |
| 959 | |
| 960 | def test_collect_status_snapshot_surfaces_planned_verification( |
| 961 | temp_dir: Path, |
| 962 | ) -> None: |
| 963 | _write_python_workspace(temp_dir) |
| 964 | _ensure_loader_dirs(temp_dir) |
| 965 | _persist_session_with_planned_verification(temp_dir) |
| 966 | |
| 967 | snapshot = collect_status_snapshot(temp_dir) |
| 968 | |
| 969 | assert snapshot.latest_policy_summary is not None |
| 970 | assert "verification_planned" in snapshot.latest_policy_summary |
| 971 | assert "policy-outcome=planned" in snapshot.latest_policy_summary |
| 972 | assert snapshot.latest_policy_observed_verification == [ |
| 973 | "verification planned for `uv run pytest -q` [write changed src/loader/runtime/tool_batches.py; attempt 3]" |
| 974 | ] |
| 975 | assert [item.status for item in snapshot.recent_verification] == ["planned"] |
| 976 | assert [item.command for item in snapshot.recent_verification] == [ |
| 977 | "uv run pytest -q" |
| 978 | ] |
| 979 | assert [item.attempt for item in snapshot.recent_verification] == ["attempt 3"] |
| 980 | assert [item.detail for item in snapshot.recent_verification] == [ |
| 981 | "write changed src/loader/runtime/tool_batches.py" |
| 982 | ] |
| 983 | assert snapshot.verification_state_summary == ( |
| 984 | "planned (attempt 3) for uv run pytest -q" |
| 985 | ) |
| 986 | |
| 987 | |
| 988 | def test_collect_status_snapshot_surfaces_stale_verification( |
| 989 | temp_dir: Path, |
| 990 | ) -> None: |
| 991 | _write_python_workspace(temp_dir) |
| 992 | _ensure_loader_dirs(temp_dir) |
| 993 | _persist_session_with_stale_verification(temp_dir) |
| 994 | |
| 995 | snapshot = collect_status_snapshot(temp_dir) |
| 996 | |
| 997 | assert snapshot.latest_policy_summary is not None |
| 998 | assert "verification_stale" in snapshot.latest_policy_summary |
| 999 | assert "policy-outcome=stale" in snapshot.latest_policy_summary |
| 1000 | assert snapshot.latest_policy_observed_verification == [ |
| 1001 | "verification became stale for `uv run pytest -q` after new mutating work [write changed src/loader/runtime/finalization.py; attempt 1 -> attempt 2]" |
| 1002 | ] |
| 1003 | assert [item.status for item in snapshot.recent_verification] == ["stale"] |
| 1004 | assert [item.command for item in snapshot.recent_verification] == [ |
| 1005 | "uv run pytest -q" |
| 1006 | ] |
| 1007 | assert [item.attempt for item in snapshot.recent_verification] == [ |
| 1008 | "attempt 1 -> attempt 2" |
| 1009 | ] |
| 1010 | assert [item.detail for item in snapshot.recent_verification] == [ |
| 1011 | "write changed src/loader/runtime/finalization.py" |
| 1012 | ] |
| 1013 | assert snapshot.verification_state_summary == ( |
| 1014 | "stale (attempt 1 -> attempt 2) for uv run pytest -q" |
| 1015 | ) |
| 1016 | |
| 1017 | |
| 1018 | def test_collect_prompt_diff_uses_persisted_prompt_history(temp_dir: Path) -> None: |
| 1019 | _write_python_workspace(temp_dir) |
| 1020 | _ensure_loader_dirs(temp_dir) |
| 1021 | session_id, _ = _persist_session_with_dod(temp_dir) |
| 1022 | |
| 1023 | diff = collect_prompt_diff(project_root=temp_dir) |
| 1024 | |
| 1025 | assert diff.session_id == session_id |
| 1026 | assert diff.previous is not None |
| 1027 | assert diff.current is not None |
| 1028 | assert diff.current.workflow_mode == "execute" |
| 1029 | assert diff.previous.workflow_mode == "verify" |
| 1030 | assert any("Workflow mode changed:" in item for item in diff.highlights) |
| 1031 | assert "---" in diff.unified_diff |
| 1032 | assert "execute parser fix" in diff.unified_diff |
| 1033 | |
| 1034 | |
| 1035 | def test_collect_workflow_artifact_diffs_reads_versioned_artifacts( |
| 1036 | temp_dir: Path, |
| 1037 | ) -> None: |
| 1038 | _write_python_workspace(temp_dir) |
| 1039 | _ensure_loader_dirs(temp_dir) |
| 1040 | session_id = _persist_session_with_rich_workflow(temp_dir) |
| 1041 | |
| 1042 | snapshot = collect_workflow_artifact_diffs(project_root=temp_dir) |
| 1043 | |
| 1044 | assert snapshot.session_id == session_id |
| 1045 | assert len(snapshot.entries) == 3 |
| 1046 | assert {entry.kind for entry in snapshot.entries} == { |
| 1047 | "clarify_brief", |
| 1048 | "implementation_plan", |
| 1049 | "verification_plan", |
| 1050 | } |
| 1051 | assert any("notes.txt" in entry.unified_diff for entry in snapshot.entries) |
| 1052 | assert snapshot.highlights |
| 1053 | |
| 1054 | |
| 1055 | def test_status_and_session_commands_render_persisted_state( |
| 1056 | temp_dir: Path, |
| 1057 | monkeypatch: pytest.MonkeyPatch, |
| 1058 | ) -> None: |
| 1059 | _write_python_workspace(temp_dir) |
| 1060 | _ensure_loader_dirs(temp_dir) |
| 1061 | session_id, _ = _persist_session_with_dod(temp_dir) |
| 1062 | _persist_explore_snapshot(temp_dir) |
| 1063 | runner = CliRunner() |
| 1064 | |
| 1065 | monkeypatch.chdir(temp_dir) |
| 1066 | |
| 1067 | status_result = runner.invoke(cli_main_module.status_cli, ["--model", "llama3.1:8b"]) |
| 1068 | list_result = runner.invoke(cli_main_module.session_cli, ["list"]) |
| 1069 | show_result = runner.invoke(cli_main_module.session_cli, ["show", session_id]) |
| 1070 | workflow_result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1071 | |
| 1072 | assert status_result.exit_code == 0 |
| 1073 | assert session_id in status_result.output |
| 1074 | assert "fixing" in status_result.output |
| 1075 | assert "Runtime Owner" in status_result.output |
| 1076 | assert "Boundary" in status_result.output |
| 1077 | assert "runtime-handle (RuntimeHandle)" in status_result.output |
| 1078 | assert "runtime-first via runtime-handle (RuntimeHandle)" in status_result.output |
| 1079 | assert "1 allow / 2 deny / 1 ask" in status_result.output |
| 1080 | assert "native" in status_result.output |
| 1081 | assert "Runtime Config, Workflow Context, Mode Guidance" in status_result.output |
| 1082 | assert "Rules Source" in status_result.output |
| 1083 | assert "verification failed; returning to execute for fixes" in status_result.output |
| 1084 | assert "Completion Decision" in status_result.output |
| 1085 | assert "continued after verification failed" in status_result.output |
| 1086 | assert "completion -> finalize" in status_result.output |
| 1087 | assert "Finalizing completed turn" in status_result.output |
| 1088 | assert "Explore Turns" in status_result.output |
| 1089 | assert "Explore History" in status_result.output |
| 1090 | assert "What file did you mention?" in status_result.output |
| 1091 | assert "pytest -q" in status_result.output |
| 1092 | assert "1 failed" in status_result.output |
| 1093 | assert "Verification State" in status_result.output |
| 1094 | assert "failed for pytest -q" in status_result.output |
| 1095 | |
| 1096 | assert list_result.exit_code == 0 |
| 1097 | assert session_id in list_result.output |
| 1098 | assert "Runtime Owner" in list_result.output |
| 1099 | assert "Boundary" in list_result.output |
| 1100 | assert "runtime-handle (RuntimeHandle)" in list_result.output |
| 1101 | assert "runtime-first via runtime-handle (RuntimeHandle)" in list_result.output |
| 1102 | assert "1 allow / 2 deny / 1 ask" in list_result.output |
| 1103 | assert "prompting enabled" in list_result.output |
| 1104 | assert "native" in list_result.output |
| 1105 | assert "Rules Source" in list_result.output |
| 1106 | assert "verification failed; returning to execute for fixes" in list_result.output |
| 1107 | assert "Completion Decision" in list_result.output |
| 1108 | assert "completion -> finalize" in list_result.output |
| 1109 | |
| 1110 | assert show_result.exit_code == 0 |
| 1111 | assert session_id in show_result.output |
| 1112 | assert "Runtime Owner" in show_result.output |
| 1113 | assert "Boundary" in show_result.output |
| 1114 | assert "runtime-handle (RuntimeHandle)" in show_result.output |
| 1115 | assert "runtime-first via runtime-handle (RuntimeHandle)" in show_result.output |
| 1116 | assert "Patch the broken parser" in show_result.output |
| 1117 | assert "1 allow / 2 deny / 1 ask" in show_result.output |
| 1118 | assert "enabled" in show_result.output |
| 1119 | assert "Runtime Config, Workflow Context, Mode Guidance" in show_result.output |
| 1120 | assert "Rules Source" in show_result.output |
| 1121 | assert "verification failed; returning to execute for fixes" in show_result.output |
| 1122 | assert "Completion Decision" in show_result.output |
| 1123 | assert "Completion Trace" in show_result.output |
| 1124 | assert "Recent Verification" in show_result.output |
| 1125 | assert "Verification State" in show_result.output |
| 1126 | assert "failed for pytest -q" in show_result.output |
| 1127 | assert "continuation_check" in show_result.output |
| 1128 | assert "completion -> finalize" in show_result.output |
| 1129 | assert "Finalizing completed turn" in show_result.output |
| 1130 | assert "Policy Timeline" not in show_result.output |
| 1131 | assert "Workflow Timeline" in show_result.output |
| 1132 | assert "handoff" in show_result.output |
| 1133 | assert "next=verify" in show_result.output |
| 1134 | assert "pytest -q" in show_result.output |
| 1135 | assert "1 failed" in show_result.output |
| 1136 | |
| 1137 | assert workflow_result.exit_code == 0 |
| 1138 | assert "Loader Workflow" in workflow_result.output |
| 1139 | assert "Workflow Timeline" in workflow_result.output |
| 1140 | assert session_id in workflow_result.output |
| 1141 | assert "Runtime Owner" in workflow_result.output |
| 1142 | assert "Boundary" in workflow_result.output |
| 1143 | assert "runtime-handle (RuntimeHandle)" in workflow_result.output |
| 1144 | assert "runtime-first via runtime-handle (RuntimeHandle)" in workflow_result.output |
| 1145 | assert "Verification State" in workflow_result.output |
| 1146 | assert "failed for pytest -q" in workflow_result.output |
| 1147 | assert "handoff" in workflow_result.output |
| 1148 | assert "next=verify" in workflow_result.output |
| 1149 | |
| 1150 | |
| 1151 | def test_workflow_command_renders_policy_accountability_context( |
| 1152 | temp_dir: Path, |
| 1153 | monkeypatch: pytest.MonkeyPatch, |
| 1154 | ) -> None: |
| 1155 | _write_python_workspace(temp_dir) |
| 1156 | _ensure_loader_dirs(temp_dir) |
| 1157 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 1158 | runner = CliRunner() |
| 1159 | |
| 1160 | monkeypatch.chdir(temp_dir) |
| 1161 | |
| 1162 | result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1163 | |
| 1164 | assert result.exit_code == 0 |
| 1165 | assert session_id in result.output |
| 1166 | assert "repair_retry" in result.output |
| 1167 | assert "Repair path:" in result.output |
| 1168 | assert "Completion decision:" in result.output |
| 1169 | assert "verification_failed_reentry" in result.output |
| 1170 | assert "Policy Evidence Needed" in result.output |
| 1171 | assert "verification failed for `pytest -q`" in result.output |
| 1172 | assert "Observed Verification" in result.output |
| 1173 | assert "verification failed for `pytest -q` [1 failed]" in result.output |
| 1174 | assert "policy-stage=raw_text_tool_fallback" in result.output |
| 1175 | assert "policy-outcome=continue" in result.output |
| 1176 | assert "provenance=contradicts:verification@dod.evidence" in result.output |
| 1177 | assert "observed=verification failed for `pytest -q` [1 failed]" in result.output |
| 1178 | |
| 1179 | policy_result = runner.invoke(cli_main_module.workflow_cli, ["show", "--policy"]) |
| 1180 | |
| 1181 | assert policy_result.exit_code == 0 |
| 1182 | assert "Loader Workflow" in policy_result.output |
| 1183 | assert "Policy Timeline" in policy_result.output |
| 1184 | assert "policy-only" in policy_result.output |
| 1185 | assert "repair_retry" in policy_result.output |
| 1186 | assert "verification_failed_reentry" in policy_result.output |
| 1187 | assert "handoff" not in policy_result.output |
| 1188 | |
| 1189 | |
| 1190 | def test_workflow_command_renders_stale_verification_context( |
| 1191 | temp_dir: Path, |
| 1192 | monkeypatch: pytest.MonkeyPatch, |
| 1193 | ) -> None: |
| 1194 | _write_python_workspace(temp_dir) |
| 1195 | _ensure_loader_dirs(temp_dir) |
| 1196 | session_id = _persist_session_with_stale_verification(temp_dir) |
| 1197 | runner = CliRunner() |
| 1198 | |
| 1199 | monkeypatch.chdir(temp_dir) |
| 1200 | |
| 1201 | result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1202 | |
| 1203 | assert result.exit_code == 0 |
| 1204 | assert session_id in result.output |
| 1205 | assert "Verify stale:" in result.output |
| 1206 | assert "verification_stale" in result.output |
| 1207 | assert "policy-outcome=stale" in result.output |
| 1208 | assert "Observed Verification" in result.output |
| 1209 | assert "Verification State" in result.output |
| 1210 | assert "stale (attempt 1 -> attempt 2) for uv run pytest -q" in result.output |
| 1211 | assert "uv run pytest -q" in result.output |
| 1212 | assert "new mutating work" in result.output |
| 1213 | |
| 1214 | |
| 1215 | def test_workflow_command_renders_planned_verification_context( |
| 1216 | temp_dir: Path, |
| 1217 | monkeypatch: pytest.MonkeyPatch, |
| 1218 | ) -> None: |
| 1219 | _write_python_workspace(temp_dir) |
| 1220 | _ensure_loader_dirs(temp_dir) |
| 1221 | session_id = _persist_session_with_planned_verification(temp_dir) |
| 1222 | runner = CliRunner() |
| 1223 | |
| 1224 | monkeypatch.chdir(temp_dir) |
| 1225 | |
| 1226 | result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1227 | |
| 1228 | assert result.exit_code == 0 |
| 1229 | assert session_id in result.output |
| 1230 | assert "Verify planned:" in result.output |
| 1231 | assert "verification_planned" in result.output |
| 1232 | assert "policy-outcome=planned" in result.output |
| 1233 | assert "Observed Verification" in result.output |
| 1234 | assert "Verification State" in result.output |
| 1235 | assert "planned (attempt 3) for uv run pytest -q" in result.output |
| 1236 | assert "verification planned for `uv run pytest -q`" in result.output |
| 1237 | assert "uv run pytest -q" in result.output |
| 1238 | |
| 1239 | |
| 1240 | def test_collect_workflow_timeline_can_focus_on_policy_accountability( |
| 1241 | temp_dir: Path, |
| 1242 | ) -> None: |
| 1243 | _write_python_workspace(temp_dir) |
| 1244 | _ensure_loader_dirs(temp_dir) |
| 1245 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 1246 | |
| 1247 | snapshot = collect_workflow_timeline( |
| 1248 | project_root=temp_dir, |
| 1249 | accountability_only=True, |
| 1250 | ) |
| 1251 | |
| 1252 | assert snapshot.session_id == session_id |
| 1253 | assert snapshot.selected_accountability_only is True |
| 1254 | assert [entry.kind for entry in snapshot.entries] == [ |
| 1255 | "repair_retry", |
| 1256 | "completion_check", |
| 1257 | "completion_continue", |
| 1258 | ] |
| 1259 | |
| 1260 | |
| 1261 | def test_session_show_renders_policy_timeline_preview( |
| 1262 | temp_dir: Path, |
| 1263 | monkeypatch: pytest.MonkeyPatch, |
| 1264 | ) -> None: |
| 1265 | _write_python_workspace(temp_dir) |
| 1266 | _ensure_loader_dirs(temp_dir) |
| 1267 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 1268 | runner = CliRunner() |
| 1269 | |
| 1270 | monkeypatch.chdir(temp_dir) |
| 1271 | |
| 1272 | show_result = runner.invoke(cli_main_module.session_cli, ["show", session_id]) |
| 1273 | |
| 1274 | assert show_result.exit_code == 0 |
| 1275 | assert "Latest Policy" in show_result.output |
| 1276 | assert "verification_failed_reentry" in show_result.output |
| 1277 | assert "Policy Evidence Needed" in show_result.output |
| 1278 | assert "verification failed for `pytest -q`" in show_result.output |
| 1279 | assert "Observed Verification" in show_result.output |
| 1280 | assert "verification failed for `pytest -q` [1 failed]" in show_result.output |
| 1281 | assert "Policy Timeline" in show_result.output |
| 1282 | assert "repair_retry" in show_result.output |
| 1283 | assert "completion:" in show_result.output |
| 1284 | assert "provenance=contradicts:verification@dod.evidence" in show_result.output |
| 1285 | |
| 1286 | |
| 1287 | def test_status_command_renders_latest_policy_summary( |
| 1288 | temp_dir: Path, |
| 1289 | monkeypatch: pytest.MonkeyPatch, |
| 1290 | ) -> None: |
| 1291 | _write_python_workspace(temp_dir) |
| 1292 | _ensure_loader_dirs(temp_dir) |
| 1293 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 1294 | runner = CliRunner() |
| 1295 | |
| 1296 | monkeypatch.chdir(temp_dir) |
| 1297 | |
| 1298 | result = runner.invoke(cli_main_module.status_cli, []) |
| 1299 | |
| 1300 | assert result.exit_code == 0 |
| 1301 | assert session_id in result.output |
| 1302 | assert "Latest Policy" in result.output |
| 1303 | assert "verification_failed_reentry" in result.output |
| 1304 | assert "Policy Evidence Needed" in result.output |
| 1305 | assert "verification failed for `pytest -q`" in result.output |
| 1306 | assert "Observed Verification" in result.output |
| 1307 | assert "verification failed for `pytest -q` [1 failed]" in result.output |
| 1308 | assert "Recent Verification" in result.output |
| 1309 | assert "policy-stage=definition_of_done" in result.output |
| 1310 | |
| 1311 | |
| 1312 | def test_workflow_show_renders_workflow_ledger( |
| 1313 | temp_dir: Path, |
| 1314 | monkeypatch: pytest.MonkeyPatch, |
| 1315 | ) -> None: |
| 1316 | _write_python_workspace(temp_dir) |
| 1317 | _ensure_loader_dirs(temp_dir) |
| 1318 | _persist_session_with_rich_workflow(temp_dir) |
| 1319 | runner = CliRunner() |
| 1320 | |
| 1321 | monkeypatch.chdir(temp_dir) |
| 1322 | |
| 1323 | result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1324 | |
| 1325 | assert result.exit_code == 0 |
| 1326 | assert "Workflow Ledger" in result.output |
| 1327 | assert "Assumptions" in result.output |
| 1328 | assert "contradicted" in result.output |
| 1329 | assert "notes.txt stays out of scope" in result.output |
| 1330 | assert "Acceptance Anchors" in result.output |
| 1331 | assert "Decision Boundaries" in result.output |
| 1332 | |
| 1333 | |
| 1334 | def test_workflow_show_command_supports_filters_and_highlights( |
| 1335 | temp_dir: Path, |
| 1336 | monkeypatch: pytest.MonkeyPatch, |
| 1337 | ) -> None: |
| 1338 | _write_python_workspace(temp_dir) |
| 1339 | _ensure_loader_dirs(temp_dir) |
| 1340 | session_id = _persist_session_with_rich_workflow(temp_dir) |
| 1341 | runner = CliRunner() |
| 1342 | |
| 1343 | monkeypatch.chdir(temp_dir) |
| 1344 | |
| 1345 | result = runner.invoke( |
| 1346 | cli_main_module.workflow_cli, |
| 1347 | ["show", "--kind", "reentry", "--limit", "1", session_id], |
| 1348 | ) |
| 1349 | |
| 1350 | assert result.exit_code == 0 |
| 1351 | assert "Loader Workflow" in result.output |
| 1352 | assert "1 shown / 3 total" in result.output |
| 1353 | assert "kind=reentry, limit=1" in result.output |
| 1354 | assert "Workflow Answers" in result.output |
| 1355 | assert "Recovered workflow:" in result.output |
| 1356 | assert "full_replan_required" in result.output |
| 1357 | assert "evidence=confirmed touchpoint:" in result.output |
| 1358 | |
| 1359 | clarify_result = runner.invoke( |
| 1360 | cli_main_module.workflow_cli, |
| 1361 | ["show", "--mode", "clarify", "--limit", "1", session_id], |
| 1362 | ) |
| 1363 | |
| 1364 | assert clarify_result.exit_code == 0 |
| 1365 | assert "stage=readiness" in clarify_result.output |
| 1366 | assert "pressure=tradeoff" in clarify_result.output |
| 1367 | assert "gates=non_goals,decision_boundaries" in clarify_result.output |
| 1368 | |
| 1369 | |
| 1370 | def test_workflow_show_can_render_artifact_diffs( |
| 1371 | temp_dir: Path, |
| 1372 | monkeypatch: pytest.MonkeyPatch, |
| 1373 | ) -> None: |
| 1374 | _write_python_workspace(temp_dir) |
| 1375 | _ensure_loader_dirs(temp_dir) |
| 1376 | _persist_session_with_rich_workflow(temp_dir) |
| 1377 | runner = CliRunner() |
| 1378 | |
| 1379 | monkeypatch.chdir(temp_dir) |
| 1380 | |
| 1381 | result = runner.invoke( |
| 1382 | cli_main_module.workflow_cli, |
| 1383 | ["show", "--diff", "--full-diff"], |
| 1384 | ) |
| 1385 | |
| 1386 | assert result.exit_code == 0 |
| 1387 | assert "Artifact Changes" in result.output |
| 1388 | assert "Artifact Diff Summary" in result.output |
| 1389 | assert "clarify_brief" in result.output |
| 1390 | assert "implementation_plan" in result.output |
| 1391 | assert "verification_plan" in result.output |
| 1392 | assert "notes.txt" in result.output |
| 1393 | |
| 1394 | |
| 1395 | def test_collect_prompt_preview_uses_persisted_runtime_state(temp_dir: Path) -> None: |
| 1396 | _write_python_workspace(temp_dir) |
| 1397 | _ensure_loader_dirs(temp_dir) |
| 1398 | session_id, _ = _persist_session_with_dod(temp_dir) |
| 1399 | |
| 1400 | preview = collect_prompt_preview( |
| 1401 | temp_dir, |
| 1402 | model="qwen2.5-coder:14b", |
| 1403 | ) |
| 1404 | |
| 1405 | assert preview.active_session_id == session_id |
| 1406 | assert preview.workflow_mode == "execute" |
| 1407 | assert preview.workflow_reason_code == "verification_failed_reentry" |
| 1408 | assert preview.workflow_decision_kind == "reentry" |
| 1409 | assert preview.permission_mode == "prompt" |
| 1410 | assert preview.prompt_format == ( |
| 1411 | "native" if preview.capability_profile.supports_native_tools else "react" |
| 1412 | ) |
| 1413 | assert preview.prompt_sections == [ |
| 1414 | "Runtime Config", |
| 1415 | "Workflow Context", |
| 1416 | "Mode Guidance", |
| 1417 | "Project Context", |
| 1418 | "Project Tips", |
| 1419 | ] |
| 1420 | assert "## Execute Mode" in preview.content |
| 1421 | assert "Current task: Fix the failing tests" in preview.content |
| 1422 | |
| 1423 | |
| 1424 | def test_prompt_show_command_renders_preview_without_model_call( |
| 1425 | temp_dir: Path, |
| 1426 | monkeypatch: pytest.MonkeyPatch, |
| 1427 | ) -> None: |
| 1428 | _write_python_workspace(temp_dir) |
| 1429 | _ensure_loader_dirs(temp_dir) |
| 1430 | _persist_session_with_dod(temp_dir) |
| 1431 | runner = CliRunner() |
| 1432 | |
| 1433 | monkeypatch.chdir(temp_dir) |
| 1434 | preview = collect_prompt_preview( |
| 1435 | temp_dir, |
| 1436 | model="qwen2.5-coder:14b", |
| 1437 | current_task="Preview the current Loader contract", |
| 1438 | ) |
| 1439 | |
| 1440 | result = runner.invoke( |
| 1441 | cli_main_module.prompt_cli, |
| 1442 | ["show", "--model", "qwen2.5-coder:14b", "Preview the current Loader contract"], |
| 1443 | ) |
| 1444 | |
| 1445 | assert result.exit_code == 0 |
| 1446 | assert "Prompt Preview" in result.output |
| 1447 | assert "Prompt Body" in result.output |
| 1448 | assert "Preview the current Loader contract" in result.output |
| 1449 | assert preview.prompt_format in result.output |
| 1450 | assert "Workflow Context" in result.output |
| 1451 | assert "Execute Mode" in result.output |
| 1452 | |
| 1453 | |
| 1454 | def test_prompt_diff_command_renders_persisted_prompt_changes( |
| 1455 | temp_dir: Path, |
| 1456 | monkeypatch: pytest.MonkeyPatch, |
| 1457 | ) -> None: |
| 1458 | _write_python_workspace(temp_dir) |
| 1459 | _ensure_loader_dirs(temp_dir) |
| 1460 | _persist_session_with_dod(temp_dir) |
| 1461 | runner = CliRunner() |
| 1462 | |
| 1463 | monkeypatch.chdir(temp_dir) |
| 1464 | |
| 1465 | result = runner.invoke(cli_main_module.prompt_cli, ["diff", "--full"]) |
| 1466 | |
| 1467 | assert result.exit_code == 0 |
| 1468 | assert "Prompt Diff" in result.output |
| 1469 | assert "Prompt Changes" in result.output |
| 1470 | assert "Workflow mode changed:" in result.output |
| 1471 | assert "Prompt Unified Diff" in result.output |
| 1472 | assert "execute parser fix" in result.output |
| 1473 | |
| 1474 | |
| 1475 | def test_permission_snapshot_and_dry_run_reflect_rules(temp_dir: Path) -> None: |
| 1476 | _write_python_workspace(temp_dir) |
| 1477 | _ensure_loader_dirs(temp_dir) |
| 1478 | (temp_dir / ".loader" / "permission-rules.json").write_text( |
| 1479 | "\n".join( |
| 1480 | [ |
| 1481 | "{", |
| 1482 | ' "allow": [{"tool": "write", "contains": "safe change"}],', |
| 1483 | ' "deny": [{"tool": "write", "path_contains": "secrets"}],', |
| 1484 | ' "ask": [{"tool": "write", "path_contains": "README"}]', |
| 1485 | "}", |
| 1486 | ] |
| 1487 | ) |
| 1488 | + "\n" |
| 1489 | ) |
| 1490 | |
| 1491 | snapshot = collect_permission_snapshot(temp_dir, permission_mode="allow") |
| 1492 | check = dry_run_permission_check( |
| 1493 | "write", |
| 1494 | { |
| 1495 | "file_path": str(temp_dir / "README.md"), |
| 1496 | "content": "safe change\n", |
| 1497 | }, |
| 1498 | project_root=temp_dir, |
| 1499 | permission_mode="allow", |
| 1500 | ) |
| 1501 | |
| 1502 | assert snapshot.active_mode == "allow" |
| 1503 | assert snapshot.prompting_enabled is True |
| 1504 | assert snapshot.rules_valid is True |
| 1505 | assert snapshot.rule_counts == {"allow": 1, "deny": 1, "ask": 1} |
| 1506 | assert snapshot.normalized_rules["allow"][0].tool_name == "write" |
| 1507 | assert snapshot.normalized_rules["allow"][0].contains == "safe change" |
| 1508 | |
| 1509 | assert check.required_mode == "workspace-write" |
| 1510 | assert check.decision == "ask" |
| 1511 | assert check.matched_disposition == "ask" |
| 1512 | assert check.matched_rule == "tool=write, path_contains=README" |
| 1513 | assert "file_path=" in check.input_summary |
| 1514 | |
| 1515 | |
| 1516 | def test_status_snapshot_reports_invalid_permission_rules(temp_dir: Path) -> None: |
| 1517 | _write_python_workspace(temp_dir) |
| 1518 | _ensure_loader_dirs(temp_dir) |
| 1519 | (temp_dir / ".loader" / "permission-rules.json").write_text("{broken json") |
| 1520 | |
| 1521 | snapshot = collect_status_snapshot(temp_dir, permission_mode="prompt") |
| 1522 | |
| 1523 | assert snapshot.permission_rules_valid is False |
| 1524 | assert snapshot.permission_prompting_enabled is True |
| 1525 | assert snapshot.permission_rules_source.endswith(".loader/permission-rules.json") |
| 1526 | |
| 1527 | |
| 1528 | def test_permissions_show_and_check_commands_render_policy( |
| 1529 | temp_dir: Path, |
| 1530 | monkeypatch: pytest.MonkeyPatch, |
| 1531 | ) -> None: |
| 1532 | _write_python_workspace(temp_dir) |
| 1533 | _ensure_loader_dirs(temp_dir) |
| 1534 | (temp_dir / ".loader" / "permission-rules.json").write_text( |
| 1535 | "\n".join( |
| 1536 | [ |
| 1537 | "{", |
| 1538 | ' "allow": [{"tool": "write", "contains": "safe change"}],', |
| 1539 | ' "ask": [{"tool": "write", "path_contains": "README"}]', |
| 1540 | "}", |
| 1541 | ] |
| 1542 | ) |
| 1543 | + "\n" |
| 1544 | ) |
| 1545 | runner = CliRunner() |
| 1546 | |
| 1547 | monkeypatch.chdir(temp_dir) |
| 1548 | |
| 1549 | show_result = runner.invoke( |
| 1550 | cli_main_module.permissions_cli, |
| 1551 | ["show", "--permission-mode", "allow"], |
| 1552 | ) |
| 1553 | check_result = runner.invoke( |
| 1554 | cli_main_module.permissions_cli, |
| 1555 | [ |
| 1556 | "check", |
| 1557 | "--permission-mode", |
| 1558 | "allow", |
| 1559 | "--args", |
| 1560 | '{"content":"safe change\\n"}', |
| 1561 | "write", |
| 1562 | "README.md", |
| 1563 | ], |
| 1564 | ) |
| 1565 | |
| 1566 | assert show_result.exit_code == 0 |
| 1567 | assert "Loader Permissions" in show_result.output |
| 1568 | assert "Permission Mode" in show_result.output |
| 1569 | assert "Rules Source" in show_result.output |
| 1570 | assert "safe change" in show_result.output |
| 1571 | assert "README" in show_result.output |
| 1572 | |
| 1573 | assert check_result.exit_code == 0 |
| 1574 | assert "Permission Check" in check_result.output |
| 1575 | assert "workspace-write" in check_result.output |
| 1576 | assert "ask" in check_result.output |
| 1577 | assert "tool=write, path_contains=README" in check_result.output |
| 1578 | |
| 1579 | |
| 1580 | def test_permissions_check_rejects_invalid_json_args( |
| 1581 | temp_dir: Path, |
| 1582 | monkeypatch: pytest.MonkeyPatch, |
| 1583 | ) -> None: |
| 1584 | _write_python_workspace(temp_dir) |
| 1585 | _ensure_loader_dirs(temp_dir) |
| 1586 | runner = CliRunner() |
| 1587 | |
| 1588 | monkeypatch.chdir(temp_dir) |
| 1589 | |
| 1590 | result = runner.invoke( |
| 1591 | cli_main_module.permissions_cli, |
| 1592 | ["check", "bash", "--args", "{broken json", "ls"], |
| 1593 | ) |
| 1594 | |
| 1595 | assert result.exit_code != 0 |
| 1596 | assert "`--args` must be valid JSON" in result.output |
| 1597 | |
| 1598 | |
| 1599 | def test_permissions_show_surfaces_invalid_rule_file( |
| 1600 | temp_dir: Path, |
| 1601 | monkeypatch: pytest.MonkeyPatch, |
| 1602 | ) -> None: |
| 1603 | _write_python_workspace(temp_dir) |
| 1604 | _ensure_loader_dirs(temp_dir) |
| 1605 | (temp_dir / ".loader" / "permission-rules.json").write_text("{broken json") |
| 1606 | runner = CliRunner() |
| 1607 | |
| 1608 | monkeypatch.chdir(temp_dir) |
| 1609 | |
| 1610 | result = runner.invoke(cli_main_module.permissions_cli, ["show"]) |
| 1611 | |
| 1612 | assert result.exit_code == 0 |
| 1613 | assert "invalid" in result.output.lower() |
| 1614 | assert "Rule Error" in result.output |
| 1615 | assert "Rules Source" in result.output |
| 1616 | |
| 1617 | |
| 1618 | def test_explore_command_can_show_and_reset_continuity( |
| 1619 | temp_dir: Path, |
| 1620 | monkeypatch: pytest.MonkeyPatch, |
| 1621 | ) -> None: |
| 1622 | _write_python_workspace(temp_dir) |
| 1623 | _ensure_loader_dirs(temp_dir) |
| 1624 | _persist_explore_snapshot(temp_dir) |
| 1625 | runner = CliRunner() |
| 1626 | |
| 1627 | monkeypatch.chdir(temp_dir) |
| 1628 | |
| 1629 | status_result = runner.invoke(cli_main_module.explore_cli, ["--status"]) |
| 1630 | |
| 1631 | assert status_result.exit_code == 0 |
| 1632 | assert "Loader Explore State" in status_result.output |
| 1633 | assert "continue" in status_result.output |
| 1634 | assert "What file did you mention?" in status_result.output |
| 1635 | |
| 1636 | reset_result = runner.invoke(cli_main_module.explore_cli, ["--reset"]) |
| 1637 | |
| 1638 | assert reset_result.exit_code == 0 |
| 1639 | assert "Cleared persisted explore continuity." in reset_result.output |
| 1640 | assert ExploreStateStore(temp_dir).load() is None |
| 1641 | |
| 1642 | |
| 1643 | def test_root_help_lists_special_commands() -> None: |
| 1644 | help_text = cli_main_module._loader_help_text() |
| 1645 | |
| 1646 | assert "loader doctor" in help_text |
| 1647 | assert "loader status" in help_text |
| 1648 | assert "loader explore <prompt>" in help_text |
| 1649 | assert "loader permissions show" in help_text |
| 1650 | assert "loader session resume <id>" in help_text |
| 1651 | |
| 1652 | |
| 1653 | def test_main_dispatches_session_resume_to_primary_cli( |
| 1654 | monkeypatch: pytest.MonkeyPatch, |
| 1655 | ) -> None: |
| 1656 | captured: dict[str, object] = {} |
| 1657 | |
| 1658 | def fake_cli_main(*, args: list[str], prog_name: str) -> None: |
| 1659 | captured["args"] = args |
| 1660 | captured["prog_name"] = prog_name |
| 1661 | |
| 1662 | monkeypatch.setattr(cli_main_module.cli, "main", fake_cli_main) |
| 1663 | monkeypatch.setattr(sys, "argv", ["loader", "session", "resume", "abc123", "--no-tui"]) |
| 1664 | |
| 1665 | cli_main_module.main() |
| 1666 | |
| 1667 | assert captured == { |
| 1668 | "args": ["--resume-target", "abc123", "--no-tui"], |
| 1669 | "prog_name": "loader", |
| 1670 | } |