| 1 | """Tests for doctor, status, and session inspection surfaces.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import sys |
| 6 | from pathlib import Path |
| 7 | |
| 8 | import pytest |
| 9 | from click.testing import CliRunner |
| 10 | |
| 11 | import loader.cli.main as cli_main_module |
| 12 | from loader.llm.base import Message, Role |
| 13 | from loader.runtime.completion_trace import CompletionTraceEntry |
| 14 | from loader.runtime.dod import ( |
| 15 | DefinitionOfDoneStore, |
| 16 | VerificationEvidence, |
| 17 | create_definition_of_done, |
| 18 | ) |
| 19 | from loader.runtime.evidence_provenance import EvidenceProvenance |
| 20 | from loader.runtime.explore_state import ExploreSnapshot, ExploreStateStore |
| 21 | from loader.runtime.inspection import ( |
| 22 | CheckStatus, |
| 23 | collect_doctor_report, |
| 24 | collect_permission_snapshot, |
| 25 | collect_prompt_diff, |
| 26 | collect_prompt_preview, |
| 27 | collect_status_snapshot, |
| 28 | collect_workflow_artifact_diffs, |
| 29 | collect_workflow_timeline, |
| 30 | dry_run_permission_check, |
| 31 | list_session_summaries, |
| 32 | load_session_detail, |
| 33 | ) |
| 34 | from loader.runtime.prompt_history import PromptSnapshot |
| 35 | from loader.runtime.session import SessionSnapshot, SessionStore |
| 36 | from loader.runtime.verification_observations import VerificationObservation |
| 37 | from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem |
| 38 | from loader.runtime.workflow_policy import WorkflowTimelineEntry |
| 39 | |
| 40 | |
| 41 | class FakeOllamaBackend: |
| 42 | """Small async backend stub for doctor tests.""" |
| 43 | |
| 44 | def __init__( |
| 45 | self, |
| 46 | *, |
| 47 | model: str, |
| 48 | health: bool, |
| 49 | models: list[dict[str, object]], |
| 50 | model_details: dict[str, object] | None = None, |
| 51 | ) -> None: |
| 52 | self.model = model |
| 53 | self._health = health |
| 54 | self._models = models |
| 55 | self._model_details = model_details |
| 56 | |
| 57 | async def list_models(self) -> list[dict[str, object]]: |
| 58 | return list(self._models) |
| 59 | |
| 60 | async def health_check(self) -> bool: |
| 61 | return self._health |
| 62 | |
| 63 | async def describe_model(self) -> dict[str, object] | None: |
| 64 | return self._model_details |
| 65 | |
| 66 | async def close(self) -> None: |
| 67 | return None |
| 68 | |
| 69 | |
| 70 | def _write_python_workspace(temp_dir: Path) -> None: |
| 71 | (temp_dir / "pyproject.toml").write_text( |
| 72 | "\n".join( |
| 73 | [ |
| 74 | "[build-system]", |
| 75 | 'requires = ["hatchling"]', |
| 76 | 'build-backend = "hatchling.build"', |
| 77 | "", |
| 78 | "[tool.pytest.ini_options]", |
| 79 | 'testpaths = ["tests"]', |
| 80 | "", |
| 81 | ] |
| 82 | ) |
| 83 | + "\n" |
| 84 | ) |
| 85 | (temp_dir / "src").mkdir() |
| 86 | (temp_dir / "tests").mkdir() |
| 87 | |
| 88 | |
| 89 | def _ensure_loader_dirs(temp_dir: Path) -> None: |
| 90 | loader_root = temp_dir / ".loader" |
| 91 | for name in ("sessions", "state", "dod", "briefs", "plans"): |
| 92 | (loader_root / name).mkdir(parents=True, exist_ok=True) |
| 93 | (loader_root / "project-memory.json").write_text("{}\n") |
| 94 | |
| 95 | |
| 96 | def _persist_session_with_dod(temp_dir: Path) -> tuple[str, str]: |
| 97 | dod = create_definition_of_done("Fix the failing tests") |
| 98 | dod.status = "fixing" |
| 99 | dod.pending_items = ["Re-run pytest"] |
| 100 | dod.completed_items = ["Patch the broken parser"] |
| 101 | dod.last_verification_result = "failed" |
| 102 | dod.evidence = [ |
| 103 | VerificationEvidence( |
| 104 | command="pytest -q", |
| 105 | passed=False, |
| 106 | stderr="1 failed", |
| 107 | kind="test", |
| 108 | ) |
| 109 | ] |
| 110 | dod_path = DefinitionOfDoneStore(temp_dir).save(dod) |
| 111 | workflow_timeline = [ |
| 112 | WorkflowTimelineEntry( |
| 113 | timestamp="2026-04-06T12:04:00Z", |
| 114 | kind="handoff", |
| 115 | mode="verify", |
| 116 | reason_code="execute_completed", |
| 117 | summary="verify: execution completed; verifying the parser fix", |
| 118 | decision_kind="handoff", |
| 119 | prompt_format="native", |
| 120 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 121 | artifact_paths=[str(temp_dir / ".loader" / "plans" / "fix-tests.md")], |
| 122 | ), |
| 123 | WorkflowTimelineEntry( |
| 124 | timestamp="2026-04-06T12:05:00Z", |
| 125 | kind="reentry", |
| 126 | mode="execute", |
| 127 | reason_code="verification_failed_reentry", |
| 128 | summary="execute: verification failed; returning to execute for fixes", |
| 129 | decision_kind="reentry", |
| 130 | scheduled_next_mode="verify", |
| 131 | runner_up_mode="verify", |
| 132 | runner_up_score=0.52, |
| 133 | verification_observations=[ |
| 134 | VerificationObservation( |
| 135 | status="failed", |
| 136 | summary="verification failed for `pytest -q`", |
| 137 | command="pytest -q", |
| 138 | kind="test", |
| 139 | detail="1 failed", |
| 140 | ) |
| 141 | ], |
| 142 | prompt_format="native", |
| 143 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 144 | artifact_paths=[str(temp_dir / ".loader" / "plans" / "fix-tests.md")], |
| 145 | ), |
| 146 | ] |
| 147 | |
| 148 | snapshot = SessionSnapshot( |
| 149 | session_id="20260406T120000Z-abcdef01", |
| 150 | created_at="2026-04-06T12:00:00Z", |
| 151 | updated_at="2026-04-06T12:05:00Z", |
| 152 | messages=[ |
| 153 | Message(role=Role.USER, content="Fix the failing tests"), |
| 154 | Message(role=Role.ASSISTANT, content="I updated the parser."), |
| 155 | ], |
| 156 | usage={"turns": 1, "tool_calls": 2}, |
| 157 | active_dod_path=str(dod_path), |
| 158 | current_task="Fix the failing tests", |
| 159 | runtime_owner_type="RuntimeHandle", |
| 160 | runtime_owner_path="runtime-handle", |
| 161 | workflow_mode="execute", |
| 162 | permission_mode="prompt", |
| 163 | permission_prompting_enabled=True, |
| 164 | permission_rule_counts={"allow": 1, "deny": 2, "ask": 1}, |
| 165 | permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"), |
| 166 | prompt_format="native", |
| 167 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 168 | prompt_history=[ |
| 169 | PromptSnapshot( |
| 170 | timestamp="2026-04-06T12:04:00Z", |
| 171 | workflow_mode="verify", |
| 172 | permission_mode="prompt", |
| 173 | current_task="Fix the failing tests", |
| 174 | prompt_format="native", |
| 175 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 176 | content="# Introduction\nverify parser fix\n", |
| 177 | ), |
| 178 | PromptSnapshot( |
| 179 | timestamp="2026-04-06T12:05:00Z", |
| 180 | workflow_mode="execute", |
| 181 | permission_mode="prompt", |
| 182 | current_task="Fix the failing tests", |
| 183 | prompt_format="native", |
| 184 | prompt_sections=[ |
| 185 | "Runtime Config", |
| 186 | "Workflow Context", |
| 187 | "Mode Guidance", |
| 188 | "Project Context", |
| 189 | ], |
| 190 | content="# Introduction\nexecute parser fix\n# Project Context\npython\n", |
| 191 | ), |
| 192 | ], |
| 193 | workflow_reason_code="verification_failed_reentry", |
| 194 | workflow_reason_summary="verification failed; returning to execute for fixes", |
| 195 | workflow_decision_kind="reentry", |
| 196 | workflow_ambiguity_score=0.1, |
| 197 | workflow_complexity_score=0.7, |
| 198 | workflow_scheduled_next_mode="verify", |
| 199 | active_turn_phase="completion", |
| 200 | last_completion_decision_code="verification_failed_reentry", |
| 201 | last_completion_decision_summary=( |
| 202 | "continued after verification failed and the runtime re-entered execute mode" |
| 203 | ), |
| 204 | completion_trace=[ |
| 205 | CompletionTraceEntry( |
| 206 | stage="continuation_check", |
| 207 | outcome="accept", |
| 208 | decision_code="completion_response_accepted", |
| 209 | decision_summary="accepted the response because completion heuristics found no missing follow-through", |
| 210 | ), |
| 211 | CompletionTraceEntry( |
| 212 | stage="definition_of_done", |
| 213 | outcome="continue", |
| 214 | decision_code="verification_failed_reentry", |
| 215 | decision_summary="continued after verification failed and the runtime re-entered execute mode", |
| 216 | ), |
| 217 | ], |
| 218 | last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn", |
| 219 | last_turn_transition_kind="terminal", |
| 220 | last_turn_transition_reason_code="turn_complete", |
| 221 | workflow_timeline=workflow_timeline, |
| 222 | ) |
| 223 | SessionStore(temp_dir).save(snapshot) |
| 224 | return snapshot.session_id, str(dod_path) |
| 225 | |
| 226 | |
| 227 | def _persist_explore_snapshot(temp_dir: Path) -> None: |
| 228 | ExploreStateStore(temp_dir).save( |
| 229 | ExploreSnapshot( |
| 230 | turn_count=2, |
| 231 | model_name="llama3.1:8b", |
| 232 | messages=[ |
| 233 | Message(role=Role.USER, content="Where should I start?"), |
| 234 | Message(role=Role.ASSISTANT, content="Start with README.md."), |
| 235 | Message(role=Role.USER, content="What file did you mention?"), |
| 236 | Message(role=Role.ASSISTANT, content="I mentioned README.md."), |
| 237 | ], |
| 238 | last_history_mode="continue", |
| 239 | last_query="What file did you mention?", |
| 240 | last_response="I mentioned README.md.", |
| 241 | ) |
| 242 | ) |
| 243 | |
| 244 | |
| 245 | def _persist_session_with_rich_workflow(temp_dir: Path) -> str: |
| 246 | slug = "tighten-loader-workflow-behavior" |
| 247 | brief_old = temp_dir / ".loader" / "briefs" / f"20260406T150000Z-{slug}.md" |
| 248 | brief_new = temp_dir / ".loader" / "briefs" / f"20260406T150200Z-{slug}.md" |
| 249 | brief_old.write_text( |
| 250 | "# Task Brief\n\n## Likely Touchpoints\n- planned.txt\n\n## Acceptance Criteria\n- planned.txt exists.\n" |
| 251 | ) |
| 252 | brief_new.write_text( |
| 253 | "# Task Brief\n\n## Likely Touchpoints\n- notes.txt\n\n## Acceptance Criteria\n- notes.txt exists.\n" |
| 254 | ) |
| 255 | plan_old_root = temp_dir / ".loader" / "plans" / f"20260406T150100Z-{slug}" |
| 256 | plan_new_root = temp_dir / ".loader" / "plans" / f"20260406T150300Z-{slug}" |
| 257 | plan_old_root.mkdir(parents=True, exist_ok=True) |
| 258 | plan_new_root.mkdir(parents=True, exist_ok=True) |
| 259 | (plan_old_root / "implementation.md").write_text( |
| 260 | "# Implementation Plan\n\n## File Changes\n- Create planned.txt.\n" |
| 261 | ) |
| 262 | (plan_old_root / "verification.md").write_text( |
| 263 | "# Verification Plan\n\n## Acceptance Criteria\n- planned.txt exists.\n" |
| 264 | ) |
| 265 | (plan_new_root / "implementation.md").write_text( |
| 266 | "# Implementation Plan\n\n## File Changes\n- Keep notes.txt as the runtime artifact.\n" |
| 267 | ) |
| 268 | (plan_new_root / "verification.md").write_text( |
| 269 | "# Verification Plan\n\n## Acceptance Criteria\n- notes.txt exists.\n" |
| 270 | ) |
| 271 | |
| 272 | dod = create_definition_of_done("Tighten Loader workflow behavior") |
| 273 | dod.status = "fixing" |
| 274 | dod.clarify_brief = str(brief_new) |
| 275 | dod.implementation_plan = str(plan_new_root / "implementation.md") |
| 276 | dod.verification_plan = str(plan_new_root / "verification.md") |
| 277 | dod.acceptance_criteria = ["notes.txt exists in the workspace root."] |
| 278 | dod_path = DefinitionOfDoneStore(temp_dir).save(dod) |
| 279 | |
| 280 | snapshot = SessionSnapshot( |
| 281 | session_id="20260406T150000Z-feedface", |
| 282 | created_at="2026-04-06T15:00:00Z", |
| 283 | updated_at="2026-04-06T15:04:00Z", |
| 284 | messages=[ |
| 285 | Message(role=Role.USER, content="Tighten Loader workflow behavior"), |
| 286 | Message(role=Role.ASSISTANT, content="I refreshed the workflow contract."), |
| 287 | ], |
| 288 | active_dod_path=str(dod_path), |
| 289 | current_task="Tighten Loader workflow behavior", |
| 290 | runtime_owner_type="RuntimeHandle", |
| 291 | runtime_owner_path="runtime-handle", |
| 292 | workflow_mode="execute", |
| 293 | permission_mode="prompt", |
| 294 | permission_prompting_enabled=True, |
| 295 | prompt_format="native", |
| 296 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 297 | prompt_history=[ |
| 298 | PromptSnapshot( |
| 299 | timestamp="2026-04-06T15:02:00Z", |
| 300 | workflow_mode="plan", |
| 301 | permission_mode="prompt", |
| 302 | current_task="Tighten Loader workflow behavior", |
| 303 | prompt_format="native", |
| 304 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 305 | content="# Introduction\nplan around planned.txt\n", |
| 306 | ), |
| 307 | PromptSnapshot( |
| 308 | timestamp="2026-04-06T15:04:00Z", |
| 309 | workflow_mode="execute", |
| 310 | permission_mode="prompt", |
| 311 | current_task="Tighten Loader workflow behavior", |
| 312 | prompt_format="native", |
| 313 | prompt_sections=[ |
| 314 | "Runtime Config", |
| 315 | "Workflow Context", |
| 316 | "Mode Guidance", |
| 317 | "Project Context", |
| 318 | ], |
| 319 | content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n", |
| 320 | ), |
| 321 | ], |
| 322 | workflow_reason_code="full_replan_completed", |
| 323 | workflow_reason_summary="clarify and plan artifacts refreshed; returning to execute", |
| 324 | workflow_decision_kind="handoff", |
| 325 | workflow_timeline=[ |
| 326 | WorkflowTimelineEntry( |
| 327 | timestamp="2026-04-06T15:01:00Z", |
| 328 | kind="clarify_continue", |
| 329 | mode="clarify", |
| 330 | reason_code="clarify_pressure_pass_required", |
| 331 | summary="clarify: Loader still needs a tradeoff pass around non-goals", |
| 332 | decision_kind="forced", |
| 333 | unresolved_questions=["Concrete files or subsystems are still not pinned down."], |
| 334 | signal_summary=["ambiguity=0.82", "open_questions=1"], |
| 335 | clarify_stage="readiness", |
| 336 | clarify_pressure_kind="tradeoff", |
| 337 | pressure_pass_complete=False, |
| 338 | missing_readiness_gates=["non_goals", "decision_boundaries"], |
| 339 | ), |
| 340 | WorkflowTimelineEntry( |
| 341 | timestamp="2026-04-06T15:02:00Z", |
| 342 | kind="reentry", |
| 343 | mode="plan", |
| 344 | reason_code="full_replan_required", |
| 345 | summary="plan: clarify and plan artifacts drifted; rebuilding the plan", |
| 346 | decision_kind="reentry", |
| 347 | scheduled_next_mode="execute", |
| 348 | unresolved_questions=["Touched files outside the current plan: notes.txt"], |
| 349 | evidence_summary=[ |
| 350 | "confirmed touchpoint: `notes.txt` was already touched during execution.", |
| 351 | ( |
| 352 | "verification contradiction: Failed verification exposed " |
| 353 | "missing brief coverage for `notes.txt exists`." |
| 354 | ), |
| 355 | ], |
| 356 | signal_summary=["recent_reentry=1", "stale_plan=true"], |
| 357 | artifact_paths=[ |
| 358 | str(brief_new), |
| 359 | str(plan_new_root / "implementation.md"), |
| 360 | str(plan_new_root / "verification.md"), |
| 361 | ], |
| 362 | ), |
| 363 | WorkflowTimelineEntry( |
| 364 | timestamp="2026-04-06T15:03:00Z", |
| 365 | kind="verify_skip", |
| 366 | mode="verify", |
| 367 | reason_code="verify_skip_no_commands", |
| 368 | summary="verify: no verification commands were available for this turn", |
| 369 | decision_kind="forced", |
| 370 | signal_summary=["verify_pressure=low"], |
| 371 | ), |
| 372 | ], |
| 373 | workflow_ledger=WorkflowLedger( |
| 374 | assumptions=[ |
| 375 | WorkflowLedgerItem( |
| 376 | text="notes.txt stays out of scope unless clarified otherwise.", |
| 377 | status="contradicted", |
| 378 | introduced_phase="clarify", |
| 379 | updated_phase="recovery", |
| 380 | evidence=["Clarify scope assumed `notes.txt` stayed out of scope."], |
| 381 | ) |
| 382 | ], |
| 383 | acceptance_anchors=[ |
| 384 | WorkflowLedgerItem( |
| 385 | text="notes.txt exists in the workspace root.", |
| 386 | status="changed", |
| 387 | introduced_phase="clarify", |
| 388 | updated_phase="recovery", |
| 389 | evidence=[ |
| 390 | ( |
| 391 | "Failed verification exposed missing brief coverage for " |
| 392 | "`notes.txt exists`." |
| 393 | ) |
| 394 | ], |
| 395 | ) |
| 396 | ], |
| 397 | decision_boundaries=[ |
| 398 | WorkflowLedgerItem( |
| 399 | text="Escalate before broad UX changes.", |
| 400 | status="reopened", |
| 401 | introduced_phase="clarify", |
| 402 | updated_phase="recovery", |
| 403 | evidence=["The active task framing outgrew the persisted clarify brief."], |
| 404 | ) |
| 405 | ], |
| 406 | ), |
| 407 | ) |
| 408 | SessionStore(temp_dir).save(snapshot) |
| 409 | return snapshot.session_id |
| 410 | |
| 411 | |
| 412 | def _persist_session_with_policy_accountability(temp_dir: Path) -> str: |
| 413 | snapshot = SessionSnapshot( |
| 414 | session_id="20260406T160000Z-abcd1234", |
| 415 | created_at="2026-04-06T16:00:00Z", |
| 416 | updated_at="2026-04-06T16:03:00Z", |
| 417 | messages=[ |
| 418 | Message(role=Role.USER, content="Explain Loader policy accountability"), |
| 419 | Message(role=Role.ASSISTANT, content="The runtime tracked repair and completion decisions."), |
| 420 | ], |
| 421 | current_task="Explain Loader policy accountability", |
| 422 | runtime_owner_type="RuntimeHandle", |
| 423 | runtime_owner_path="runtime-handle", |
| 424 | workflow_mode="execute", |
| 425 | permission_mode="workspace-write", |
| 426 | prompt_format="native", |
| 427 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 428 | workflow_timeline=[ |
| 429 | WorkflowTimelineEntry( |
| 430 | timestamp="2026-04-06T16:01:00Z", |
| 431 | kind="repair_retry", |
| 432 | mode="execute", |
| 433 | reason_code="raw_text_tool_recovered", |
| 434 | summary="repair: recovered raw-text tool calls into executable tool invocations", |
| 435 | decision_kind="forced", |
| 436 | policy_stage="raw_text_tool_fallback", |
| 437 | policy_outcome="retry", |
| 438 | prompt_format="native", |
| 439 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 440 | ), |
| 441 | WorkflowTimelineEntry( |
| 442 | timestamp="2026-04-06T16:02:00Z", |
| 443 | kind="completion_check", |
| 444 | mode="execute", |
| 445 | reason_code="completion_response_accepted", |
| 446 | summary="completion: accepted the response because completion heuristics found no missing follow-through", |
| 447 | decision_kind="forced", |
| 448 | policy_stage="continuation_check", |
| 449 | policy_outcome="accept", |
| 450 | prompt_format="native", |
| 451 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 452 | ), |
| 453 | WorkflowTimelineEntry( |
| 454 | timestamp="2026-04-06T16:03:00Z", |
| 455 | kind="completion_continue", |
| 456 | mode="execute", |
| 457 | reason_code="verification_failed_reentry", |
| 458 | summary="completion: continued after verification failed and the runtime re-entered execute mode", |
| 459 | decision_kind="forced", |
| 460 | policy_stage="definition_of_done", |
| 461 | policy_outcome="continue", |
| 462 | evidence_provenance=[ |
| 463 | EvidenceProvenance( |
| 464 | category="verification", |
| 465 | source="dod.evidence", |
| 466 | summary="verification failed for `pytest -q`", |
| 467 | status="contradicts", |
| 468 | subject="pytest -q", |
| 469 | ) |
| 470 | ], |
| 471 | verification_observations=[ |
| 472 | VerificationObservation( |
| 473 | status="failed", |
| 474 | summary="verification failed for `pytest -q`", |
| 475 | command="pytest -q", |
| 476 | kind="test", |
| 477 | detail="1 failed", |
| 478 | ) |
| 479 | ], |
| 480 | prompt_format="native", |
| 481 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 482 | ), |
| 483 | ], |
| 484 | ) |
| 485 | SessionStore(temp_dir).save(snapshot) |
| 486 | return snapshot.session_id |
| 487 | |
| 488 | |
| 489 | def _persist_session_with_pending_verification(temp_dir: Path) -> str: |
| 490 | snapshot = SessionSnapshot( |
| 491 | session_id="20260406T160500Z-pending123", |
| 492 | created_at="2026-04-06T16:05:00Z", |
| 493 | updated_at="2026-04-06T16:05:30Z", |
| 494 | messages=[ |
| 495 | Message(role=Role.USER, content="Verify the runtime changes"), |
| 496 | Message(role=Role.ASSISTANT, content="Entering verification."), |
| 497 | ], |
| 498 | current_task="Verify the runtime changes", |
| 499 | runtime_owner_type="RuntimeHandle", |
| 500 | runtime_owner_path="runtime-handle", |
| 501 | workflow_mode="verify", |
| 502 | permission_mode="workspace-write", |
| 503 | prompt_format="native", |
| 504 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 505 | workflow_timeline=[ |
| 506 | WorkflowTimelineEntry( |
| 507 | timestamp="2026-04-06T16:05:30Z", |
| 508 | kind="verify_observation", |
| 509 | mode="verify", |
| 510 | reason_code="verification_pending", |
| 511 | summary="verify: verification is pending for the active command set", |
| 512 | decision_kind="forced", |
| 513 | policy_stage="verification", |
| 514 | policy_outcome="pending", |
| 515 | verification_observations=[ |
| 516 | VerificationObservation( |
| 517 | status="pending", |
| 518 | summary="verification pending for `uv run pytest -q`", |
| 519 | command="uv run pytest -q", |
| 520 | kind="test", |
| 521 | attempt_id="verification-attempt-2", |
| 522 | attempt_number=2, |
| 523 | ) |
| 524 | ], |
| 525 | prompt_format="native", |
| 526 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 527 | ) |
| 528 | ], |
| 529 | ) |
| 530 | SessionStore(temp_dir).save(snapshot) |
| 531 | return snapshot.session_id |
| 532 | |
| 533 | |
| 534 | def _persist_session_with_planned_verification(temp_dir: Path) -> str: |
| 535 | snapshot = SessionSnapshot( |
| 536 | session_id="20260406T160430Z-plan1234", |
| 537 | created_at="2026-04-06T16:04:30Z", |
| 538 | updated_at="2026-04-06T16:04:50Z", |
| 539 | messages=[ |
| 540 | Message(role=Role.USER, content="Keep editing the runtime"), |
| 541 | Message(role=Role.ASSISTANT, content="Verification will run after execution."), |
| 542 | ], |
| 543 | current_task="Keep editing the runtime", |
| 544 | runtime_owner_type="RuntimeHandle", |
| 545 | runtime_owner_path="runtime-handle", |
| 546 | workflow_mode="execute", |
| 547 | permission_mode="workspace-write", |
| 548 | prompt_format="native", |
| 549 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 550 | workflow_timeline=[ |
| 551 | WorkflowTimelineEntry( |
| 552 | timestamp="2026-04-06T16:04:50Z", |
| 553 | kind="verify_observation", |
| 554 | mode="execute", |
| 555 | reason_code="verification_planned", |
| 556 | summary="verify: verification is planned after new mutating work", |
| 557 | decision_kind="forced", |
| 558 | policy_stage="verification", |
| 559 | policy_outcome="planned", |
| 560 | verification_observations=[ |
| 561 | VerificationObservation( |
| 562 | status="planned", |
| 563 | summary="verification planned for `uv run pytest -q`", |
| 564 | command="uv run pytest -q", |
| 565 | kind="runtime", |
| 566 | detail="write changed src/loader/runtime/tool_batches.py", |
| 567 | attempt_id="verification-attempt-3", |
| 568 | attempt_number=3, |
| 569 | ) |
| 570 | ], |
| 571 | prompt_format="native", |
| 572 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 573 | ) |
| 574 | ], |
| 575 | ) |
| 576 | SessionStore(temp_dir).save(snapshot) |
| 577 | return snapshot.session_id |
| 578 | |
| 579 | |
| 580 | def _persist_session_with_stale_verification(temp_dir: Path) -> str: |
| 581 | snapshot = SessionSnapshot( |
| 582 | session_id="20260406T160700Z-stale1234", |
| 583 | created_at="2026-04-06T16:07:00Z", |
| 584 | updated_at="2026-04-06T16:07:30Z", |
| 585 | messages=[ |
| 586 | Message(role=Role.USER, content="Keep working on the runtime"), |
| 587 | Message(role=Role.ASSISTANT, content="Fresh verification is required again."), |
| 588 | ], |
| 589 | current_task="Keep working on the runtime", |
| 590 | runtime_owner_type="RuntimeHandle", |
| 591 | runtime_owner_path="runtime-handle", |
| 592 | workflow_mode="execute", |
| 593 | permission_mode="workspace-write", |
| 594 | prompt_format="native", |
| 595 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 596 | workflow_timeline=[ |
| 597 | WorkflowTimelineEntry( |
| 598 | timestamp="2026-04-06T16:07:30Z", |
| 599 | kind="verify_observation", |
| 600 | mode="execute", |
| 601 | reason_code="verification_stale", |
| 602 | summary="verify: previous verification became stale after new mutating work", |
| 603 | decision_kind="forced", |
| 604 | policy_stage="verification", |
| 605 | policy_outcome="stale", |
| 606 | verification_observations=[ |
| 607 | VerificationObservation( |
| 608 | status="stale", |
| 609 | summary=( |
| 610 | "verification became stale for `uv run pytest -q` " |
| 611 | "after new mutating work" |
| 612 | ), |
| 613 | command="uv run pytest -q", |
| 614 | kind="runtime", |
| 615 | detail="write changed src/loader/runtime/finalization.py", |
| 616 | attempt_id="verification-attempt-1", |
| 617 | attempt_number=1, |
| 618 | supersedes_attempt_id="verification-attempt-2", |
| 619 | ) |
| 620 | ], |
| 621 | prompt_format="native", |
| 622 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 623 | ) |
| 624 | ], |
| 625 | ) |
| 626 | SessionStore(temp_dir).save(snapshot) |
| 627 | return snapshot.session_id |
| 628 | |
| 629 | |
| 630 | @pytest.mark.asyncio |
| 631 | async def test_collect_doctor_report_passes_for_healthy_workspace(temp_dir: Path) -> None: |
| 632 | _write_python_workspace(temp_dir) |
| 633 | _ensure_loader_dirs(temp_dir) |
| 634 | |
| 635 | report = await collect_doctor_report( |
| 636 | temp_dir, |
| 637 | model="qwen2.5-coder:14b", |
| 638 | backend_factory=lambda model: FakeOllamaBackend( |
| 639 | model=model, |
| 640 | health=True, |
| 641 | models=[{"name": "qwen2.5-coder:14b"}], |
| 642 | model_details={"details": {"family": "qwen2.5"}}, |
| 643 | ), |
| 644 | ) |
| 645 | |
| 646 | assert report.overall_status == CheckStatus.PASS |
| 647 | assert {check.name for check in report.checks} == { |
| 648 | "backend", |
| 649 | "capabilities", |
| 650 | "workspace", |
| 651 | "write_access", |
| 652 | "commands", |
| 653 | "state", |
| 654 | "permissions", |
| 655 | } |
| 656 | backend_check = next(check for check in report.checks if check.name == "backend") |
| 657 | state_check = next(check for check in report.checks if check.name == "state") |
| 658 | |
| 659 | assert backend_check.status == CheckStatus.PASS |
| 660 | assert state_check.status == CheckStatus.PASS |
| 661 | |
| 662 | |
| 663 | @pytest.mark.asyncio |
| 664 | async def test_collect_doctor_report_surfaces_backend_and_state_failures(temp_dir: Path) -> None: |
| 665 | _write_python_workspace(temp_dir) |
| 666 | (temp_dir / ".loader").mkdir() |
| 667 | (temp_dir / ".loader" / "project-memory.json").write_text("{broken json") |
| 668 | |
| 669 | report = await collect_doctor_report( |
| 670 | temp_dir, |
| 671 | model="missing-model:latest", |
| 672 | backend_factory=lambda model: FakeOllamaBackend( |
| 673 | model=model, |
| 674 | health=False, |
| 675 | models=[{"name": "llama3.1:8b"}], |
| 676 | model_details=None, |
| 677 | ), |
| 678 | ) |
| 679 | |
| 680 | backend_check = next(check for check in report.checks if check.name == "backend") |
| 681 | state_check = next(check for check in report.checks if check.name == "state") |
| 682 | |
| 683 | assert report.overall_status == CheckStatus.FAIL |
| 684 | assert backend_check.status == CheckStatus.FAIL |
| 685 | assert "not pulled" in backend_check.message |
| 686 | assert state_check.status == CheckStatus.FAIL |
| 687 | assert "corrupted" in state_check.message |
| 688 | |
| 689 | |
| 690 | @pytest.mark.asyncio |
| 691 | async def test_collect_doctor_report_fails_closed_on_invalid_permission_rules( |
| 692 | temp_dir: Path, |
| 693 | ) -> None: |
| 694 | _write_python_workspace(temp_dir) |
| 695 | _ensure_loader_dirs(temp_dir) |
| 696 | (temp_dir / ".loader" / "permission-rules.json").write_text('{"allow": "nope"}\n') |
| 697 | |
| 698 | report = await collect_doctor_report( |
| 699 | temp_dir, |
| 700 | model="qwen2.5-coder:14b", |
| 701 | permission_mode="prompt", |
| 702 | backend_factory=lambda model: FakeOllamaBackend( |
| 703 | model=model, |
| 704 | health=True, |
| 705 | models=[{"name": "qwen2.5-coder:14b"}], |
| 706 | ), |
| 707 | ) |
| 708 | |
| 709 | permission_check = next(check for check in report.checks if check.name == "permissions") |
| 710 | assert report.overall_status == CheckStatus.FAIL |
| 711 | assert permission_check.status == CheckStatus.FAIL |
| 712 | assert report.permission_rules_valid is False |
| 713 | assert "invalid" in permission_check.message.lower() |
| 714 | |
| 715 | |
| 716 | def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> None: |
| 717 | _write_python_workspace(temp_dir) |
| 718 | _ensure_loader_dirs(temp_dir) |
| 719 | session_id, dod_path = _persist_session_with_dod(temp_dir) |
| 720 | _persist_explore_snapshot(temp_dir) |
| 721 | |
| 722 | snapshot = collect_status_snapshot( |
| 723 | temp_dir, |
| 724 | model="llama3.1:8b", |
| 725 | ) |
| 726 | sessions = list_session_summaries(temp_dir) |
| 727 | detail = load_session_detail(session_id, project_root=temp_dir) |
| 728 | |
| 729 | assert snapshot.active_session_id == session_id |
| 730 | assert snapshot.dod_status == "fixing" |
| 731 | assert snapshot.dod_pending_items_count == 1 |
| 732 | assert snapshot.last_verification_result == "failed" |
| 733 | assert snapshot.active_dod_path == dod_path |
| 734 | assert snapshot.permission_mode == "prompt" |
| 735 | assert snapshot.runtime_owner_type == "RuntimeHandle" |
| 736 | assert snapshot.runtime_owner_path == "runtime-handle" |
| 737 | assert snapshot.permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1} |
| 738 | assert snapshot.permission_prompting_enabled is True |
| 739 | assert snapshot.permission_rules_valid is True |
| 740 | assert snapshot.permission_rules_source == str( |
| 741 | temp_dir / ".loader" / "permission-rules.json" |
| 742 | ) |
| 743 | assert snapshot.prompt_format == "native" |
| 744 | assert snapshot.prompt_sections == [ |
| 745 | "Runtime Config", |
| 746 | "Workflow Context", |
| 747 | "Mode Guidance", |
| 748 | ] |
| 749 | assert snapshot.workflow_reason_code == "verification_failed_reentry" |
| 750 | assert snapshot.workflow_reason_summary == ( |
| 751 | "verification failed; returning to execute for fixes" |
| 752 | ) |
| 753 | assert snapshot.workflow_decision_kind == "reentry" |
| 754 | assert snapshot.workflow_scheduled_next_mode == "verify" |
| 755 | assert snapshot.active_turn_phase == "completion" |
| 756 | assert snapshot.completion_decision_code == "verification_failed_reentry" |
| 757 | assert snapshot.completion_decision_summary == ( |
| 758 | "continued after verification failed and the runtime re-entered execute mode" |
| 759 | ) |
| 760 | assert snapshot.last_turn_transition_summary == ( |
| 761 | "completion -> finalize [terminal] Finalizing completed turn" |
| 762 | ) |
| 763 | assert snapshot.explore_turn_count == 2 |
| 764 | assert snapshot.explore_message_count == 4 |
| 765 | assert snapshot.explore_history_mode == "continue" |
| 766 | assert snapshot.explore_last_query == "What file did you mention?" |
| 767 | assert snapshot.explore_last_response == "I mentioned README.md." |
| 768 | assert snapshot.explore_updated_at is not None |
| 769 | assert [item.status for item in snapshot.recent_verification] == ["failed"] |
| 770 | assert [item.command for item in snapshot.recent_verification] == ["pytest -q"] |
| 771 | assert [item.detail for item in snapshot.recent_verification] == ["1 failed"] |
| 772 | |
| 773 | assert len(sessions) == 1 |
| 774 | assert sessions[0].session_id == session_id |
| 775 | assert sessions[0].is_current is True |
| 776 | assert sessions[0].runtime_owner_type == "RuntimeHandle" |
| 777 | assert sessions[0].runtime_owner_path == "runtime-handle" |
| 778 | assert sessions[0].dod_status == "fixing" |
| 779 | assert sessions[0].permission_prompting_enabled is True |
| 780 | assert sessions[0].permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1} |
| 781 | assert sessions[0].permission_rules_source == str( |
| 782 | temp_dir / ".loader" / "permission-rules.json" |
| 783 | ) |
| 784 | assert sessions[0].prompt_format == "native" |
| 785 | assert sessions[0].workflow_reason_code == "verification_failed_reentry" |
| 786 | assert sessions[0].workflow_reason_summary == ( |
| 787 | "verification failed; returning to execute for fixes" |
| 788 | ) |
| 789 | assert sessions[0].workflow_decision_kind == "reentry" |
| 790 | assert sessions[0].completion_decision_code == "verification_failed_reentry" |
| 791 | assert sessions[0].completion_decision_summary == ( |
| 792 | "continued after verification failed and the runtime re-entered execute mode" |
| 793 | ) |
| 794 | assert sessions[0].last_turn_transition_summary == ( |
| 795 | "completion -> finalize [terminal] Finalizing completed turn" |
| 796 | ) |
| 797 | |
| 798 | assert detail.snapshot.session_id == session_id |
| 799 | assert detail.is_current is True |
| 800 | assert detail.snapshot.runtime_owner_type == "RuntimeHandle" |
| 801 | assert detail.snapshot.runtime_owner_path == "runtime-handle" |
| 802 | assert detail.definition_of_done is not None |
| 803 | assert detail.definition_of_done.status == "fixing" |
| 804 | assert detail.snapshot.permission_rules_source == str( |
| 805 | temp_dir / ".loader" / "permission-rules.json" |
| 806 | ) |
| 807 | assert detail.snapshot.workflow_reason_code == "verification_failed_reentry" |
| 808 | assert detail.snapshot.last_completion_decision_code == ( |
| 809 | "verification_failed_reentry" |
| 810 | ) |
| 811 | assert [entry.decision_code for entry in detail.snapshot.completion_trace] == [ |
| 812 | "completion_response_accepted", |
| 813 | "verification_failed_reentry", |
| 814 | ] |
| 815 | assert [item.status for item in detail.recent_verification] == ["failed"] |
| 816 | assert [item.command for item in detail.recent_verification] == ["pytest -q"] |
| 817 | assert detail.snapshot.last_turn_transition_reason_code == "turn_complete" |
| 818 | assert len(detail.snapshot.workflow_timeline) == 2 |
| 819 | assert detail.snapshot.workflow_timeline[-1].scheduled_next_mode == "verify" |
| 820 | |
| 821 | |
| 822 | def test_collect_workflow_timeline_reflects_persisted_history(temp_dir: Path) -> None: |
| 823 | _write_python_workspace(temp_dir) |
| 824 | _ensure_loader_dirs(temp_dir) |
| 825 | session_id, _ = _persist_session_with_dod(temp_dir) |
| 826 | |
| 827 | snapshot = collect_workflow_timeline(project_root=temp_dir) |
| 828 | |
| 829 | assert snapshot.session_id == session_id |
| 830 | assert snapshot.is_current is True |
| 831 | assert snapshot.runtime_owner_type == "RuntimeHandle" |
| 832 | assert snapshot.runtime_owner_path == "runtime-handle" |
| 833 | assert snapshot.workflow_mode == "execute" |
| 834 | assert snapshot.current_task == "Fix the failing tests" |
| 835 | assert snapshot.total_entries == 2 |
| 836 | assert [entry.kind for entry in snapshot.entries] == ["handoff", "reentry"] |
| 837 | assert snapshot.entries[-1].reason_code == "verification_failed_reentry" |
| 838 | |
| 839 | |
| 840 | def test_collect_workflow_timeline_supports_filters_and_highlights( |
| 841 | temp_dir: Path, |
| 842 | ) -> None: |
| 843 | _write_python_workspace(temp_dir) |
| 844 | _ensure_loader_dirs(temp_dir) |
| 845 | session_id = _persist_session_with_rich_workflow(temp_dir) |
| 846 | |
| 847 | snapshot = collect_workflow_timeline( |
| 848 | project_root=temp_dir, |
| 849 | mode="clarify", |
| 850 | limit=1, |
| 851 | ) |
| 852 | |
| 853 | assert snapshot.session_id == session_id |
| 854 | assert snapshot.total_entries == 3 |
| 855 | assert snapshot.selected_mode == "clarify" |
| 856 | assert snapshot.selected_kind is None |
| 857 | assert snapshot.entry_limit == 1 |
| 858 | assert len(snapshot.entries) == 1 |
| 859 | assert snapshot.entries[0].kind == "clarify_continue" |
| 860 | assert snapshot.entries[0].clarify_stage == "readiness" |
| 861 | assert snapshot.entries[0].clarify_pressure_kind == "tradeoff" |
| 862 | assert snapshot.entries[0].missing_readiness_gates == [ |
| 863 | "non_goals", |
| 864 | "decision_boundaries", |
| 865 | ] |
| 866 | assert any(item.startswith("Asked again:") for item in snapshot.highlights) |
| 867 | assert snapshot.workflow_ledger.assumptions[0].status == "contradicted" |
| 868 | assert any( |
| 869 | item.startswith("Contradicted assumptions:") |
| 870 | for item in snapshot.highlights |
| 871 | ) |
| 872 | |
| 873 | |
| 874 | def test_collect_workflow_timeline_highlights_policy_accountability( |
| 875 | temp_dir: Path, |
| 876 | ) -> None: |
| 877 | _write_python_workspace(temp_dir) |
| 878 | _ensure_loader_dirs(temp_dir) |
| 879 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 880 | |
| 881 | snapshot = collect_workflow_timeline(project_root=temp_dir) |
| 882 | |
| 883 | assert snapshot.session_id == session_id |
| 884 | assert [entry.kind for entry in snapshot.entries] == [ |
| 885 | "repair_retry", |
| 886 | "completion_check", |
| 887 | "completion_continue", |
| 888 | ] |
| 889 | assert any(item.startswith("Repair path:") for item in snapshot.highlights) |
| 890 | assert any(item.startswith("Completion decision:") for item in snapshot.highlights) |
| 891 | assert any( |
| 892 | "policy-stage=definition_of_done" in item for item in snapshot.highlights |
| 893 | ) |
| 894 | |
| 895 | |
| 896 | def test_collect_status_snapshot_includes_latest_policy_summary( |
| 897 | temp_dir: Path, |
| 898 | ) -> None: |
| 899 | _write_python_workspace(temp_dir) |
| 900 | _ensure_loader_dirs(temp_dir) |
| 901 | _persist_session_with_policy_accountability(temp_dir) |
| 902 | |
| 903 | snapshot = collect_status_snapshot(temp_dir) |
| 904 | |
| 905 | assert snapshot.latest_policy_summary is not None |
| 906 | assert "verification_failed_reentry" in snapshot.latest_policy_summary |
| 907 | assert "observed=verification failed for `pytest -q` [1 failed]" in ( |
| 908 | snapshot.latest_policy_summary |
| 909 | ) |
| 910 | assert "policy-stage=definition_of_done" in snapshot.latest_policy_summary |
| 911 | assert snapshot.latest_policy_blocking_evidence == [ |
| 912 | "verification failed for `pytest -q`" |
| 913 | ] |
| 914 | assert snapshot.latest_policy_observed_verification == [ |
| 915 | "verification failed for `pytest -q` [1 failed]" |
| 916 | ] |
| 917 | assert [item.status for item in snapshot.recent_verification] == ["failed"] |
| 918 | assert [item.command for item in snapshot.recent_verification] == ["pytest -q"] |
| 919 | assert [item.detail for item in snapshot.recent_verification] == ["1 failed"] |
| 920 | |
| 921 | |
| 922 | def test_collect_status_snapshot_surfaces_pending_verification( |
| 923 | temp_dir: Path, |
| 924 | ) -> None: |
| 925 | _write_python_workspace(temp_dir) |
| 926 | _ensure_loader_dirs(temp_dir) |
| 927 | _persist_session_with_pending_verification(temp_dir) |
| 928 | |
| 929 | snapshot = collect_status_snapshot(temp_dir) |
| 930 | |
| 931 | assert snapshot.latest_policy_summary is not None |
| 932 | assert "verification_pending" in snapshot.latest_policy_summary |
| 933 | assert "policy-outcome=pending" in snapshot.latest_policy_summary |
| 934 | assert snapshot.latest_policy_observed_verification == [ |
| 935 | "verification pending for `uv run pytest -q` [attempt 2]" |
| 936 | ] |
| 937 | assert [item.status for item in snapshot.recent_verification] == ["pending"] |
| 938 | assert [item.command for item in snapshot.recent_verification] == [ |
| 939 | "uv run pytest -q" |
| 940 | ] |
| 941 | assert [item.attempt for item in snapshot.recent_verification] == ["attempt 2"] |
| 942 | |
| 943 | |
| 944 | def test_collect_status_snapshot_surfaces_planned_verification( |
| 945 | temp_dir: Path, |
| 946 | ) -> None: |
| 947 | _write_python_workspace(temp_dir) |
| 948 | _ensure_loader_dirs(temp_dir) |
| 949 | _persist_session_with_planned_verification(temp_dir) |
| 950 | |
| 951 | snapshot = collect_status_snapshot(temp_dir) |
| 952 | |
| 953 | assert snapshot.latest_policy_summary is not None |
| 954 | assert "verification_planned" in snapshot.latest_policy_summary |
| 955 | assert "policy-outcome=planned" in snapshot.latest_policy_summary |
| 956 | assert snapshot.latest_policy_observed_verification == [ |
| 957 | "verification planned for `uv run pytest -q` [write changed src/loader/runtime/tool_batches.py; attempt 3]" |
| 958 | ] |
| 959 | assert [item.status for item in snapshot.recent_verification] == ["planned"] |
| 960 | assert [item.command for item in snapshot.recent_verification] == [ |
| 961 | "uv run pytest -q" |
| 962 | ] |
| 963 | assert [item.attempt for item in snapshot.recent_verification] == ["attempt 3"] |
| 964 | assert [item.detail for item in snapshot.recent_verification] == [ |
| 965 | "write changed src/loader/runtime/tool_batches.py" |
| 966 | ] |
| 967 | |
| 968 | |
| 969 | def test_collect_status_snapshot_surfaces_stale_verification( |
| 970 | temp_dir: Path, |
| 971 | ) -> None: |
| 972 | _write_python_workspace(temp_dir) |
| 973 | _ensure_loader_dirs(temp_dir) |
| 974 | _persist_session_with_stale_verification(temp_dir) |
| 975 | |
| 976 | snapshot = collect_status_snapshot(temp_dir) |
| 977 | |
| 978 | assert snapshot.latest_policy_summary is not None |
| 979 | assert "verification_stale" in snapshot.latest_policy_summary |
| 980 | assert "policy-outcome=stale" in snapshot.latest_policy_summary |
| 981 | assert snapshot.latest_policy_observed_verification == [ |
| 982 | "verification became stale for `uv run pytest -q` after new mutating work [write changed src/loader/runtime/finalization.py; attempt 1 -> attempt 2]" |
| 983 | ] |
| 984 | assert [item.status for item in snapshot.recent_verification] == ["stale"] |
| 985 | assert [item.command for item in snapshot.recent_verification] == [ |
| 986 | "uv run pytest -q" |
| 987 | ] |
| 988 | assert [item.attempt for item in snapshot.recent_verification] == [ |
| 989 | "attempt 1 -> attempt 2" |
| 990 | ] |
| 991 | assert [item.detail for item in snapshot.recent_verification] == [ |
| 992 | "write changed src/loader/runtime/finalization.py" |
| 993 | ] |
| 994 | |
| 995 | |
| 996 | def test_collect_prompt_diff_uses_persisted_prompt_history(temp_dir: Path) -> None: |
| 997 | _write_python_workspace(temp_dir) |
| 998 | _ensure_loader_dirs(temp_dir) |
| 999 | session_id, _ = _persist_session_with_dod(temp_dir) |
| 1000 | |
| 1001 | diff = collect_prompt_diff(project_root=temp_dir) |
| 1002 | |
| 1003 | assert diff.session_id == session_id |
| 1004 | assert diff.previous is not None |
| 1005 | assert diff.current is not None |
| 1006 | assert diff.current.workflow_mode == "execute" |
| 1007 | assert diff.previous.workflow_mode == "verify" |
| 1008 | assert any("Workflow mode changed:" in item for item in diff.highlights) |
| 1009 | assert "---" in diff.unified_diff |
| 1010 | assert "execute parser fix" in diff.unified_diff |
| 1011 | |
| 1012 | |
| 1013 | def test_collect_workflow_artifact_diffs_reads_versioned_artifacts( |
| 1014 | temp_dir: Path, |
| 1015 | ) -> None: |
| 1016 | _write_python_workspace(temp_dir) |
| 1017 | _ensure_loader_dirs(temp_dir) |
| 1018 | session_id = _persist_session_with_rich_workflow(temp_dir) |
| 1019 | |
| 1020 | snapshot = collect_workflow_artifact_diffs(project_root=temp_dir) |
| 1021 | |
| 1022 | assert snapshot.session_id == session_id |
| 1023 | assert len(snapshot.entries) == 3 |
| 1024 | assert {entry.kind for entry in snapshot.entries} == { |
| 1025 | "clarify_brief", |
| 1026 | "implementation_plan", |
| 1027 | "verification_plan", |
| 1028 | } |
| 1029 | assert any("notes.txt" in entry.unified_diff for entry in snapshot.entries) |
| 1030 | assert snapshot.highlights |
| 1031 | |
| 1032 | |
| 1033 | def test_status_and_session_commands_render_persisted_state( |
| 1034 | temp_dir: Path, |
| 1035 | monkeypatch: pytest.MonkeyPatch, |
| 1036 | ) -> None: |
| 1037 | _write_python_workspace(temp_dir) |
| 1038 | _ensure_loader_dirs(temp_dir) |
| 1039 | session_id, _ = _persist_session_with_dod(temp_dir) |
| 1040 | _persist_explore_snapshot(temp_dir) |
| 1041 | runner = CliRunner() |
| 1042 | |
| 1043 | monkeypatch.chdir(temp_dir) |
| 1044 | |
| 1045 | status_result = runner.invoke(cli_main_module.status_cli, ["--model", "llama3.1:8b"]) |
| 1046 | list_result = runner.invoke(cli_main_module.session_cli, ["list"]) |
| 1047 | show_result = runner.invoke(cli_main_module.session_cli, ["show", session_id]) |
| 1048 | workflow_result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1049 | |
| 1050 | assert status_result.exit_code == 0 |
| 1051 | assert session_id in status_result.output |
| 1052 | assert "fixing" in status_result.output |
| 1053 | assert "Runtime Owner" in status_result.output |
| 1054 | assert "runtime-handle (RuntimeHandle)" in status_result.output |
| 1055 | assert "1 allow / 2 deny / 1 ask" in status_result.output |
| 1056 | assert "native" in status_result.output |
| 1057 | assert "Runtime Config, Workflow Context, Mode Guidance" in status_result.output |
| 1058 | assert "Rules Source" in status_result.output |
| 1059 | assert "verification failed; returning to execute for fixes" in status_result.output |
| 1060 | assert "Completion Decision" in status_result.output |
| 1061 | assert "continued after verification failed" in status_result.output |
| 1062 | assert "completion -> finalize" in status_result.output |
| 1063 | assert "Finalizing completed turn" in status_result.output |
| 1064 | assert "Explore Turns" in status_result.output |
| 1065 | assert "Explore History" in status_result.output |
| 1066 | assert "What file did you mention?" in status_result.output |
| 1067 | assert "pytest -q" in status_result.output |
| 1068 | assert "1 failed" in status_result.output |
| 1069 | |
| 1070 | assert list_result.exit_code == 0 |
| 1071 | assert session_id in list_result.output |
| 1072 | assert "Runtime Owner" in list_result.output |
| 1073 | assert "runtime-handle (RuntimeHandle)" in list_result.output |
| 1074 | assert "1 allow / 2 deny / 1 ask" in list_result.output |
| 1075 | assert "prompting enabled" in list_result.output |
| 1076 | assert "native" in list_result.output |
| 1077 | assert "Rules Source" in list_result.output |
| 1078 | assert "verification failed; returning to execute for fixes" in list_result.output |
| 1079 | assert "Completion Decision" in list_result.output |
| 1080 | assert "completion -> finalize" in list_result.output |
| 1081 | |
| 1082 | assert show_result.exit_code == 0 |
| 1083 | assert session_id in show_result.output |
| 1084 | assert "Runtime Owner" in show_result.output |
| 1085 | assert "runtime-handle (RuntimeHandle)" in show_result.output |
| 1086 | assert "Patch the broken parser" in show_result.output |
| 1087 | assert "1 allow / 2 deny / 1 ask" in show_result.output |
| 1088 | assert "enabled" in show_result.output |
| 1089 | assert "Runtime Config, Workflow Context, Mode Guidance" in show_result.output |
| 1090 | assert "Rules Source" in show_result.output |
| 1091 | assert "verification failed; returning to execute for fixes" in show_result.output |
| 1092 | assert "Completion Decision" in show_result.output |
| 1093 | assert "Completion Trace" in show_result.output |
| 1094 | assert "Recent Verification" in show_result.output |
| 1095 | assert "continuation_check" in show_result.output |
| 1096 | assert "completion -> finalize" in show_result.output |
| 1097 | assert "Finalizing completed turn" in show_result.output |
| 1098 | assert "Policy Timeline" not in show_result.output |
| 1099 | assert "Workflow Timeline" in show_result.output |
| 1100 | assert "handoff" in show_result.output |
| 1101 | assert "next=verify" in show_result.output |
| 1102 | assert "pytest -q" in show_result.output |
| 1103 | assert "1 failed" in show_result.output |
| 1104 | |
| 1105 | assert workflow_result.exit_code == 0 |
| 1106 | assert "Loader Workflow" in workflow_result.output |
| 1107 | assert "Workflow Timeline" in workflow_result.output |
| 1108 | assert session_id in workflow_result.output |
| 1109 | assert "Runtime Owner" in workflow_result.output |
| 1110 | assert "runtime-handle (RuntimeHandle)" in workflow_result.output |
| 1111 | assert "handoff" in workflow_result.output |
| 1112 | assert "next=verify" in workflow_result.output |
| 1113 | |
| 1114 | |
| 1115 | def test_workflow_command_renders_policy_accountability_context( |
| 1116 | temp_dir: Path, |
| 1117 | monkeypatch: pytest.MonkeyPatch, |
| 1118 | ) -> None: |
| 1119 | _write_python_workspace(temp_dir) |
| 1120 | _ensure_loader_dirs(temp_dir) |
| 1121 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 1122 | runner = CliRunner() |
| 1123 | |
| 1124 | monkeypatch.chdir(temp_dir) |
| 1125 | |
| 1126 | result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1127 | |
| 1128 | assert result.exit_code == 0 |
| 1129 | assert session_id in result.output |
| 1130 | assert "repair_retry" in result.output |
| 1131 | assert "Repair path:" in result.output |
| 1132 | assert "Completion decision:" in result.output |
| 1133 | assert "verification_failed_reentry" in result.output |
| 1134 | assert "Policy Evidence Needed" in result.output |
| 1135 | assert "verification failed for `pytest -q`" in result.output |
| 1136 | assert "Observed Verification" in result.output |
| 1137 | assert "verification failed for `pytest -q` [1 failed]" in result.output |
| 1138 | assert "policy-stage=raw_text_tool_fallback" in result.output |
| 1139 | assert "policy-outcome=continue" in result.output |
| 1140 | assert "provenance=contradicts:verification@dod.evidence" in result.output |
| 1141 | assert "observed=verification failed for `pytest -q` [1 failed]" in result.output |
| 1142 | |
| 1143 | policy_result = runner.invoke(cli_main_module.workflow_cli, ["show", "--policy"]) |
| 1144 | |
| 1145 | assert policy_result.exit_code == 0 |
| 1146 | assert "Loader Workflow" in policy_result.output |
| 1147 | assert "Policy Timeline" in policy_result.output |
| 1148 | assert "policy-only" in policy_result.output |
| 1149 | assert "repair_retry" in policy_result.output |
| 1150 | assert "verification_failed_reentry" in policy_result.output |
| 1151 | assert "handoff" not in policy_result.output |
| 1152 | |
| 1153 | |
| 1154 | def test_workflow_command_renders_stale_verification_context( |
| 1155 | temp_dir: Path, |
| 1156 | monkeypatch: pytest.MonkeyPatch, |
| 1157 | ) -> None: |
| 1158 | _write_python_workspace(temp_dir) |
| 1159 | _ensure_loader_dirs(temp_dir) |
| 1160 | session_id = _persist_session_with_stale_verification(temp_dir) |
| 1161 | runner = CliRunner() |
| 1162 | |
| 1163 | monkeypatch.chdir(temp_dir) |
| 1164 | |
| 1165 | result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1166 | |
| 1167 | assert result.exit_code == 0 |
| 1168 | assert session_id in result.output |
| 1169 | assert "Verify stale:" in result.output |
| 1170 | assert "verification_stale" in result.output |
| 1171 | assert "policy-outcome=stale" in result.output |
| 1172 | assert "Observed Verification" in result.output |
| 1173 | assert "uv run pytest -q" in result.output |
| 1174 | assert "new mutating work" in result.output |
| 1175 | |
| 1176 | |
| 1177 | def test_workflow_command_renders_planned_verification_context( |
| 1178 | temp_dir: Path, |
| 1179 | monkeypatch: pytest.MonkeyPatch, |
| 1180 | ) -> None: |
| 1181 | _write_python_workspace(temp_dir) |
| 1182 | _ensure_loader_dirs(temp_dir) |
| 1183 | session_id = _persist_session_with_planned_verification(temp_dir) |
| 1184 | runner = CliRunner() |
| 1185 | |
| 1186 | monkeypatch.chdir(temp_dir) |
| 1187 | |
| 1188 | result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1189 | |
| 1190 | assert result.exit_code == 0 |
| 1191 | assert session_id in result.output |
| 1192 | assert "Verify planned:" in result.output |
| 1193 | assert "verification_planned" in result.output |
| 1194 | assert "policy-outcome=planned" in result.output |
| 1195 | assert "Observed Verification" in result.output |
| 1196 | assert "verification planned for `uv run pytest -q`" in result.output |
| 1197 | assert "uv run pytest -q" in result.output |
| 1198 | |
| 1199 | |
| 1200 | def test_collect_workflow_timeline_can_focus_on_policy_accountability( |
| 1201 | temp_dir: Path, |
| 1202 | ) -> None: |
| 1203 | _write_python_workspace(temp_dir) |
| 1204 | _ensure_loader_dirs(temp_dir) |
| 1205 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 1206 | |
| 1207 | snapshot = collect_workflow_timeline( |
| 1208 | project_root=temp_dir, |
| 1209 | accountability_only=True, |
| 1210 | ) |
| 1211 | |
| 1212 | assert snapshot.session_id == session_id |
| 1213 | assert snapshot.selected_accountability_only is True |
| 1214 | assert [entry.kind for entry in snapshot.entries] == [ |
| 1215 | "repair_retry", |
| 1216 | "completion_check", |
| 1217 | "completion_continue", |
| 1218 | ] |
| 1219 | |
| 1220 | |
| 1221 | def test_session_show_renders_policy_timeline_preview( |
| 1222 | temp_dir: Path, |
| 1223 | monkeypatch: pytest.MonkeyPatch, |
| 1224 | ) -> None: |
| 1225 | _write_python_workspace(temp_dir) |
| 1226 | _ensure_loader_dirs(temp_dir) |
| 1227 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 1228 | runner = CliRunner() |
| 1229 | |
| 1230 | monkeypatch.chdir(temp_dir) |
| 1231 | |
| 1232 | show_result = runner.invoke(cli_main_module.session_cli, ["show", session_id]) |
| 1233 | |
| 1234 | assert show_result.exit_code == 0 |
| 1235 | assert "Latest Policy" in show_result.output |
| 1236 | assert "verification_failed_reentry" in show_result.output |
| 1237 | assert "Policy Evidence Needed" in show_result.output |
| 1238 | assert "verification failed for `pytest -q`" in show_result.output |
| 1239 | assert "Observed Verification" in show_result.output |
| 1240 | assert "verification failed for `pytest -q` [1 failed]" in show_result.output |
| 1241 | assert "Policy Timeline" in show_result.output |
| 1242 | assert "repair_retry" in show_result.output |
| 1243 | assert "completion:" in show_result.output |
| 1244 | assert "provenance=contradicts:verification@dod.evidence" in show_result.output |
| 1245 | |
| 1246 | |
| 1247 | def test_status_command_renders_latest_policy_summary( |
| 1248 | temp_dir: Path, |
| 1249 | monkeypatch: pytest.MonkeyPatch, |
| 1250 | ) -> None: |
| 1251 | _write_python_workspace(temp_dir) |
| 1252 | _ensure_loader_dirs(temp_dir) |
| 1253 | session_id = _persist_session_with_policy_accountability(temp_dir) |
| 1254 | runner = CliRunner() |
| 1255 | |
| 1256 | monkeypatch.chdir(temp_dir) |
| 1257 | |
| 1258 | result = runner.invoke(cli_main_module.status_cli, []) |
| 1259 | |
| 1260 | assert result.exit_code == 0 |
| 1261 | assert session_id in result.output |
| 1262 | assert "Latest Policy" in result.output |
| 1263 | assert "verification_failed_reentry" in result.output |
| 1264 | assert "Policy Evidence Needed" in result.output |
| 1265 | assert "verification failed for `pytest -q`" in result.output |
| 1266 | assert "Observed Verification" in result.output |
| 1267 | assert "verification failed for `pytest -q` [1 failed]" in result.output |
| 1268 | assert "Recent Verification" in result.output |
| 1269 | assert "policy-stage=definition_of_done" in result.output |
| 1270 | |
| 1271 | |
| 1272 | def test_workflow_show_renders_workflow_ledger( |
| 1273 | temp_dir: Path, |
| 1274 | monkeypatch: pytest.MonkeyPatch, |
| 1275 | ) -> None: |
| 1276 | _write_python_workspace(temp_dir) |
| 1277 | _ensure_loader_dirs(temp_dir) |
| 1278 | _persist_session_with_rich_workflow(temp_dir) |
| 1279 | runner = CliRunner() |
| 1280 | |
| 1281 | monkeypatch.chdir(temp_dir) |
| 1282 | |
| 1283 | result = runner.invoke(cli_main_module.workflow_cli, ["show"]) |
| 1284 | |
| 1285 | assert result.exit_code == 0 |
| 1286 | assert "Workflow Ledger" in result.output |
| 1287 | assert "Assumptions" in result.output |
| 1288 | assert "contradicted" in result.output |
| 1289 | assert "notes.txt stays out of scope" in result.output |
| 1290 | assert "Acceptance Anchors" in result.output |
| 1291 | assert "Decision Boundaries" in result.output |
| 1292 | |
| 1293 | |
| 1294 | def test_workflow_show_command_supports_filters_and_highlights( |
| 1295 | temp_dir: Path, |
| 1296 | monkeypatch: pytest.MonkeyPatch, |
| 1297 | ) -> None: |
| 1298 | _write_python_workspace(temp_dir) |
| 1299 | _ensure_loader_dirs(temp_dir) |
| 1300 | session_id = _persist_session_with_rich_workflow(temp_dir) |
| 1301 | runner = CliRunner() |
| 1302 | |
| 1303 | monkeypatch.chdir(temp_dir) |
| 1304 | |
| 1305 | result = runner.invoke( |
| 1306 | cli_main_module.workflow_cli, |
| 1307 | ["show", "--kind", "reentry", "--limit", "1", session_id], |
| 1308 | ) |
| 1309 | |
| 1310 | assert result.exit_code == 0 |
| 1311 | assert "Loader Workflow" in result.output |
| 1312 | assert "1 shown / 3 total" in result.output |
| 1313 | assert "kind=reentry, limit=1" in result.output |
| 1314 | assert "Workflow Answers" in result.output |
| 1315 | assert "Recovered workflow:" in result.output |
| 1316 | assert "full_replan_required" in result.output |
| 1317 | assert "evidence=confirmed touchpoint:" in result.output |
| 1318 | |
| 1319 | clarify_result = runner.invoke( |
| 1320 | cli_main_module.workflow_cli, |
| 1321 | ["show", "--mode", "clarify", "--limit", "1", session_id], |
| 1322 | ) |
| 1323 | |
| 1324 | assert clarify_result.exit_code == 0 |
| 1325 | assert "stage=readiness" in clarify_result.output |
| 1326 | assert "pressure=tradeoff" in clarify_result.output |
| 1327 | assert "gates=non_goals,decision_boundaries" in clarify_result.output |
| 1328 | |
| 1329 | |
| 1330 | def test_workflow_show_can_render_artifact_diffs( |
| 1331 | temp_dir: Path, |
| 1332 | monkeypatch: pytest.MonkeyPatch, |
| 1333 | ) -> None: |
| 1334 | _write_python_workspace(temp_dir) |
| 1335 | _ensure_loader_dirs(temp_dir) |
| 1336 | _persist_session_with_rich_workflow(temp_dir) |
| 1337 | runner = CliRunner() |
| 1338 | |
| 1339 | monkeypatch.chdir(temp_dir) |
| 1340 | |
| 1341 | result = runner.invoke( |
| 1342 | cli_main_module.workflow_cli, |
| 1343 | ["show", "--diff", "--full-diff"], |
| 1344 | ) |
| 1345 | |
| 1346 | assert result.exit_code == 0 |
| 1347 | assert "Artifact Changes" in result.output |
| 1348 | assert "Artifact Diff Summary" in result.output |
| 1349 | assert "clarify_brief" in result.output |
| 1350 | assert "implementation_plan" in result.output |
| 1351 | assert "verification_plan" in result.output |
| 1352 | assert "notes.txt" in result.output |
| 1353 | |
| 1354 | |
| 1355 | def test_collect_prompt_preview_uses_persisted_runtime_state(temp_dir: Path) -> None: |
| 1356 | _write_python_workspace(temp_dir) |
| 1357 | _ensure_loader_dirs(temp_dir) |
| 1358 | session_id, _ = _persist_session_with_dod(temp_dir) |
| 1359 | |
| 1360 | preview = collect_prompt_preview( |
| 1361 | temp_dir, |
| 1362 | model="qwen2.5-coder:14b", |
| 1363 | ) |
| 1364 | |
| 1365 | assert preview.active_session_id == session_id |
| 1366 | assert preview.workflow_mode == "execute" |
| 1367 | assert preview.workflow_reason_code == "verification_failed_reentry" |
| 1368 | assert preview.workflow_decision_kind == "reentry" |
| 1369 | assert preview.permission_mode == "prompt" |
| 1370 | assert preview.prompt_format == ( |
| 1371 | "native" if preview.capability_profile.supports_native_tools else "react" |
| 1372 | ) |
| 1373 | assert preview.prompt_sections == [ |
| 1374 | "Runtime Config", |
| 1375 | "Workflow Context", |
| 1376 | "Mode Guidance", |
| 1377 | "Project Context", |
| 1378 | "Project Tips", |
| 1379 | ] |
| 1380 | assert "## Execute Mode" in preview.content |
| 1381 | assert "Current task: Fix the failing tests" in preview.content |
| 1382 | |
| 1383 | |
| 1384 | def test_prompt_show_command_renders_preview_without_model_call( |
| 1385 | temp_dir: Path, |
| 1386 | monkeypatch: pytest.MonkeyPatch, |
| 1387 | ) -> None: |
| 1388 | _write_python_workspace(temp_dir) |
| 1389 | _ensure_loader_dirs(temp_dir) |
| 1390 | _persist_session_with_dod(temp_dir) |
| 1391 | runner = CliRunner() |
| 1392 | |
| 1393 | monkeypatch.chdir(temp_dir) |
| 1394 | preview = collect_prompt_preview( |
| 1395 | temp_dir, |
| 1396 | model="qwen2.5-coder:14b", |
| 1397 | current_task="Preview the current Loader contract", |
| 1398 | ) |
| 1399 | |
| 1400 | result = runner.invoke( |
| 1401 | cli_main_module.prompt_cli, |
| 1402 | ["show", "--model", "qwen2.5-coder:14b", "Preview the current Loader contract"], |
| 1403 | ) |
| 1404 | |
| 1405 | assert result.exit_code == 0 |
| 1406 | assert "Prompt Preview" in result.output |
| 1407 | assert "Prompt Body" in result.output |
| 1408 | assert "Preview the current Loader contract" in result.output |
| 1409 | assert preview.prompt_format in result.output |
| 1410 | assert "Workflow Context" in result.output |
| 1411 | assert "Execute Mode" in result.output |
| 1412 | |
| 1413 | |
| 1414 | def test_prompt_diff_command_renders_persisted_prompt_changes( |
| 1415 | temp_dir: Path, |
| 1416 | monkeypatch: pytest.MonkeyPatch, |
| 1417 | ) -> None: |
| 1418 | _write_python_workspace(temp_dir) |
| 1419 | _ensure_loader_dirs(temp_dir) |
| 1420 | _persist_session_with_dod(temp_dir) |
| 1421 | runner = CliRunner() |
| 1422 | |
| 1423 | monkeypatch.chdir(temp_dir) |
| 1424 | |
| 1425 | result = runner.invoke(cli_main_module.prompt_cli, ["diff", "--full"]) |
| 1426 | |
| 1427 | assert result.exit_code == 0 |
| 1428 | assert "Prompt Diff" in result.output |
| 1429 | assert "Prompt Changes" in result.output |
| 1430 | assert "Workflow mode changed:" in result.output |
| 1431 | assert "Prompt Unified Diff" in result.output |
| 1432 | assert "execute parser fix" in result.output |
| 1433 | |
| 1434 | |
| 1435 | def test_permission_snapshot_and_dry_run_reflect_rules(temp_dir: Path) -> None: |
| 1436 | _write_python_workspace(temp_dir) |
| 1437 | _ensure_loader_dirs(temp_dir) |
| 1438 | (temp_dir / ".loader" / "permission-rules.json").write_text( |
| 1439 | "\n".join( |
| 1440 | [ |
| 1441 | "{", |
| 1442 | ' "allow": [{"tool": "write", "contains": "safe change"}],', |
| 1443 | ' "deny": [{"tool": "write", "path_contains": "secrets"}],', |
| 1444 | ' "ask": [{"tool": "write", "path_contains": "README"}]', |
| 1445 | "}", |
| 1446 | ] |
| 1447 | ) |
| 1448 | + "\n" |
| 1449 | ) |
| 1450 | |
| 1451 | snapshot = collect_permission_snapshot(temp_dir, permission_mode="allow") |
| 1452 | check = dry_run_permission_check( |
| 1453 | "write", |
| 1454 | { |
| 1455 | "file_path": str(temp_dir / "README.md"), |
| 1456 | "content": "safe change\n", |
| 1457 | }, |
| 1458 | project_root=temp_dir, |
| 1459 | permission_mode="allow", |
| 1460 | ) |
| 1461 | |
| 1462 | assert snapshot.active_mode == "allow" |
| 1463 | assert snapshot.prompting_enabled is True |
| 1464 | assert snapshot.rules_valid is True |
| 1465 | assert snapshot.rule_counts == {"allow": 1, "deny": 1, "ask": 1} |
| 1466 | assert snapshot.normalized_rules["allow"][0].tool_name == "write" |
| 1467 | assert snapshot.normalized_rules["allow"][0].contains == "safe change" |
| 1468 | |
| 1469 | assert check.required_mode == "workspace-write" |
| 1470 | assert check.decision == "ask" |
| 1471 | assert check.matched_disposition == "ask" |
| 1472 | assert check.matched_rule == "tool=write, path_contains=README" |
| 1473 | assert "file_path=" in check.input_summary |
| 1474 | |
| 1475 | |
| 1476 | def test_status_snapshot_reports_invalid_permission_rules(temp_dir: Path) -> None: |
| 1477 | _write_python_workspace(temp_dir) |
| 1478 | _ensure_loader_dirs(temp_dir) |
| 1479 | (temp_dir / ".loader" / "permission-rules.json").write_text("{broken json") |
| 1480 | |
| 1481 | snapshot = collect_status_snapshot(temp_dir, permission_mode="prompt") |
| 1482 | |
| 1483 | assert snapshot.permission_rules_valid is False |
| 1484 | assert snapshot.permission_prompting_enabled is True |
| 1485 | assert snapshot.permission_rules_source.endswith(".loader/permission-rules.json") |
| 1486 | |
| 1487 | |
| 1488 | def test_permissions_show_and_check_commands_render_policy( |
| 1489 | temp_dir: Path, |
| 1490 | monkeypatch: pytest.MonkeyPatch, |
| 1491 | ) -> None: |
| 1492 | _write_python_workspace(temp_dir) |
| 1493 | _ensure_loader_dirs(temp_dir) |
| 1494 | (temp_dir / ".loader" / "permission-rules.json").write_text( |
| 1495 | "\n".join( |
| 1496 | [ |
| 1497 | "{", |
| 1498 | ' "allow": [{"tool": "write", "contains": "safe change"}],', |
| 1499 | ' "ask": [{"tool": "write", "path_contains": "README"}]', |
| 1500 | "}", |
| 1501 | ] |
| 1502 | ) |
| 1503 | + "\n" |
| 1504 | ) |
| 1505 | runner = CliRunner() |
| 1506 | |
| 1507 | monkeypatch.chdir(temp_dir) |
| 1508 | |
| 1509 | show_result = runner.invoke( |
| 1510 | cli_main_module.permissions_cli, |
| 1511 | ["show", "--permission-mode", "allow"], |
| 1512 | ) |
| 1513 | check_result = runner.invoke( |
| 1514 | cli_main_module.permissions_cli, |
| 1515 | [ |
| 1516 | "check", |
| 1517 | "--permission-mode", |
| 1518 | "allow", |
| 1519 | "--args", |
| 1520 | '{"content":"safe change\\n"}', |
| 1521 | "write", |
| 1522 | "README.md", |
| 1523 | ], |
| 1524 | ) |
| 1525 | |
| 1526 | assert show_result.exit_code == 0 |
| 1527 | assert "Loader Permissions" in show_result.output |
| 1528 | assert "Permission Mode" in show_result.output |
| 1529 | assert "Rules Source" in show_result.output |
| 1530 | assert "safe change" in show_result.output |
| 1531 | assert "README" in show_result.output |
| 1532 | |
| 1533 | assert check_result.exit_code == 0 |
| 1534 | assert "Permission Check" in check_result.output |
| 1535 | assert "workspace-write" in check_result.output |
| 1536 | assert "ask" in check_result.output |
| 1537 | assert "tool=write, path_contains=README" in check_result.output |
| 1538 | |
| 1539 | |
| 1540 | def test_permissions_check_rejects_invalid_json_args( |
| 1541 | temp_dir: Path, |
| 1542 | monkeypatch: pytest.MonkeyPatch, |
| 1543 | ) -> None: |
| 1544 | _write_python_workspace(temp_dir) |
| 1545 | _ensure_loader_dirs(temp_dir) |
| 1546 | runner = CliRunner() |
| 1547 | |
| 1548 | monkeypatch.chdir(temp_dir) |
| 1549 | |
| 1550 | result = runner.invoke( |
| 1551 | cli_main_module.permissions_cli, |
| 1552 | ["check", "bash", "--args", "{broken json", "ls"], |
| 1553 | ) |
| 1554 | |
| 1555 | assert result.exit_code != 0 |
| 1556 | assert "`--args` must be valid JSON" in result.output |
| 1557 | |
| 1558 | |
| 1559 | def test_permissions_show_surfaces_invalid_rule_file( |
| 1560 | temp_dir: Path, |
| 1561 | monkeypatch: pytest.MonkeyPatch, |
| 1562 | ) -> None: |
| 1563 | _write_python_workspace(temp_dir) |
| 1564 | _ensure_loader_dirs(temp_dir) |
| 1565 | (temp_dir / ".loader" / "permission-rules.json").write_text("{broken json") |
| 1566 | runner = CliRunner() |
| 1567 | |
| 1568 | monkeypatch.chdir(temp_dir) |
| 1569 | |
| 1570 | result = runner.invoke(cli_main_module.permissions_cli, ["show"]) |
| 1571 | |
| 1572 | assert result.exit_code == 0 |
| 1573 | assert "invalid" in result.output.lower() |
| 1574 | assert "Rule Error" in result.output |
| 1575 | assert "Rules Source" in result.output |
| 1576 | |
| 1577 | |
| 1578 | def test_explore_command_can_show_and_reset_continuity( |
| 1579 | temp_dir: Path, |
| 1580 | monkeypatch: pytest.MonkeyPatch, |
| 1581 | ) -> None: |
| 1582 | _write_python_workspace(temp_dir) |
| 1583 | _ensure_loader_dirs(temp_dir) |
| 1584 | _persist_explore_snapshot(temp_dir) |
| 1585 | runner = CliRunner() |
| 1586 | |
| 1587 | monkeypatch.chdir(temp_dir) |
| 1588 | |
| 1589 | status_result = runner.invoke(cli_main_module.explore_cli, ["--status"]) |
| 1590 | |
| 1591 | assert status_result.exit_code == 0 |
| 1592 | assert "Loader Explore State" in status_result.output |
| 1593 | assert "continue" in status_result.output |
| 1594 | assert "What file did you mention?" in status_result.output |
| 1595 | |
| 1596 | reset_result = runner.invoke(cli_main_module.explore_cli, ["--reset"]) |
| 1597 | |
| 1598 | assert reset_result.exit_code == 0 |
| 1599 | assert "Cleared persisted explore continuity." in reset_result.output |
| 1600 | assert ExploreStateStore(temp_dir).load() is None |
| 1601 | |
| 1602 | |
| 1603 | def test_root_help_lists_special_commands() -> None: |
| 1604 | help_text = cli_main_module._loader_help_text() |
| 1605 | |
| 1606 | assert "loader doctor" in help_text |
| 1607 | assert "loader status" in help_text |
| 1608 | assert "loader explore <prompt>" in help_text |
| 1609 | assert "loader permissions show" in help_text |
| 1610 | assert "loader session resume <id>" in help_text |
| 1611 | |
| 1612 | |
| 1613 | def test_main_dispatches_session_resume_to_primary_cli( |
| 1614 | monkeypatch: pytest.MonkeyPatch, |
| 1615 | ) -> None: |
| 1616 | captured: dict[str, object] = {} |
| 1617 | |
| 1618 | def fake_cli_main(*, args: list[str], prog_name: str) -> None: |
| 1619 | captured["args"] = args |
| 1620 | captured["prog_name"] = prog_name |
| 1621 | |
| 1622 | monkeypatch.setattr(cli_main_module.cli, "main", fake_cli_main) |
| 1623 | monkeypatch.setattr(sys, "argv", ["loader", "session", "resume", "abc123", "--no-tui"]) |
| 1624 | |
| 1625 | cli_main_module.main() |
| 1626 | |
| 1627 | assert captured == { |
| 1628 | "args": ["--resume-target", "abc123", "--no-tui"], |
| 1629 | "prog_name": "loader", |
| 1630 | } |