Python · 63766 bytes Raw Blame History
1 """Tests for doctor, status, and session inspection surfaces."""
2
3 from __future__ import annotations
4
5 import sys
6 from pathlib import Path
7
8 import pytest
9 from click.testing import CliRunner
10
11 import loader.cli.main as cli_main_module
12 from loader.llm.base import Message, Role
13 from loader.runtime.completion_trace import CompletionTraceEntry
14 from loader.runtime.dod import (
15 DefinitionOfDoneStore,
16 VerificationEvidence,
17 create_definition_of_done,
18 )
19 from loader.runtime.evidence_provenance import EvidenceProvenance
20 from loader.runtime.explore_state import ExploreSnapshot, ExploreStateStore
21 from loader.runtime.inspection import (
22 CheckStatus,
23 collect_doctor_report,
24 collect_permission_snapshot,
25 collect_prompt_diff,
26 collect_prompt_preview,
27 collect_status_snapshot,
28 collect_workflow_artifact_diffs,
29 collect_workflow_timeline,
30 dry_run_permission_check,
31 list_session_summaries,
32 load_session_detail,
33 )
34 from loader.runtime.prompt_history import PromptSnapshot
35 from loader.runtime.session import SessionSnapshot, SessionStore
36 from loader.runtime.verification_observations import VerificationObservation
37 from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem
38 from loader.runtime.workflow_policy import WorkflowTimelineEntry
39
40
41 class FakeOllamaBackend:
42 """Small async backend stub for doctor tests."""
43
44 def __init__(
45 self,
46 *,
47 model: str,
48 health: bool,
49 models: list[dict[str, object]],
50 model_details: dict[str, object] | None = None,
51 ) -> None:
52 self.model = model
53 self._health = health
54 self._models = models
55 self._model_details = model_details
56
57 async def list_models(self) -> list[dict[str, object]]:
58 return list(self._models)
59
60 async def health_check(self) -> bool:
61 return self._health
62
63 async def describe_model(self) -> dict[str, object] | None:
64 return self._model_details
65
66 async def close(self) -> None:
67 return None
68
69
70 def _write_python_workspace(temp_dir: Path) -> None:
71 (temp_dir / "pyproject.toml").write_text(
72 "\n".join(
73 [
74 "[build-system]",
75 'requires = ["hatchling"]',
76 'build-backend = "hatchling.build"',
77 "",
78 "[tool.pytest.ini_options]",
79 'testpaths = ["tests"]',
80 "",
81 ]
82 )
83 + "\n"
84 )
85 (temp_dir / "src").mkdir()
86 (temp_dir / "tests").mkdir()
87
88
89 def _ensure_loader_dirs(temp_dir: Path) -> None:
90 loader_root = temp_dir / ".loader"
91 for name in ("sessions", "state", "dod", "briefs", "plans"):
92 (loader_root / name).mkdir(parents=True, exist_ok=True)
93 (loader_root / "project-memory.json").write_text("{}\n")
94
95
96 def _persist_session_with_dod(temp_dir: Path) -> tuple[str, str]:
97 dod = create_definition_of_done("Fix the failing tests")
98 dod.status = "fixing"
99 dod.pending_items = ["Re-run pytest"]
100 dod.completed_items = ["Patch the broken parser"]
101 dod.last_verification_result = "failed"
102 dod.evidence = [
103 VerificationEvidence(
104 command="pytest -q",
105 passed=False,
106 stderr="1 failed",
107 kind="test",
108 )
109 ]
110 dod_path = DefinitionOfDoneStore(temp_dir).save(dod)
111 workflow_timeline = [
112 WorkflowTimelineEntry(
113 timestamp="2026-04-06T12:04:00Z",
114 kind="handoff",
115 mode="verify",
116 reason_code="execute_completed",
117 summary="verify: execution completed; verifying the parser fix",
118 decision_kind="handoff",
119 prompt_format="native",
120 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
121 artifact_paths=[str(temp_dir / ".loader" / "plans" / "fix-tests.md")],
122 ),
123 WorkflowTimelineEntry(
124 timestamp="2026-04-06T12:05:00Z",
125 kind="reentry",
126 mode="execute",
127 reason_code="verification_failed_reentry",
128 summary="execute: verification failed; returning to execute for fixes",
129 decision_kind="reentry",
130 scheduled_next_mode="verify",
131 runner_up_mode="verify",
132 runner_up_score=0.52,
133 verification_observations=[
134 VerificationObservation(
135 status="failed",
136 summary="verification failed for `pytest -q`",
137 command="pytest -q",
138 kind="test",
139 detail="1 failed",
140 )
141 ],
142 prompt_format="native",
143 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
144 artifact_paths=[str(temp_dir / ".loader" / "plans" / "fix-tests.md")],
145 ),
146 ]
147
148 snapshot = SessionSnapshot(
149 session_id="20260406T120000Z-abcdef01",
150 created_at="2026-04-06T12:00:00Z",
151 updated_at="2026-04-06T12:05:00Z",
152 messages=[
153 Message(role=Role.USER, content="Fix the failing tests"),
154 Message(role=Role.ASSISTANT, content="I updated the parser."),
155 ],
156 usage={"turns": 1, "tool_calls": 2},
157 active_dod_path=str(dod_path),
158 current_task="Fix the failing tests",
159 runtime_owner_type="RuntimeHandle",
160 runtime_owner_path="runtime-handle",
161 workflow_mode="execute",
162 permission_mode="prompt",
163 permission_prompting_enabled=True,
164 permission_rule_counts={"allow": 1, "deny": 2, "ask": 1},
165 permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
166 prompt_format="native",
167 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
168 prompt_history=[
169 PromptSnapshot(
170 timestamp="2026-04-06T12:04:00Z",
171 workflow_mode="verify",
172 permission_mode="prompt",
173 current_task="Fix the failing tests",
174 prompt_format="native",
175 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
176 content="# Introduction\nverify parser fix\n",
177 ),
178 PromptSnapshot(
179 timestamp="2026-04-06T12:05:00Z",
180 workflow_mode="execute",
181 permission_mode="prompt",
182 current_task="Fix the failing tests",
183 prompt_format="native",
184 prompt_sections=[
185 "Runtime Config",
186 "Workflow Context",
187 "Mode Guidance",
188 "Project Context",
189 ],
190 content="# Introduction\nexecute parser fix\n# Project Context\npython\n",
191 ),
192 ],
193 workflow_reason_code="verification_failed_reentry",
194 workflow_reason_summary="verification failed; returning to execute for fixes",
195 workflow_decision_kind="reentry",
196 workflow_ambiguity_score=0.1,
197 workflow_complexity_score=0.7,
198 workflow_scheduled_next_mode="verify",
199 active_turn_phase="completion",
200 last_completion_decision_code="verification_failed_reentry",
201 last_completion_decision_summary=(
202 "continued after verification failed and the runtime re-entered execute mode"
203 ),
204 completion_trace=[
205 CompletionTraceEntry(
206 stage="continuation_check",
207 outcome="accept",
208 decision_code="completion_response_accepted",
209 decision_summary="accepted the response because completion heuristics found no missing follow-through",
210 ),
211 CompletionTraceEntry(
212 stage="definition_of_done",
213 outcome="continue",
214 decision_code="verification_failed_reentry",
215 decision_summary="continued after verification failed and the runtime re-entered execute mode",
216 ),
217 ],
218 last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn",
219 last_turn_transition_kind="terminal",
220 last_turn_transition_reason_code="turn_complete",
221 workflow_timeline=workflow_timeline,
222 )
223 SessionStore(temp_dir).save(snapshot)
224 return snapshot.session_id, str(dod_path)
225
226
227 def _persist_explore_snapshot(temp_dir: Path) -> None:
228 ExploreStateStore(temp_dir).save(
229 ExploreSnapshot(
230 turn_count=2,
231 model_name="llama3.1:8b",
232 messages=[
233 Message(role=Role.USER, content="Where should I start?"),
234 Message(role=Role.ASSISTANT, content="Start with README.md."),
235 Message(role=Role.USER, content="What file did you mention?"),
236 Message(role=Role.ASSISTANT, content="I mentioned README.md."),
237 ],
238 last_history_mode="continue",
239 last_query="What file did you mention?",
240 last_response="I mentioned README.md.",
241 )
242 )
243
244
245 def _persist_session_with_rich_workflow(temp_dir: Path) -> str:
246 slug = "tighten-loader-workflow-behavior"
247 brief_old = temp_dir / ".loader" / "briefs" / f"20260406T150000Z-{slug}.md"
248 brief_new = temp_dir / ".loader" / "briefs" / f"20260406T150200Z-{slug}.md"
249 brief_old.write_text(
250 "# Task Brief\n\n## Likely Touchpoints\n- planned.txt\n\n## Acceptance Criteria\n- planned.txt exists.\n"
251 )
252 brief_new.write_text(
253 "# Task Brief\n\n## Likely Touchpoints\n- notes.txt\n\n## Acceptance Criteria\n- notes.txt exists.\n"
254 )
255 plan_old_root = temp_dir / ".loader" / "plans" / f"20260406T150100Z-{slug}"
256 plan_new_root = temp_dir / ".loader" / "plans" / f"20260406T150300Z-{slug}"
257 plan_old_root.mkdir(parents=True, exist_ok=True)
258 plan_new_root.mkdir(parents=True, exist_ok=True)
259 (plan_old_root / "implementation.md").write_text(
260 "# Implementation Plan\n\n## File Changes\n- Create planned.txt.\n"
261 )
262 (plan_old_root / "verification.md").write_text(
263 "# Verification Plan\n\n## Acceptance Criteria\n- planned.txt exists.\n"
264 )
265 (plan_new_root / "implementation.md").write_text(
266 "# Implementation Plan\n\n## File Changes\n- Keep notes.txt as the runtime artifact.\n"
267 )
268 (plan_new_root / "verification.md").write_text(
269 "# Verification Plan\n\n## Acceptance Criteria\n- notes.txt exists.\n"
270 )
271
272 dod = create_definition_of_done("Tighten Loader workflow behavior")
273 dod.status = "fixing"
274 dod.clarify_brief = str(brief_new)
275 dod.implementation_plan = str(plan_new_root / "implementation.md")
276 dod.verification_plan = str(plan_new_root / "verification.md")
277 dod.acceptance_criteria = ["notes.txt exists in the workspace root."]
278 dod_path = DefinitionOfDoneStore(temp_dir).save(dod)
279
280 snapshot = SessionSnapshot(
281 session_id="20260406T150000Z-feedface",
282 created_at="2026-04-06T15:00:00Z",
283 updated_at="2026-04-06T15:04:00Z",
284 messages=[
285 Message(role=Role.USER, content="Tighten Loader workflow behavior"),
286 Message(role=Role.ASSISTANT, content="I refreshed the workflow contract."),
287 ],
288 active_dod_path=str(dod_path),
289 current_task="Tighten Loader workflow behavior",
290 runtime_owner_type="RuntimeHandle",
291 runtime_owner_path="runtime-handle",
292 workflow_mode="execute",
293 permission_mode="prompt",
294 permission_prompting_enabled=True,
295 prompt_format="native",
296 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
297 prompt_history=[
298 PromptSnapshot(
299 timestamp="2026-04-06T15:02:00Z",
300 workflow_mode="plan",
301 permission_mode="prompt",
302 current_task="Tighten Loader workflow behavior",
303 prompt_format="native",
304 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
305 content="# Introduction\nplan around planned.txt\n",
306 ),
307 PromptSnapshot(
308 timestamp="2026-04-06T15:04:00Z",
309 workflow_mode="execute",
310 permission_mode="prompt",
311 current_task="Tighten Loader workflow behavior",
312 prompt_format="native",
313 prompt_sections=[
314 "Runtime Config",
315 "Workflow Context",
316 "Mode Guidance",
317 "Project Context",
318 ],
319 content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n",
320 ),
321 ],
322 workflow_reason_code="full_replan_completed",
323 workflow_reason_summary="clarify and plan artifacts refreshed; returning to execute",
324 workflow_decision_kind="handoff",
325 workflow_timeline=[
326 WorkflowTimelineEntry(
327 timestamp="2026-04-06T15:01:00Z",
328 kind="clarify_continue",
329 mode="clarify",
330 reason_code="clarify_pressure_pass_required",
331 summary="clarify: Loader still needs a tradeoff pass around non-goals",
332 decision_kind="forced",
333 unresolved_questions=["Concrete files or subsystems are still not pinned down."],
334 signal_summary=["ambiguity=0.82", "open_questions=1"],
335 clarify_stage="readiness",
336 clarify_pressure_kind="tradeoff",
337 pressure_pass_complete=False,
338 missing_readiness_gates=["non_goals", "decision_boundaries"],
339 ),
340 WorkflowTimelineEntry(
341 timestamp="2026-04-06T15:02:00Z",
342 kind="reentry",
343 mode="plan",
344 reason_code="full_replan_required",
345 summary="plan: clarify and plan artifacts drifted; rebuilding the plan",
346 decision_kind="reentry",
347 scheduled_next_mode="execute",
348 unresolved_questions=["Touched files outside the current plan: notes.txt"],
349 evidence_summary=[
350 "confirmed touchpoint: `notes.txt` was already touched during execution.",
351 (
352 "verification contradiction: Failed verification exposed "
353 "missing brief coverage for `notes.txt exists`."
354 ),
355 ],
356 signal_summary=["recent_reentry=1", "stale_plan=true"],
357 artifact_paths=[
358 str(brief_new),
359 str(plan_new_root / "implementation.md"),
360 str(plan_new_root / "verification.md"),
361 ],
362 ),
363 WorkflowTimelineEntry(
364 timestamp="2026-04-06T15:03:00Z",
365 kind="verify_skip",
366 mode="verify",
367 reason_code="verify_skip_no_commands",
368 summary="verify: no verification commands were available for this turn",
369 decision_kind="forced",
370 signal_summary=["verify_pressure=low"],
371 ),
372 ],
373 workflow_ledger=WorkflowLedger(
374 assumptions=[
375 WorkflowLedgerItem(
376 text="notes.txt stays out of scope unless clarified otherwise.",
377 status="contradicted",
378 introduced_phase="clarify",
379 updated_phase="recovery",
380 evidence=["Clarify scope assumed `notes.txt` stayed out of scope."],
381 )
382 ],
383 acceptance_anchors=[
384 WorkflowLedgerItem(
385 text="notes.txt exists in the workspace root.",
386 status="changed",
387 introduced_phase="clarify",
388 updated_phase="recovery",
389 evidence=[
390 (
391 "Failed verification exposed missing brief coverage for "
392 "`notes.txt exists`."
393 )
394 ],
395 )
396 ],
397 decision_boundaries=[
398 WorkflowLedgerItem(
399 text="Escalate before broad UX changes.",
400 status="reopened",
401 introduced_phase="clarify",
402 updated_phase="recovery",
403 evidence=["The active task framing outgrew the persisted clarify brief."],
404 )
405 ],
406 ),
407 )
408 SessionStore(temp_dir).save(snapshot)
409 return snapshot.session_id
410
411
412 def _persist_session_with_policy_accountability(temp_dir: Path) -> str:
413 snapshot = SessionSnapshot(
414 session_id="20260406T160000Z-abcd1234",
415 created_at="2026-04-06T16:00:00Z",
416 updated_at="2026-04-06T16:03:00Z",
417 messages=[
418 Message(role=Role.USER, content="Explain Loader policy accountability"),
419 Message(role=Role.ASSISTANT, content="The runtime tracked repair and completion decisions."),
420 ],
421 current_task="Explain Loader policy accountability",
422 runtime_owner_type="RuntimeHandle",
423 runtime_owner_path="runtime-handle",
424 workflow_mode="execute",
425 permission_mode="workspace-write",
426 prompt_format="native",
427 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
428 workflow_timeline=[
429 WorkflowTimelineEntry(
430 timestamp="2026-04-06T16:01:00Z",
431 kind="repair_retry",
432 mode="execute",
433 reason_code="raw_text_tool_recovered",
434 summary="repair: recovered raw-text tool calls into executable tool invocations",
435 decision_kind="forced",
436 policy_stage="raw_text_tool_fallback",
437 policy_outcome="retry",
438 prompt_format="native",
439 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
440 ),
441 WorkflowTimelineEntry(
442 timestamp="2026-04-06T16:02:00Z",
443 kind="completion_check",
444 mode="execute",
445 reason_code="completion_response_accepted",
446 summary="completion: accepted the response because completion heuristics found no missing follow-through",
447 decision_kind="forced",
448 policy_stage="continuation_check",
449 policy_outcome="accept",
450 prompt_format="native",
451 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
452 ),
453 WorkflowTimelineEntry(
454 timestamp="2026-04-06T16:03:00Z",
455 kind="completion_continue",
456 mode="execute",
457 reason_code="verification_failed_reentry",
458 summary="completion: continued after verification failed and the runtime re-entered execute mode",
459 decision_kind="forced",
460 policy_stage="definition_of_done",
461 policy_outcome="continue",
462 evidence_provenance=[
463 EvidenceProvenance(
464 category="verification",
465 source="dod.evidence",
466 summary="verification failed for `pytest -q`",
467 status="contradicts",
468 subject="pytest -q",
469 )
470 ],
471 verification_observations=[
472 VerificationObservation(
473 status="failed",
474 summary="verification failed for `pytest -q`",
475 command="pytest -q",
476 kind="test",
477 detail="1 failed",
478 )
479 ],
480 prompt_format="native",
481 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
482 ),
483 ],
484 )
485 SessionStore(temp_dir).save(snapshot)
486 return snapshot.session_id
487
488
489 def _persist_session_with_pending_verification(temp_dir: Path) -> str:
490 snapshot = SessionSnapshot(
491 session_id="20260406T160500Z-pending123",
492 created_at="2026-04-06T16:05:00Z",
493 updated_at="2026-04-06T16:05:30Z",
494 messages=[
495 Message(role=Role.USER, content="Verify the runtime changes"),
496 Message(role=Role.ASSISTANT, content="Entering verification."),
497 ],
498 current_task="Verify the runtime changes",
499 runtime_owner_type="RuntimeHandle",
500 runtime_owner_path="runtime-handle",
501 workflow_mode="verify",
502 permission_mode="workspace-write",
503 prompt_format="native",
504 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
505 workflow_timeline=[
506 WorkflowTimelineEntry(
507 timestamp="2026-04-06T16:05:30Z",
508 kind="verify_observation",
509 mode="verify",
510 reason_code="verification_pending",
511 summary="verify: verification is pending for the active command set",
512 decision_kind="forced",
513 policy_stage="verification",
514 policy_outcome="pending",
515 verification_observations=[
516 VerificationObservation(
517 status="pending",
518 summary="verification pending for `uv run pytest -q`",
519 command="uv run pytest -q",
520 kind="test",
521 attempt_id="verification-attempt-2",
522 attempt_number=2,
523 )
524 ],
525 prompt_format="native",
526 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
527 )
528 ],
529 )
530 SessionStore(temp_dir).save(snapshot)
531 return snapshot.session_id
532
533
534 def _persist_session_with_planned_verification(temp_dir: Path) -> str:
535 snapshot = SessionSnapshot(
536 session_id="20260406T160430Z-plan1234",
537 created_at="2026-04-06T16:04:30Z",
538 updated_at="2026-04-06T16:04:50Z",
539 messages=[
540 Message(role=Role.USER, content="Keep editing the runtime"),
541 Message(role=Role.ASSISTANT, content="Verification will run after execution."),
542 ],
543 current_task="Keep editing the runtime",
544 runtime_owner_type="RuntimeHandle",
545 runtime_owner_path="runtime-handle",
546 workflow_mode="execute",
547 permission_mode="workspace-write",
548 prompt_format="native",
549 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
550 workflow_timeline=[
551 WorkflowTimelineEntry(
552 timestamp="2026-04-06T16:04:50Z",
553 kind="verify_observation",
554 mode="execute",
555 reason_code="verification_planned",
556 summary="verify: verification is planned after new mutating work",
557 decision_kind="forced",
558 policy_stage="verification",
559 policy_outcome="planned",
560 verification_observations=[
561 VerificationObservation(
562 status="planned",
563 summary="verification planned for `uv run pytest -q`",
564 command="uv run pytest -q",
565 kind="runtime",
566 detail="write changed src/loader/runtime/tool_batches.py",
567 attempt_id="verification-attempt-3",
568 attempt_number=3,
569 )
570 ],
571 prompt_format="native",
572 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
573 )
574 ],
575 )
576 SessionStore(temp_dir).save(snapshot)
577 return snapshot.session_id
578
579
580 def _persist_session_with_stale_verification(temp_dir: Path) -> str:
581 snapshot = SessionSnapshot(
582 session_id="20260406T160700Z-stale1234",
583 created_at="2026-04-06T16:07:00Z",
584 updated_at="2026-04-06T16:07:30Z",
585 messages=[
586 Message(role=Role.USER, content="Keep working on the runtime"),
587 Message(role=Role.ASSISTANT, content="Fresh verification is required again."),
588 ],
589 current_task="Keep working on the runtime",
590 runtime_owner_type="RuntimeHandle",
591 runtime_owner_path="runtime-handle",
592 workflow_mode="execute",
593 permission_mode="workspace-write",
594 prompt_format="native",
595 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
596 workflow_timeline=[
597 WorkflowTimelineEntry(
598 timestamp="2026-04-06T16:07:30Z",
599 kind="verify_observation",
600 mode="execute",
601 reason_code="verification_stale",
602 summary="verify: previous verification became stale after new mutating work",
603 decision_kind="forced",
604 policy_stage="verification",
605 policy_outcome="stale",
606 verification_observations=[
607 VerificationObservation(
608 status="stale",
609 summary=(
610 "verification became stale for `uv run pytest -q` "
611 "after new mutating work"
612 ),
613 command="uv run pytest -q",
614 kind="runtime",
615 detail="write changed src/loader/runtime/finalization.py",
616 attempt_id="verification-attempt-1",
617 attempt_number=1,
618 supersedes_attempt_id="verification-attempt-2",
619 )
620 ],
621 prompt_format="native",
622 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
623 )
624 ],
625 )
626 SessionStore(temp_dir).save(snapshot)
627 return snapshot.session_id
628
629
630 @pytest.mark.asyncio
631 async def test_collect_doctor_report_passes_for_healthy_workspace(temp_dir: Path) -> None:
632 _write_python_workspace(temp_dir)
633 _ensure_loader_dirs(temp_dir)
634
635 report = await collect_doctor_report(
636 temp_dir,
637 model="qwen2.5-coder:14b",
638 backend_factory=lambda model: FakeOllamaBackend(
639 model=model,
640 health=True,
641 models=[{"name": "qwen2.5-coder:14b"}],
642 model_details={"details": {"family": "qwen2.5"}},
643 ),
644 )
645
646 assert report.overall_status == CheckStatus.PASS
647 assert {check.name for check in report.checks} == {
648 "backend",
649 "capabilities",
650 "workspace",
651 "write_access",
652 "commands",
653 "state",
654 "permissions",
655 }
656 backend_check = next(check for check in report.checks if check.name == "backend")
657 state_check = next(check for check in report.checks if check.name == "state")
658
659 assert backend_check.status == CheckStatus.PASS
660 assert state_check.status == CheckStatus.PASS
661
662
663 @pytest.mark.asyncio
664 async def test_collect_doctor_report_surfaces_backend_and_state_failures(temp_dir: Path) -> None:
665 _write_python_workspace(temp_dir)
666 (temp_dir / ".loader").mkdir()
667 (temp_dir / ".loader" / "project-memory.json").write_text("{broken json")
668
669 report = await collect_doctor_report(
670 temp_dir,
671 model="missing-model:latest",
672 backend_factory=lambda model: FakeOllamaBackend(
673 model=model,
674 health=False,
675 models=[{"name": "llama3.1:8b"}],
676 model_details=None,
677 ),
678 )
679
680 backend_check = next(check for check in report.checks if check.name == "backend")
681 state_check = next(check for check in report.checks if check.name == "state")
682
683 assert report.overall_status == CheckStatus.FAIL
684 assert backend_check.status == CheckStatus.FAIL
685 assert "not pulled" in backend_check.message
686 assert state_check.status == CheckStatus.FAIL
687 assert "corrupted" in state_check.message
688
689
690 @pytest.mark.asyncio
691 async def test_collect_doctor_report_fails_closed_on_invalid_permission_rules(
692 temp_dir: Path,
693 ) -> None:
694 _write_python_workspace(temp_dir)
695 _ensure_loader_dirs(temp_dir)
696 (temp_dir / ".loader" / "permission-rules.json").write_text('{"allow": "nope"}\n')
697
698 report = await collect_doctor_report(
699 temp_dir,
700 model="qwen2.5-coder:14b",
701 permission_mode="prompt",
702 backend_factory=lambda model: FakeOllamaBackend(
703 model=model,
704 health=True,
705 models=[{"name": "qwen2.5-coder:14b"}],
706 ),
707 )
708
709 permission_check = next(check for check in report.checks if check.name == "permissions")
710 assert report.overall_status == CheckStatus.FAIL
711 assert permission_check.status == CheckStatus.FAIL
712 assert report.permission_rules_valid is False
713 assert "invalid" in permission_check.message.lower()
714
715
716 def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> None:
717 _write_python_workspace(temp_dir)
718 _ensure_loader_dirs(temp_dir)
719 session_id, dod_path = _persist_session_with_dod(temp_dir)
720 _persist_explore_snapshot(temp_dir)
721
722 snapshot = collect_status_snapshot(
723 temp_dir,
724 model="llama3.1:8b",
725 )
726 sessions = list_session_summaries(temp_dir)
727 detail = load_session_detail(session_id, project_root=temp_dir)
728
729 assert snapshot.active_session_id == session_id
730 assert snapshot.dod_status == "fixing"
731 assert snapshot.dod_pending_items_count == 1
732 assert snapshot.last_verification_result == "failed"
733 assert snapshot.active_dod_path == dod_path
734 assert snapshot.permission_mode == "prompt"
735 assert snapshot.runtime_owner_type == "RuntimeHandle"
736 assert snapshot.runtime_owner_path == "runtime-handle"
737 assert snapshot.permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1}
738 assert snapshot.permission_prompting_enabled is True
739 assert snapshot.permission_rules_valid is True
740 assert snapshot.permission_rules_source == str(
741 temp_dir / ".loader" / "permission-rules.json"
742 )
743 assert snapshot.prompt_format == "native"
744 assert snapshot.prompt_sections == [
745 "Runtime Config",
746 "Workflow Context",
747 "Mode Guidance",
748 ]
749 assert snapshot.workflow_reason_code == "verification_failed_reentry"
750 assert snapshot.workflow_reason_summary == (
751 "verification failed; returning to execute for fixes"
752 )
753 assert snapshot.workflow_decision_kind == "reentry"
754 assert snapshot.workflow_scheduled_next_mode == "verify"
755 assert snapshot.active_turn_phase == "completion"
756 assert snapshot.completion_decision_code == "verification_failed_reentry"
757 assert snapshot.completion_decision_summary == (
758 "continued after verification failed and the runtime re-entered execute mode"
759 )
760 assert snapshot.last_turn_transition_summary == (
761 "completion -> finalize [terminal] Finalizing completed turn"
762 )
763 assert snapshot.explore_turn_count == 2
764 assert snapshot.explore_message_count == 4
765 assert snapshot.explore_history_mode == "continue"
766 assert snapshot.explore_last_query == "What file did you mention?"
767 assert snapshot.explore_last_response == "I mentioned README.md."
768 assert snapshot.explore_updated_at is not None
769 assert [item.status for item in snapshot.recent_verification] == ["failed"]
770 assert [item.command for item in snapshot.recent_verification] == ["pytest -q"]
771 assert [item.detail for item in snapshot.recent_verification] == ["1 failed"]
772
773 assert len(sessions) == 1
774 assert sessions[0].session_id == session_id
775 assert sessions[0].is_current is True
776 assert sessions[0].runtime_owner_type == "RuntimeHandle"
777 assert sessions[0].runtime_owner_path == "runtime-handle"
778 assert sessions[0].dod_status == "fixing"
779 assert sessions[0].permission_prompting_enabled is True
780 assert sessions[0].permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1}
781 assert sessions[0].permission_rules_source == str(
782 temp_dir / ".loader" / "permission-rules.json"
783 )
784 assert sessions[0].prompt_format == "native"
785 assert sessions[0].workflow_reason_code == "verification_failed_reentry"
786 assert sessions[0].workflow_reason_summary == (
787 "verification failed; returning to execute for fixes"
788 )
789 assert sessions[0].workflow_decision_kind == "reentry"
790 assert sessions[0].completion_decision_code == "verification_failed_reentry"
791 assert sessions[0].completion_decision_summary == (
792 "continued after verification failed and the runtime re-entered execute mode"
793 )
794 assert sessions[0].last_turn_transition_summary == (
795 "completion -> finalize [terminal] Finalizing completed turn"
796 )
797
798 assert detail.snapshot.session_id == session_id
799 assert detail.is_current is True
800 assert detail.snapshot.runtime_owner_type == "RuntimeHandle"
801 assert detail.snapshot.runtime_owner_path == "runtime-handle"
802 assert detail.definition_of_done is not None
803 assert detail.definition_of_done.status == "fixing"
804 assert detail.snapshot.permission_rules_source == str(
805 temp_dir / ".loader" / "permission-rules.json"
806 )
807 assert detail.snapshot.workflow_reason_code == "verification_failed_reentry"
808 assert detail.snapshot.last_completion_decision_code == (
809 "verification_failed_reentry"
810 )
811 assert [entry.decision_code for entry in detail.snapshot.completion_trace] == [
812 "completion_response_accepted",
813 "verification_failed_reentry",
814 ]
815 assert [item.status for item in detail.recent_verification] == ["failed"]
816 assert [item.command for item in detail.recent_verification] == ["pytest -q"]
817 assert detail.snapshot.last_turn_transition_reason_code == "turn_complete"
818 assert len(detail.snapshot.workflow_timeline) == 2
819 assert detail.snapshot.workflow_timeline[-1].scheduled_next_mode == "verify"
820
821
822 def test_collect_workflow_timeline_reflects_persisted_history(temp_dir: Path) -> None:
823 _write_python_workspace(temp_dir)
824 _ensure_loader_dirs(temp_dir)
825 session_id, _ = _persist_session_with_dod(temp_dir)
826
827 snapshot = collect_workflow_timeline(project_root=temp_dir)
828
829 assert snapshot.session_id == session_id
830 assert snapshot.is_current is True
831 assert snapshot.runtime_owner_type == "RuntimeHandle"
832 assert snapshot.runtime_owner_path == "runtime-handle"
833 assert snapshot.workflow_mode == "execute"
834 assert snapshot.current_task == "Fix the failing tests"
835 assert snapshot.total_entries == 2
836 assert [entry.kind for entry in snapshot.entries] == ["handoff", "reentry"]
837 assert snapshot.entries[-1].reason_code == "verification_failed_reentry"
838
839
840 def test_collect_workflow_timeline_supports_filters_and_highlights(
841 temp_dir: Path,
842 ) -> None:
843 _write_python_workspace(temp_dir)
844 _ensure_loader_dirs(temp_dir)
845 session_id = _persist_session_with_rich_workflow(temp_dir)
846
847 snapshot = collect_workflow_timeline(
848 project_root=temp_dir,
849 mode="clarify",
850 limit=1,
851 )
852
853 assert snapshot.session_id == session_id
854 assert snapshot.total_entries == 3
855 assert snapshot.selected_mode == "clarify"
856 assert snapshot.selected_kind is None
857 assert snapshot.entry_limit == 1
858 assert len(snapshot.entries) == 1
859 assert snapshot.entries[0].kind == "clarify_continue"
860 assert snapshot.entries[0].clarify_stage == "readiness"
861 assert snapshot.entries[0].clarify_pressure_kind == "tradeoff"
862 assert snapshot.entries[0].missing_readiness_gates == [
863 "non_goals",
864 "decision_boundaries",
865 ]
866 assert any(item.startswith("Asked again:") for item in snapshot.highlights)
867 assert snapshot.workflow_ledger.assumptions[0].status == "contradicted"
868 assert any(
869 item.startswith("Contradicted assumptions:")
870 for item in snapshot.highlights
871 )
872
873
874 def test_collect_workflow_timeline_highlights_policy_accountability(
875 temp_dir: Path,
876 ) -> None:
877 _write_python_workspace(temp_dir)
878 _ensure_loader_dirs(temp_dir)
879 session_id = _persist_session_with_policy_accountability(temp_dir)
880
881 snapshot = collect_workflow_timeline(project_root=temp_dir)
882
883 assert snapshot.session_id == session_id
884 assert [entry.kind for entry in snapshot.entries] == [
885 "repair_retry",
886 "completion_check",
887 "completion_continue",
888 ]
889 assert any(item.startswith("Repair path:") for item in snapshot.highlights)
890 assert any(item.startswith("Completion decision:") for item in snapshot.highlights)
891 assert any(
892 "policy-stage=definition_of_done" in item for item in snapshot.highlights
893 )
894
895
896 def test_collect_status_snapshot_includes_latest_policy_summary(
897 temp_dir: Path,
898 ) -> None:
899 _write_python_workspace(temp_dir)
900 _ensure_loader_dirs(temp_dir)
901 _persist_session_with_policy_accountability(temp_dir)
902
903 snapshot = collect_status_snapshot(temp_dir)
904
905 assert snapshot.latest_policy_summary is not None
906 assert "verification_failed_reentry" in snapshot.latest_policy_summary
907 assert "observed=verification failed for `pytest -q` [1 failed]" in (
908 snapshot.latest_policy_summary
909 )
910 assert "policy-stage=definition_of_done" in snapshot.latest_policy_summary
911 assert snapshot.latest_policy_blocking_evidence == [
912 "verification failed for `pytest -q`"
913 ]
914 assert snapshot.latest_policy_observed_verification == [
915 "verification failed for `pytest -q` [1 failed]"
916 ]
917 assert [item.status for item in snapshot.recent_verification] == ["failed"]
918 assert [item.command for item in snapshot.recent_verification] == ["pytest -q"]
919 assert [item.detail for item in snapshot.recent_verification] == ["1 failed"]
920
921
922 def test_collect_status_snapshot_surfaces_pending_verification(
923 temp_dir: Path,
924 ) -> None:
925 _write_python_workspace(temp_dir)
926 _ensure_loader_dirs(temp_dir)
927 _persist_session_with_pending_verification(temp_dir)
928
929 snapshot = collect_status_snapshot(temp_dir)
930
931 assert snapshot.latest_policy_summary is not None
932 assert "verification_pending" in snapshot.latest_policy_summary
933 assert "policy-outcome=pending" in snapshot.latest_policy_summary
934 assert snapshot.latest_policy_observed_verification == [
935 "verification pending for `uv run pytest -q` [attempt 2]"
936 ]
937 assert [item.status for item in snapshot.recent_verification] == ["pending"]
938 assert [item.command for item in snapshot.recent_verification] == [
939 "uv run pytest -q"
940 ]
941 assert [item.attempt for item in snapshot.recent_verification] == ["attempt 2"]
942
943
944 def test_collect_status_snapshot_surfaces_planned_verification(
945 temp_dir: Path,
946 ) -> None:
947 _write_python_workspace(temp_dir)
948 _ensure_loader_dirs(temp_dir)
949 _persist_session_with_planned_verification(temp_dir)
950
951 snapshot = collect_status_snapshot(temp_dir)
952
953 assert snapshot.latest_policy_summary is not None
954 assert "verification_planned" in snapshot.latest_policy_summary
955 assert "policy-outcome=planned" in snapshot.latest_policy_summary
956 assert snapshot.latest_policy_observed_verification == [
957 "verification planned for `uv run pytest -q` [write changed src/loader/runtime/tool_batches.py; attempt 3]"
958 ]
959 assert [item.status for item in snapshot.recent_verification] == ["planned"]
960 assert [item.command for item in snapshot.recent_verification] == [
961 "uv run pytest -q"
962 ]
963 assert [item.attempt for item in snapshot.recent_verification] == ["attempt 3"]
964 assert [item.detail for item in snapshot.recent_verification] == [
965 "write changed src/loader/runtime/tool_batches.py"
966 ]
967
968
969 def test_collect_status_snapshot_surfaces_stale_verification(
970 temp_dir: Path,
971 ) -> None:
972 _write_python_workspace(temp_dir)
973 _ensure_loader_dirs(temp_dir)
974 _persist_session_with_stale_verification(temp_dir)
975
976 snapshot = collect_status_snapshot(temp_dir)
977
978 assert snapshot.latest_policy_summary is not None
979 assert "verification_stale" in snapshot.latest_policy_summary
980 assert "policy-outcome=stale" in snapshot.latest_policy_summary
981 assert snapshot.latest_policy_observed_verification == [
982 "verification became stale for `uv run pytest -q` after new mutating work [write changed src/loader/runtime/finalization.py; attempt 1 -> attempt 2]"
983 ]
984 assert [item.status for item in snapshot.recent_verification] == ["stale"]
985 assert [item.command for item in snapshot.recent_verification] == [
986 "uv run pytest -q"
987 ]
988 assert [item.attempt for item in snapshot.recent_verification] == [
989 "attempt 1 -> attempt 2"
990 ]
991 assert [item.detail for item in snapshot.recent_verification] == [
992 "write changed src/loader/runtime/finalization.py"
993 ]
994
995
996 def test_collect_prompt_diff_uses_persisted_prompt_history(temp_dir: Path) -> None:
997 _write_python_workspace(temp_dir)
998 _ensure_loader_dirs(temp_dir)
999 session_id, _ = _persist_session_with_dod(temp_dir)
1000
1001 diff = collect_prompt_diff(project_root=temp_dir)
1002
1003 assert diff.session_id == session_id
1004 assert diff.previous is not None
1005 assert diff.current is not None
1006 assert diff.current.workflow_mode == "execute"
1007 assert diff.previous.workflow_mode == "verify"
1008 assert any("Workflow mode changed:" in item for item in diff.highlights)
1009 assert "---" in diff.unified_diff
1010 assert "execute parser fix" in diff.unified_diff
1011
1012
1013 def test_collect_workflow_artifact_diffs_reads_versioned_artifacts(
1014 temp_dir: Path,
1015 ) -> None:
1016 _write_python_workspace(temp_dir)
1017 _ensure_loader_dirs(temp_dir)
1018 session_id = _persist_session_with_rich_workflow(temp_dir)
1019
1020 snapshot = collect_workflow_artifact_diffs(project_root=temp_dir)
1021
1022 assert snapshot.session_id == session_id
1023 assert len(snapshot.entries) == 3
1024 assert {entry.kind for entry in snapshot.entries} == {
1025 "clarify_brief",
1026 "implementation_plan",
1027 "verification_plan",
1028 }
1029 assert any("notes.txt" in entry.unified_diff for entry in snapshot.entries)
1030 assert snapshot.highlights
1031
1032
1033 def test_status_and_session_commands_render_persisted_state(
1034 temp_dir: Path,
1035 monkeypatch: pytest.MonkeyPatch,
1036 ) -> None:
1037 _write_python_workspace(temp_dir)
1038 _ensure_loader_dirs(temp_dir)
1039 session_id, _ = _persist_session_with_dod(temp_dir)
1040 _persist_explore_snapshot(temp_dir)
1041 runner = CliRunner()
1042
1043 monkeypatch.chdir(temp_dir)
1044
1045 status_result = runner.invoke(cli_main_module.status_cli, ["--model", "llama3.1:8b"])
1046 list_result = runner.invoke(cli_main_module.session_cli, ["list"])
1047 show_result = runner.invoke(cli_main_module.session_cli, ["show", session_id])
1048 workflow_result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1049
1050 assert status_result.exit_code == 0
1051 assert session_id in status_result.output
1052 assert "fixing" in status_result.output
1053 assert "Runtime Owner" in status_result.output
1054 assert "runtime-handle (RuntimeHandle)" in status_result.output
1055 assert "1 allow / 2 deny / 1 ask" in status_result.output
1056 assert "native" in status_result.output
1057 assert "Runtime Config, Workflow Context, Mode Guidance" in status_result.output
1058 assert "Rules Source" in status_result.output
1059 assert "verification failed; returning to execute for fixes" in status_result.output
1060 assert "Completion Decision" in status_result.output
1061 assert "continued after verification failed" in status_result.output
1062 assert "completion -> finalize" in status_result.output
1063 assert "Finalizing completed turn" in status_result.output
1064 assert "Explore Turns" in status_result.output
1065 assert "Explore History" in status_result.output
1066 assert "What file did you mention?" in status_result.output
1067 assert "pytest -q" in status_result.output
1068 assert "1 failed" in status_result.output
1069
1070 assert list_result.exit_code == 0
1071 assert session_id in list_result.output
1072 assert "Runtime Owner" in list_result.output
1073 assert "runtime-handle (RuntimeHandle)" in list_result.output
1074 assert "1 allow / 2 deny / 1 ask" in list_result.output
1075 assert "prompting enabled" in list_result.output
1076 assert "native" in list_result.output
1077 assert "Rules Source" in list_result.output
1078 assert "verification failed; returning to execute for fixes" in list_result.output
1079 assert "Completion Decision" in list_result.output
1080 assert "completion -> finalize" in list_result.output
1081
1082 assert show_result.exit_code == 0
1083 assert session_id in show_result.output
1084 assert "Runtime Owner" in show_result.output
1085 assert "runtime-handle (RuntimeHandle)" in show_result.output
1086 assert "Patch the broken parser" in show_result.output
1087 assert "1 allow / 2 deny / 1 ask" in show_result.output
1088 assert "enabled" in show_result.output
1089 assert "Runtime Config, Workflow Context, Mode Guidance" in show_result.output
1090 assert "Rules Source" in show_result.output
1091 assert "verification failed; returning to execute for fixes" in show_result.output
1092 assert "Completion Decision" in show_result.output
1093 assert "Completion Trace" in show_result.output
1094 assert "Recent Verification" in show_result.output
1095 assert "continuation_check" in show_result.output
1096 assert "completion -> finalize" in show_result.output
1097 assert "Finalizing completed turn" in show_result.output
1098 assert "Policy Timeline" not in show_result.output
1099 assert "Workflow Timeline" in show_result.output
1100 assert "handoff" in show_result.output
1101 assert "next=verify" in show_result.output
1102 assert "pytest -q" in show_result.output
1103 assert "1 failed" in show_result.output
1104
1105 assert workflow_result.exit_code == 0
1106 assert "Loader Workflow" in workflow_result.output
1107 assert "Workflow Timeline" in workflow_result.output
1108 assert session_id in workflow_result.output
1109 assert "Runtime Owner" in workflow_result.output
1110 assert "runtime-handle (RuntimeHandle)" in workflow_result.output
1111 assert "handoff" in workflow_result.output
1112 assert "next=verify" in workflow_result.output
1113
1114
1115 def test_workflow_command_renders_policy_accountability_context(
1116 temp_dir: Path,
1117 monkeypatch: pytest.MonkeyPatch,
1118 ) -> None:
1119 _write_python_workspace(temp_dir)
1120 _ensure_loader_dirs(temp_dir)
1121 session_id = _persist_session_with_policy_accountability(temp_dir)
1122 runner = CliRunner()
1123
1124 monkeypatch.chdir(temp_dir)
1125
1126 result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1127
1128 assert result.exit_code == 0
1129 assert session_id in result.output
1130 assert "repair_retry" in result.output
1131 assert "Repair path:" in result.output
1132 assert "Completion decision:" in result.output
1133 assert "verification_failed_reentry" in result.output
1134 assert "Policy Evidence Needed" in result.output
1135 assert "verification failed for `pytest -q`" in result.output
1136 assert "Observed Verification" in result.output
1137 assert "verification failed for `pytest -q` [1 failed]" in result.output
1138 assert "policy-stage=raw_text_tool_fallback" in result.output
1139 assert "policy-outcome=continue" in result.output
1140 assert "provenance=contradicts:verification@dod.evidence" in result.output
1141 assert "observed=verification failed for `pytest -q` [1 failed]" in result.output
1142
1143 policy_result = runner.invoke(cli_main_module.workflow_cli, ["show", "--policy"])
1144
1145 assert policy_result.exit_code == 0
1146 assert "Loader Workflow" in policy_result.output
1147 assert "Policy Timeline" in policy_result.output
1148 assert "policy-only" in policy_result.output
1149 assert "repair_retry" in policy_result.output
1150 assert "verification_failed_reentry" in policy_result.output
1151 assert "handoff" not in policy_result.output
1152
1153
1154 def test_workflow_command_renders_stale_verification_context(
1155 temp_dir: Path,
1156 monkeypatch: pytest.MonkeyPatch,
1157 ) -> None:
1158 _write_python_workspace(temp_dir)
1159 _ensure_loader_dirs(temp_dir)
1160 session_id = _persist_session_with_stale_verification(temp_dir)
1161 runner = CliRunner()
1162
1163 monkeypatch.chdir(temp_dir)
1164
1165 result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1166
1167 assert result.exit_code == 0
1168 assert session_id in result.output
1169 assert "Verify stale:" in result.output
1170 assert "verification_stale" in result.output
1171 assert "policy-outcome=stale" in result.output
1172 assert "Observed Verification" in result.output
1173 assert "uv run pytest -q" in result.output
1174 assert "new mutating work" in result.output
1175
1176
1177 def test_workflow_command_renders_planned_verification_context(
1178 temp_dir: Path,
1179 monkeypatch: pytest.MonkeyPatch,
1180 ) -> None:
1181 _write_python_workspace(temp_dir)
1182 _ensure_loader_dirs(temp_dir)
1183 session_id = _persist_session_with_planned_verification(temp_dir)
1184 runner = CliRunner()
1185
1186 monkeypatch.chdir(temp_dir)
1187
1188 result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1189
1190 assert result.exit_code == 0
1191 assert session_id in result.output
1192 assert "Verify planned:" in result.output
1193 assert "verification_planned" in result.output
1194 assert "policy-outcome=planned" in result.output
1195 assert "Observed Verification" in result.output
1196 assert "verification planned for `uv run pytest -q`" in result.output
1197 assert "uv run pytest -q" in result.output
1198
1199
1200 def test_collect_workflow_timeline_can_focus_on_policy_accountability(
1201 temp_dir: Path,
1202 ) -> None:
1203 _write_python_workspace(temp_dir)
1204 _ensure_loader_dirs(temp_dir)
1205 session_id = _persist_session_with_policy_accountability(temp_dir)
1206
1207 snapshot = collect_workflow_timeline(
1208 project_root=temp_dir,
1209 accountability_only=True,
1210 )
1211
1212 assert snapshot.session_id == session_id
1213 assert snapshot.selected_accountability_only is True
1214 assert [entry.kind for entry in snapshot.entries] == [
1215 "repair_retry",
1216 "completion_check",
1217 "completion_continue",
1218 ]
1219
1220
1221 def test_session_show_renders_policy_timeline_preview(
1222 temp_dir: Path,
1223 monkeypatch: pytest.MonkeyPatch,
1224 ) -> None:
1225 _write_python_workspace(temp_dir)
1226 _ensure_loader_dirs(temp_dir)
1227 session_id = _persist_session_with_policy_accountability(temp_dir)
1228 runner = CliRunner()
1229
1230 monkeypatch.chdir(temp_dir)
1231
1232 show_result = runner.invoke(cli_main_module.session_cli, ["show", session_id])
1233
1234 assert show_result.exit_code == 0
1235 assert "Latest Policy" in show_result.output
1236 assert "verification_failed_reentry" in show_result.output
1237 assert "Policy Evidence Needed" in show_result.output
1238 assert "verification failed for `pytest -q`" in show_result.output
1239 assert "Observed Verification" in show_result.output
1240 assert "verification failed for `pytest -q` [1 failed]" in show_result.output
1241 assert "Policy Timeline" in show_result.output
1242 assert "repair_retry" in show_result.output
1243 assert "completion:" in show_result.output
1244 assert "provenance=contradicts:verification@dod.evidence" in show_result.output
1245
1246
1247 def test_status_command_renders_latest_policy_summary(
1248 temp_dir: Path,
1249 monkeypatch: pytest.MonkeyPatch,
1250 ) -> None:
1251 _write_python_workspace(temp_dir)
1252 _ensure_loader_dirs(temp_dir)
1253 session_id = _persist_session_with_policy_accountability(temp_dir)
1254 runner = CliRunner()
1255
1256 monkeypatch.chdir(temp_dir)
1257
1258 result = runner.invoke(cli_main_module.status_cli, [])
1259
1260 assert result.exit_code == 0
1261 assert session_id in result.output
1262 assert "Latest Policy" in result.output
1263 assert "verification_failed_reentry" in result.output
1264 assert "Policy Evidence Needed" in result.output
1265 assert "verification failed for `pytest -q`" in result.output
1266 assert "Observed Verification" in result.output
1267 assert "verification failed for `pytest -q` [1 failed]" in result.output
1268 assert "Recent Verification" in result.output
1269 assert "policy-stage=definition_of_done" in result.output
1270
1271
1272 def test_workflow_show_renders_workflow_ledger(
1273 temp_dir: Path,
1274 monkeypatch: pytest.MonkeyPatch,
1275 ) -> None:
1276 _write_python_workspace(temp_dir)
1277 _ensure_loader_dirs(temp_dir)
1278 _persist_session_with_rich_workflow(temp_dir)
1279 runner = CliRunner()
1280
1281 monkeypatch.chdir(temp_dir)
1282
1283 result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1284
1285 assert result.exit_code == 0
1286 assert "Workflow Ledger" in result.output
1287 assert "Assumptions" in result.output
1288 assert "contradicted" in result.output
1289 assert "notes.txt stays out of scope" in result.output
1290 assert "Acceptance Anchors" in result.output
1291 assert "Decision Boundaries" in result.output
1292
1293
1294 def test_workflow_show_command_supports_filters_and_highlights(
1295 temp_dir: Path,
1296 monkeypatch: pytest.MonkeyPatch,
1297 ) -> None:
1298 _write_python_workspace(temp_dir)
1299 _ensure_loader_dirs(temp_dir)
1300 session_id = _persist_session_with_rich_workflow(temp_dir)
1301 runner = CliRunner()
1302
1303 monkeypatch.chdir(temp_dir)
1304
1305 result = runner.invoke(
1306 cli_main_module.workflow_cli,
1307 ["show", "--kind", "reentry", "--limit", "1", session_id],
1308 )
1309
1310 assert result.exit_code == 0
1311 assert "Loader Workflow" in result.output
1312 assert "1 shown / 3 total" in result.output
1313 assert "kind=reentry, limit=1" in result.output
1314 assert "Workflow Answers" in result.output
1315 assert "Recovered workflow:" in result.output
1316 assert "full_replan_required" in result.output
1317 assert "evidence=confirmed touchpoint:" in result.output
1318
1319 clarify_result = runner.invoke(
1320 cli_main_module.workflow_cli,
1321 ["show", "--mode", "clarify", "--limit", "1", session_id],
1322 )
1323
1324 assert clarify_result.exit_code == 0
1325 assert "stage=readiness" in clarify_result.output
1326 assert "pressure=tradeoff" in clarify_result.output
1327 assert "gates=non_goals,decision_boundaries" in clarify_result.output
1328
1329
1330 def test_workflow_show_can_render_artifact_diffs(
1331 temp_dir: Path,
1332 monkeypatch: pytest.MonkeyPatch,
1333 ) -> None:
1334 _write_python_workspace(temp_dir)
1335 _ensure_loader_dirs(temp_dir)
1336 _persist_session_with_rich_workflow(temp_dir)
1337 runner = CliRunner()
1338
1339 monkeypatch.chdir(temp_dir)
1340
1341 result = runner.invoke(
1342 cli_main_module.workflow_cli,
1343 ["show", "--diff", "--full-diff"],
1344 )
1345
1346 assert result.exit_code == 0
1347 assert "Artifact Changes" in result.output
1348 assert "Artifact Diff Summary" in result.output
1349 assert "clarify_brief" in result.output
1350 assert "implementation_plan" in result.output
1351 assert "verification_plan" in result.output
1352 assert "notes.txt" in result.output
1353
1354
1355 def test_collect_prompt_preview_uses_persisted_runtime_state(temp_dir: Path) -> None:
1356 _write_python_workspace(temp_dir)
1357 _ensure_loader_dirs(temp_dir)
1358 session_id, _ = _persist_session_with_dod(temp_dir)
1359
1360 preview = collect_prompt_preview(
1361 temp_dir,
1362 model="qwen2.5-coder:14b",
1363 )
1364
1365 assert preview.active_session_id == session_id
1366 assert preview.workflow_mode == "execute"
1367 assert preview.workflow_reason_code == "verification_failed_reentry"
1368 assert preview.workflow_decision_kind == "reentry"
1369 assert preview.permission_mode == "prompt"
1370 assert preview.prompt_format == (
1371 "native" if preview.capability_profile.supports_native_tools else "react"
1372 )
1373 assert preview.prompt_sections == [
1374 "Runtime Config",
1375 "Workflow Context",
1376 "Mode Guidance",
1377 "Project Context",
1378 "Project Tips",
1379 ]
1380 assert "## Execute Mode" in preview.content
1381 assert "Current task: Fix the failing tests" in preview.content
1382
1383
1384 def test_prompt_show_command_renders_preview_without_model_call(
1385 temp_dir: Path,
1386 monkeypatch: pytest.MonkeyPatch,
1387 ) -> None:
1388 _write_python_workspace(temp_dir)
1389 _ensure_loader_dirs(temp_dir)
1390 _persist_session_with_dod(temp_dir)
1391 runner = CliRunner()
1392
1393 monkeypatch.chdir(temp_dir)
1394 preview = collect_prompt_preview(
1395 temp_dir,
1396 model="qwen2.5-coder:14b",
1397 current_task="Preview the current Loader contract",
1398 )
1399
1400 result = runner.invoke(
1401 cli_main_module.prompt_cli,
1402 ["show", "--model", "qwen2.5-coder:14b", "Preview the current Loader contract"],
1403 )
1404
1405 assert result.exit_code == 0
1406 assert "Prompt Preview" in result.output
1407 assert "Prompt Body" in result.output
1408 assert "Preview the current Loader contract" in result.output
1409 assert preview.prompt_format in result.output
1410 assert "Workflow Context" in result.output
1411 assert "Execute Mode" in result.output
1412
1413
1414 def test_prompt_diff_command_renders_persisted_prompt_changes(
1415 temp_dir: Path,
1416 monkeypatch: pytest.MonkeyPatch,
1417 ) -> None:
1418 _write_python_workspace(temp_dir)
1419 _ensure_loader_dirs(temp_dir)
1420 _persist_session_with_dod(temp_dir)
1421 runner = CliRunner()
1422
1423 monkeypatch.chdir(temp_dir)
1424
1425 result = runner.invoke(cli_main_module.prompt_cli, ["diff", "--full"])
1426
1427 assert result.exit_code == 0
1428 assert "Prompt Diff" in result.output
1429 assert "Prompt Changes" in result.output
1430 assert "Workflow mode changed:" in result.output
1431 assert "Prompt Unified Diff" in result.output
1432 assert "execute parser fix" in result.output
1433
1434
1435 def test_permission_snapshot_and_dry_run_reflect_rules(temp_dir: Path) -> None:
1436 _write_python_workspace(temp_dir)
1437 _ensure_loader_dirs(temp_dir)
1438 (temp_dir / ".loader" / "permission-rules.json").write_text(
1439 "\n".join(
1440 [
1441 "{",
1442 ' "allow": [{"tool": "write", "contains": "safe change"}],',
1443 ' "deny": [{"tool": "write", "path_contains": "secrets"}],',
1444 ' "ask": [{"tool": "write", "path_contains": "README"}]',
1445 "}",
1446 ]
1447 )
1448 + "\n"
1449 )
1450
1451 snapshot = collect_permission_snapshot(temp_dir, permission_mode="allow")
1452 check = dry_run_permission_check(
1453 "write",
1454 {
1455 "file_path": str(temp_dir / "README.md"),
1456 "content": "safe change\n",
1457 },
1458 project_root=temp_dir,
1459 permission_mode="allow",
1460 )
1461
1462 assert snapshot.active_mode == "allow"
1463 assert snapshot.prompting_enabled is True
1464 assert snapshot.rules_valid is True
1465 assert snapshot.rule_counts == {"allow": 1, "deny": 1, "ask": 1}
1466 assert snapshot.normalized_rules["allow"][0].tool_name == "write"
1467 assert snapshot.normalized_rules["allow"][0].contains == "safe change"
1468
1469 assert check.required_mode == "workspace-write"
1470 assert check.decision == "ask"
1471 assert check.matched_disposition == "ask"
1472 assert check.matched_rule == "tool=write, path_contains=README"
1473 assert "file_path=" in check.input_summary
1474
1475
1476 def test_status_snapshot_reports_invalid_permission_rules(temp_dir: Path) -> None:
1477 _write_python_workspace(temp_dir)
1478 _ensure_loader_dirs(temp_dir)
1479 (temp_dir / ".loader" / "permission-rules.json").write_text("{broken json")
1480
1481 snapshot = collect_status_snapshot(temp_dir, permission_mode="prompt")
1482
1483 assert snapshot.permission_rules_valid is False
1484 assert snapshot.permission_prompting_enabled is True
1485 assert snapshot.permission_rules_source.endswith(".loader/permission-rules.json")
1486
1487
1488 def test_permissions_show_and_check_commands_render_policy(
1489 temp_dir: Path,
1490 monkeypatch: pytest.MonkeyPatch,
1491 ) -> None:
1492 _write_python_workspace(temp_dir)
1493 _ensure_loader_dirs(temp_dir)
1494 (temp_dir / ".loader" / "permission-rules.json").write_text(
1495 "\n".join(
1496 [
1497 "{",
1498 ' "allow": [{"tool": "write", "contains": "safe change"}],',
1499 ' "ask": [{"tool": "write", "path_contains": "README"}]',
1500 "}",
1501 ]
1502 )
1503 + "\n"
1504 )
1505 runner = CliRunner()
1506
1507 monkeypatch.chdir(temp_dir)
1508
1509 show_result = runner.invoke(
1510 cli_main_module.permissions_cli,
1511 ["show", "--permission-mode", "allow"],
1512 )
1513 check_result = runner.invoke(
1514 cli_main_module.permissions_cli,
1515 [
1516 "check",
1517 "--permission-mode",
1518 "allow",
1519 "--args",
1520 '{"content":"safe change\\n"}',
1521 "write",
1522 "README.md",
1523 ],
1524 )
1525
1526 assert show_result.exit_code == 0
1527 assert "Loader Permissions" in show_result.output
1528 assert "Permission Mode" in show_result.output
1529 assert "Rules Source" in show_result.output
1530 assert "safe change" in show_result.output
1531 assert "README" in show_result.output
1532
1533 assert check_result.exit_code == 0
1534 assert "Permission Check" in check_result.output
1535 assert "workspace-write" in check_result.output
1536 assert "ask" in check_result.output
1537 assert "tool=write, path_contains=README" in check_result.output
1538
1539
1540 def test_permissions_check_rejects_invalid_json_args(
1541 temp_dir: Path,
1542 monkeypatch: pytest.MonkeyPatch,
1543 ) -> None:
1544 _write_python_workspace(temp_dir)
1545 _ensure_loader_dirs(temp_dir)
1546 runner = CliRunner()
1547
1548 monkeypatch.chdir(temp_dir)
1549
1550 result = runner.invoke(
1551 cli_main_module.permissions_cli,
1552 ["check", "bash", "--args", "{broken json", "ls"],
1553 )
1554
1555 assert result.exit_code != 0
1556 assert "`--args` must be valid JSON" in result.output
1557
1558
1559 def test_permissions_show_surfaces_invalid_rule_file(
1560 temp_dir: Path,
1561 monkeypatch: pytest.MonkeyPatch,
1562 ) -> None:
1563 _write_python_workspace(temp_dir)
1564 _ensure_loader_dirs(temp_dir)
1565 (temp_dir / ".loader" / "permission-rules.json").write_text("{broken json")
1566 runner = CliRunner()
1567
1568 monkeypatch.chdir(temp_dir)
1569
1570 result = runner.invoke(cli_main_module.permissions_cli, ["show"])
1571
1572 assert result.exit_code == 0
1573 assert "invalid" in result.output.lower()
1574 assert "Rule Error" in result.output
1575 assert "Rules Source" in result.output
1576
1577
1578 def test_explore_command_can_show_and_reset_continuity(
1579 temp_dir: Path,
1580 monkeypatch: pytest.MonkeyPatch,
1581 ) -> None:
1582 _write_python_workspace(temp_dir)
1583 _ensure_loader_dirs(temp_dir)
1584 _persist_explore_snapshot(temp_dir)
1585 runner = CliRunner()
1586
1587 monkeypatch.chdir(temp_dir)
1588
1589 status_result = runner.invoke(cli_main_module.explore_cli, ["--status"])
1590
1591 assert status_result.exit_code == 0
1592 assert "Loader Explore State" in status_result.output
1593 assert "continue" in status_result.output
1594 assert "What file did you mention?" in status_result.output
1595
1596 reset_result = runner.invoke(cli_main_module.explore_cli, ["--reset"])
1597
1598 assert reset_result.exit_code == 0
1599 assert "Cleared persisted explore continuity." in reset_result.output
1600 assert ExploreStateStore(temp_dir).load() is None
1601
1602
1603 def test_root_help_lists_special_commands() -> None:
1604 help_text = cli_main_module._loader_help_text()
1605
1606 assert "loader doctor" in help_text
1607 assert "loader status" in help_text
1608 assert "loader explore <prompt>" in help_text
1609 assert "loader permissions show" in help_text
1610 assert "loader session resume <id>" in help_text
1611
1612
1613 def test_main_dispatches_session_resume_to_primary_cli(
1614 monkeypatch: pytest.MonkeyPatch,
1615 ) -> None:
1616 captured: dict[str, object] = {}
1617
1618 def fake_cli_main(*, args: list[str], prog_name: str) -> None:
1619 captured["args"] = args
1620 captured["prog_name"] = prog_name
1621
1622 monkeypatch.setattr(cli_main_module.cli, "main", fake_cli_main)
1623 monkeypatch.setattr(sys, "argv", ["loader", "session", "resume", "abc123", "--no-tui"])
1624
1625 cli_main_module.main()
1626
1627 assert captured == {
1628 "args": ["--resume-target", "abc123", "--no-tui"],
1629 "prog_name": "loader",
1630 }