Python · 65883 bytes Raw Blame History
1 """Tests for doctor, status, and session inspection surfaces."""
2
3 from __future__ import annotations
4
5 import sys
6 from pathlib import Path
7
8 import pytest
9 from click.testing import CliRunner
10
11 import loader.cli.main as cli_main_module
12 from loader.llm.base import Message, Role
13 from loader.runtime.completion_trace import CompletionTraceEntry
14 from loader.runtime.dod import (
15 DefinitionOfDoneStore,
16 VerificationEvidence,
17 create_definition_of_done,
18 )
19 from loader.runtime.evidence_provenance import EvidenceProvenance
20 from loader.runtime.explore_state import ExploreSnapshot, ExploreStateStore
21 from loader.runtime.inspection import (
22 CheckStatus,
23 collect_doctor_report,
24 collect_permission_snapshot,
25 collect_prompt_diff,
26 collect_prompt_preview,
27 collect_status_snapshot,
28 collect_workflow_artifact_diffs,
29 collect_workflow_timeline,
30 dry_run_permission_check,
31 list_session_summaries,
32 load_session_detail,
33 )
34 from loader.runtime.prompt_history import PromptSnapshot
35 from loader.runtime.session import SessionSnapshot, SessionStore
36 from loader.runtime.verification_observations import VerificationObservation
37 from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem
38 from loader.runtime.workflow_policy import WorkflowTimelineEntry
39
40
41 class FakeOllamaBackend:
42 """Small async backend stub for doctor tests."""
43
44 def __init__(
45 self,
46 *,
47 model: str,
48 health: bool,
49 models: list[dict[str, object]],
50 model_details: dict[str, object] | None = None,
51 ) -> None:
52 self.model = model
53 self._health = health
54 self._models = models
55 self._model_details = model_details
56
57 async def list_models(self) -> list[dict[str, object]]:
58 return list(self._models)
59
60 async def health_check(self) -> bool:
61 return self._health
62
63 async def describe_model(self) -> dict[str, object] | None:
64 return self._model_details
65
66 async def close(self) -> None:
67 return None
68
69
70 def _write_python_workspace(temp_dir: Path) -> None:
71 (temp_dir / "pyproject.toml").write_text(
72 "\n".join(
73 [
74 "[build-system]",
75 'requires = ["hatchling"]',
76 'build-backend = "hatchling.build"',
77 "",
78 "[tool.pytest.ini_options]",
79 'testpaths = ["tests"]',
80 "",
81 ]
82 )
83 + "\n"
84 )
85 (temp_dir / "src").mkdir()
86 (temp_dir / "tests").mkdir()
87
88
89 def _ensure_loader_dirs(temp_dir: Path) -> None:
90 loader_root = temp_dir / ".loader"
91 for name in ("sessions", "state", "dod", "briefs", "plans"):
92 (loader_root / name).mkdir(parents=True, exist_ok=True)
93 (loader_root / "project-memory.json").write_text("{}\n")
94
95
96 def _persist_session_with_dod(temp_dir: Path) -> tuple[str, str]:
97 dod = create_definition_of_done("Fix the failing tests")
98 dod.status = "fixing"
99 dod.pending_items = ["Re-run pytest"]
100 dod.completed_items = ["Patch the broken parser"]
101 dod.last_verification_result = "failed"
102 dod.evidence = [
103 VerificationEvidence(
104 command="pytest -q",
105 passed=False,
106 stderr="1 failed",
107 kind="test",
108 )
109 ]
110 dod_path = DefinitionOfDoneStore(temp_dir).save(dod)
111 workflow_timeline = [
112 WorkflowTimelineEntry(
113 timestamp="2026-04-06T12:04:00Z",
114 kind="handoff",
115 mode="verify",
116 reason_code="execute_completed",
117 summary="verify: execution completed; verifying the parser fix",
118 decision_kind="handoff",
119 prompt_format="native",
120 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
121 artifact_paths=[str(temp_dir / ".loader" / "plans" / "fix-tests.md")],
122 ),
123 WorkflowTimelineEntry(
124 timestamp="2026-04-06T12:05:00Z",
125 kind="reentry",
126 mode="execute",
127 reason_code="verification_failed_reentry",
128 summary="execute: verification failed; returning to execute for fixes",
129 decision_kind="reentry",
130 scheduled_next_mode="verify",
131 runner_up_mode="verify",
132 runner_up_score=0.52,
133 verification_observations=[
134 VerificationObservation(
135 status="failed",
136 summary="verification failed for `pytest -q`",
137 command="pytest -q",
138 kind="test",
139 detail="1 failed",
140 )
141 ],
142 prompt_format="native",
143 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
144 artifact_paths=[str(temp_dir / ".loader" / "plans" / "fix-tests.md")],
145 ),
146 ]
147
148 snapshot = SessionSnapshot(
149 session_id="20260406T120000Z-abcdef01",
150 created_at="2026-04-06T12:00:00Z",
151 updated_at="2026-04-06T12:05:00Z",
152 messages=[
153 Message(role=Role.USER, content="Fix the failing tests"),
154 Message(role=Role.ASSISTANT, content="I updated the parser."),
155 ],
156 usage={"turns": 1, "tool_calls": 2},
157 active_dod_path=str(dod_path),
158 current_task="Fix the failing tests",
159 runtime_owner_type="RuntimeHandle",
160 runtime_owner_path="runtime-handle",
161 workflow_mode="execute",
162 permission_mode="prompt",
163 permission_prompting_enabled=True,
164 permission_rule_counts={"allow": 1, "deny": 2, "ask": 1},
165 permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
166 prompt_format="native",
167 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
168 prompt_history=[
169 PromptSnapshot(
170 timestamp="2026-04-06T12:04:00Z",
171 workflow_mode="verify",
172 permission_mode="prompt",
173 current_task="Fix the failing tests",
174 prompt_format="native",
175 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
176 content="# Introduction\nverify parser fix\n",
177 ),
178 PromptSnapshot(
179 timestamp="2026-04-06T12:05:00Z",
180 workflow_mode="execute",
181 permission_mode="prompt",
182 current_task="Fix the failing tests",
183 prompt_format="native",
184 prompt_sections=[
185 "Runtime Config",
186 "Workflow Context",
187 "Mode Guidance",
188 "Project Context",
189 ],
190 content="# Introduction\nexecute parser fix\n# Project Context\npython\n",
191 ),
192 ],
193 workflow_reason_code="verification_failed_reentry",
194 workflow_reason_summary="verification failed; returning to execute for fixes",
195 workflow_decision_kind="reentry",
196 workflow_ambiguity_score=0.1,
197 workflow_complexity_score=0.7,
198 workflow_scheduled_next_mode="verify",
199 active_turn_phase="completion",
200 last_completion_decision_code="verification_failed_reentry",
201 last_completion_decision_summary=(
202 "continued after verification failed and the runtime re-entered execute mode"
203 ),
204 completion_trace=[
205 CompletionTraceEntry(
206 stage="continuation_check",
207 outcome="accept",
208 decision_code="completion_response_accepted",
209 decision_summary="accepted the response because completion heuristics found no missing follow-through",
210 ),
211 CompletionTraceEntry(
212 stage="definition_of_done",
213 outcome="continue",
214 decision_code="verification_failed_reentry",
215 decision_summary="continued after verification failed and the runtime re-entered execute mode",
216 ),
217 ],
218 last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn",
219 last_turn_transition_kind="terminal",
220 last_turn_transition_reason_code="turn_complete",
221 workflow_timeline=workflow_timeline,
222 )
223 SessionStore(temp_dir).save(snapshot)
224 return snapshot.session_id, str(dod_path)
225
226
227 def _persist_explore_snapshot(temp_dir: Path) -> None:
228 ExploreStateStore(temp_dir).save(
229 ExploreSnapshot(
230 turn_count=2,
231 model_name="llama3.1:8b",
232 messages=[
233 Message(role=Role.USER, content="Where should I start?"),
234 Message(role=Role.ASSISTANT, content="Start with README.md."),
235 Message(role=Role.USER, content="What file did you mention?"),
236 Message(role=Role.ASSISTANT, content="I mentioned README.md."),
237 ],
238 last_history_mode="continue",
239 last_query="What file did you mention?",
240 last_response="I mentioned README.md.",
241 )
242 )
243
244
245 def _persist_session_with_rich_workflow(temp_dir: Path) -> str:
246 slug = "tighten-loader-workflow-behavior"
247 brief_old = temp_dir / ".loader" / "briefs" / f"20260406T150000Z-{slug}.md"
248 brief_new = temp_dir / ".loader" / "briefs" / f"20260406T150200Z-{slug}.md"
249 brief_old.write_text(
250 "# Task Brief\n\n## Likely Touchpoints\n- planned.txt\n\n## Acceptance Criteria\n- planned.txt exists.\n"
251 )
252 brief_new.write_text(
253 "# Task Brief\n\n## Likely Touchpoints\n- notes.txt\n\n## Acceptance Criteria\n- notes.txt exists.\n"
254 )
255 plan_old_root = temp_dir / ".loader" / "plans" / f"20260406T150100Z-{slug}"
256 plan_new_root = temp_dir / ".loader" / "plans" / f"20260406T150300Z-{slug}"
257 plan_old_root.mkdir(parents=True, exist_ok=True)
258 plan_new_root.mkdir(parents=True, exist_ok=True)
259 (plan_old_root / "implementation.md").write_text(
260 "# Implementation Plan\n\n## File Changes\n- Create planned.txt.\n"
261 )
262 (plan_old_root / "verification.md").write_text(
263 "# Verification Plan\n\n## Acceptance Criteria\n- planned.txt exists.\n"
264 )
265 (plan_new_root / "implementation.md").write_text(
266 "# Implementation Plan\n\n## File Changes\n- Keep notes.txt as the runtime artifact.\n"
267 )
268 (plan_new_root / "verification.md").write_text(
269 "# Verification Plan\n\n## Acceptance Criteria\n- notes.txt exists.\n"
270 )
271
272 dod = create_definition_of_done("Tighten Loader workflow behavior")
273 dod.status = "fixing"
274 dod.clarify_brief = str(brief_new)
275 dod.implementation_plan = str(plan_new_root / "implementation.md")
276 dod.verification_plan = str(plan_new_root / "verification.md")
277 dod.acceptance_criteria = ["notes.txt exists in the workspace root."]
278 dod_path = DefinitionOfDoneStore(temp_dir).save(dod)
279
280 snapshot = SessionSnapshot(
281 session_id="20260406T150000Z-feedface",
282 created_at="2026-04-06T15:00:00Z",
283 updated_at="2026-04-06T15:04:00Z",
284 messages=[
285 Message(role=Role.USER, content="Tighten Loader workflow behavior"),
286 Message(role=Role.ASSISTANT, content="I refreshed the workflow contract."),
287 ],
288 active_dod_path=str(dod_path),
289 current_task="Tighten Loader workflow behavior",
290 runtime_owner_type="RuntimeHandle",
291 runtime_owner_path="runtime-handle",
292 workflow_mode="execute",
293 permission_mode="prompt",
294 permission_prompting_enabled=True,
295 prompt_format="native",
296 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
297 prompt_history=[
298 PromptSnapshot(
299 timestamp="2026-04-06T15:02:00Z",
300 workflow_mode="plan",
301 permission_mode="prompt",
302 current_task="Tighten Loader workflow behavior",
303 prompt_format="native",
304 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
305 content="# Introduction\nplan around planned.txt\n",
306 ),
307 PromptSnapshot(
308 timestamp="2026-04-06T15:04:00Z",
309 workflow_mode="execute",
310 permission_mode="prompt",
311 current_task="Tighten Loader workflow behavior",
312 prompt_format="native",
313 prompt_sections=[
314 "Runtime Config",
315 "Workflow Context",
316 "Mode Guidance",
317 "Project Context",
318 ],
319 content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n",
320 ),
321 ],
322 workflow_reason_code="full_replan_completed",
323 workflow_reason_summary="clarify and plan artifacts refreshed; returning to execute",
324 workflow_decision_kind="handoff",
325 workflow_timeline=[
326 WorkflowTimelineEntry(
327 timestamp="2026-04-06T15:01:00Z",
328 kind="clarify_continue",
329 mode="clarify",
330 reason_code="clarify_pressure_pass_required",
331 summary="clarify: Loader still needs a tradeoff pass around non-goals",
332 decision_kind="forced",
333 unresolved_questions=["Concrete files or subsystems are still not pinned down."],
334 signal_summary=["ambiguity=0.82", "open_questions=1"],
335 clarify_stage="readiness",
336 clarify_pressure_kind="tradeoff",
337 pressure_pass_complete=False,
338 missing_readiness_gates=["non_goals", "decision_boundaries"],
339 ),
340 WorkflowTimelineEntry(
341 timestamp="2026-04-06T15:02:00Z",
342 kind="reentry",
343 mode="plan",
344 reason_code="full_replan_required",
345 summary="plan: clarify and plan artifacts drifted; rebuilding the plan",
346 decision_kind="reentry",
347 scheduled_next_mode="execute",
348 unresolved_questions=["Touched files outside the current plan: notes.txt"],
349 evidence_summary=[
350 "confirmed touchpoint: `notes.txt` was already touched during execution.",
351 (
352 "verification contradiction: Failed verification exposed "
353 "missing brief coverage for `notes.txt exists`."
354 ),
355 ],
356 signal_summary=["recent_reentry=1", "stale_plan=true"],
357 artifact_paths=[
358 str(brief_new),
359 str(plan_new_root / "implementation.md"),
360 str(plan_new_root / "verification.md"),
361 ],
362 ),
363 WorkflowTimelineEntry(
364 timestamp="2026-04-06T15:03:00Z",
365 kind="verify_skip",
366 mode="verify",
367 reason_code="verify_skip_no_commands",
368 summary="verify: no verification commands were available for this turn",
369 decision_kind="forced",
370 signal_summary=["verify_pressure=low"],
371 ),
372 ],
373 workflow_ledger=WorkflowLedger(
374 assumptions=[
375 WorkflowLedgerItem(
376 text="notes.txt stays out of scope unless clarified otherwise.",
377 status="contradicted",
378 introduced_phase="clarify",
379 updated_phase="recovery",
380 evidence=["Clarify scope assumed `notes.txt` stayed out of scope."],
381 )
382 ],
383 acceptance_anchors=[
384 WorkflowLedgerItem(
385 text="notes.txt exists in the workspace root.",
386 status="changed",
387 introduced_phase="clarify",
388 updated_phase="recovery",
389 evidence=[
390 (
391 "Failed verification exposed missing brief coverage for "
392 "`notes.txt exists`."
393 )
394 ],
395 )
396 ],
397 decision_boundaries=[
398 WorkflowLedgerItem(
399 text="Escalate before broad UX changes.",
400 status="reopened",
401 introduced_phase="clarify",
402 updated_phase="recovery",
403 evidence=["The active task framing outgrew the persisted clarify brief."],
404 )
405 ],
406 ),
407 )
408 SessionStore(temp_dir).save(snapshot)
409 return snapshot.session_id
410
411
412 def _persist_session_with_policy_accountability(temp_dir: Path) -> str:
413 snapshot = SessionSnapshot(
414 session_id="20260406T160000Z-abcd1234",
415 created_at="2026-04-06T16:00:00Z",
416 updated_at="2026-04-06T16:03:00Z",
417 messages=[
418 Message(role=Role.USER, content="Explain Loader policy accountability"),
419 Message(role=Role.ASSISTANT, content="The runtime tracked repair and completion decisions."),
420 ],
421 current_task="Explain Loader policy accountability",
422 runtime_owner_type="RuntimeHandle",
423 runtime_owner_path="runtime-handle",
424 workflow_mode="execute",
425 permission_mode="workspace-write",
426 prompt_format="native",
427 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
428 workflow_timeline=[
429 WorkflowTimelineEntry(
430 timestamp="2026-04-06T16:01:00Z",
431 kind="repair_retry",
432 mode="execute",
433 reason_code="raw_text_tool_recovered",
434 summary="repair: recovered raw-text tool calls into executable tool invocations",
435 decision_kind="forced",
436 policy_stage="raw_text_tool_fallback",
437 policy_outcome="retry",
438 prompt_format="native",
439 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
440 ),
441 WorkflowTimelineEntry(
442 timestamp="2026-04-06T16:02:00Z",
443 kind="completion_check",
444 mode="execute",
445 reason_code="completion_response_accepted",
446 summary="completion: accepted the response because completion heuristics found no missing follow-through",
447 decision_kind="forced",
448 policy_stage="continuation_check",
449 policy_outcome="accept",
450 prompt_format="native",
451 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
452 ),
453 WorkflowTimelineEntry(
454 timestamp="2026-04-06T16:03:00Z",
455 kind="completion_continue",
456 mode="execute",
457 reason_code="verification_failed_reentry",
458 summary="completion: continued after verification failed and the runtime re-entered execute mode",
459 decision_kind="forced",
460 policy_stage="definition_of_done",
461 policy_outcome="continue",
462 evidence_provenance=[
463 EvidenceProvenance(
464 category="verification",
465 source="dod.evidence",
466 summary="verification failed for `pytest -q`",
467 status="contradicts",
468 subject="pytest -q",
469 )
470 ],
471 verification_observations=[
472 VerificationObservation(
473 status="failed",
474 summary="verification failed for `pytest -q`",
475 command="pytest -q",
476 kind="test",
477 detail="1 failed",
478 )
479 ],
480 prompt_format="native",
481 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
482 ),
483 ],
484 )
485 SessionStore(temp_dir).save(snapshot)
486 return snapshot.session_id
487
488
489 def _persist_session_with_pending_verification(temp_dir: Path) -> str:
490 snapshot = SessionSnapshot(
491 session_id="20260406T160500Z-pending123",
492 created_at="2026-04-06T16:05:00Z",
493 updated_at="2026-04-06T16:05:30Z",
494 messages=[
495 Message(role=Role.USER, content="Verify the runtime changes"),
496 Message(role=Role.ASSISTANT, content="Entering verification."),
497 ],
498 current_task="Verify the runtime changes",
499 runtime_owner_type="RuntimeHandle",
500 runtime_owner_path="runtime-handle",
501 workflow_mode="verify",
502 permission_mode="workspace-write",
503 prompt_format="native",
504 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
505 workflow_timeline=[
506 WorkflowTimelineEntry(
507 timestamp="2026-04-06T16:05:30Z",
508 kind="verify_observation",
509 mode="verify",
510 reason_code="verification_pending",
511 summary="verify: verification is pending for the active command set",
512 decision_kind="forced",
513 policy_stage="verification",
514 policy_outcome="pending",
515 verification_observations=[
516 VerificationObservation(
517 status="pending",
518 summary="verification pending for `uv run pytest -q`",
519 command="uv run pytest -q",
520 kind="test",
521 attempt_id="verification-attempt-2",
522 attempt_number=2,
523 )
524 ],
525 prompt_format="native",
526 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
527 )
528 ],
529 )
530 SessionStore(temp_dir).save(snapshot)
531 return snapshot.session_id
532
533
534 def _persist_session_with_planned_verification(temp_dir: Path) -> str:
535 snapshot = SessionSnapshot(
536 session_id="20260406T160430Z-plan1234",
537 created_at="2026-04-06T16:04:30Z",
538 updated_at="2026-04-06T16:04:50Z",
539 messages=[
540 Message(role=Role.USER, content="Keep editing the runtime"),
541 Message(role=Role.ASSISTANT, content="Verification will run after execution."),
542 ],
543 current_task="Keep editing the runtime",
544 runtime_owner_type="RuntimeHandle",
545 runtime_owner_path="runtime-handle",
546 workflow_mode="execute",
547 permission_mode="workspace-write",
548 prompt_format="native",
549 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
550 workflow_timeline=[
551 WorkflowTimelineEntry(
552 timestamp="2026-04-06T16:04:50Z",
553 kind="verify_observation",
554 mode="execute",
555 reason_code="verification_planned",
556 summary="verify: verification is planned after new mutating work",
557 decision_kind="forced",
558 policy_stage="verification",
559 policy_outcome="planned",
560 verification_observations=[
561 VerificationObservation(
562 status="planned",
563 summary="verification planned for `uv run pytest -q`",
564 command="uv run pytest -q",
565 kind="runtime",
566 detail="write changed src/loader/runtime/tool_batches.py",
567 attempt_id="verification-attempt-3",
568 attempt_number=3,
569 )
570 ],
571 prompt_format="native",
572 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
573 )
574 ],
575 )
576 SessionStore(temp_dir).save(snapshot)
577 return snapshot.session_id
578
579
580 def _persist_session_with_stale_verification(temp_dir: Path) -> str:
581 snapshot = SessionSnapshot(
582 session_id="20260406T160700Z-stale1234",
583 created_at="2026-04-06T16:07:00Z",
584 updated_at="2026-04-06T16:07:30Z",
585 messages=[
586 Message(role=Role.USER, content="Keep working on the runtime"),
587 Message(role=Role.ASSISTANT, content="Fresh verification is required again."),
588 ],
589 current_task="Keep working on the runtime",
590 runtime_owner_type="RuntimeHandle",
591 runtime_owner_path="runtime-handle",
592 workflow_mode="execute",
593 permission_mode="workspace-write",
594 prompt_format="native",
595 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
596 workflow_timeline=[
597 WorkflowTimelineEntry(
598 timestamp="2026-04-06T16:07:30Z",
599 kind="verify_observation",
600 mode="execute",
601 reason_code="verification_stale",
602 summary="verify: previous verification became stale after new mutating work",
603 decision_kind="forced",
604 policy_stage="verification",
605 policy_outcome="stale",
606 verification_observations=[
607 VerificationObservation(
608 status="stale",
609 summary=(
610 "verification became stale for `uv run pytest -q` "
611 "after new mutating work"
612 ),
613 command="uv run pytest -q",
614 kind="runtime",
615 detail="write changed src/loader/runtime/finalization.py",
616 attempt_id="verification-attempt-1",
617 attempt_number=1,
618 supersedes_attempt_id="verification-attempt-2",
619 )
620 ],
621 prompt_format="native",
622 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
623 )
624 ],
625 )
626 SessionStore(temp_dir).save(snapshot)
627 return snapshot.session_id
628
629
630 @pytest.mark.asyncio
631 async def test_collect_doctor_report_passes_for_healthy_workspace(temp_dir: Path) -> None:
632 _write_python_workspace(temp_dir)
633 _ensure_loader_dirs(temp_dir)
634
635 report = await collect_doctor_report(
636 temp_dir,
637 model="qwen2.5-coder:14b",
638 backend_factory=lambda model: FakeOllamaBackend(
639 model=model,
640 health=True,
641 models=[{"name": "qwen2.5-coder:14b"}],
642 model_details={"details": {"family": "qwen2.5"}},
643 ),
644 )
645
646 assert report.overall_status == CheckStatus.PASS
647 assert {check.name for check in report.checks} == {
648 "backend",
649 "capabilities",
650 "workspace",
651 "write_access",
652 "commands",
653 "state",
654 "permissions",
655 }
656 backend_check = next(check for check in report.checks if check.name == "backend")
657 state_check = next(check for check in report.checks if check.name == "state")
658
659 assert backend_check.status == CheckStatus.PASS
660 assert state_check.status == CheckStatus.PASS
661
662
663 @pytest.mark.asyncio
664 async def test_collect_doctor_report_surfaces_backend_and_state_failures(temp_dir: Path) -> None:
665 _write_python_workspace(temp_dir)
666 (temp_dir / ".loader").mkdir()
667 (temp_dir / ".loader" / "project-memory.json").write_text("{broken json")
668
669 report = await collect_doctor_report(
670 temp_dir,
671 model="missing-model:latest",
672 backend_factory=lambda model: FakeOllamaBackend(
673 model=model,
674 health=False,
675 models=[{"name": "llama3.1:8b"}],
676 model_details=None,
677 ),
678 )
679
680 backend_check = next(check for check in report.checks if check.name == "backend")
681 state_check = next(check for check in report.checks if check.name == "state")
682
683 assert report.overall_status == CheckStatus.FAIL
684 assert backend_check.status == CheckStatus.FAIL
685 assert "not pulled" in backend_check.message
686 assert state_check.status == CheckStatus.FAIL
687 assert "corrupted" in state_check.message
688
689
690 @pytest.mark.asyncio
691 async def test_collect_doctor_report_fails_closed_on_invalid_permission_rules(
692 temp_dir: Path,
693 ) -> None:
694 _write_python_workspace(temp_dir)
695 _ensure_loader_dirs(temp_dir)
696 (temp_dir / ".loader" / "permission-rules.json").write_text('{"allow": "nope"}\n')
697
698 report = await collect_doctor_report(
699 temp_dir,
700 model="qwen2.5-coder:14b",
701 permission_mode="prompt",
702 backend_factory=lambda model: FakeOllamaBackend(
703 model=model,
704 health=True,
705 models=[{"name": "qwen2.5-coder:14b"}],
706 ),
707 )
708
709 permission_check = next(check for check in report.checks if check.name == "permissions")
710 assert report.overall_status == CheckStatus.FAIL
711 assert permission_check.status == CheckStatus.FAIL
712 assert report.permission_rules_valid is False
713 assert "invalid" in permission_check.message.lower()
714
715
716 def test_status_and_session_surfaces_reflect_persisted_state(temp_dir: Path) -> None:
717 _write_python_workspace(temp_dir)
718 _ensure_loader_dirs(temp_dir)
719 session_id, dod_path = _persist_session_with_dod(temp_dir)
720 _persist_explore_snapshot(temp_dir)
721
722 snapshot = collect_status_snapshot(
723 temp_dir,
724 model="llama3.1:8b",
725 )
726 sessions = list_session_summaries(temp_dir)
727 detail = load_session_detail(session_id, project_root=temp_dir)
728
729 assert snapshot.active_session_id == session_id
730 assert snapshot.dod_status == "fixing"
731 assert snapshot.dod_pending_items_count == 1
732 assert snapshot.last_verification_result == "failed"
733 assert snapshot.active_dod_path == dod_path
734 assert snapshot.permission_mode == "prompt"
735 assert snapshot.runtime_boundary_summary == "runtime-first via runtime-handle (RuntimeHandle)"
736 assert snapshot.runtime_owner_type == "RuntimeHandle"
737 assert snapshot.runtime_owner_path == "runtime-handle"
738 assert snapshot.permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1}
739 assert snapshot.permission_prompting_enabled is True
740 assert snapshot.permission_rules_valid is True
741 assert snapshot.permission_rules_source == str(
742 temp_dir / ".loader" / "permission-rules.json"
743 )
744 assert snapshot.prompt_format == "native"
745 assert snapshot.prompt_sections == [
746 "Runtime Config",
747 "Workflow Context",
748 "Mode Guidance",
749 ]
750 assert snapshot.workflow_reason_code == "verification_failed_reentry"
751 assert snapshot.workflow_reason_summary == (
752 "verification failed; returning to execute for fixes"
753 )
754 assert snapshot.workflow_decision_kind == "reentry"
755 assert snapshot.workflow_scheduled_next_mode == "verify"
756 assert snapshot.active_turn_phase == "completion"
757 assert snapshot.completion_decision_code == "verification_failed_reentry"
758 assert snapshot.completion_decision_summary == (
759 "continued after verification failed and the runtime re-entered execute mode"
760 )
761 assert snapshot.last_turn_transition_summary == (
762 "completion -> finalize [terminal] Finalizing completed turn"
763 )
764 assert snapshot.explore_turn_count == 2
765 assert snapshot.explore_message_count == 4
766 assert snapshot.explore_history_mode == "continue"
767 assert snapshot.explore_last_query == "What file did you mention?"
768 assert snapshot.explore_last_response == "I mentioned README.md."
769 assert snapshot.explore_updated_at is not None
770 assert [item.status for item in snapshot.recent_verification] == ["failed"]
771 assert [item.command for item in snapshot.recent_verification] == ["pytest -q"]
772 assert [item.detail for item in snapshot.recent_verification] == ["1 failed"]
773 assert snapshot.verification_state_summary == "failed for pytest -q"
774
775 assert len(sessions) == 1
776 assert sessions[0].session_id == session_id
777 assert sessions[0].is_current is True
778 assert sessions[0].runtime_owner_type == "RuntimeHandle"
779 assert sessions[0].runtime_owner_path == "runtime-handle"
780 assert sessions[0].runtime_boundary_summary == (
781 "runtime-first via runtime-handle (RuntimeHandle)"
782 )
783 assert sessions[0].dod_status == "fixing"
784 assert sessions[0].permission_prompting_enabled is True
785 assert sessions[0].permission_rule_counts == {"allow": 1, "deny": 2, "ask": 1}
786 assert sessions[0].permission_rules_source == str(
787 temp_dir / ".loader" / "permission-rules.json"
788 )
789 assert sessions[0].prompt_format == "native"
790 assert sessions[0].workflow_reason_code == "verification_failed_reentry"
791 assert sessions[0].workflow_reason_summary == (
792 "verification failed; returning to execute for fixes"
793 )
794 assert sessions[0].workflow_decision_kind == "reentry"
795 assert sessions[0].completion_decision_code == "verification_failed_reentry"
796 assert sessions[0].completion_decision_summary == (
797 "continued after verification failed and the runtime re-entered execute mode"
798 )
799 assert sessions[0].last_turn_transition_summary == (
800 "completion -> finalize [terminal] Finalizing completed turn"
801 )
802
803 assert detail.snapshot.session_id == session_id
804 assert detail.is_current is True
805 assert detail.snapshot.runtime_owner_type == "RuntimeHandle"
806 assert detail.snapshot.runtime_owner_path == "runtime-handle"
807 assert detail.runtime_boundary_summary == (
808 "runtime-first via runtime-handle (RuntimeHandle)"
809 )
810 assert detail.verification_state_summary == "failed for pytest -q"
811 assert detail.definition_of_done is not None
812 assert detail.definition_of_done.status == "fixing"
813 assert detail.snapshot.permission_rules_source == str(
814 temp_dir / ".loader" / "permission-rules.json"
815 )
816 assert detail.snapshot.workflow_reason_code == "verification_failed_reentry"
817 assert detail.snapshot.last_completion_decision_code == (
818 "verification_failed_reentry"
819 )
820 assert [entry.decision_code for entry in detail.snapshot.completion_trace] == [
821 "completion_response_accepted",
822 "verification_failed_reentry",
823 ]
824 assert [item.status for item in detail.recent_verification] == ["failed"]
825 assert [item.command for item in detail.recent_verification] == ["pytest -q"]
826 assert detail.snapshot.last_turn_transition_reason_code == "turn_complete"
827 assert len(detail.snapshot.workflow_timeline) == 2
828 assert detail.snapshot.workflow_timeline[-1].scheduled_next_mode == "verify"
829
830
831 def test_collect_workflow_timeline_reflects_persisted_history(temp_dir: Path) -> None:
832 _write_python_workspace(temp_dir)
833 _ensure_loader_dirs(temp_dir)
834 session_id, _ = _persist_session_with_dod(temp_dir)
835
836 snapshot = collect_workflow_timeline(project_root=temp_dir)
837
838 assert snapshot.session_id == session_id
839 assert snapshot.is_current is True
840 assert snapshot.runtime_owner_type == "RuntimeHandle"
841 assert snapshot.runtime_owner_path == "runtime-handle"
842 assert snapshot.runtime_boundary_summary == (
843 "runtime-first via runtime-handle (RuntimeHandle)"
844 )
845 assert snapshot.workflow_mode == "execute"
846 assert snapshot.current_task == "Fix the failing tests"
847 assert snapshot.verification_state_summary == "failed for pytest -q"
848 assert snapshot.total_entries == 2
849 assert [entry.kind for entry in snapshot.entries] == ["handoff", "reentry"]
850 assert snapshot.entries[-1].reason_code == "verification_failed_reentry"
851
852
853 def test_collect_workflow_timeline_supports_filters_and_highlights(
854 temp_dir: Path,
855 ) -> None:
856 _write_python_workspace(temp_dir)
857 _ensure_loader_dirs(temp_dir)
858 session_id = _persist_session_with_rich_workflow(temp_dir)
859
860 snapshot = collect_workflow_timeline(
861 project_root=temp_dir,
862 mode="clarify",
863 limit=1,
864 )
865
866 assert snapshot.session_id == session_id
867 assert snapshot.total_entries == 3
868 assert snapshot.selected_mode == "clarify"
869 assert snapshot.selected_kind is None
870 assert snapshot.entry_limit == 1
871 assert len(snapshot.entries) == 1
872 assert snapshot.entries[0].kind == "clarify_continue"
873 assert snapshot.entries[0].clarify_stage == "readiness"
874 assert snapshot.entries[0].clarify_pressure_kind == "tradeoff"
875 assert snapshot.entries[0].missing_readiness_gates == [
876 "non_goals",
877 "decision_boundaries",
878 ]
879 assert any(item.startswith("Asked again:") for item in snapshot.highlights)
880 assert snapshot.workflow_ledger.assumptions[0].status == "contradicted"
881 assert any(
882 item.startswith("Contradicted assumptions:")
883 for item in snapshot.highlights
884 )
885
886
887 def test_collect_workflow_timeline_highlights_policy_accountability(
888 temp_dir: Path,
889 ) -> None:
890 _write_python_workspace(temp_dir)
891 _ensure_loader_dirs(temp_dir)
892 session_id = _persist_session_with_policy_accountability(temp_dir)
893
894 snapshot = collect_workflow_timeline(project_root=temp_dir)
895
896 assert snapshot.session_id == session_id
897 assert [entry.kind for entry in snapshot.entries] == [
898 "repair_retry",
899 "completion_check",
900 "completion_continue",
901 ]
902 assert any(item.startswith("Repair path:") for item in snapshot.highlights)
903 assert any(item.startswith("Completion decision:") for item in snapshot.highlights)
904 assert any(
905 "policy-stage=definition_of_done" in item for item in snapshot.highlights
906 )
907
908
909 def test_collect_status_snapshot_includes_latest_policy_summary(
910 temp_dir: Path,
911 ) -> None:
912 _write_python_workspace(temp_dir)
913 _ensure_loader_dirs(temp_dir)
914 _persist_session_with_policy_accountability(temp_dir)
915
916 snapshot = collect_status_snapshot(temp_dir)
917
918 assert snapshot.latest_policy_summary is not None
919 assert "verification_failed_reentry" in snapshot.latest_policy_summary
920 assert "observed=verification failed for `pytest -q` [1 failed]" in (
921 snapshot.latest_policy_summary
922 )
923 assert "policy-stage=definition_of_done" in snapshot.latest_policy_summary
924 assert snapshot.latest_policy_blocking_evidence == [
925 "verification failed for `pytest -q`"
926 ]
927 assert snapshot.latest_policy_observed_verification == [
928 "verification failed for `pytest -q` [1 failed]"
929 ]
930 assert [item.status for item in snapshot.recent_verification] == ["failed"]
931 assert [item.command for item in snapshot.recent_verification] == ["pytest -q"]
932 assert [item.detail for item in snapshot.recent_verification] == ["1 failed"]
933
934
935 def test_collect_status_snapshot_surfaces_pending_verification(
936 temp_dir: Path,
937 ) -> None:
938 _write_python_workspace(temp_dir)
939 _ensure_loader_dirs(temp_dir)
940 _persist_session_with_pending_verification(temp_dir)
941
942 snapshot = collect_status_snapshot(temp_dir)
943
944 assert snapshot.latest_policy_summary is not None
945 assert "verification_pending" in snapshot.latest_policy_summary
946 assert "policy-outcome=pending" in snapshot.latest_policy_summary
947 assert snapshot.latest_policy_observed_verification == [
948 "verification pending for `uv run pytest -q` [attempt 2]"
949 ]
950 assert [item.status for item in snapshot.recent_verification] == ["pending"]
951 assert [item.command for item in snapshot.recent_verification] == [
952 "uv run pytest -q"
953 ]
954 assert [item.attempt for item in snapshot.recent_verification] == ["attempt 2"]
955 assert snapshot.verification_state_summary == (
956 "pending (attempt 2) for uv run pytest -q"
957 )
958
959
960 def test_collect_status_snapshot_surfaces_planned_verification(
961 temp_dir: Path,
962 ) -> None:
963 _write_python_workspace(temp_dir)
964 _ensure_loader_dirs(temp_dir)
965 _persist_session_with_planned_verification(temp_dir)
966
967 snapshot = collect_status_snapshot(temp_dir)
968
969 assert snapshot.latest_policy_summary is not None
970 assert "verification_planned" in snapshot.latest_policy_summary
971 assert "policy-outcome=planned" in snapshot.latest_policy_summary
972 assert snapshot.latest_policy_observed_verification == [
973 "verification planned for `uv run pytest -q` [write changed src/loader/runtime/tool_batches.py; attempt 3]"
974 ]
975 assert [item.status for item in snapshot.recent_verification] == ["planned"]
976 assert [item.command for item in snapshot.recent_verification] == [
977 "uv run pytest -q"
978 ]
979 assert [item.attempt for item in snapshot.recent_verification] == ["attempt 3"]
980 assert [item.detail for item in snapshot.recent_verification] == [
981 "write changed src/loader/runtime/tool_batches.py"
982 ]
983 assert snapshot.verification_state_summary == (
984 "planned (attempt 3) for uv run pytest -q"
985 )
986
987
988 def test_collect_status_snapshot_surfaces_stale_verification(
989 temp_dir: Path,
990 ) -> None:
991 _write_python_workspace(temp_dir)
992 _ensure_loader_dirs(temp_dir)
993 _persist_session_with_stale_verification(temp_dir)
994
995 snapshot = collect_status_snapshot(temp_dir)
996
997 assert snapshot.latest_policy_summary is not None
998 assert "verification_stale" in snapshot.latest_policy_summary
999 assert "policy-outcome=stale" in snapshot.latest_policy_summary
1000 assert snapshot.latest_policy_observed_verification == [
1001 "verification became stale for `uv run pytest -q` after new mutating work [write changed src/loader/runtime/finalization.py; attempt 1 -> attempt 2]"
1002 ]
1003 assert [item.status for item in snapshot.recent_verification] == ["stale"]
1004 assert [item.command for item in snapshot.recent_verification] == [
1005 "uv run pytest -q"
1006 ]
1007 assert [item.attempt for item in snapshot.recent_verification] == [
1008 "attempt 1 -> attempt 2"
1009 ]
1010 assert [item.detail for item in snapshot.recent_verification] == [
1011 "write changed src/loader/runtime/finalization.py"
1012 ]
1013 assert snapshot.verification_state_summary == (
1014 "stale (attempt 1 -> attempt 2) for uv run pytest -q"
1015 )
1016
1017
1018 def test_collect_prompt_diff_uses_persisted_prompt_history(temp_dir: Path) -> None:
1019 _write_python_workspace(temp_dir)
1020 _ensure_loader_dirs(temp_dir)
1021 session_id, _ = _persist_session_with_dod(temp_dir)
1022
1023 diff = collect_prompt_diff(project_root=temp_dir)
1024
1025 assert diff.session_id == session_id
1026 assert diff.previous is not None
1027 assert diff.current is not None
1028 assert diff.current.workflow_mode == "execute"
1029 assert diff.previous.workflow_mode == "verify"
1030 assert any("Workflow mode changed:" in item for item in diff.highlights)
1031 assert "---" in diff.unified_diff
1032 assert "execute parser fix" in diff.unified_diff
1033
1034
1035 def test_collect_workflow_artifact_diffs_reads_versioned_artifacts(
1036 temp_dir: Path,
1037 ) -> None:
1038 _write_python_workspace(temp_dir)
1039 _ensure_loader_dirs(temp_dir)
1040 session_id = _persist_session_with_rich_workflow(temp_dir)
1041
1042 snapshot = collect_workflow_artifact_diffs(project_root=temp_dir)
1043
1044 assert snapshot.session_id == session_id
1045 assert len(snapshot.entries) == 3
1046 assert {entry.kind for entry in snapshot.entries} == {
1047 "clarify_brief",
1048 "implementation_plan",
1049 "verification_plan",
1050 }
1051 assert any("notes.txt" in entry.unified_diff for entry in snapshot.entries)
1052 assert snapshot.highlights
1053
1054
1055 def test_status_and_session_commands_render_persisted_state(
1056 temp_dir: Path,
1057 monkeypatch: pytest.MonkeyPatch,
1058 ) -> None:
1059 _write_python_workspace(temp_dir)
1060 _ensure_loader_dirs(temp_dir)
1061 session_id, _ = _persist_session_with_dod(temp_dir)
1062 _persist_explore_snapshot(temp_dir)
1063 runner = CliRunner()
1064
1065 monkeypatch.chdir(temp_dir)
1066
1067 status_result = runner.invoke(cli_main_module.status_cli, ["--model", "llama3.1:8b"])
1068 list_result = runner.invoke(cli_main_module.session_cli, ["list"])
1069 show_result = runner.invoke(cli_main_module.session_cli, ["show", session_id])
1070 workflow_result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1071
1072 assert status_result.exit_code == 0
1073 assert session_id in status_result.output
1074 assert "fixing" in status_result.output
1075 assert "Runtime Owner" in status_result.output
1076 assert "Boundary" in status_result.output
1077 assert "runtime-handle (RuntimeHandle)" in status_result.output
1078 assert "runtime-first via runtime-handle (RuntimeHandle)" in status_result.output
1079 assert "1 allow / 2 deny / 1 ask" in status_result.output
1080 assert "native" in status_result.output
1081 assert "Runtime Config, Workflow Context, Mode Guidance" in status_result.output
1082 assert "Rules Source" in status_result.output
1083 assert "verification failed; returning to execute for fixes" in status_result.output
1084 assert "Completion Decision" in status_result.output
1085 assert "continued after verification failed" in status_result.output
1086 assert "completion -> finalize" in status_result.output
1087 assert "Finalizing completed turn" in status_result.output
1088 assert "Explore Turns" in status_result.output
1089 assert "Explore History" in status_result.output
1090 assert "What file did you mention?" in status_result.output
1091 assert "pytest -q" in status_result.output
1092 assert "1 failed" in status_result.output
1093 assert "Verification State" in status_result.output
1094 assert "failed for pytest -q" in status_result.output
1095
1096 assert list_result.exit_code == 0
1097 assert session_id in list_result.output
1098 assert "Runtime Owner" in list_result.output
1099 assert "Boundary" in list_result.output
1100 assert "runtime-handle (RuntimeHandle)" in list_result.output
1101 assert "runtime-first via runtime-handle (RuntimeHandle)" in list_result.output
1102 assert "1 allow / 2 deny / 1 ask" in list_result.output
1103 assert "prompting enabled" in list_result.output
1104 assert "native" in list_result.output
1105 assert "Rules Source" in list_result.output
1106 assert "verification failed; returning to execute for fixes" in list_result.output
1107 assert "Completion Decision" in list_result.output
1108 assert "completion -> finalize" in list_result.output
1109
1110 assert show_result.exit_code == 0
1111 assert session_id in show_result.output
1112 assert "Runtime Owner" in show_result.output
1113 assert "Boundary" in show_result.output
1114 assert "runtime-handle (RuntimeHandle)" in show_result.output
1115 assert "runtime-first via runtime-handle (RuntimeHandle)" in show_result.output
1116 assert "Patch the broken parser" in show_result.output
1117 assert "1 allow / 2 deny / 1 ask" in show_result.output
1118 assert "enabled" in show_result.output
1119 assert "Runtime Config, Workflow Context, Mode Guidance" in show_result.output
1120 assert "Rules Source" in show_result.output
1121 assert "verification failed; returning to execute for fixes" in show_result.output
1122 assert "Completion Decision" in show_result.output
1123 assert "Completion Trace" in show_result.output
1124 assert "Recent Verification" in show_result.output
1125 assert "Verification State" in show_result.output
1126 assert "failed for pytest -q" in show_result.output
1127 assert "continuation_check" in show_result.output
1128 assert "completion -> finalize" in show_result.output
1129 assert "Finalizing completed turn" in show_result.output
1130 assert "Policy Timeline" not in show_result.output
1131 assert "Workflow Timeline" in show_result.output
1132 assert "handoff" in show_result.output
1133 assert "next=verify" in show_result.output
1134 assert "pytest -q" in show_result.output
1135 assert "1 failed" in show_result.output
1136
1137 assert workflow_result.exit_code == 0
1138 assert "Loader Workflow" in workflow_result.output
1139 assert "Workflow Timeline" in workflow_result.output
1140 assert session_id in workflow_result.output
1141 assert "Runtime Owner" in workflow_result.output
1142 assert "Boundary" in workflow_result.output
1143 assert "runtime-handle (RuntimeHandle)" in workflow_result.output
1144 assert "runtime-first via runtime-handle (RuntimeHandle)" in workflow_result.output
1145 assert "Verification State" in workflow_result.output
1146 assert "failed for pytest -q" in workflow_result.output
1147 assert "handoff" in workflow_result.output
1148 assert "next=verify" in workflow_result.output
1149
1150
1151 def test_workflow_command_renders_policy_accountability_context(
1152 temp_dir: Path,
1153 monkeypatch: pytest.MonkeyPatch,
1154 ) -> None:
1155 _write_python_workspace(temp_dir)
1156 _ensure_loader_dirs(temp_dir)
1157 session_id = _persist_session_with_policy_accountability(temp_dir)
1158 runner = CliRunner()
1159
1160 monkeypatch.chdir(temp_dir)
1161
1162 result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1163
1164 assert result.exit_code == 0
1165 assert session_id in result.output
1166 assert "repair_retry" in result.output
1167 assert "Repair path:" in result.output
1168 assert "Completion decision:" in result.output
1169 assert "verification_failed_reentry" in result.output
1170 assert "Policy Evidence Needed" in result.output
1171 assert "verification failed for `pytest -q`" in result.output
1172 assert "Observed Verification" in result.output
1173 assert "verification failed for `pytest -q` [1 failed]" in result.output
1174 assert "policy-stage=raw_text_tool_fallback" in result.output
1175 assert "policy-outcome=continue" in result.output
1176 assert "provenance=contradicts:verification@dod.evidence" in result.output
1177 assert "observed=verification failed for `pytest -q` [1 failed]" in result.output
1178
1179 policy_result = runner.invoke(cli_main_module.workflow_cli, ["show", "--policy"])
1180
1181 assert policy_result.exit_code == 0
1182 assert "Loader Workflow" in policy_result.output
1183 assert "Policy Timeline" in policy_result.output
1184 assert "policy-only" in policy_result.output
1185 assert "repair_retry" in policy_result.output
1186 assert "verification_failed_reentry" in policy_result.output
1187 assert "handoff" not in policy_result.output
1188
1189
1190 def test_workflow_command_renders_stale_verification_context(
1191 temp_dir: Path,
1192 monkeypatch: pytest.MonkeyPatch,
1193 ) -> None:
1194 _write_python_workspace(temp_dir)
1195 _ensure_loader_dirs(temp_dir)
1196 session_id = _persist_session_with_stale_verification(temp_dir)
1197 runner = CliRunner()
1198
1199 monkeypatch.chdir(temp_dir)
1200
1201 result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1202
1203 assert result.exit_code == 0
1204 assert session_id in result.output
1205 assert "Verify stale:" in result.output
1206 assert "verification_stale" in result.output
1207 assert "policy-outcome=stale" in result.output
1208 assert "Observed Verification" in result.output
1209 assert "Verification State" in result.output
1210 assert "stale (attempt 1 -> attempt 2) for uv run pytest -q" in result.output
1211 assert "uv run pytest -q" in result.output
1212 assert "new mutating work" in result.output
1213
1214
1215 def test_workflow_command_renders_planned_verification_context(
1216 temp_dir: Path,
1217 monkeypatch: pytest.MonkeyPatch,
1218 ) -> None:
1219 _write_python_workspace(temp_dir)
1220 _ensure_loader_dirs(temp_dir)
1221 session_id = _persist_session_with_planned_verification(temp_dir)
1222 runner = CliRunner()
1223
1224 monkeypatch.chdir(temp_dir)
1225
1226 result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1227
1228 assert result.exit_code == 0
1229 assert session_id in result.output
1230 assert "Verify planned:" in result.output
1231 assert "verification_planned" in result.output
1232 assert "policy-outcome=planned" in result.output
1233 assert "Observed Verification" in result.output
1234 assert "Verification State" in result.output
1235 assert "planned (attempt 3) for uv run pytest -q" in result.output
1236 assert "verification planned for `uv run pytest -q`" in result.output
1237 assert "uv run pytest -q" in result.output
1238
1239
1240 def test_collect_workflow_timeline_can_focus_on_policy_accountability(
1241 temp_dir: Path,
1242 ) -> None:
1243 _write_python_workspace(temp_dir)
1244 _ensure_loader_dirs(temp_dir)
1245 session_id = _persist_session_with_policy_accountability(temp_dir)
1246
1247 snapshot = collect_workflow_timeline(
1248 project_root=temp_dir,
1249 accountability_only=True,
1250 )
1251
1252 assert snapshot.session_id == session_id
1253 assert snapshot.selected_accountability_only is True
1254 assert [entry.kind for entry in snapshot.entries] == [
1255 "repair_retry",
1256 "completion_check",
1257 "completion_continue",
1258 ]
1259
1260
1261 def test_session_show_renders_policy_timeline_preview(
1262 temp_dir: Path,
1263 monkeypatch: pytest.MonkeyPatch,
1264 ) -> None:
1265 _write_python_workspace(temp_dir)
1266 _ensure_loader_dirs(temp_dir)
1267 session_id = _persist_session_with_policy_accountability(temp_dir)
1268 runner = CliRunner()
1269
1270 monkeypatch.chdir(temp_dir)
1271
1272 show_result = runner.invoke(cli_main_module.session_cli, ["show", session_id])
1273
1274 assert show_result.exit_code == 0
1275 assert "Latest Policy" in show_result.output
1276 assert "verification_failed_reentry" in show_result.output
1277 assert "Policy Evidence Needed" in show_result.output
1278 assert "verification failed for `pytest -q`" in show_result.output
1279 assert "Observed Verification" in show_result.output
1280 assert "verification failed for `pytest -q` [1 failed]" in show_result.output
1281 assert "Policy Timeline" in show_result.output
1282 assert "repair_retry" in show_result.output
1283 assert "completion:" in show_result.output
1284 assert "provenance=contradicts:verification@dod.evidence" in show_result.output
1285
1286
1287 def test_status_command_renders_latest_policy_summary(
1288 temp_dir: Path,
1289 monkeypatch: pytest.MonkeyPatch,
1290 ) -> None:
1291 _write_python_workspace(temp_dir)
1292 _ensure_loader_dirs(temp_dir)
1293 session_id = _persist_session_with_policy_accountability(temp_dir)
1294 runner = CliRunner()
1295
1296 monkeypatch.chdir(temp_dir)
1297
1298 result = runner.invoke(cli_main_module.status_cli, [])
1299
1300 assert result.exit_code == 0
1301 assert session_id in result.output
1302 assert "Latest Policy" in result.output
1303 assert "verification_failed_reentry" in result.output
1304 assert "Policy Evidence Needed" in result.output
1305 assert "verification failed for `pytest -q`" in result.output
1306 assert "Observed Verification" in result.output
1307 assert "verification failed for `pytest -q` [1 failed]" in result.output
1308 assert "Recent Verification" in result.output
1309 assert "policy-stage=definition_of_done" in result.output
1310
1311
1312 def test_workflow_show_renders_workflow_ledger(
1313 temp_dir: Path,
1314 monkeypatch: pytest.MonkeyPatch,
1315 ) -> None:
1316 _write_python_workspace(temp_dir)
1317 _ensure_loader_dirs(temp_dir)
1318 _persist_session_with_rich_workflow(temp_dir)
1319 runner = CliRunner()
1320
1321 monkeypatch.chdir(temp_dir)
1322
1323 result = runner.invoke(cli_main_module.workflow_cli, ["show"])
1324
1325 assert result.exit_code == 0
1326 assert "Workflow Ledger" in result.output
1327 assert "Assumptions" in result.output
1328 assert "contradicted" in result.output
1329 assert "notes.txt stays out of scope" in result.output
1330 assert "Acceptance Anchors" in result.output
1331 assert "Decision Boundaries" in result.output
1332
1333
1334 def test_workflow_show_command_supports_filters_and_highlights(
1335 temp_dir: Path,
1336 monkeypatch: pytest.MonkeyPatch,
1337 ) -> None:
1338 _write_python_workspace(temp_dir)
1339 _ensure_loader_dirs(temp_dir)
1340 session_id = _persist_session_with_rich_workflow(temp_dir)
1341 runner = CliRunner()
1342
1343 monkeypatch.chdir(temp_dir)
1344
1345 result = runner.invoke(
1346 cli_main_module.workflow_cli,
1347 ["show", "--kind", "reentry", "--limit", "1", session_id],
1348 )
1349
1350 assert result.exit_code == 0
1351 assert "Loader Workflow" in result.output
1352 assert "1 shown / 3 total" in result.output
1353 assert "kind=reentry, limit=1" in result.output
1354 assert "Workflow Answers" in result.output
1355 assert "Recovered workflow:" in result.output
1356 assert "full_replan_required" in result.output
1357 assert "evidence=confirmed touchpoint:" in result.output
1358
1359 clarify_result = runner.invoke(
1360 cli_main_module.workflow_cli,
1361 ["show", "--mode", "clarify", "--limit", "1", session_id],
1362 )
1363
1364 assert clarify_result.exit_code == 0
1365 assert "stage=readiness" in clarify_result.output
1366 assert "pressure=tradeoff" in clarify_result.output
1367 assert "gates=non_goals,decision_boundaries" in clarify_result.output
1368
1369
1370 def test_workflow_show_can_render_artifact_diffs(
1371 temp_dir: Path,
1372 monkeypatch: pytest.MonkeyPatch,
1373 ) -> None:
1374 _write_python_workspace(temp_dir)
1375 _ensure_loader_dirs(temp_dir)
1376 _persist_session_with_rich_workflow(temp_dir)
1377 runner = CliRunner()
1378
1379 monkeypatch.chdir(temp_dir)
1380
1381 result = runner.invoke(
1382 cli_main_module.workflow_cli,
1383 ["show", "--diff", "--full-diff"],
1384 )
1385
1386 assert result.exit_code == 0
1387 assert "Artifact Changes" in result.output
1388 assert "Artifact Diff Summary" in result.output
1389 assert "clarify_brief" in result.output
1390 assert "implementation_plan" in result.output
1391 assert "verification_plan" in result.output
1392 assert "notes.txt" in result.output
1393
1394
1395 def test_collect_prompt_preview_uses_persisted_runtime_state(temp_dir: Path) -> None:
1396 _write_python_workspace(temp_dir)
1397 _ensure_loader_dirs(temp_dir)
1398 session_id, _ = _persist_session_with_dod(temp_dir)
1399
1400 preview = collect_prompt_preview(
1401 temp_dir,
1402 model="qwen2.5-coder:14b",
1403 )
1404
1405 assert preview.active_session_id == session_id
1406 assert preview.workflow_mode == "execute"
1407 assert preview.workflow_reason_code == "verification_failed_reentry"
1408 assert preview.workflow_decision_kind == "reentry"
1409 assert preview.permission_mode == "prompt"
1410 assert preview.prompt_format == (
1411 "native" if preview.capability_profile.supports_native_tools else "react"
1412 )
1413 assert preview.prompt_sections == [
1414 "Runtime Config",
1415 "Workflow Context",
1416 "Mode Guidance",
1417 "Project Context",
1418 "Project Tips",
1419 ]
1420 assert "## Execute Mode" in preview.content
1421 assert "Current task: Fix the failing tests" in preview.content
1422
1423
1424 def test_prompt_show_command_renders_preview_without_model_call(
1425 temp_dir: Path,
1426 monkeypatch: pytest.MonkeyPatch,
1427 ) -> None:
1428 _write_python_workspace(temp_dir)
1429 _ensure_loader_dirs(temp_dir)
1430 _persist_session_with_dod(temp_dir)
1431 runner = CliRunner()
1432
1433 monkeypatch.chdir(temp_dir)
1434 preview = collect_prompt_preview(
1435 temp_dir,
1436 model="qwen2.5-coder:14b",
1437 current_task="Preview the current Loader contract",
1438 )
1439
1440 result = runner.invoke(
1441 cli_main_module.prompt_cli,
1442 ["show", "--model", "qwen2.5-coder:14b", "Preview the current Loader contract"],
1443 )
1444
1445 assert result.exit_code == 0
1446 assert "Prompt Preview" in result.output
1447 assert "Prompt Body" in result.output
1448 assert "Preview the current Loader contract" in result.output
1449 assert preview.prompt_format in result.output
1450 assert "Workflow Context" in result.output
1451 assert "Execute Mode" in result.output
1452
1453
1454 def test_prompt_diff_command_renders_persisted_prompt_changes(
1455 temp_dir: Path,
1456 monkeypatch: pytest.MonkeyPatch,
1457 ) -> None:
1458 _write_python_workspace(temp_dir)
1459 _ensure_loader_dirs(temp_dir)
1460 _persist_session_with_dod(temp_dir)
1461 runner = CliRunner()
1462
1463 monkeypatch.chdir(temp_dir)
1464
1465 result = runner.invoke(cli_main_module.prompt_cli, ["diff", "--full"])
1466
1467 assert result.exit_code == 0
1468 assert "Prompt Diff" in result.output
1469 assert "Prompt Changes" in result.output
1470 assert "Workflow mode changed:" in result.output
1471 assert "Prompt Unified Diff" in result.output
1472 assert "execute parser fix" in result.output
1473
1474
1475 def test_permission_snapshot_and_dry_run_reflect_rules(temp_dir: Path) -> None:
1476 _write_python_workspace(temp_dir)
1477 _ensure_loader_dirs(temp_dir)
1478 (temp_dir / ".loader" / "permission-rules.json").write_text(
1479 "\n".join(
1480 [
1481 "{",
1482 ' "allow": [{"tool": "write", "contains": "safe change"}],',
1483 ' "deny": [{"tool": "write", "path_contains": "secrets"}],',
1484 ' "ask": [{"tool": "write", "path_contains": "README"}]',
1485 "}",
1486 ]
1487 )
1488 + "\n"
1489 )
1490
1491 snapshot = collect_permission_snapshot(temp_dir, permission_mode="allow")
1492 check = dry_run_permission_check(
1493 "write",
1494 {
1495 "file_path": str(temp_dir / "README.md"),
1496 "content": "safe change\n",
1497 },
1498 project_root=temp_dir,
1499 permission_mode="allow",
1500 )
1501
1502 assert snapshot.active_mode == "allow"
1503 assert snapshot.prompting_enabled is True
1504 assert snapshot.rules_valid is True
1505 assert snapshot.rule_counts == {"allow": 1, "deny": 1, "ask": 1}
1506 assert snapshot.normalized_rules["allow"][0].tool_name == "write"
1507 assert snapshot.normalized_rules["allow"][0].contains == "safe change"
1508
1509 assert check.required_mode == "workspace-write"
1510 assert check.decision == "ask"
1511 assert check.matched_disposition == "ask"
1512 assert check.matched_rule == "tool=write, path_contains=README"
1513 assert "file_path=" in check.input_summary
1514
1515
1516 def test_status_snapshot_reports_invalid_permission_rules(temp_dir: Path) -> None:
1517 _write_python_workspace(temp_dir)
1518 _ensure_loader_dirs(temp_dir)
1519 (temp_dir / ".loader" / "permission-rules.json").write_text("{broken json")
1520
1521 snapshot = collect_status_snapshot(temp_dir, permission_mode="prompt")
1522
1523 assert snapshot.permission_rules_valid is False
1524 assert snapshot.permission_prompting_enabled is True
1525 assert snapshot.permission_rules_source.endswith(".loader/permission-rules.json")
1526
1527
1528 def test_permissions_show_and_check_commands_render_policy(
1529 temp_dir: Path,
1530 monkeypatch: pytest.MonkeyPatch,
1531 ) -> None:
1532 _write_python_workspace(temp_dir)
1533 _ensure_loader_dirs(temp_dir)
1534 (temp_dir / ".loader" / "permission-rules.json").write_text(
1535 "\n".join(
1536 [
1537 "{",
1538 ' "allow": [{"tool": "write", "contains": "safe change"}],',
1539 ' "ask": [{"tool": "write", "path_contains": "README"}]',
1540 "}",
1541 ]
1542 )
1543 + "\n"
1544 )
1545 runner = CliRunner()
1546
1547 monkeypatch.chdir(temp_dir)
1548
1549 show_result = runner.invoke(
1550 cli_main_module.permissions_cli,
1551 ["show", "--permission-mode", "allow"],
1552 )
1553 check_result = runner.invoke(
1554 cli_main_module.permissions_cli,
1555 [
1556 "check",
1557 "--permission-mode",
1558 "allow",
1559 "--args",
1560 '{"content":"safe change\\n"}',
1561 "write",
1562 "README.md",
1563 ],
1564 )
1565
1566 assert show_result.exit_code == 0
1567 assert "Loader Permissions" in show_result.output
1568 assert "Permission Mode" in show_result.output
1569 assert "Rules Source" in show_result.output
1570 assert "safe change" in show_result.output
1571 assert "README" in show_result.output
1572
1573 assert check_result.exit_code == 0
1574 assert "Permission Check" in check_result.output
1575 assert "workspace-write" in check_result.output
1576 assert "ask" in check_result.output
1577 assert "tool=write, path_contains=README" in check_result.output
1578
1579
1580 def test_permissions_check_rejects_invalid_json_args(
1581 temp_dir: Path,
1582 monkeypatch: pytest.MonkeyPatch,
1583 ) -> None:
1584 _write_python_workspace(temp_dir)
1585 _ensure_loader_dirs(temp_dir)
1586 runner = CliRunner()
1587
1588 monkeypatch.chdir(temp_dir)
1589
1590 result = runner.invoke(
1591 cli_main_module.permissions_cli,
1592 ["check", "bash", "--args", "{broken json", "ls"],
1593 )
1594
1595 assert result.exit_code != 0
1596 assert "`--args` must be valid JSON" in result.output
1597
1598
1599 def test_permissions_show_surfaces_invalid_rule_file(
1600 temp_dir: Path,
1601 monkeypatch: pytest.MonkeyPatch,
1602 ) -> None:
1603 _write_python_workspace(temp_dir)
1604 _ensure_loader_dirs(temp_dir)
1605 (temp_dir / ".loader" / "permission-rules.json").write_text("{broken json")
1606 runner = CliRunner()
1607
1608 monkeypatch.chdir(temp_dir)
1609
1610 result = runner.invoke(cli_main_module.permissions_cli, ["show"])
1611
1612 assert result.exit_code == 0
1613 assert "invalid" in result.output.lower()
1614 assert "Rule Error" in result.output
1615 assert "Rules Source" in result.output
1616
1617
1618 def test_explore_command_can_show_and_reset_continuity(
1619 temp_dir: Path,
1620 monkeypatch: pytest.MonkeyPatch,
1621 ) -> None:
1622 _write_python_workspace(temp_dir)
1623 _ensure_loader_dirs(temp_dir)
1624 _persist_explore_snapshot(temp_dir)
1625 runner = CliRunner()
1626
1627 monkeypatch.chdir(temp_dir)
1628
1629 status_result = runner.invoke(cli_main_module.explore_cli, ["--status"])
1630
1631 assert status_result.exit_code == 0
1632 assert "Loader Explore State" in status_result.output
1633 assert "continue" in status_result.output
1634 assert "What file did you mention?" in status_result.output
1635
1636 reset_result = runner.invoke(cli_main_module.explore_cli, ["--reset"])
1637
1638 assert reset_result.exit_code == 0
1639 assert "Cleared persisted explore continuity." in reset_result.output
1640 assert ExploreStateStore(temp_dir).load() is None
1641
1642
1643 def test_root_help_lists_special_commands() -> None:
1644 help_text = cli_main_module._loader_help_text()
1645
1646 assert "loader doctor" in help_text
1647 assert "loader status" in help_text
1648 assert "loader explore <prompt>" in help_text
1649 assert "loader permissions show" in help_text
1650 assert "loader session resume <id>" in help_text
1651
1652
1653 def test_main_dispatches_session_resume_to_primary_cli(
1654 monkeypatch: pytest.MonkeyPatch,
1655 ) -> None:
1656 captured: dict[str, object] = {}
1657
1658 def fake_cli_main(*, args: list[str], prog_name: str) -> None:
1659 captured["args"] = args
1660 captured["prog_name"] = prog_name
1661
1662 monkeypatch.setattr(cli_main_module.cli, "main", fake_cli_main)
1663 monkeypatch.setattr(sys, "argv", ["loader", "session", "resume", "abc123", "--no-tui"])
1664
1665 cli_main_module.main()
1666
1667 assert captured == {
1668 "args": ["--resume-target", "abc123", "--no-tui"],
1669 "prog_name": "loader",
1670 }