"""Tests for definition-of-done state and persistence.""" import json import subprocess from pathlib import Path from loader.llm.base import ToolCall from loader.runtime.dod import ( DefinitionOfDoneStore, VerificationEvidence, all_planned_artifact_outputs_exist, all_planned_artifacts_exist, begin_new_verification_attempt, build_verification_summary, collect_planned_artifact_targets, create_definition_of_done, derive_verification_commands, determine_task_size, ensure_active_verification_attempt, record_successful_tool_call, sanitize_verification_commands, ) def test_determine_task_size_boundaries() -> None: assert determine_task_size(1, 10) == "small" assert determine_task_size(3, 99) == "small" assert determine_task_size(4, 99) == "standard" assert determine_task_size(15, 499) == "standard" assert determine_task_size(16, 499) == "large" assert determine_task_size(15, 500) == "large" def test_definition_of_done_round_trip(tmp_path: Path) -> None: store = DefinitionOfDoneStore(tmp_path) dod = create_definition_of_done( "Create hello.py and verify it runs.", retry_budget=2, ) dod.status = "fixing" dod.retry_count = 1 dod.verification_commands = ["python hello.py"] dod.touched_files = [str(tmp_path / "hello.py")] attempt = begin_new_verification_attempt(dod) saved_path = store.save(dod) reloaded = store.load(saved_path) assert reloaded.task_statement == dod.task_statement assert reloaded.status == "fixing" assert reloaded.retry_count == 1 assert reloaded.verification_commands == ["python hello.py"] assert reloaded.touched_files == [str(tmp_path / "hello.py")] assert reloaded.active_verification_attempt_id == attempt.attempt_id assert reloaded.active_verification_attempt_number == attempt.attempt_number def test_ensure_active_verification_attempt_rehydrates_missing_active_attempt() -> None: dod = create_definition_of_done("Verify the runtime output.") dod.verification_attempt_counter = 2 attempt = ensure_active_verification_attempt(dod) assert attempt.attempt_id == "verification-attempt-2" assert attempt.attempt_number == 2 assert dod.active_verification_attempt_id == "verification-attempt-2" assert dod.active_verification_attempt_number == 2 def test_verification_command_derivation_prefers_runtime_evidence(tmp_path: Path) -> None: project_root = tmp_path dod = create_definition_of_done("Create hello.py and make sure it runs.") hello_path = project_root / "hello.py" record_successful_tool_call( dod, ToolCall( id="write-1", name="write", arguments={"file_path": str(hello_path), "content": "print('hi')\n"}, ), ) record_successful_tool_call( dod, ToolCall( id="bash-1", name="bash", arguments={"command": "python hello.py"}, ), ) commands = derive_verification_commands( dod, project_root=project_root, task_statement=dod.task_statement, ) assert commands == ["python hello.py"] def test_record_successful_tool_call_preserves_absolute_path_string(tmp_path: Path) -> None: dod = create_definition_of_done("Create hello.py and verify it exists.") absolute_path = tmp_path / "hello.py" record_successful_tool_call( dod, ToolCall( id="write-1", name="write", arguments={"file_path": str(absolute_path), "content": "print('hi')\n"}, ), ) assert dod.touched_files == [str(absolute_path)] def test_record_successful_tool_call_counts_json_string_patch_hunks( tmp_path: Path, ) -> None: dod = create_definition_of_done("Patch generated HTML content.") target = tmp_path / "chapter.html" hunks = json.dumps( [ { "old_start": 10, "old_lines": 2, "new_start": 10, "new_lines": 8, "lines": ["-old", "-body", "+new", "+expanded"], } ] )[:-1] record_successful_tool_call( dod, ToolCall( id="patch-1", name="patch", arguments={"file_path": str(target), "hunks": hunks}, ), ) assert dod.touched_files == [str(target)] assert dod.line_changes == 8 def test_record_successful_tool_call_counts_path_content_edit(tmp_path: Path) -> None: dod = create_definition_of_done("Replace generated HTML content.") target = tmp_path / "index.html" record_successful_tool_call( dod, ToolCall( id="edit-1", name="edit", arguments={ "path": str(target), "content": "
Expanded.
\n", }, ), ) assert dod.touched_files == [str(target)] assert dod.line_changes == 3 def test_derive_verification_commands_adds_semantic_html_toc_check(tmp_path: Path) -> None: chapters = tmp_path / "chapters" chapters.mkdir() (chapters / "01-introduction.html").write_text( "" + "i" * 1600 + "
") for index in range(1, 5): (reference_chapters / f"0{index}-topic.html").write_text( "{'x' * 300}
" for section in range(10)) ) guide = tmp_path / "guide" chapters = guide / "chapters" chapters.mkdir(parents=True) (guide / "index.html").write_text("" + "i" * 1000 + "
") (chapters / "01-introduction.html").write_text( "{'x' * 110}
" for section in range(10)) ) for index in range(2, 5): (chapters / f"0{index}-topic.html").write_text( "{'x' * 220}
" for section in range(10)) ) implementation_plan = tmp_path / "implementation.md" implementation_plan.write_text( "\n".join( [ "# Implementation Plan", "", "## File Changes", f"- `{guide / 'index.html'}`", f"- `{chapters / '01-introduction.html'}`", f"- `{chapters / '02-topic.html'}`", f"- `{chapters / '03-topic.html'}`", f"- `{chapters / '04-topic.html'}`", "", ] ) ) dod = create_definition_of_done( f"Create an equally thorough HTML guide modeled on {reference} at {guide}." ) dod.implementation_plan = str(implementation_plan) commands = derive_verification_commands( dod, project_root=tmp_path, task_statement=dod.task_statement, supplement_existing=True, ) quality_command = next( command for command in commands if "HTML guide content quality issues:" in command ) result = subprocess.run( quality_command, shell=True, cwd=tmp_path, capture_output=True, text=True, check=False, ) assert result.returncode == 1 assert "01-introduction.html: thin content" in result.stdout assert "expected at least 15" in result.stdout def test_html_guide_quality_check_flags_malformed_document_structure( tmp_path: Path, ) -> None: def rich_doc(title: str) -> str: body = "".join( f"{'x' * 180}