| 1 | """Tests for definition-of-done state and persistence.""" |
| 2 | |
| 3 | from pathlib import Path |
| 4 | |
| 5 | from loader.llm.base import ToolCall |
| 6 | from loader.runtime.dod import ( |
| 7 | DefinitionOfDoneStore, |
| 8 | begin_new_verification_attempt, |
| 9 | create_definition_of_done, |
| 10 | derive_verification_commands, |
| 11 | determine_task_size, |
| 12 | ensure_active_verification_attempt, |
| 13 | record_successful_tool_call, |
| 14 | ) |
| 15 | |
| 16 | |
| 17 | def test_determine_task_size_boundaries() -> None: |
| 18 | assert determine_task_size(1, 10) == "small" |
| 19 | assert determine_task_size(3, 99) == "small" |
| 20 | assert determine_task_size(4, 99) == "standard" |
| 21 | assert determine_task_size(15, 499) == "standard" |
| 22 | assert determine_task_size(16, 499) == "large" |
| 23 | assert determine_task_size(15, 500) == "large" |
| 24 | |
| 25 | |
| 26 | def test_definition_of_done_round_trip(tmp_path: Path) -> None: |
| 27 | store = DefinitionOfDoneStore(tmp_path) |
| 28 | dod = create_definition_of_done( |
| 29 | "Create hello.py and verify it runs.", |
| 30 | retry_budget=2, |
| 31 | ) |
| 32 | dod.status = "fixing" |
| 33 | dod.retry_count = 1 |
| 34 | dod.verification_commands = ["python hello.py"] |
| 35 | dod.touched_files = [str(tmp_path / "hello.py")] |
| 36 | attempt = begin_new_verification_attempt(dod) |
| 37 | saved_path = store.save(dod) |
| 38 | |
| 39 | reloaded = store.load(saved_path) |
| 40 | |
| 41 | assert reloaded.task_statement == dod.task_statement |
| 42 | assert reloaded.status == "fixing" |
| 43 | assert reloaded.retry_count == 1 |
| 44 | assert reloaded.verification_commands == ["python hello.py"] |
| 45 | assert reloaded.touched_files == [str(tmp_path / "hello.py")] |
| 46 | assert reloaded.active_verification_attempt_id == attempt.attempt_id |
| 47 | assert reloaded.active_verification_attempt_number == attempt.attempt_number |
| 48 | |
| 49 | |
| 50 | def test_ensure_active_verification_attempt_rehydrates_missing_active_attempt() -> None: |
| 51 | dod = create_definition_of_done("Verify the runtime output.") |
| 52 | dod.verification_attempt_counter = 2 |
| 53 | |
| 54 | attempt = ensure_active_verification_attempt(dod) |
| 55 | |
| 56 | assert attempt.attempt_id == "verification-attempt-2" |
| 57 | assert attempt.attempt_number == 2 |
| 58 | assert dod.active_verification_attempt_id == "verification-attempt-2" |
| 59 | assert dod.active_verification_attempt_number == 2 |
| 60 | |
| 61 | |
| 62 | def test_verification_command_derivation_prefers_runtime_evidence(tmp_path: Path) -> None: |
| 63 | project_root = tmp_path |
| 64 | dod = create_definition_of_done("Create hello.py and make sure it runs.") |
| 65 | hello_path = project_root / "hello.py" |
| 66 | record_successful_tool_call( |
| 67 | dod, |
| 68 | ToolCall( |
| 69 | id="write-1", |
| 70 | name="write", |
| 71 | arguments={"file_path": str(hello_path), "content": "print('hi')\n"}, |
| 72 | ), |
| 73 | ) |
| 74 | record_successful_tool_call( |
| 75 | dod, |
| 76 | ToolCall( |
| 77 | id="bash-1", |
| 78 | name="bash", |
| 79 | arguments={"command": "python hello.py"}, |
| 80 | ), |
| 81 | ) |
| 82 | |
| 83 | commands = derive_verification_commands( |
| 84 | dod, |
| 85 | project_root=project_root, |
| 86 | task_statement=dod.task_statement, |
| 87 | ) |
| 88 | |
| 89 | assert commands == ["python hello.py"] |
| 90 | |
| 91 | |
| 92 | def test_record_successful_tool_call_preserves_absolute_path_string(tmp_path: Path) -> None: |
| 93 | dod = create_definition_of_done("Create hello.py and verify it exists.") |
| 94 | absolute_path = tmp_path / "hello.py" |
| 95 | |
| 96 | record_successful_tool_call( |
| 97 | dod, |
| 98 | ToolCall( |
| 99 | id="write-1", |
| 100 | name="write", |
| 101 | arguments={"file_path": str(absolute_path), "content": "print('hi')\n"}, |
| 102 | ), |
| 103 | ) |
| 104 | |
| 105 | assert dod.touched_files == [str(absolute_path)] |
| 106 | |
| 107 | |
| 108 | def test_derive_verification_commands_adds_semantic_html_toc_check(tmp_path: Path) -> None: |
| 109 | chapters = tmp_path / "chapters" |
| 110 | chapters.mkdir() |
| 111 | (chapters / "01-introduction.html").write_text( |
| 112 | "<h1>Chapter 1: Introduction to Fortran</h1>\n" |
| 113 | ) |
| 114 | index = tmp_path / "index.html" |
| 115 | index.write_text( |
| 116 | "\n".join( |
| 117 | [ |
| 118 | '<ul class="chapter-list">', |
| 119 | ' <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>', |
| 120 | "</ul>", |
| 121 | ] |
| 122 | ) |
| 123 | ) |
| 124 | |
| 125 | dod = create_definition_of_done( |
| 126 | "Update index.html so the table of contents links and hrefs are correct." |
| 127 | ) |
| 128 | dod.acceptance_criteria = [ |
| 129 | "All table of contents links in index.html point to existing chapter files.", |
| 130 | "All link texts match the actual chapter titles.", |
| 131 | ] |
| 132 | dod.touched_files = [str(index)] |
| 133 | |
| 134 | commands = derive_verification_commands( |
| 135 | dod, |
| 136 | project_root=tmp_path, |
| 137 | task_statement=dod.task_statement, |
| 138 | ) |
| 139 | |
| 140 | assert any(command.startswith("/usr/bin/python3 - <<'PY'") for command in commands) |
| 141 | assert not any(command == f"test -f {index}" for command in commands) |