Python · 4672 bytes Raw Blame History
1 """Tests for definition-of-done state and persistence."""
2
3 from pathlib import Path
4
5 from loader.llm.base import ToolCall
6 from loader.runtime.dod import (
7 DefinitionOfDoneStore,
8 begin_new_verification_attempt,
9 create_definition_of_done,
10 derive_verification_commands,
11 determine_task_size,
12 ensure_active_verification_attempt,
13 record_successful_tool_call,
14 )
15
16
17 def test_determine_task_size_boundaries() -> None:
18 assert determine_task_size(1, 10) == "small"
19 assert determine_task_size(3, 99) == "small"
20 assert determine_task_size(4, 99) == "standard"
21 assert determine_task_size(15, 499) == "standard"
22 assert determine_task_size(16, 499) == "large"
23 assert determine_task_size(15, 500) == "large"
24
25
26 def test_definition_of_done_round_trip(tmp_path: Path) -> None:
27 store = DefinitionOfDoneStore(tmp_path)
28 dod = create_definition_of_done(
29 "Create hello.py and verify it runs.",
30 retry_budget=2,
31 )
32 dod.status = "fixing"
33 dod.retry_count = 1
34 dod.verification_commands = ["python hello.py"]
35 dod.touched_files = [str(tmp_path / "hello.py")]
36 attempt = begin_new_verification_attempt(dod)
37 saved_path = store.save(dod)
38
39 reloaded = store.load(saved_path)
40
41 assert reloaded.task_statement == dod.task_statement
42 assert reloaded.status == "fixing"
43 assert reloaded.retry_count == 1
44 assert reloaded.verification_commands == ["python hello.py"]
45 assert reloaded.touched_files == [str(tmp_path / "hello.py")]
46 assert reloaded.active_verification_attempt_id == attempt.attempt_id
47 assert reloaded.active_verification_attempt_number == attempt.attempt_number
48
49
50 def test_ensure_active_verification_attempt_rehydrates_missing_active_attempt() -> None:
51 dod = create_definition_of_done("Verify the runtime output.")
52 dod.verification_attempt_counter = 2
53
54 attempt = ensure_active_verification_attempt(dod)
55
56 assert attempt.attempt_id == "verification-attempt-2"
57 assert attempt.attempt_number == 2
58 assert dod.active_verification_attempt_id == "verification-attempt-2"
59 assert dod.active_verification_attempt_number == 2
60
61
62 def test_verification_command_derivation_prefers_runtime_evidence(tmp_path: Path) -> None:
63 project_root = tmp_path
64 dod = create_definition_of_done("Create hello.py and make sure it runs.")
65 hello_path = project_root / "hello.py"
66 record_successful_tool_call(
67 dod,
68 ToolCall(
69 id="write-1",
70 name="write",
71 arguments={"file_path": str(hello_path), "content": "print('hi')\n"},
72 ),
73 )
74 record_successful_tool_call(
75 dod,
76 ToolCall(
77 id="bash-1",
78 name="bash",
79 arguments={"command": "python hello.py"},
80 ),
81 )
82
83 commands = derive_verification_commands(
84 dod,
85 project_root=project_root,
86 task_statement=dod.task_statement,
87 )
88
89 assert commands == ["python hello.py"]
90
91
92 def test_record_successful_tool_call_preserves_absolute_path_string(tmp_path: Path) -> None:
93 dod = create_definition_of_done("Create hello.py and verify it exists.")
94 absolute_path = tmp_path / "hello.py"
95
96 record_successful_tool_call(
97 dod,
98 ToolCall(
99 id="write-1",
100 name="write",
101 arguments={"file_path": str(absolute_path), "content": "print('hi')\n"},
102 ),
103 )
104
105 assert dod.touched_files == [str(absolute_path)]
106
107
108 def test_derive_verification_commands_adds_semantic_html_toc_check(tmp_path: Path) -> None:
109 chapters = tmp_path / "chapters"
110 chapters.mkdir()
111 (chapters / "01-introduction.html").write_text(
112 "<h1>Chapter 1: Introduction to Fortran</h1>\n"
113 )
114 index = tmp_path / "index.html"
115 index.write_text(
116 "\n".join(
117 [
118 '<ul class="chapter-list">',
119 ' <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>',
120 "</ul>",
121 ]
122 )
123 )
124
125 dod = create_definition_of_done(
126 "Update index.html so the table of contents links and hrefs are correct."
127 )
128 dod.acceptance_criteria = [
129 "All table of contents links in index.html point to existing chapter files.",
130 "All link texts match the actual chapter titles.",
131 ]
132 dod.touched_files = [str(index)]
133
134 commands = derive_verification_commands(
135 dod,
136 project_root=tmp_path,
137 task_statement=dod.task_statement,
138 )
139
140 assert any(command.startswith("/usr/bin/python3 - <<'PY'") for command in commands)
141 assert not any(command == f"test -f {index}" for command in commands)