loader Public

Watch 0 Fork 0 Star 0

Python · 4672 bytes Raw Blame History

  
        1
        """Tests for definition-of-done state and persistence."""
      
        2
        
        3
        from pathlib import Path
      
        4
        
        5
        from loader.llm.base import ToolCall
      
        6
        from loader.runtime.dod import (
      
        7
            DefinitionOfDoneStore,
      
        8
            begin_new_verification_attempt,
      
        9
            create_definition_of_done,
      
        10
            derive_verification_commands,
      
        11
            determine_task_size,
      
        12
            ensure_active_verification_attempt,
      
        13
            record_successful_tool_call,
      
        14
        )
      
        15
        
        16
        
        17
        def test_determine_task_size_boundaries() -> None:
      
        18
            assert determine_task_size(1, 10) == "small"
      
        19
            assert determine_task_size(3, 99) == "small"
      
        20
            assert determine_task_size(4, 99) == "standard"
      
        21
            assert determine_task_size(15, 499) == "standard"
      
        22
            assert determine_task_size(16, 499) == "large"
      
        23
            assert determine_task_size(15, 500) == "large"
      
        24
        
        25
        
        26
        def test_definition_of_done_round_trip(tmp_path: Path) -> None:
      
        27
            store = DefinitionOfDoneStore(tmp_path)
      
        28
            dod = create_definition_of_done(
      
        29
                "Create hello.py and verify it runs.",
      
        30
                retry_budget=2,
      
        31
            )
      
        32
            dod.status = "fixing"
      
        33
            dod.retry_count = 1
      
        34
            dod.verification_commands = ["python hello.py"]
      
        35
            dod.touched_files = [str(tmp_path / "hello.py")]
      
        36
            attempt = begin_new_verification_attempt(dod)
      
        37
            saved_path = store.save(dod)
      
        38
        
        39
            reloaded = store.load(saved_path)
      
        40
        
        41
            assert reloaded.task_statement == dod.task_statement
      
        42
            assert reloaded.status == "fixing"
      
        43
            assert reloaded.retry_count == 1
      
        44
            assert reloaded.verification_commands == ["python hello.py"]
      
        45
            assert reloaded.touched_files == [str(tmp_path / "hello.py")]
      
        46
            assert reloaded.active_verification_attempt_id == attempt.attempt_id
      
        47
            assert reloaded.active_verification_attempt_number == attempt.attempt_number
      
        48
        
        49
        
        50
        def test_ensure_active_verification_attempt_rehydrates_missing_active_attempt() -> None:
      
        51
            dod = create_definition_of_done("Verify the runtime output.")
      
        52
            dod.verification_attempt_counter = 2
      
        53
        
        54
            attempt = ensure_active_verification_attempt(dod)
      
        55
        
        56
            assert attempt.attempt_id == "verification-attempt-2"
      
        57
            assert attempt.attempt_number == 2
      
        58
            assert dod.active_verification_attempt_id == "verification-attempt-2"
      
        59
            assert dod.active_verification_attempt_number == 2
      
        60
        
        61
        
        62
        def test_verification_command_derivation_prefers_runtime_evidence(tmp_path: Path) -> None:
      
        63
            project_root = tmp_path
      
        64
            dod = create_definition_of_done("Create hello.py and make sure it runs.")
      
        65
            hello_path = project_root / "hello.py"
      
        66
            record_successful_tool_call(
      
        67
                dod,
      
        68
                ToolCall(
      
        69
                    id="write-1",
      
        70
                    name="write",
      
        71
                    arguments={"file_path": str(hello_path), "content": "print('hi')\n"},
      
        72
                ),
      
        73
            )
      
        74
            record_successful_tool_call(
      
        75
                dod,
      
        76
                ToolCall(
      
        77
                    id="bash-1",
      
        78
                    name="bash",
      
        79
                    arguments={"command": "python hello.py"},
      
        80
                ),
      
        81
            )
      
        82
        
        83
            commands = derive_verification_commands(
      
        84
                dod,
      
        85
                project_root=project_root,
      
        86
                task_statement=dod.task_statement,
      
        87
            )
      
        88
        
        89
            assert commands == ["python hello.py"]
      
        90
        
        91
        
        92
        def test_record_successful_tool_call_preserves_absolute_path_string(tmp_path: Path) -> None:
      
        93
            dod = create_definition_of_done("Create hello.py and verify it exists.")
      
        94
            absolute_path = tmp_path / "hello.py"
      
        95
        
        96
            record_successful_tool_call(
      
        97
                dod,
      
        98
                ToolCall(
      
        99
                    id="write-1",
      
        100
                    name="write",
      
        101
                    arguments={"file_path": str(absolute_path), "content": "print('hi')\n"},
      
        102
                ),
      
        103
            )
      
        104
        
        105
            assert dod.touched_files == [str(absolute_path)]
      
        106
        
        107
        
        108
        def test_derive_verification_commands_adds_semantic_html_toc_check(tmp_path: Path) -> None:
      
        109
            chapters = tmp_path / "chapters"
      
        110
            chapters.mkdir()
      
        111
            (chapters / "01-introduction.html").write_text(
      
        112
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        113
            )
      
        114
            index = tmp_path / "index.html"
      
        115
            index.write_text(
      
        116
                "\n".join(
      
        117
                    [
      
        118
                        '<ul class="chapter-list">',
      
        119
                        '  <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>',
      
        120
                        "</ul>",
      
        121
                    ]
      
        122
                )
      
        123
            )
      
        124
        
        125
            dod = create_definition_of_done(
      
        126
                "Update index.html so the table of contents links and hrefs are correct."
      
        127
            )
      
        128
            dod.acceptance_criteria = [
      
        129
                "All table of contents links in index.html point to existing chapter files.",
      
        130
                "All link texts match the actual chapter titles.",
      
        131
            ]
      
        132
            dod.touched_files = [str(index)]
      
        133
        
        134
            commands = derive_verification_commands(
      
        135
                dod,
      
        136
                project_root=tmp_path,
      
        137
                task_statement=dod.task_statement,
      
        138
            )
      
        139
        
        140
            assert any(command.startswith("/usr/bin/python3 - <<'PY'") for command in commands)
      
        141
            assert not any(command == f"test -f {index}" for command in commands)

1	"""Tests for definition-of-done state and persistence."""
2
3	from pathlib import Path
4
5	from loader.llm.base import ToolCall
6	from loader.runtime.dod import (
7	DefinitionOfDoneStore,
8	begin_new_verification_attempt,
9	create_definition_of_done,
10	derive_verification_commands,
11	determine_task_size,
12	ensure_active_verification_attempt,
13	record_successful_tool_call,
14	)
15
16
17	def test_determine_task_size_boundaries() -> None:
18	assert determine_task_size(1, 10) == "small"
19	assert determine_task_size(3, 99) == "small"
20	assert determine_task_size(4, 99) == "standard"
21	assert determine_task_size(15, 499) == "standard"
22	assert determine_task_size(16, 499) == "large"
23	assert determine_task_size(15, 500) == "large"
24
25
26	def test_definition_of_done_round_trip(tmp_path: Path) -> None:
27	store = DefinitionOfDoneStore(tmp_path)
28	dod = create_definition_of_done(
29	"Create hello.py and verify it runs.",
30	retry_budget=2,
31	)
32	dod.status = "fixing"
33	dod.retry_count = 1
34	dod.verification_commands = ["python hello.py"]
35	dod.touched_files = [str(tmp_path / "hello.py")]
36	attempt = begin_new_verification_attempt(dod)
37	saved_path = store.save(dod)
38
39	reloaded = store.load(saved_path)
40
41	assert reloaded.task_statement == dod.task_statement
42	assert reloaded.status == "fixing"
43	assert reloaded.retry_count == 1
44	assert reloaded.verification_commands == ["python hello.py"]
45	assert reloaded.touched_files == [str(tmp_path / "hello.py")]
46	assert reloaded.active_verification_attempt_id == attempt.attempt_id
47	assert reloaded.active_verification_attempt_number == attempt.attempt_number
48
49
50	def test_ensure_active_verification_attempt_rehydrates_missing_active_attempt() -> None:
51	dod = create_definition_of_done("Verify the runtime output.")
52	dod.verification_attempt_counter = 2
53
54	attempt = ensure_active_verification_attempt(dod)
55
56	assert attempt.attempt_id == "verification-attempt-2"
57	assert attempt.attempt_number == 2
58	assert dod.active_verification_attempt_id == "verification-attempt-2"
59	assert dod.active_verification_attempt_number == 2
60
61
62	def test_verification_command_derivation_prefers_runtime_evidence(tmp_path: Path) -> None:
63	project_root = tmp_path
64	dod = create_definition_of_done("Create hello.py and make sure it runs.")
65	hello_path = project_root / "hello.py"
66	record_successful_tool_call(
67	dod,
68	ToolCall(
69	id="write-1",
70	name="write",
71	arguments={"file_path": str(hello_path), "content": "print('hi')\n"},
72	),
73	)
74	record_successful_tool_call(
75	dod,
76	ToolCall(
77	id="bash-1",
78	name="bash",
79	arguments={"command": "python hello.py"},
80	),
81	)
82
83	commands = derive_verification_commands(
84	dod,
85	project_root=project_root,
86	task_statement=dod.task_statement,
87	)
88
89	assert commands == ["python hello.py"]
90
91
92	def test_record_successful_tool_call_preserves_absolute_path_string(tmp_path: Path) -> None:
93	dod = create_definition_of_done("Create hello.py and verify it exists.")
94	absolute_path = tmp_path / "hello.py"
95
96	record_successful_tool_call(
97	dod,
98	ToolCall(
99	id="write-1",
100	name="write",
101	arguments={"file_path": str(absolute_path), "content": "print('hi')\n"},
102	),
103	)
104
105	assert dod.touched_files == [str(absolute_path)]
106
107
108	def test_derive_verification_commands_adds_semantic_html_toc_check(tmp_path: Path) -> None:
109	chapters = tmp_path / "chapters"
110	chapters.mkdir()
111	(chapters / "01-introduction.html").write_text(
112	"<h1>Chapter 1: Introduction to Fortran</h1>\n"
113	)
114	index = tmp_path / "index.html"
115	index.write_text(
116	"\n".join(
117	[
118	'<ul class="chapter-list">',
119	' <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>',
120	"</ul>",
121	]
122	)
123	)
124
125	dod = create_definition_of_done(
126	"Update index.html so the table of contents links and hrefs are correct."
127	)
128	dod.acceptance_criteria = [
129	"All table of contents links in index.html point to existing chapter files.",
130	"All link texts match the actual chapter titles.",
131	]
132	dod.touched_files = [str(index)]
133
134	commands = derive_verification_commands(
135	dod,
136	project_root=tmp_path,
137	task_statement=dod.task_statement,
138	)
139
140	assert any(command.startswith("/usr/bin/python3 - <<'PY'") for command in commands)
141	assert not any(command == f"test -f {index}" for command in commands)