JSON · 7390 bytes Raw Blame History
1 [
2 {
3 "name": "streaming_text",
4 "category": "baseline",
5 "description": "Simple streamed answer with no tool use."
6 },
7 {
8 "name": "read_file_roundtrip",
9 "category": "tool-roundtrip",
10 "description": "Native tool call reads a file, then the model summarizes the result."
11 },
12 {
13 "name": "multi_tool_turn_roundtrip",
14 "category": "tool-roundtrip",
15 "description": "A single assistant turn executes multiple tools before finalizing."
16 },
17 {
18 "name": "turn_summary_smoke_for_multi_tool_turn",
19 "category": "summary",
20 "description": "Completed multi-tool turns populate TurnSummary with assistant messages, tool results, and trace data."
21 },
22 {
23 "name": "write_file_allowed",
24 "category": "confirmation",
25 "description": "A destructive write goes through the confirmation path and succeeds."
26 },
27 {
28 "name": "write_file_denied",
29 "category": "confirmation",
30 "description": "A destructive write is declined and does not touch the filesystem."
31 },
32 {
33 "name": "bash_stdout_roundtrip",
34 "category": "shell",
35 "description": "Bash returns stdout cleanly through the runtime."
36 },
37 {
38 "name": "bash_confirmation_prompt_approved",
39 "category": "shell",
40 "description": "A destructive bash command is approved through the callback path."
41 },
42 {
43 "name": "bash_confirmation_prompt_denied",
44 "category": "shell",
45 "description": "A destructive bash command is denied through the callback path."
46 },
47 {
48 "name": "read_only_mode_denies_write",
49 "category": "permissions",
50 "description": "Read-only mode blocks write tool calls before execution."
51 },
52 {
53 "name": "read_only_mode_denies_mutating_bash",
54 "category": "permissions",
55 "description": "Read-only mode blocks mutating shell commands while keeping the runtime alive."
56 },
57 {
58 "name": "read_only_mode_allows_safe_bash",
59 "category": "permissions",
60 "description": "Read-only mode still allows safe read-only shell commands."
61 },
62 {
63 "name": "workspace_write_denies_write_outside_root",
64 "category": "permissions",
65 "description": "Workspace-write mode rejects writes that escape the configured workspace root."
66 },
67 {
68 "name": "danger_full_access_allows_dangerous_bash",
69 "category": "permissions",
70 "description": "Danger-full-access mode permits dangerous shell operations when approved."
71 },
72 {
73 "name": "prompt_mode_prompts_destructive_write",
74 "category": "permissions",
75 "description": "Prompt mode routes destructive writes through the policy approval path before execution."
76 },
77 {
78 "name": "allow_mode_skips_prompt_for_destructive_write",
79 "category": "permissions",
80 "description": "Allow mode executes destructive writes without a second legacy confirmation prompt."
81 },
82 {
83 "name": "deny_rule_blocks_allowed_mode",
84 "category": "permissions",
85 "description": "A deny rule still blocks tool execution even when the active permission mode would otherwise allow it."
86 },
87 {
88 "name": "ask_rule_prompts_even_when_mode_would_allow",
89 "category": "permissions",
90 "description": "An ask rule forces interactive approval even when the active permission mode would otherwise allow the tool."
91 },
92 {
93 "name": "raw_json_tool_call_fallback",
94 "category": "fallback",
95 "description": "Raw JSON tool syntax is recovered when native tool calls are absent."
96 },
97 {
98 "name": "raw_json_todowrite_tool_call_fallback",
99 "category": "fallback",
100 "description": "Raw JSON fallback can recover TodoWrite calls with nested todo items."
101 },
102 {
103 "name": "raw_json_patch_tool_call_fallback",
104 "category": "fallback",
105 "description": "Raw JSON fallback can recover patch calls with nested structured hunks."
106 },
107 {
108 "name": "raw_json_ask_user_question_tool_call_fallback",
109 "category": "fallback",
110 "description": "Raw JSON fallback can recover AskUserQuestion calls with structured option objects."
111 },
112 {
113 "name": "raw_bracket_ask_user_question_tool_call_fallback",
114 "category": "fallback",
115 "description": "Bracket-format fallback canonicalizes mixed-case workflow tool names against the registry."
116 },
117 {
118 "name": "native_and_raw_tool_paths_share_executor_trace",
119 "category": "executor",
120 "description": "Native and extracted tool calls emit the same executor trace events, annotated by source."
121 },
122 {
123 "name": "backend_capability_probe_refreshes_native_tool_mode",
124 "category": "capabilities",
125 "description": "Turn startup can refine backend capabilities before the first request and enable native tool use."
126 },
127 {
128 "name": "run_streaming_delegates_to_primary_runtime",
129 "category": "runtime",
130 "description": "The streaming helper delegates into the main runtime path instead of maintaining a second loop."
131 },
132 {
133 "name": "definition_of_done_verify_phase",
134 "category": "definition-of-done",
135 "description": "Mutating tasks enter a verify phase before completion and return evidence-backed responses."
136 },
137 {
138 "name": "verify_failure_routes_to_fix_loop",
139 "category": "definition-of-done",
140 "description": "Verification failures route back into execution with a structured fix loop."
141 },
142 {
143 "name": "verify_retry_budget_exhaustion",
144 "category": "definition-of-done",
145 "description": "Verification escalates to the user once the fix-loop retry budget is exhausted."
146 },
147 {
148 "name": "ambiguous_prompt_routes_to_clarify",
149 "category": "workflow",
150 "description": "Ambiguous prompts enter clarify mode, ask one structured question, persist a single-question brief artifact, and hand off to execute."
151 },
152 {
153 "name": "complex_prompt_routes_to_plan",
154 "category": "workflow",
155 "description": "Complex prompts enter plan mode, persist single-pass implementation and verification artifacts, and use planned verification commands without legacy decomposition."
156 },
157 {
158 "name": "verify_failure_fix_loop_does_not_reroute_workflow",
159 "category": "workflow",
160 "description": "A verify-fix retry returns to execute mode without re-triggering clarify or plan."
161 },
162 {
163 "name": "conversational_task_skips_verify_phase",
164 "category": "definition-of-done",
165 "description": "Conversational tasks skip the verify phase entirely and avoid DoD overhead."
166 },
167 {
168 "name": "explore_mode_skips_dod_and_router",
169 "category": "explore",
170 "description": "Explore mode answers lookup questions without entering workflow routing or creating DoD artifacts."
171 },
172 {
173 "name": "explore_mode_denies_write",
174 "category": "explore",
175 "description": "Explore mode stays read-only even when the broader session would otherwise allow workspace writes."
176 },
177 {
178 "name": "explore_mode_ignores_global_allow_policy",
179 "category": "explore",
180 "description": "Explore mode ignores global allow rules so the read-only lane cannot be elevated into write access."
181 },
182 {
183 "name": "non_mutating_completion_no_longer_forces_continuation",
184 "category": "behavior",
185 "description": "Non-mutating tasks now return the model response directly instead of injecting a continuation prompt."
186 },
187 {
188 "name": "tool_result_contract_regression",
189 "category": "known-failure",
190 "description": "Duplicate-suppression and pre-validation branches should not build invalid Message objects."
191 }
192 ]