loader Public

Watch 0 Fork 0 Star 0

JSON · 7390 bytes Raw Blame History

  
        1
        [
      
        2
          {
      
        3
            "name": "streaming_text",
      
        4
            "category": "baseline",
      
        5
            "description": "Simple streamed answer with no tool use."
      
        6
          },
      
        7
          {
      
        8
            "name": "read_file_roundtrip",
      
        9
            "category": "tool-roundtrip",
      
        10
            "description": "Native tool call reads a file, then the model summarizes the result."
      
        11
          },
      
        12
          {
      
        13
            "name": "multi_tool_turn_roundtrip",
      
        14
            "category": "tool-roundtrip",
      
        15
            "description": "A single assistant turn executes multiple tools before finalizing."
      
        16
          },
      
        17
          {
      
        18
            "name": "turn_summary_smoke_for_multi_tool_turn",
      
        19
            "category": "summary",
      
        20
            "description": "Completed multi-tool turns populate TurnSummary with assistant messages, tool results, and trace data."
      
        21
          },
      
        22
          {
      
        23
            "name": "write_file_allowed",
      
        24
            "category": "confirmation",
      
        25
            "description": "A destructive write goes through the confirmation path and succeeds."
      
        26
          },
      
        27
          {
      
        28
            "name": "write_file_denied",
      
        29
            "category": "confirmation",
      
        30
            "description": "A destructive write is declined and does not touch the filesystem."
      
        31
          },
      
        32
          {
      
        33
            "name": "bash_stdout_roundtrip",
      
        34
            "category": "shell",
      
        35
            "description": "Bash returns stdout cleanly through the runtime."
      
        36
          },
      
        37
          {
      
        38
            "name": "bash_confirmation_prompt_approved",
      
        39
            "category": "shell",
      
        40
            "description": "A destructive bash command is approved through the callback path."
      
        41
          },
      
        42
          {
      
        43
            "name": "bash_confirmation_prompt_denied",
      
        44
            "category": "shell",
      
        45
            "description": "A destructive bash command is denied through the callback path."
      
        46
          },
      
        47
          {
      
        48
            "name": "read_only_mode_denies_write",
      
        49
            "category": "permissions",
      
        50
            "description": "Read-only mode blocks write tool calls before execution."
      
        51
          },
      
        52
          {
      
        53
            "name": "read_only_mode_denies_mutating_bash",
      
        54
            "category": "permissions",
      
        55
            "description": "Read-only mode blocks mutating shell commands while keeping the runtime alive."
      
        56
          },
      
        57
          {
      
        58
            "name": "read_only_mode_allows_safe_bash",
      
        59
            "category": "permissions",
      
        60
            "description": "Read-only mode still allows safe read-only shell commands."
      
        61
          },
      
        62
          {
      
        63
            "name": "workspace_write_denies_write_outside_root",
      
        64
            "category": "permissions",
      
        65
            "description": "Workspace-write mode rejects writes that escape the configured workspace root."
      
        66
          },
      
        67
          {
      
        68
            "name": "danger_full_access_allows_dangerous_bash",
      
        69
            "category": "permissions",
      
        70
            "description": "Danger-full-access mode permits dangerous shell operations when approved."
      
        71
          },
      
        72
          {
      
        73
            "name": "prompt_mode_prompts_destructive_write",
      
        74
            "category": "permissions",
      
        75
            "description": "Prompt mode routes destructive writes through the policy approval path before execution."
      
        76
          },
      
        77
          {
      
        78
            "name": "allow_mode_skips_prompt_for_destructive_write",
      
        79
            "category": "permissions",
      
        80
            "description": "Allow mode executes destructive writes without a second legacy confirmation prompt."
      
        81
          },
      
        82
          {
      
        83
            "name": "deny_rule_blocks_allowed_mode",
      
        84
            "category": "permissions",
      
        85
            "description": "A deny rule still blocks tool execution even when the active permission mode would otherwise allow it."
      
        86
          },
      
        87
          {
      
        88
            "name": "ask_rule_prompts_even_when_mode_would_allow",
      
        89
            "category": "permissions",
      
        90
            "description": "An ask rule forces interactive approval even when the active permission mode would otherwise allow the tool."
      
        91
          },
      
        92
          {
      
        93
            "name": "raw_json_tool_call_fallback",
      
        94
            "category": "fallback",
      
        95
            "description": "Raw JSON tool syntax is recovered when native tool calls are absent."
      
        96
          },
      
        97
          {
      
        98
            "name": "raw_json_todowrite_tool_call_fallback",
      
        99
            "category": "fallback",
      
        100
            "description": "Raw JSON fallback can recover TodoWrite calls with nested todo items."
      
        101
          },
      
        102
          {
      
        103
            "name": "raw_json_patch_tool_call_fallback",
      
        104
            "category": "fallback",
      
        105
            "description": "Raw JSON fallback can recover patch calls with nested structured hunks."
      
        106
          },
      
        107
          {
      
        108
            "name": "raw_json_ask_user_question_tool_call_fallback",
      
        109
            "category": "fallback",
      
        110
            "description": "Raw JSON fallback can recover AskUserQuestion calls with structured option objects."
      
        111
          },
      
        112
          {
      
        113
            "name": "raw_bracket_ask_user_question_tool_call_fallback",
      
        114
            "category": "fallback",
      
        115
            "description": "Bracket-format fallback canonicalizes mixed-case workflow tool names against the registry."
      
        116
          },
      
        117
          {
      
        118
            "name": "native_and_raw_tool_paths_share_executor_trace",
      
        119
            "category": "executor",
      
        120
            "description": "Native and extracted tool calls emit the same executor trace events, annotated by source."
      
        121
          },
      
        122
          {
      
        123
            "name": "backend_capability_probe_refreshes_native_tool_mode",
      
        124
            "category": "capabilities",
      
        125
            "description": "Turn startup can refine backend capabilities before the first request and enable native tool use."
      
        126
          },
      
        127
          {
      
        128
            "name": "run_streaming_delegates_to_primary_runtime",
      
        129
            "category": "runtime",
      
        130
            "description": "The streaming helper delegates into the main runtime path instead of maintaining a second loop."
      
        131
          },
      
        132
          {
      
        133
            "name": "definition_of_done_verify_phase",
      
        134
            "category": "definition-of-done",
      
        135
            "description": "Mutating tasks enter a verify phase before completion and return evidence-backed responses."
      
        136
          },
      
        137
          {
      
        138
            "name": "verify_failure_routes_to_fix_loop",
      
        139
            "category": "definition-of-done",
      
        140
            "description": "Verification failures route back into execution with a structured fix loop."
      
        141
          },
      
        142
          {
      
        143
            "name": "verify_retry_budget_exhaustion",
      
        144
            "category": "definition-of-done",
      
        145
            "description": "Verification escalates to the user once the fix-loop retry budget is exhausted."
      
        146
          },
      
        147
          {
      
        148
            "name": "ambiguous_prompt_routes_to_clarify",
      
        149
            "category": "workflow",
      
        150
            "description": "Ambiguous prompts enter clarify mode, ask one structured question, persist a single-question brief artifact, and hand off to execute."
      
        151
          },
      
        152
          {
      
        153
            "name": "complex_prompt_routes_to_plan",
      
        154
            "category": "workflow",
      
        155
            "description": "Complex prompts enter plan mode, persist single-pass implementation and verification artifacts, and use planned verification commands without legacy decomposition."
      
        156
          },
      
        157
          {
      
        158
            "name": "verify_failure_fix_loop_does_not_reroute_workflow",
      
        159
            "category": "workflow",
      
        160
            "description": "A verify-fix retry returns to execute mode without re-triggering clarify or plan."
      
        161
          },
      
        162
          {
      
        163
            "name": "conversational_task_skips_verify_phase",
      
        164
            "category": "definition-of-done",
      
        165
            "description": "Conversational tasks skip the verify phase entirely and avoid DoD overhead."
      
        166
          },
      
        167
          {
      
        168
            "name": "explore_mode_skips_dod_and_router",
      
        169
            "category": "explore",
      
        170
            "description": "Explore mode answers lookup questions without entering workflow routing or creating DoD artifacts."
      
        171
          },
      
        172
          {
      
        173
            "name": "explore_mode_denies_write",
      
        174
            "category": "explore",
      
        175
            "description": "Explore mode stays read-only even when the broader session would otherwise allow workspace writes."
      
        176
          },
      
        177
          {
      
        178
            "name": "explore_mode_ignores_global_allow_policy",
      
        179
            "category": "explore",
      
        180
            "description": "Explore mode ignores global allow rules so the read-only lane cannot be elevated into write access."
      
        181
          },
      
        182
          {
      
        183
            "name": "non_mutating_completion_no_longer_forces_continuation",
      
        184
            "category": "behavior",
      
        185
            "description": "Non-mutating tasks now return the model response directly instead of injecting a continuation prompt."
      
        186
          },
      
        187
          {
      
        188
            "name": "tool_result_contract_regression",
      
        189
            "category": "known-failure",
      
        190
            "description": "Duplicate-suppression and pre-validation branches should not build invalid Message objects."
      
        191
          }
      
        192
        ]

1	[
2	{
3	"name": "streaming_text",
4	"category": "baseline",
5	"description": "Simple streamed answer with no tool use."
6	},
7	{
8	"name": "read_file_roundtrip",
9	"category": "tool-roundtrip",
10	"description": "Native tool call reads a file, then the model summarizes the result."
11	},
12	{
13	"name": "multi_tool_turn_roundtrip",
14	"category": "tool-roundtrip",
15	"description": "A single assistant turn executes multiple tools before finalizing."
16	},
17	{
18	"name": "turn_summary_smoke_for_multi_tool_turn",
19	"category": "summary",
20	"description": "Completed multi-tool turns populate TurnSummary with assistant messages, tool results, and trace data."
21	},
22	{
23	"name": "write_file_allowed",
24	"category": "confirmation",
25	"description": "A destructive write goes through the confirmation path and succeeds."
26	},
27	{
28	"name": "write_file_denied",
29	"category": "confirmation",
30	"description": "A destructive write is declined and does not touch the filesystem."
31	},
32	{
33	"name": "bash_stdout_roundtrip",
34	"category": "shell",
35	"description": "Bash returns stdout cleanly through the runtime."
36	},
37	{
38	"name": "bash_confirmation_prompt_approved",
39	"category": "shell",
40	"description": "A destructive bash command is approved through the callback path."
41	},
42	{
43	"name": "bash_confirmation_prompt_denied",
44	"category": "shell",
45	"description": "A destructive bash command is denied through the callback path."
46	},
47	{
48	"name": "read_only_mode_denies_write",
49	"category": "permissions",
50	"description": "Read-only mode blocks write tool calls before execution."
51	},
52	{
53	"name": "read_only_mode_denies_mutating_bash",
54	"category": "permissions",
55	"description": "Read-only mode blocks mutating shell commands while keeping the runtime alive."
56	},
57	{
58	"name": "read_only_mode_allows_safe_bash",
59	"category": "permissions",
60	"description": "Read-only mode still allows safe read-only shell commands."
61	},
62	{
63	"name": "workspace_write_denies_write_outside_root",
64	"category": "permissions",
65	"description": "Workspace-write mode rejects writes that escape the configured workspace root."
66	},
67	{
68	"name": "danger_full_access_allows_dangerous_bash",
69	"category": "permissions",
70	"description": "Danger-full-access mode permits dangerous shell operations when approved."
71	},
72	{
73	"name": "prompt_mode_prompts_destructive_write",
74	"category": "permissions",
75	"description": "Prompt mode routes destructive writes through the policy approval path before execution."
76	},
77	{
78	"name": "allow_mode_skips_prompt_for_destructive_write",
79	"category": "permissions",
80	"description": "Allow mode executes destructive writes without a second legacy confirmation prompt."
81	},
82	{
83	"name": "deny_rule_blocks_allowed_mode",
84	"category": "permissions",
85	"description": "A deny rule still blocks tool execution even when the active permission mode would otherwise allow it."
86	},
87	{
88	"name": "ask_rule_prompts_even_when_mode_would_allow",
89	"category": "permissions",
90	"description": "An ask rule forces interactive approval even when the active permission mode would otherwise allow the tool."
91	},
92	{
93	"name": "raw_json_tool_call_fallback",
94	"category": "fallback",
95	"description": "Raw JSON tool syntax is recovered when native tool calls are absent."
96	},
97	{
98	"name": "raw_json_todowrite_tool_call_fallback",
99	"category": "fallback",
100	"description": "Raw JSON fallback can recover TodoWrite calls with nested todo items."
101	},
102	{
103	"name": "raw_json_patch_tool_call_fallback",
104	"category": "fallback",
105	"description": "Raw JSON fallback can recover patch calls with nested structured hunks."
106	},
107	{
108	"name": "raw_json_ask_user_question_tool_call_fallback",
109	"category": "fallback",
110	"description": "Raw JSON fallback can recover AskUserQuestion calls with structured option objects."
111	},
112	{
113	"name": "raw_bracket_ask_user_question_tool_call_fallback",
114	"category": "fallback",
115	"description": "Bracket-format fallback canonicalizes mixed-case workflow tool names against the registry."
116	},
117	{
118	"name": "native_and_raw_tool_paths_share_executor_trace",
119	"category": "executor",
120	"description": "Native and extracted tool calls emit the same executor trace events, annotated by source."
121	},
122	{
123	"name": "backend_capability_probe_refreshes_native_tool_mode",
124	"category": "capabilities",
125	"description": "Turn startup can refine backend capabilities before the first request and enable native tool use."
126	},
127	{
128	"name": "run_streaming_delegates_to_primary_runtime",
129	"category": "runtime",
130	"description": "The streaming helper delegates into the main runtime path instead of maintaining a second loop."
131	},
132	{
133	"name": "definition_of_done_verify_phase",
134	"category": "definition-of-done",
135	"description": "Mutating tasks enter a verify phase before completion and return evidence-backed responses."
136	},
137	{
138	"name": "verify_failure_routes_to_fix_loop",
139	"category": "definition-of-done",
140	"description": "Verification failures route back into execution with a structured fix loop."
141	},
142	{
143	"name": "verify_retry_budget_exhaustion",
144	"category": "definition-of-done",
145	"description": "Verification escalates to the user once the fix-loop retry budget is exhausted."
146	},
147	{
148	"name": "ambiguous_prompt_routes_to_clarify",
149	"category": "workflow",
150	"description": "Ambiguous prompts enter clarify mode, ask one structured question, persist a single-question brief artifact, and hand off to execute."
151	},
152	{
153	"name": "complex_prompt_routes_to_plan",
154	"category": "workflow",
155	"description": "Complex prompts enter plan mode, persist single-pass implementation and verification artifacts, and use planned verification commands without legacy decomposition."
156	},
157	{
158	"name": "verify_failure_fix_loop_does_not_reroute_workflow",
159	"category": "workflow",
160	"description": "A verify-fix retry returns to execute mode without re-triggering clarify or plan."
161	},
162	{
163	"name": "conversational_task_skips_verify_phase",
164	"category": "definition-of-done",
165	"description": "Conversational tasks skip the verify phase entirely and avoid DoD overhead."
166	},
167	{
168	"name": "explore_mode_skips_dod_and_router",
169	"category": "explore",
170	"description": "Explore mode answers lookup questions without entering workflow routing or creating DoD artifacts."
171	},
172	{
173	"name": "explore_mode_denies_write",
174	"category": "explore",
175	"description": "Explore mode stays read-only even when the broader session would otherwise allow workspace writes."
176	},
177	{
178	"name": "explore_mode_ignores_global_allow_policy",
179	"category": "explore",
180	"description": "Explore mode ignores global allow rules so the read-only lane cannot be elevated into write access."
181	},
182	{
183	"name": "non_mutating_completion_no_longer_forces_continuation",
184	"category": "behavior",
185	"description": "Non-mutating tasks now return the model response directly instead of injecting a continuation prompt."
186	},
187	{
188	"name": "tool_result_contract_regression",
189	"category": "known-failure",
190	"description": "Duplicate-suppression and pre-validation branches should not build invalid Message objects."
191	}
192	]