| 1 | [ |
| 2 | { |
| 3 | "name": "streaming_text", |
| 4 | "category": "baseline", |
| 5 | "description": "Simple streamed answer with no tool use." |
| 6 | }, |
| 7 | { |
| 8 | "name": "read_file_roundtrip", |
| 9 | "category": "tool-roundtrip", |
| 10 | "description": "Native tool call reads a file, then the model summarizes the result." |
| 11 | }, |
| 12 | { |
| 13 | "name": "multi_tool_turn_roundtrip", |
| 14 | "category": "tool-roundtrip", |
| 15 | "description": "A single assistant turn executes multiple tools before finalizing." |
| 16 | }, |
| 17 | { |
| 18 | "name": "turn_summary_smoke_for_multi_tool_turn", |
| 19 | "category": "summary", |
| 20 | "description": "Completed multi-tool turns populate TurnSummary with assistant messages, tool results, and trace data." |
| 21 | }, |
| 22 | { |
| 23 | "name": "write_file_allowed", |
| 24 | "category": "confirmation", |
| 25 | "description": "A destructive write goes through the confirmation path and succeeds." |
| 26 | }, |
| 27 | { |
| 28 | "name": "write_file_denied", |
| 29 | "category": "confirmation", |
| 30 | "description": "A destructive write is declined and does not touch the filesystem." |
| 31 | }, |
| 32 | { |
| 33 | "name": "bash_stdout_roundtrip", |
| 34 | "category": "shell", |
| 35 | "description": "Bash returns stdout cleanly through the runtime." |
| 36 | }, |
| 37 | { |
| 38 | "name": "bash_confirmation_prompt_approved", |
| 39 | "category": "shell", |
| 40 | "description": "A destructive bash command is approved through the callback path." |
| 41 | }, |
| 42 | { |
| 43 | "name": "bash_confirmation_prompt_denied", |
| 44 | "category": "shell", |
| 45 | "description": "A destructive bash command is denied through the callback path." |
| 46 | }, |
| 47 | { |
| 48 | "name": "read_only_mode_denies_write", |
| 49 | "category": "permissions", |
| 50 | "description": "Read-only mode blocks write tool calls before execution." |
| 51 | }, |
| 52 | { |
| 53 | "name": "read_only_mode_denies_mutating_bash", |
| 54 | "category": "permissions", |
| 55 | "description": "Read-only mode blocks mutating shell commands while keeping the runtime alive." |
| 56 | }, |
| 57 | { |
| 58 | "name": "read_only_mode_allows_safe_bash", |
| 59 | "category": "permissions", |
| 60 | "description": "Read-only mode still allows safe read-only shell commands." |
| 61 | }, |
| 62 | { |
| 63 | "name": "workspace_write_denies_write_outside_root", |
| 64 | "category": "permissions", |
| 65 | "description": "Workspace-write mode rejects writes that escape the configured workspace root." |
| 66 | }, |
| 67 | { |
| 68 | "name": "danger_full_access_allows_dangerous_bash", |
| 69 | "category": "permissions", |
| 70 | "description": "Danger-full-access mode permits dangerous shell operations when approved." |
| 71 | }, |
| 72 | { |
| 73 | "name": "prompt_mode_prompts_destructive_write", |
| 74 | "category": "permissions", |
| 75 | "description": "Prompt mode routes destructive writes through the policy approval path before execution." |
| 76 | }, |
| 77 | { |
| 78 | "name": "allow_mode_skips_prompt_for_destructive_write", |
| 79 | "category": "permissions", |
| 80 | "description": "Allow mode executes destructive writes without a second legacy confirmation prompt." |
| 81 | }, |
| 82 | { |
| 83 | "name": "deny_rule_blocks_allowed_mode", |
| 84 | "category": "permissions", |
| 85 | "description": "A deny rule still blocks tool execution even when the active permission mode would otherwise allow it." |
| 86 | }, |
| 87 | { |
| 88 | "name": "ask_rule_prompts_even_when_mode_would_allow", |
| 89 | "category": "permissions", |
| 90 | "description": "An ask rule forces interactive approval even when the active permission mode would otherwise allow the tool." |
| 91 | }, |
| 92 | { |
| 93 | "name": "raw_json_tool_call_fallback", |
| 94 | "category": "fallback", |
| 95 | "description": "Raw JSON tool syntax is recovered when native tool calls are absent." |
| 96 | }, |
| 97 | { |
| 98 | "name": "raw_json_todowrite_tool_call_fallback", |
| 99 | "category": "fallback", |
| 100 | "description": "Raw JSON fallback can recover TodoWrite calls with nested todo items." |
| 101 | }, |
| 102 | { |
| 103 | "name": "raw_json_patch_tool_call_fallback", |
| 104 | "category": "fallback", |
| 105 | "description": "Raw JSON fallback can recover patch calls with nested structured hunks." |
| 106 | }, |
| 107 | { |
| 108 | "name": "raw_json_ask_user_question_tool_call_fallback", |
| 109 | "category": "fallback", |
| 110 | "description": "Raw JSON fallback can recover AskUserQuestion calls with structured option objects." |
| 111 | }, |
| 112 | { |
| 113 | "name": "raw_bracket_ask_user_question_tool_call_fallback", |
| 114 | "category": "fallback", |
| 115 | "description": "Bracket-format fallback canonicalizes mixed-case workflow tool names against the registry." |
| 116 | }, |
| 117 | { |
| 118 | "name": "native_and_raw_tool_paths_share_executor_trace", |
| 119 | "category": "executor", |
| 120 | "description": "Native and extracted tool calls emit the same executor trace events, annotated by source." |
| 121 | }, |
| 122 | { |
| 123 | "name": "backend_capability_probe_refreshes_native_tool_mode", |
| 124 | "category": "capabilities", |
| 125 | "description": "Turn startup can refine backend capabilities before the first request and enable native tool use." |
| 126 | }, |
| 127 | { |
| 128 | "name": "run_streaming_delegates_to_primary_runtime", |
| 129 | "category": "runtime", |
| 130 | "description": "The streaming helper delegates into the main runtime path instead of maintaining a second loop." |
| 131 | }, |
| 132 | { |
| 133 | "name": "definition_of_done_verify_phase", |
| 134 | "category": "definition-of-done", |
| 135 | "description": "Mutating tasks enter a verify phase before completion and return evidence-backed responses." |
| 136 | }, |
| 137 | { |
| 138 | "name": "verify_failure_routes_to_fix_loop", |
| 139 | "category": "definition-of-done", |
| 140 | "description": "Verification failures route back into execution with a structured fix loop." |
| 141 | }, |
| 142 | { |
| 143 | "name": "verify_retry_budget_exhaustion", |
| 144 | "category": "definition-of-done", |
| 145 | "description": "Verification escalates to the user once the fix-loop retry budget is exhausted." |
| 146 | }, |
| 147 | { |
| 148 | "name": "ambiguous_prompt_routes_to_clarify", |
| 149 | "category": "workflow", |
| 150 | "description": "Ambiguous prompts enter clarify mode, ask one structured question, persist a single-question brief artifact, and hand off to execute." |
| 151 | }, |
| 152 | { |
| 153 | "name": "complex_prompt_routes_to_plan", |
| 154 | "category": "workflow", |
| 155 | "description": "Complex prompts enter plan mode, persist single-pass implementation and verification artifacts, and use planned verification commands without legacy decomposition." |
| 156 | }, |
| 157 | { |
| 158 | "name": "verify_failure_fix_loop_does_not_reroute_workflow", |
| 159 | "category": "workflow", |
| 160 | "description": "A verify-fix retry returns to execute mode without re-triggering clarify or plan." |
| 161 | }, |
| 162 | { |
| 163 | "name": "conversational_task_skips_verify_phase", |
| 164 | "category": "definition-of-done", |
| 165 | "description": "Conversational tasks skip the verify phase entirely and avoid DoD overhead." |
| 166 | }, |
| 167 | { |
| 168 | "name": "explore_mode_skips_dod_and_router", |
| 169 | "category": "explore", |
| 170 | "description": "Explore mode answers lookup questions without entering workflow routing or creating DoD artifacts." |
| 171 | }, |
| 172 | { |
| 173 | "name": "explore_mode_denies_write", |
| 174 | "category": "explore", |
| 175 | "description": "Explore mode stays read-only even when the broader session would otherwise allow workspace writes." |
| 176 | }, |
| 177 | { |
| 178 | "name": "explore_mode_ignores_global_allow_policy", |
| 179 | "category": "explore", |
| 180 | "description": "Explore mode ignores global allow rules so the read-only lane cannot be elevated into write access." |
| 181 | }, |
| 182 | { |
| 183 | "name": "non_mutating_completion_no_longer_forces_continuation", |
| 184 | "category": "behavior", |
| 185 | "description": "Non-mutating tasks now return the model response directly instead of injecting a continuation prompt." |
| 186 | }, |
| 187 | { |
| 188 | "name": "tool_result_contract_regression", |
| 189 | "category": "known-failure", |
| 190 | "description": "Duplicate-suppression and pre-validation branches should not build invalid Message objects." |
| 191 | } |
| 192 | ] |