| 1 | /** |
| 2 | * fortsh_strings.h - C string operations for Fortran interop |
| 3 | * |
| 4 | * Purpose: Bypass flang-new ARM64 heap corruption bugs on strings >128 bytes |
| 5 | * by implementing critical string operations in C. |
| 6 | * |
| 7 | * The flang-new compiler on macOS ARM64 has a bug where substring operations |
| 8 | * and assignments on strings >128 bytes cause heap corruption. By implementing |
| 9 | * these operations in C, we bypass the Fortran runtime entirely. |
| 10 | */ |
| 11 | |
| 12 | #ifndef FORTSH_STRINGS_H |
| 13 | #define FORTSH_STRINGS_H |
| 14 | |
| 15 | #include <stddef.h> |
| 16 | #include <stdint.h> |
| 17 | |
| 18 | #ifdef __cplusplus |
| 19 | extern "C" { |
| 20 | #endif |
| 21 | |
| 22 | /** |
| 23 | * Command buffer handle - opaque to Fortran |
| 24 | * Internally manages a dynamically allocated string buffer |
| 25 | */ |
| 26 | typedef struct fortsh_buffer fortsh_buffer_t; |
| 27 | |
| 28 | /* ============================================================================ |
| 29 | * Buffer Management |
| 30 | * ============================================================================ */ |
| 31 | |
| 32 | /** |
| 33 | * Create a new string buffer with specified capacity |
| 34 | * @param capacity Maximum size of buffer (e.g., 1024 for command line) |
| 35 | * @return Handle to buffer, or NULL on failure |
| 36 | */ |
| 37 | fortsh_buffer_t* fortsh_buffer_create(size_t capacity); |
| 38 | |
| 39 | /** |
| 40 | * Destroy a buffer and free its memory |
| 41 | * @param buf Buffer handle |
| 42 | */ |
| 43 | void fortsh_buffer_destroy(fortsh_buffer_t* buf); |
| 44 | |
| 45 | /** |
| 46 | * Clear buffer contents (set to empty string) |
| 47 | * @param buf Buffer handle |
| 48 | */ |
| 49 | void fortsh_buffer_clear(fortsh_buffer_t* buf); |
| 50 | |
| 51 | /** |
| 52 | * Get current length of string in buffer (like len_trim) |
| 53 | * @param buf Buffer handle |
| 54 | * @return Length of string (not including trailing spaces) |
| 55 | */ |
| 56 | size_t fortsh_buffer_length(const fortsh_buffer_t* buf); |
| 57 | |
| 58 | /** |
| 59 | * Get capacity of buffer |
| 60 | * @param buf Buffer handle |
| 61 | * @return Maximum capacity |
| 62 | */ |
| 63 | size_t fortsh_buffer_capacity(const fortsh_buffer_t* buf); |
| 64 | |
| 65 | /* ============================================================================ |
| 66 | * String Operations (safe for >128 bytes) |
| 67 | * ============================================================================ */ |
| 68 | |
| 69 | /** |
| 70 | * Copy C string into buffer (like buffer = "string") |
| 71 | * @param buf Buffer handle |
| 72 | * @param str C string to copy (null-terminated) |
| 73 | * @return 0 on success, -1 on failure (overflow) |
| 74 | */ |
| 75 | int fortsh_buffer_set(fortsh_buffer_t* buf, const char* str); |
| 76 | |
| 77 | /** |
| 78 | * Copy from another buffer (like buffer1 = buffer2) |
| 79 | * @param dest Destination buffer |
| 80 | * @param src Source buffer |
| 81 | * @return 0 on success, -1 on failure |
| 82 | */ |
| 83 | int fortsh_buffer_copy(fortsh_buffer_t* dest, const fortsh_buffer_t* src); |
| 84 | |
| 85 | /** |
| 86 | * Extract substring into destination buffer (like dest = src(start:end)) |
| 87 | * @param dest Destination buffer |
| 88 | * @param src Source buffer |
| 89 | * @param start Start index (0-based) |
| 90 | * @param end End index (0-based, inclusive) |
| 91 | * @return 0 on success, -1 on failure |
| 92 | */ |
| 93 | int fortsh_buffer_substring(fortsh_buffer_t* dest, const fortsh_buffer_t* src, |
| 94 | size_t start, size_t end); |
| 95 | |
| 96 | /** |
| 97 | * Get character at position (like ch = buffer(i:i)) |
| 98 | * @param buf Buffer handle |
| 99 | * @param pos Position (0-based) |
| 100 | * @return Character, or '\0' if out of bounds |
| 101 | */ |
| 102 | char fortsh_buffer_get_char(const fortsh_buffer_t* buf, size_t pos); |
| 103 | |
| 104 | /** |
| 105 | * Set character at position (like buffer(i:i) = 'x') |
| 106 | * @param buf Buffer handle |
| 107 | * @param pos Position (0-based) |
| 108 | * @param ch Character to set |
| 109 | * @return 0 on success, -1 on failure |
| 110 | */ |
| 111 | int fortsh_buffer_set_char(fortsh_buffer_t* buf, size_t pos, char ch); |
| 112 | |
| 113 | /* ============================================================================ |
| 114 | * Buffer Manipulation |
| 115 | * ============================================================================ */ |
| 116 | |
| 117 | /** |
| 118 | * Insert string at position |
| 119 | * @param buf Buffer handle |
| 120 | * @param pos Position to insert at (0-based) |
| 121 | * @param str String to insert |
| 122 | * @return 0 on success, -1 on failure (overflow) |
| 123 | */ |
| 124 | int fortsh_buffer_insert(fortsh_buffer_t* buf, size_t pos, const char* str); |
| 125 | |
| 126 | /** |
| 127 | * Delete characters from buffer |
| 128 | * @param buf Buffer handle |
| 129 | * @param start Start position (0-based) |
| 130 | * @param count Number of characters to delete |
| 131 | * @return 0 on success, -1 on failure |
| 132 | */ |
| 133 | int fortsh_buffer_delete(fortsh_buffer_t* buf, size_t start, size_t count); |
| 134 | |
| 135 | /** |
| 136 | * Append null-terminated string to buffer |
| 137 | * @param buf Buffer handle |
| 138 | * @param str String to append |
| 139 | * @return 0 on success, -1 on failure (overflow) |
| 140 | */ |
| 141 | int fortsh_buffer_append(fortsh_buffer_t* buf, const char* str); |
| 142 | |
| 143 | /** |
| 144 | * Append N bytes from a Fortran string (not null-terminated). Auto-grows. |
| 145 | * @param buf Buffer handle |
| 146 | * @param str Fortran character data |
| 147 | * @param len Number of bytes to append |
| 148 | * @return 0 on success, -1 on failure |
| 149 | */ |
| 150 | int fortsh_buffer_append_chars(fortsh_buffer_t* buf, const char* str, size_t len); |
| 151 | |
| 152 | /** |
| 153 | * Append a single character. Auto-grows. |
| 154 | * @param buf Buffer handle |
| 155 | * @param ch Character to append |
| 156 | * @return 0 on success, -1 on failure |
| 157 | */ |
| 158 | int fortsh_buffer_append_char(fortsh_buffer_t* buf, char ch); |
| 159 | |
| 160 | /** |
| 161 | * Grow buffer capacity via realloc, preserving contents. |
| 162 | * @param buf Buffer handle |
| 163 | * @param new_capacity New capacity (must be >= current length) |
| 164 | * @return 0 on success, -1 on failure |
| 165 | */ |
| 166 | int fortsh_buffer_grow(fortsh_buffer_t* buf, size_t new_capacity); |
| 167 | |
| 168 | /** |
| 169 | * Trim trailing whitespace (like trim(buffer)) |
| 170 | * Modifies buffer in place |
| 171 | * @param buf Buffer handle |
| 172 | */ |
| 173 | void fortsh_buffer_trim(fortsh_buffer_t* buf); |
| 174 | |
| 175 | /* ============================================================================ |
| 176 | * Fortran Interop Helpers |
| 177 | * ============================================================================ */ |
| 178 | |
| 179 | /** |
| 180 | * Copy buffer contents to Fortran character array |
| 181 | * @param buf Buffer handle |
| 182 | * @param fortran_str Fortran character array (NOT null-terminated) |
| 183 | * @param fortran_len Length of Fortran array |
| 184 | * @return Number of characters copied |
| 185 | */ |
| 186 | size_t fortsh_buffer_to_fortran(const fortsh_buffer_t* buf, char* fortran_str, |
| 187 | size_t fortran_len); |
| 188 | |
| 189 | /** |
| 190 | * Set buffer from Fortran character array |
| 191 | * @param buf Buffer handle |
| 192 | * @param fortran_str Fortran character array (NOT null-terminated) |
| 193 | * @param fortran_len Length of Fortran string to use |
| 194 | * @return 0 on success, -1 on failure |
| 195 | */ |
| 196 | int fortsh_buffer_from_fortran(fortsh_buffer_t* buf, const char* fortran_str, |
| 197 | size_t fortran_len); |
| 198 | |
| 199 | /** |
| 200 | * Get pointer to internal C string (null-terminated) |
| 201 | * WARNING: Pointer is only valid until next buffer operation! |
| 202 | * @param buf Buffer handle |
| 203 | * @return Pointer to C string |
| 204 | */ |
| 205 | const char* fortsh_buffer_c_str(const fortsh_buffer_t* buf); |
| 206 | |
| 207 | /* ============================================================================ |
| 208 | * Utility Functions |
| 209 | * ============================================================================ */ |
| 210 | |
| 211 | /** |
| 212 | * Find substring in buffer (like index(buffer, pattern)) |
| 213 | * @param buf Buffer handle |
| 214 | * @param pattern Pattern to search for |
| 215 | * @return Position of first match (0-based), or -1 if not found |
| 216 | */ |
| 217 | int fortsh_buffer_find(const fortsh_buffer_t* buf, const char* pattern); |
| 218 | |
| 219 | /** |
| 220 | * Compare buffer contents with C string |
| 221 | * @param buf Buffer handle |
| 222 | * @param str String to compare |
| 223 | * @return 0 if equal, <0 if buf < str, >0 if buf > str |
| 224 | */ |
| 225 | int fortsh_buffer_compare(const fortsh_buffer_t* buf, const char* str); |
| 226 | |
| 227 | /* ============================================================================ |
| 228 | * String Operations (non-buffer, direct C string functions) |
| 229 | * ============================================================================ */ |
| 230 | |
| 231 | /** |
| 232 | * Pattern replace on raw C strings — bypasses Fortran runtime entirely. |
| 233 | * Replaces occurrences of pattern in input, writing result to output. |
| 234 | * @param input Input string (null-terminated) |
| 235 | * @param input_len Length of input string |
| 236 | * @param pattern Pattern to find (null-terminated) |
| 237 | * @param pat_len Length of pattern |
| 238 | * @param replacement Replacement string (null-terminated) |
| 239 | * @param repl_len Length of replacement |
| 240 | * @param replace_all 1 = replace all occurrences, 0 = first only |
| 241 | * @param output Output buffer (caller-allocated, must be large enough) |
| 242 | * @param output_cap Capacity of output buffer |
| 243 | * @return Length of result string, or -1 on error |
| 244 | */ |
| 245 | int fortsh_pattern_replace(const char* input, int input_len, |
| 246 | const char* pattern, int pat_len, |
| 247 | const char* replacement, int repl_len, |
| 248 | int replace_all, |
| 249 | char* output, int output_cap); |
| 250 | |
| 251 | /** |
| 252 | * Pattern replace with C-managed allocation. |
| 253 | * All memory is malloc'd in C — no Fortran allocatable involved. |
| 254 | * @param result_out Pointer to receive C-allocated result string |
| 255 | * @return Length of result, or -1 on error. Caller must free with fortsh_free_string(). |
| 256 | */ |
| 257 | int fortsh_pattern_replace_alloc(const char* input, int input_len, |
| 258 | const char* pattern, int pat_len, |
| 259 | const char* replacement, int repl_len, |
| 260 | int replace_all, |
| 261 | char** result_out); |
| 262 | |
| 263 | /** |
| 264 | * Free a string allocated by fortsh_pattern_replace_alloc. |
| 265 | */ |
| 266 | void fortsh_free_string(char* ptr); |
| 267 | |
| 268 | #ifdef __cplusplus |
| 269 | } |
| 270 | #endif |
| 271 | |
| 272 | #endif /* FORTSH_STRINGS_H */ |
| 273 |