C · 13418 bytes Raw Blame History
1 /**
2 * fortsh_strings.c - Implementation of C string operations for Fortran interop
3 *
4 * This library provides string buffer operations that work around flang-new
5 * ARM64 bugs related to substring operations on strings >128 bytes.
6 */
7
8 #include "fortsh_strings.h"
9 #include <stdlib.h>
10 #include <string.h>
11 #include <ctype.h>
12
13 /* Internal buffer structure */
14 struct fortsh_buffer {
15 char* data; /* Actual string data (null-terminated) */
16 size_t length; /* Current length of string */
17 size_t capacity; /* Maximum capacity (excluding null terminator) */
18 };
19
20 /* ============================================================================
21 * Buffer Management
22 * ============================================================================ */
23
24 fortsh_buffer_t* fortsh_buffer_create(size_t capacity) {
25 if (capacity == 0 || capacity > 1024*1024) { /* Sanity check: max 1MB */
26 return NULL;
27 }
28
29 fortsh_buffer_t* buf = (fortsh_buffer_t*)malloc(sizeof(fortsh_buffer_t));
30 if (!buf) {
31 return NULL;
32 }
33
34 /* Allocate capacity + 1 for null terminator */
35 buf->data = (char*)calloc(capacity + 1, sizeof(char));
36 if (!buf->data) {
37 free(buf);
38 return NULL;
39 }
40
41 buf->length = 0;
42 buf->capacity = capacity;
43 buf->data[0] = '\0';
44
45 return buf;
46 }
47
48 void fortsh_buffer_destroy(fortsh_buffer_t* buf) {
49 if (buf) {
50 if (buf->data) {
51 free(buf->data);
52 }
53 free(buf);
54 }
55 }
56
57 void fortsh_buffer_clear(fortsh_buffer_t* buf) {
58 if (buf && buf->data) {
59 buf->data[0] = '\0';
60 buf->length = 0;
61 }
62 }
63
64 size_t fortsh_buffer_length(const fortsh_buffer_t* buf) {
65 return buf ? buf->length : 0;
66 }
67
68 size_t fortsh_buffer_capacity(const fortsh_buffer_t* buf) {
69 return buf ? buf->capacity : 0;
70 }
71
72 /* ============================================================================
73 * String Operations
74 * ============================================================================ */
75
76 int fortsh_buffer_set(fortsh_buffer_t* buf, const char* str) {
77 if (!buf || !buf->data || !str) {
78 return -1;
79 }
80
81 size_t len = strlen(str);
82 if (len > buf->capacity) {
83 return -1; /* Overflow */
84 }
85
86 strcpy(buf->data, str);
87 buf->length = len;
88 return 0;
89 }
90
91 int fortsh_buffer_copy(fortsh_buffer_t* dest, const fortsh_buffer_t* src) {
92 if (!dest || !src || !dest->data || !src->data) {
93 return -1;
94 }
95
96 if (src->length > dest->capacity) {
97 return -1; /* Overflow */
98 }
99
100 memcpy(dest->data, src->data, src->length);
101 dest->data[src->length] = '\0';
102 dest->length = src->length;
103 return 0;
104 }
105
106 int fortsh_buffer_substring(fortsh_buffer_t* dest, const fortsh_buffer_t* src,
107 size_t start, size_t end) {
108 if (!dest || !src || !dest->data || !src->data) {
109 return -1;
110 }
111
112 /* Validate indices */
113 if (start > end || end >= src->length) {
114 return -1;
115 }
116
117 size_t sub_len = end - start + 1;
118 if (sub_len > dest->capacity) {
119 return -1; /* Overflow */
120 }
121
122 memcpy(dest->data, src->data + start, sub_len);
123 dest->data[sub_len] = '\0';
124 dest->length = sub_len;
125 return 0;
126 }
127
128 char fortsh_buffer_get_char(const fortsh_buffer_t* buf, size_t pos) {
129 if (!buf || !buf->data || pos >= buf->length) {
130 return '\0';
131 }
132 return buf->data[pos];
133 }
134
135 int fortsh_buffer_set_char(fortsh_buffer_t* buf, size_t pos, char ch) {
136 if (!buf || !buf->data || pos >= buf->capacity) {
137 return -1;
138 }
139
140 buf->data[pos] = ch;
141
142 /* Update length if we extended the string */
143 if (pos >= buf->length) {
144 buf->length = pos + 1;
145 buf->data[buf->length] = '\0';
146 }
147
148 return 0;
149 }
150
151 /* ============================================================================
152 * Buffer Manipulation
153 * ============================================================================ */
154
155 int fortsh_buffer_insert(fortsh_buffer_t* buf, size_t pos, const char* str) {
156 if (!buf || !buf->data || !str) {
157 return -1;
158 }
159
160 size_t str_len = strlen(str);
161 size_t new_len = buf->length + str_len;
162
163 /* Check for overflow */
164 if (new_len > buf->capacity || pos > buf->length) {
165 return -1;
166 }
167
168 /* Shift existing content right to make room */
169 if (pos < buf->length) {
170 memmove(buf->data + pos + str_len, buf->data + pos, buf->length - pos);
171 }
172
173 /* Insert new string */
174 memcpy(buf->data + pos, str, str_len);
175 buf->length = new_len;
176 buf->data[buf->length] = '\0';
177
178 return 0;
179 }
180
181 int fortsh_buffer_delete(fortsh_buffer_t* buf, size_t start, size_t count) {
182 if (!buf || !buf->data) {
183 return -1;
184 }
185
186 /* Validate bounds */
187 if (start >= buf->length || count == 0) {
188 return 0; /* Nothing to delete */
189 }
190
191 /* Adjust count if it would go past end */
192 if (start + count > buf->length) {
193 count = buf->length - start;
194 }
195
196 /* Shift content left */
197 memmove(buf->data + start, buf->data + start + count,
198 buf->length - start - count);
199
200 buf->length -= count;
201 buf->data[buf->length] = '\0';
202
203 return 0;
204 }
205
206 int fortsh_buffer_append(fortsh_buffer_t* buf, const char* str) {
207 if (!buf || !buf->data || !str) {
208 return -1;
209 }
210
211 size_t str_len = strlen(str);
212 size_t new_len = buf->length + str_len;
213
214 if (new_len > buf->capacity) {
215 return -1; /* Overflow */
216 }
217
218 strcpy(buf->data + buf->length, str);
219 buf->length = new_len;
220
221 return 0;
222 }
223
224 int fortsh_buffer_grow(fortsh_buffer_t* buf, size_t new_capacity) {
225 if (!buf || !buf->data) return -1;
226 if (new_capacity <= buf->capacity) return 0; /* already big enough */
227 if (new_capacity > 16*1024*1024) return -1; /* 16MB sanity cap */
228
229 char* new_data = (char*)realloc(buf->data, new_capacity + 1);
230 if (!new_data) return -1;
231
232 buf->data = new_data;
233 buf->capacity = new_capacity;
234 return 0;
235 }
236
237 int fortsh_buffer_append_chars(fortsh_buffer_t* buf, const char* str, size_t len) {
238 if (!buf || !buf->data || !str || len == 0) return (buf && len == 0) ? 0 : -1;
239
240 size_t new_len = buf->length + len;
241 if (new_len > buf->capacity) {
242 /* Double or add 256, whichever is larger */
243 size_t grow_to = buf->capacity * 2;
244 if (grow_to < new_len + 256) grow_to = new_len + 256;
245 if (fortsh_buffer_grow(buf, grow_to) != 0) return -1;
246 }
247
248 memcpy(buf->data + buf->length, str, len);
249 buf->length = new_len;
250 buf->data[new_len] = '\0';
251 return 0;
252 }
253
254 int fortsh_buffer_append_char(fortsh_buffer_t* buf, char ch) {
255 return fortsh_buffer_append_chars(buf, &ch, 1);
256 }
257
258 void fortsh_buffer_trim(fortsh_buffer_t* buf) {
259 if (!buf || !buf->data || buf->length == 0) {
260 return;
261 }
262
263 /* Trim trailing whitespace */
264 while (buf->length > 0 && isspace((unsigned char)buf->data[buf->length - 1])) {
265 buf->length--;
266 }
267 buf->data[buf->length] = '\0';
268 }
269
270 /* ============================================================================
271 * Fortran Interop Helpers
272 * ============================================================================ */
273
274 size_t fortsh_buffer_to_fortran(const fortsh_buffer_t* buf, char* fortran_str,
275 size_t fortran_len) {
276 if (!buf || !buf->data || !fortran_str || fortran_len == 0) {
277 return 0;
278 }
279
280 size_t copy_len = (buf->length < fortran_len) ? buf->length : fortran_len;
281
282 /* Copy data */
283 memcpy(fortran_str, buf->data, copy_len);
284
285 /* Pad with spaces (Fortran convention) */
286 if (copy_len < fortran_len) {
287 memset(fortran_str + copy_len, ' ', fortran_len - copy_len);
288 }
289
290 return copy_len;
291 }
292
293 int fortsh_buffer_from_fortran(fortsh_buffer_t* buf, const char* fortran_str,
294 size_t fortran_len) {
295 if (!buf || !buf->data || !fortran_str) {
296 return -1;
297 }
298
299 /* Find actual length by trimming trailing spaces */
300 size_t actual_len = fortran_len;
301 while (actual_len > 0 && (fortran_str[actual_len - 1] == ' ' ||
302 fortran_str[actual_len - 1] == '\0')) {
303 actual_len--;
304 }
305
306 if (actual_len > buf->capacity) {
307 return -1; /* Overflow */
308 }
309
310 memcpy(buf->data, fortran_str, actual_len);
311 buf->data[actual_len] = '\0';
312 buf->length = actual_len;
313
314 return 0;
315 }
316
317 const char* fortsh_buffer_c_str(const fortsh_buffer_t* buf) {
318 return (buf && buf->data) ? buf->data : "";
319 }
320
321 /* ============================================================================
322 * Utility Functions
323 * ============================================================================ */
324
325 int fortsh_buffer_find(const fortsh_buffer_t* buf, const char* pattern) {
326 if (!buf || !buf->data || !pattern) {
327 return -1;
328 }
329
330 const char* pos = strstr(buf->data, pattern);
331 if (!pos) {
332 return -1; /* Not found */
333 }
334
335 return (int)(pos - buf->data); /* Return 0-based index */
336 }
337
338 int fortsh_buffer_compare(const fortsh_buffer_t* buf, const char* str) {
339 if (!buf || !buf->data || !str) {
340 return -1;
341 }
342
343 return strcmp(buf->data, str);
344 }
345
346 /* ============================================================================
347 * String Operations (non-buffer, direct C string functions)
348 * ============================================================================ */
349
350 int fortsh_pattern_replace(const char* input, int input_len,
351 const char* pattern, int pat_len,
352 const char* replacement, int repl_len,
353 int replace_all,
354 char* output, int output_cap) {
355 if (!input || !pattern || !replacement || !output || pat_len <= 0 || output_cap <= 0) {
356 if (output && output_cap > 0) output[0] = '\0';
357 return 0;
358 }
359
360 int out_pos = 0;
361 int i = 0;
362
363 while (i < input_len && out_pos < output_cap - 1) {
364 /* Check if pattern matches at position i */
365 if (i + pat_len <= input_len && memcmp(input + i, pattern, pat_len) == 0) {
366 /* Copy replacement */
367 int copy_len = repl_len;
368 if (out_pos + copy_len > output_cap - 1) {
369 copy_len = output_cap - 1 - out_pos;
370 }
371 if (copy_len > 0) {
372 memcpy(output + out_pos, replacement, copy_len);
373 out_pos += copy_len;
374 }
375 i += pat_len;
376
377 if (!replace_all) {
378 /* Copy rest of input */
379 int rest = input_len - i;
380 if (out_pos + rest > output_cap - 1) {
381 rest = output_cap - 1 - out_pos;
382 }
383 if (rest > 0) {
384 memcpy(output + out_pos, input + i, rest);
385 out_pos += rest;
386 }
387 break;
388 }
389 } else {
390 output[out_pos++] = input[i++];
391 }
392 }
393
394 output[out_pos] = '\0';
395 return out_pos;
396 }
397
398 /*
399 * Full pattern replace with C-managed memory.
400 * Fortran passes raw strings (not null-terminated), C handles all allocation.
401 * Returns result via caller-provided pointer to a C-allocated buffer.
402 * Caller must call fortsh_free_string() on the result.
403 */
404 int fortsh_pattern_replace_alloc(const char* input, int input_len,
405 const char* pattern, int pat_len,
406 const char* replacement, int repl_len,
407 int replace_all,
408 char** result_out) {
409 *result_out = NULL;
410
411 if (!input || !pattern || !replacement || pat_len <= 0) {
412 /* No pattern: return copy of input */
413 *result_out = (char*)malloc(input_len + 1);
414 if (!*result_out) return 0;
415 memcpy(*result_out, input, input_len);
416 (*result_out)[input_len] = '\0';
417 return input_len;
418 }
419
420 /* Estimate output size */
421 int out_cap;
422 if (repl_len > pat_len) {
423 out_cap = input_len + (input_len / pat_len + 1) * (repl_len - pat_len) + 2;
424 } else {
425 out_cap = input_len + 2;
426 }
427
428 char* output = (char*)malloc(out_cap);
429 if (!output) return -1;
430
431 int result_len = fortsh_pattern_replace(input, input_len, pattern, pat_len,
432 replacement, repl_len, replace_all,
433 output, out_cap);
434 *result_out = output;
435 return result_len;
436 }
437
438 void fortsh_free_string(char* ptr) {
439 if (ptr) free(ptr);
440 }
441
442 /*
443 * Pattern replace reading input from a fortsh_buffer_t handle.
444 * Avoids passing large Fortran strings through the Fortran→C boundary.
445 * Result is malloc'd; caller must free via fortsh_free_string.
446 */
447 int fortsh_buffer_pattern_replace(const fortsh_buffer_t* input_buf,
448 const char* pattern, int pat_len,
449 const char* replacement, int repl_len,
450 int replace_all,
451 char** result_out) {
452 if (!input_buf || !input_buf->data) {
453 *result_out = NULL;
454 return 0;
455 }
456 return fortsh_pattern_replace_alloc(
457 input_buf->data, (int)input_buf->length,
458 pattern, pat_len,
459 replacement, repl_len,
460 replace_all, result_out);
461 }