fortrangoingonforty/armfortas / 4a7578b

Browse files

Add fuzzing harness: cargo-fuzz targets and deterministic smoke tests

Lexer fuzzer, parser fuzzer (cargo fuzz), seed corpus, and
deterministic smoke tests (10K random ASCII through lexer, 5K
through parser, 5K Fortran fragment combinations through parser).
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
4a7578b82d19d6b51469b88ec576755f503d2f3a
Parents
d9bb467
Tree
aedf4f3

8 changed files

StatusFile+-
A fuzz/Cargo.toml 26 0
A fuzz/corpus/fuzz_lexer/seed_hello.f90 4 0
A fuzz/corpus/fuzz_lexer/seed_module.f90 10 0
A fuzz/corpus/fuzz_parser/seed_complex.f90 23 0
A fuzz/corpus/fuzz_parser/seed_hello.f90 4 0
A fuzz/fuzz_targets/fuzz_lexer.rs 12 0
A fuzz/fuzz_targets/fuzz_parser.rs 15 0
A tests/fuzz_smoke.rs 118 0
fuzz/Cargo.tomladded
@@ -0,0 +1,26 @@
1
+[package]
2
+name = "armfortas-fuzz"
3
+version = "0.0.0"
4
+publish = false
5
+edition = "2021"
6
+
7
+[package.metadata]
8
+cargo-fuzz = true
9
+
10
+[dependencies]
11
+libfuzzer-sys = "0.4"
12
+armfortas = { path = ".." }
13
+
14
+# Prevent this from interfering with workspaces
15
+[workspace]
16
+members = ["."]
17
+
18
+[[bin]]
19
+name = "fuzz_lexer"
20
+path = "fuzz_targets/fuzz_lexer.rs"
21
+doc = false
22
+
23
+[[bin]]
24
+name = "fuzz_parser"
25
+path = "fuzz_targets/fuzz_parser.rs"
26
+doc = false
fuzz/corpus/fuzz_lexer/seed_hello.f90added
@@ -0,0 +1,4 @@
1
+program hello
2
+  implicit none
3
+  print *, "Hello, world!"
4
+end program
fuzz/corpus/fuzz_lexer/seed_module.f90added
@@ -0,0 +1,10 @@
1
+module m
2
+  implicit none
3
+  integer, parameter :: N = 42
4
+  real, allocatable :: buf(:)
5
+contains
6
+  pure integer function f(x)
7
+    integer, intent(in) :: x
8
+    f = x * 2
9
+  end function
10
+end module
fuzz/corpus/fuzz_parser/seed_complex.f90added
@@ -0,0 +1,23 @@
1
+module m
2
+  implicit none
3
+  type :: point
4
+    real :: x, y
5
+  end type
6
+contains
7
+  subroutine process(arr, n)
8
+    integer, intent(in) :: n
9
+    type(point), intent(inout) :: arr(:)
10
+    integer :: i
11
+    do i = 1, n
12
+      arr(i)%x = real(i)
13
+      arr(i)%y = real(i) * 2.0
14
+    end do
15
+  end subroutine
16
+end module
17
+program p
18
+  use m
19
+  implicit none
20
+  type(point) :: pts(10)
21
+  call process(pts, 10)
22
+  print *, pts(5)%x, pts(5)%y
23
+end program
fuzz/corpus/fuzz_parser/seed_hello.f90added
@@ -0,0 +1,4 @@
1
+program hello
2
+  implicit none
3
+  print *, "Hello, world!"
4
+end program
fuzz/fuzz_targets/fuzz_lexer.rsadded
@@ -0,0 +1,12 @@
1
+#![no_main]
2
+
3
+use libfuzzer_sys::fuzz_target;
4
+use armfortas::lexer::{self, SourceForm};
5
+
6
+fuzz_target!(|data: &[u8]| {
7
+    // Feed arbitrary bytes to the lexer. It must never panic —
8
+    // errors are fine, panics are bugs.
9
+    if let Ok(src) = std::str::from_utf8(data) {
10
+        let _ = lexer::tokenize(src, 0, SourceForm::FreeForm);
11
+    }
12
+});
fuzz/fuzz_targets/fuzz_parser.rsadded
@@ -0,0 +1,15 @@
1
+#![no_main]
2
+
3
+use libfuzzer_sys::fuzz_target;
4
+use armfortas::lexer::{self, SourceForm};
5
+use armfortas::parser::Parser;
6
+
7
+fuzz_target!(|data: &[u8]| {
8
+    // Feed arbitrary bytes through lexer → parser. Neither may panic.
9
+    if let Ok(src) = std::str::from_utf8(data) {
10
+        if let Ok(tokens) = lexer::tokenize(src, 0, SourceForm::FreeForm) {
11
+            let mut parser = Parser::new(&tokens);
12
+            let _ = parser.parse_file();
13
+        }
14
+    }
15
+});
tests/fuzz_smoke.rsadded
@@ -0,0 +1,118 @@
1
+//! Smoke-level fuzz tests for the lexer and parser.
2
+//!
3
+//! These don't use libfuzzer — they run deterministic random inputs
4
+//! through the lexer and parser to verify no panics. For real fuzzing,
5
+//! use `cargo fuzz run fuzz_lexer` / `cargo fuzz run fuzz_parser`.
6
+
7
+use armfortas::lexer::{self, SourceForm};
8
+use armfortas::parser::Parser;
9
+
10
+/// Feed N random-ish strings through the lexer.
11
+fn fuzz_lexer_deterministic(seed: u64, count: usize) {
12
+    let mut state = seed;
13
+    for _ in 0..count {
14
+        // Simple xorshift64 PRNG.
15
+        state ^= state << 13;
16
+        state ^= state >> 7;
17
+        state ^= state << 17;
18
+
19
+        let len = (state % 256) as usize;
20
+        let bytes: Vec<u8> = (0..len)
21
+            .map(|i| {
22
+                let mut s = state.wrapping_add(i as u64);
23
+                s ^= s << 13;
24
+                s ^= s >> 7;
25
+                s ^= s << 17;
26
+                (s & 0x7F) as u8 // ASCII range
27
+            })
28
+            .collect();
29
+
30
+        if let Ok(src) = std::str::from_utf8(&bytes) {
31
+            let _ = lexer::tokenize(src, 0, SourceForm::FreeForm);
32
+        }
33
+    }
34
+}
35
+
36
+/// Feed N random-ish strings through lexer → parser.
37
+fn fuzz_parser_deterministic(seed: u64, count: usize) {
38
+    let mut state = seed;
39
+    for _ in 0..count {
40
+        state ^= state << 13;
41
+        state ^= state >> 7;
42
+        state ^= state << 17;
43
+
44
+        let len = (state % 512) as usize;
45
+        let bytes: Vec<u8> = (0..len)
46
+            .map(|i| {
47
+                let mut s = state.wrapping_add(i as u64);
48
+                s ^= s << 13;
49
+                s ^= s >> 7;
50
+                s ^= s << 17;
51
+                (s & 0x7F) as u8
52
+            })
53
+            .collect();
54
+
55
+        if let Ok(src) = std::str::from_utf8(&bytes) {
56
+            if let Ok(tokens) = lexer::tokenize(src, 0, SourceForm::FreeForm) {
57
+                let mut parser = Parser::new(&tokens);
58
+                let _ = parser.parse_file();
59
+            }
60
+        }
61
+    }
62
+}
63
+
64
+/// Feed malformed but plausible Fortran through the parser.
65
+/// Runs without threads — each input goes directly through the parser.
66
+fn fuzz_parser_with_fortran_fragments(count: usize) {
67
+    let fragments = [
68
+        "program p\nend program\n", "module m\nend module\n",
69
+        "integer :: x\n", "real, allocatable :: a(:,:)\n",
70
+        "do i = 1, 10\nend do\n", "if (x > 0) then\nend if\n",
71
+        "select case (x)\ncase (1)\nend select\n",
72
+        "type :: t\n  integer :: f\nend type\n",
73
+        "interface\nend interface\n",
74
+        "goto 100\n", "100 continue\n",
75
+        "use iso_c_binding, only: c_int\n",
76
+        "", "!\n", "! comment\n",
77
+        "end\n",
78
+    ];
79
+
80
+    let mut state: u64 = 0xDEADBEEF;
81
+    for _ in 0..count {
82
+        state ^= state << 13;
83
+        state ^= state >> 7;
84
+        state ^= state << 17;
85
+
86
+        // Pick 1-3 random fragments and concatenate.
87
+        let n_frags = 1 + (state % 3) as usize;
88
+        let mut src = String::new();
89
+        for _ in 0..n_frags {
90
+            state ^= state << 13;
91
+            state ^= state >> 7;
92
+            state ^= state << 17;
93
+            let idx = (state as usize) % fragments.len();
94
+            src.push_str(fragments[idx]);
95
+        }
96
+
97
+        if let Ok(tokens) = lexer::tokenize(&src, 0, SourceForm::FreeForm) {
98
+            if tokens.len() > 100 { continue; }
99
+            let mut parser = Parser::new(&tokens);
100
+            let _ = parser.parse_file();
101
+        }
102
+    }
103
+}
104
+
105
+#[test]
106
+fn lexer_no_panic_on_random_ascii() {
107
+    fuzz_lexer_deterministic(0x12345678, 10_000);
108
+}
109
+
110
+#[test]
111
+fn parser_no_panic_on_random_ascii() {
112
+    fuzz_parser_deterministic(0x87654321, 5_000);
113
+}
114
+
115
+#[test]
116
+fn parser_no_panic_on_fortran_fragments() {
117
+    fuzz_parser_with_fortran_fragments(5_000);
118
+}