Rust · 9048 bytes Raw Blame History
1 //! Dependency scanner — quick-scan Fortran source files to extract
2 //! MODULE definitions and USE dependencies without a full parse.
3 //!
4 //! Used by the multi-source driver mode to determine compilation
5 //! order via topological sort.
6
7 use std::collections::{HashMap, VecDeque};
8 use std::path::{Path, PathBuf};
9
10 /// Information extracted from a single source file.
11 #[derive(Debug)]
12 pub struct FileDeps {
13 pub path: PathBuf,
14 /// Module names this file defines (lowercase).
15 pub defines: Vec<String>,
16 /// Module names this file USEs (lowercase).
17 pub uses: Vec<String>,
18 }
19
20 /// Scan a source file for MODULE and USE statements.
21 /// Uses a simple line-by-line keyword scan — no lexer or parser needed.
22 pub fn scan_file(path: &Path) -> Result<FileDeps, String> {
23 let content = std::fs::read_to_string(path)
24 .map_err(|e| format!("cannot read '{}': {}", path.display(), e))?;
25
26 let mut defines = Vec::new();
27 let mut uses = Vec::new();
28
29 for line in content.lines() {
30 let trimmed = line.trim().to_lowercase();
31 // Skip comments and empty lines.
32 if trimmed.starts_with('!') || trimmed.is_empty() {
33 continue;
34 }
35
36 // MODULE <name> — but not "module procedure" or "module function"
37 if let Some(rest) = trimmed.strip_prefix("module ") {
38 let rest = rest.trim();
39 if rest.starts_with("procedure")
40 || rest.starts_with("function")
41 || rest.starts_with("subroutine")
42 {
43 continue;
44 }
45 // Extract the module name (first identifier after "module").
46 if let Some(name) = rest.split_whitespace().next() {
47 let clean = name.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_');
48 if !clean.is_empty() {
49 defines.push(clean.to_string());
50 }
51 }
52 }
53
54 // USE <name> [, ...]
55 if trimmed.starts_with("use ") || trimmed.starts_with("use,") {
56 let mut rest = if let Some(after_comma) = trimmed.strip_prefix("use,") {
57 // USE, intrinsic :: name
58 if let Some(idx) = trimmed.find("::") {
59 trimmed[idx + 2..].trim()
60 } else {
61 after_comma
62 }
63 } else if let Some(after_use) = trimmed.strip_prefix("use ") {
64 after_use.trim()
65 } else {
66 unreachable!()
67 };
68 // Strip leading :: (USE :: module_name syntax).
69 if rest.starts_with("::") {
70 rest = rest[2..].trim();
71 }
72 if let Some(name) = rest
73 .split(|c: char| c == ',' || c == ':' || c.is_whitespace())
74 .next()
75 {
76 let clean = name.trim();
77 if !clean.is_empty() && clean != "only" {
78 // Skip intrinsic modules — they don't need .amod files.
79 if !is_intrinsic_module(clean) {
80 uses.push(clean.to_string());
81 }
82 }
83 }
84 }
85 }
86
87 // Deduplicate.
88 uses.sort();
89 uses.dedup();
90 defines.sort();
91 defines.dedup();
92
93 Ok(FileDeps {
94 path: path.to_path_buf(),
95 defines,
96 uses,
97 })
98 }
99
100 fn is_intrinsic_module(name: &str) -> bool {
101 matches!(
102 name,
103 "iso_c_binding"
104 | "iso_fortran_env"
105 | "ieee_arithmetic"
106 | "ieee_exceptions"
107 | "ieee_features"
108 )
109 }
110
111 /// Determine compilation order for a set of source files.
112 /// Returns the files in topological order (dependencies first).
113 /// Errors on circular dependencies.
114 pub fn resolve_compilation_order(files: &[FileDeps]) -> Result<Vec<usize>, String> {
115 // Build: module_name → file_index that defines it.
116 let mut module_to_file: HashMap<&str, usize> = HashMap::new();
117 for (i, f) in files.iter().enumerate() {
118 for def in &f.defines {
119 module_to_file.insert(def.as_str(), i);
120 }
121 }
122
123 // Build adjacency list: file i depends on file j if i USEs a module defined by j.
124 let n = files.len();
125 let mut in_degree = vec![0usize; n];
126 let mut dependents: Vec<Vec<usize>> = vec![vec![]; n]; // j → [files that depend on j]
127
128 for (i, f) in files.iter().enumerate() {
129 for used in &f.uses {
130 if let Some(&j) = module_to_file.get(used.as_str()) {
131 if i != j {
132 dependents[j].push(i);
133 in_degree[i] += 1;
134 }
135 }
136 // If used module is not defined by any file in the set,
137 // it's either intrinsic or external — skip (not an error
138 // here; the compiler will diagnose at USE-resolution time).
139 }
140 }
141
142 // Kahn's algorithm for topological sort.
143 let mut queue: VecDeque<usize> = VecDeque::new();
144 for (i, &deg) in in_degree.iter().enumerate().take(n) {
145 if deg == 0 {
146 queue.push_back(i);
147 }
148 }
149
150 let mut order = Vec::with_capacity(n);
151 while let Some(j) = queue.pop_front() {
152 order.push(j);
153 for &dep in &dependents[j] {
154 in_degree[dep] -= 1;
155 if in_degree[dep] == 0 {
156 queue.push_back(dep);
157 }
158 }
159 }
160
161 if order.len() != n {
162 // Cycle detected — find the modules involved.
163 let cycle_files: Vec<&str> = (0..n)
164 .filter(|i| in_degree[*i] > 0)
165 .map(|i| {
166 files[i]
167 .path
168 .file_name()
169 .unwrap_or_default()
170 .to_str()
171 .unwrap_or("?")
172 })
173 .collect();
174 return Err(format!(
175 "circular module dependency detected among: {}",
176 cycle_files.join(", ")
177 ));
178 }
179
180 Ok(order)
181 }
182
183 #[cfg(test)]
184 mod tests {
185 use super::*;
186
187 #[test]
188 fn scan_simple_module() {
189 let dir = std::env::temp_dir().join("dep_scan_test_1");
190 std::fs::create_dir_all(&dir).unwrap();
191 let f = dir.join("mod.f90");
192 std::fs::write(
193 &f,
194 "module mymod\n use other_mod\n implicit none\nend module\n",
195 )
196 .unwrap();
197 let deps = scan_file(&f).unwrap();
198 assert_eq!(deps.defines, vec!["mymod"]);
199 assert_eq!(deps.uses, vec!["other_mod"]);
200 let _ = std::fs::remove_dir_all(&dir);
201 }
202
203 #[test]
204 fn topo_sort_chain() {
205 let files = vec![
206 FileDeps {
207 path: "c.f90".into(),
208 defines: vec!["c".into()],
209 uses: vec![],
210 },
211 FileDeps {
212 path: "b.f90".into(),
213 defines: vec!["b".into()],
214 uses: vec!["c".into()],
215 },
216 FileDeps {
217 path: "a.f90".into(),
218 defines: vec!["a".into()],
219 uses: vec!["b".into()],
220 },
221 ];
222 let order = resolve_compilation_order(&files).unwrap();
223 // c must come before b, b before a.
224 let pos_c = order.iter().position(|&i| i == 0).unwrap();
225 let pos_b = order.iter().position(|&i| i == 1).unwrap();
226 let pos_a = order.iter().position(|&i| i == 2).unwrap();
227 assert!(pos_c < pos_b);
228 assert!(pos_b < pos_a);
229 }
230
231 #[test]
232 fn topo_sort_cycle() {
233 let files = vec![
234 FileDeps {
235 path: "a.f90".into(),
236 defines: vec!["a".into()],
237 uses: vec!["b".into()],
238 },
239 FileDeps {
240 path: "b.f90".into(),
241 defines: vec!["b".into()],
242 uses: vec!["a".into()],
243 },
244 ];
245 let err = resolve_compilation_order(&files).unwrap_err();
246 assert!(
247 err.contains("circular"),
248 "expected cycle error, got: {}",
249 err
250 );
251 }
252
253 #[test]
254 fn topo_sort_diamond() {
255 let files = vec![
256 FileDeps {
257 path: "d.f90".into(),
258 defines: vec!["d".into()],
259 uses: vec![],
260 },
261 FileDeps {
262 path: "b.f90".into(),
263 defines: vec!["b".into()],
264 uses: vec!["d".into()],
265 },
266 FileDeps {
267 path: "c.f90".into(),
268 defines: vec!["c".into()],
269 uses: vec!["d".into()],
270 },
271 FileDeps {
272 path: "a.f90".into(),
273 defines: vec!["a".into()],
274 uses: vec!["b".into(), "c".into()],
275 },
276 ];
277 let order = resolve_compilation_order(&files).unwrap();
278 let pos_d = order.iter().position(|&i| i == 0).unwrap();
279 let pos_a = order.iter().position(|&i| i == 3).unwrap();
280 assert!(pos_d < pos_a);
281 assert_eq!(order.len(), 4);
282 }
283 }
284