| 1 | //! Dependency scanner — quick-scan Fortran source files to extract |
| 2 | //! MODULE definitions and USE dependencies without a full parse. |
| 3 | //! |
| 4 | //! Used by the multi-source driver mode to determine compilation |
| 5 | //! order via topological sort. |
| 6 | |
| 7 | use std::collections::{HashMap, VecDeque}; |
| 8 | use std::path::{Path, PathBuf}; |
| 9 | |
| 10 | /// Information extracted from a single source file. |
| 11 | #[derive(Debug)] |
| 12 | pub struct FileDeps { |
| 13 | pub path: PathBuf, |
| 14 | /// Module names this file defines (lowercase). |
| 15 | pub defines: Vec<String>, |
| 16 | /// Module names this file USEs (lowercase). |
| 17 | pub uses: Vec<String>, |
| 18 | } |
| 19 | |
| 20 | /// Scan a source file for MODULE and USE statements. |
| 21 | /// Uses a simple line-by-line keyword scan — no lexer or parser needed. |
| 22 | pub fn scan_file(path: &Path) -> Result<FileDeps, String> { |
| 23 | let content = std::fs::read_to_string(path) |
| 24 | .map_err(|e| format!("cannot read '{}': {}", path.display(), e))?; |
| 25 | |
| 26 | let mut defines = Vec::new(); |
| 27 | let mut uses = Vec::new(); |
| 28 | |
| 29 | for line in content.lines() { |
| 30 | let trimmed = line.trim().to_lowercase(); |
| 31 | // Skip comments and empty lines. |
| 32 | if trimmed.starts_with('!') || trimmed.is_empty() { |
| 33 | continue; |
| 34 | } |
| 35 | |
| 36 | // MODULE <name> — but not "module procedure" or "module function" |
| 37 | if let Some(rest) = trimmed.strip_prefix("module ") { |
| 38 | let rest = rest.trim(); |
| 39 | if rest.starts_with("procedure") |
| 40 | || rest.starts_with("function") |
| 41 | || rest.starts_with("subroutine") |
| 42 | { |
| 43 | continue; |
| 44 | } |
| 45 | // Extract the module name (first identifier after "module"). |
| 46 | if let Some(name) = rest.split_whitespace().next() { |
| 47 | let clean = name.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_'); |
| 48 | if !clean.is_empty() { |
| 49 | defines.push(clean.to_string()); |
| 50 | } |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | // USE <name> [, ...] |
| 55 | if trimmed.starts_with("use ") || trimmed.starts_with("use,") { |
| 56 | let mut rest = if let Some(after_comma) = trimmed.strip_prefix("use,") { |
| 57 | // USE, intrinsic :: name |
| 58 | if let Some(idx) = trimmed.find("::") { |
| 59 | trimmed[idx + 2..].trim() |
| 60 | } else { |
| 61 | after_comma |
| 62 | } |
| 63 | } else if let Some(after_use) = trimmed.strip_prefix("use ") { |
| 64 | after_use.trim() |
| 65 | } else { |
| 66 | unreachable!() |
| 67 | }; |
| 68 | // Strip leading :: (USE :: module_name syntax). |
| 69 | if rest.starts_with("::") { |
| 70 | rest = rest[2..].trim(); |
| 71 | } |
| 72 | if let Some(name) = rest |
| 73 | .split(|c: char| c == ',' || c == ':' || c.is_whitespace()) |
| 74 | .next() |
| 75 | { |
| 76 | let clean = name.trim(); |
| 77 | if !clean.is_empty() && clean != "only" { |
| 78 | // Skip intrinsic modules — they don't need .amod files. |
| 79 | if !is_intrinsic_module(clean) { |
| 80 | uses.push(clean.to_string()); |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | // Deduplicate. |
| 88 | uses.sort(); |
| 89 | uses.dedup(); |
| 90 | defines.sort(); |
| 91 | defines.dedup(); |
| 92 | |
| 93 | Ok(FileDeps { |
| 94 | path: path.to_path_buf(), |
| 95 | defines, |
| 96 | uses, |
| 97 | }) |
| 98 | } |
| 99 | |
| 100 | fn is_intrinsic_module(name: &str) -> bool { |
| 101 | matches!( |
| 102 | name, |
| 103 | "iso_c_binding" |
| 104 | | "iso_fortran_env" |
| 105 | | "ieee_arithmetic" |
| 106 | | "ieee_exceptions" |
| 107 | | "ieee_features" |
| 108 | ) |
| 109 | } |
| 110 | |
| 111 | /// Determine compilation order for a set of source files. |
| 112 | /// Returns the files in topological order (dependencies first). |
| 113 | /// Errors on circular dependencies. |
| 114 | pub fn resolve_compilation_order(files: &[FileDeps]) -> Result<Vec<usize>, String> { |
| 115 | // Build: module_name → file_index that defines it. |
| 116 | let mut module_to_file: HashMap<&str, usize> = HashMap::new(); |
| 117 | for (i, f) in files.iter().enumerate() { |
| 118 | for def in &f.defines { |
| 119 | module_to_file.insert(def.as_str(), i); |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | // Build adjacency list: file i depends on file j if i USEs a module defined by j. |
| 124 | let n = files.len(); |
| 125 | let mut in_degree = vec![0usize; n]; |
| 126 | let mut dependents: Vec<Vec<usize>> = vec![vec![]; n]; // j → [files that depend on j] |
| 127 | |
| 128 | for (i, f) in files.iter().enumerate() { |
| 129 | for used in &f.uses { |
| 130 | if let Some(&j) = module_to_file.get(used.as_str()) { |
| 131 | if i != j { |
| 132 | dependents[j].push(i); |
| 133 | in_degree[i] += 1; |
| 134 | } |
| 135 | } |
| 136 | // If used module is not defined by any file in the set, |
| 137 | // it's either intrinsic or external — skip (not an error |
| 138 | // here; the compiler will diagnose at USE-resolution time). |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | // Kahn's algorithm for topological sort. |
| 143 | let mut queue: VecDeque<usize> = VecDeque::new(); |
| 144 | for (i, °) in in_degree.iter().enumerate().take(n) { |
| 145 | if deg == 0 { |
| 146 | queue.push_back(i); |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | let mut order = Vec::with_capacity(n); |
| 151 | while let Some(j) = queue.pop_front() { |
| 152 | order.push(j); |
| 153 | for &dep in &dependents[j] { |
| 154 | in_degree[dep] -= 1; |
| 155 | if in_degree[dep] == 0 { |
| 156 | queue.push_back(dep); |
| 157 | } |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | if order.len() != n { |
| 162 | // Cycle detected — find the modules involved. |
| 163 | let cycle_files: Vec<&str> = (0..n) |
| 164 | .filter(|i| in_degree[*i] > 0) |
| 165 | .map(|i| { |
| 166 | files[i] |
| 167 | .path |
| 168 | .file_name() |
| 169 | .unwrap_or_default() |
| 170 | .to_str() |
| 171 | .unwrap_or("?") |
| 172 | }) |
| 173 | .collect(); |
| 174 | return Err(format!( |
| 175 | "circular module dependency detected among: {}", |
| 176 | cycle_files.join(", ") |
| 177 | )); |
| 178 | } |
| 179 | |
| 180 | Ok(order) |
| 181 | } |
| 182 | |
| 183 | #[cfg(test)] |
| 184 | mod tests { |
| 185 | use super::*; |
| 186 | |
| 187 | #[test] |
| 188 | fn scan_simple_module() { |
| 189 | let dir = std::env::temp_dir().join("dep_scan_test_1"); |
| 190 | std::fs::create_dir_all(&dir).unwrap(); |
| 191 | let f = dir.join("mod.f90"); |
| 192 | std::fs::write( |
| 193 | &f, |
| 194 | "module mymod\n use other_mod\n implicit none\nend module\n", |
| 195 | ) |
| 196 | .unwrap(); |
| 197 | let deps = scan_file(&f).unwrap(); |
| 198 | assert_eq!(deps.defines, vec!["mymod"]); |
| 199 | assert_eq!(deps.uses, vec!["other_mod"]); |
| 200 | let _ = std::fs::remove_dir_all(&dir); |
| 201 | } |
| 202 | |
| 203 | #[test] |
| 204 | fn topo_sort_chain() { |
| 205 | let files = vec![ |
| 206 | FileDeps { |
| 207 | path: "c.f90".into(), |
| 208 | defines: vec!["c".into()], |
| 209 | uses: vec![], |
| 210 | }, |
| 211 | FileDeps { |
| 212 | path: "b.f90".into(), |
| 213 | defines: vec!["b".into()], |
| 214 | uses: vec!["c".into()], |
| 215 | }, |
| 216 | FileDeps { |
| 217 | path: "a.f90".into(), |
| 218 | defines: vec!["a".into()], |
| 219 | uses: vec!["b".into()], |
| 220 | }, |
| 221 | ]; |
| 222 | let order = resolve_compilation_order(&files).unwrap(); |
| 223 | // c must come before b, b before a. |
| 224 | let pos_c = order.iter().position(|&i| i == 0).unwrap(); |
| 225 | let pos_b = order.iter().position(|&i| i == 1).unwrap(); |
| 226 | let pos_a = order.iter().position(|&i| i == 2).unwrap(); |
| 227 | assert!(pos_c < pos_b); |
| 228 | assert!(pos_b < pos_a); |
| 229 | } |
| 230 | |
| 231 | #[test] |
| 232 | fn topo_sort_cycle() { |
| 233 | let files = vec![ |
| 234 | FileDeps { |
| 235 | path: "a.f90".into(), |
| 236 | defines: vec!["a".into()], |
| 237 | uses: vec!["b".into()], |
| 238 | }, |
| 239 | FileDeps { |
| 240 | path: "b.f90".into(), |
| 241 | defines: vec!["b".into()], |
| 242 | uses: vec!["a".into()], |
| 243 | }, |
| 244 | ]; |
| 245 | let err = resolve_compilation_order(&files).unwrap_err(); |
| 246 | assert!( |
| 247 | err.contains("circular"), |
| 248 | "expected cycle error, got: {}", |
| 249 | err |
| 250 | ); |
| 251 | } |
| 252 | |
| 253 | #[test] |
| 254 | fn topo_sort_diamond() { |
| 255 | let files = vec![ |
| 256 | FileDeps { |
| 257 | path: "d.f90".into(), |
| 258 | defines: vec!["d".into()], |
| 259 | uses: vec![], |
| 260 | }, |
| 261 | FileDeps { |
| 262 | path: "b.f90".into(), |
| 263 | defines: vec!["b".into()], |
| 264 | uses: vec!["d".into()], |
| 265 | }, |
| 266 | FileDeps { |
| 267 | path: "c.f90".into(), |
| 268 | defines: vec!["c".into()], |
| 269 | uses: vec!["d".into()], |
| 270 | }, |
| 271 | FileDeps { |
| 272 | path: "a.f90".into(), |
| 273 | defines: vec!["a".into()], |
| 274 | uses: vec!["b".into(), "c".into()], |
| 275 | }, |
| 276 | ]; |
| 277 | let order = resolve_compilation_order(&files).unwrap(); |
| 278 | let pos_d = order.iter().position(|&i| i == 0).unwrap(); |
| 279 | let pos_a = order.iter().position(|&i| i == 3).unwrap(); |
| 280 | assert!(pos_d < pos_a); |
| 281 | assert_eq!(order.len(), 4); |
| 282 | } |
| 283 | } |
| 284 |