@@ -15,9 +15,11 @@ |
| 15 | 15 | //! happened (inserted / replaced / kept / pending archive fetch), and they |
| 16 | 16 | //! drive the outer loop. |
| 17 | 17 | |
| 18 | | -use std::collections::HashMap; |
| 19 | | -use std::path::PathBuf; |
| 18 | +use std::collections::{HashMap, HashSet, VecDeque}; |
| 19 | +use std::path::{Path, PathBuf}; |
| 20 | 20 | use std::rc::Rc; |
| 21 | +use std::sync::{mpsc, Arc, Mutex}; |
| 22 | +use std::thread; |
| 21 | 23 | |
| 22 | 24 | use crate::archive::{Archive, ArchiveError}; |
| 23 | 25 | use crate::input::ObjectFile; |
@@ -156,7 +158,7 @@ pub struct ArchiveInput { |
| 156 | 158 | /// the fixed-point loop from re-ingesting the same object twice — |
| 157 | 159 | /// important both for correctness (no duplicate-strong errors from |
| 158 | 160 | /// our own symbols) and for keeping transitions deterministic. |
| 159 | | - pub fetched: std::collections::HashSet<u32>, |
| 161 | + pub fetched: HashSet<u32>, |
| 160 | 162 | } |
| 161 | 163 | |
| 162 | 164 | #[derive(Debug)] |
@@ -1168,47 +1170,164 @@ pub struct DrainReport { |
| 1168 | 1170 | pub referrers: ReferrerLog, |
| 1169 | 1171 | } |
| 1170 | 1172 | |
| 1173 | +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] |
| 1174 | +struct ArchiveMemberKey { |
| 1175 | + archive: ArchiveId, |
| 1176 | + member: MemberId, |
| 1177 | +} |
| 1178 | + |
| 1179 | +#[derive(Clone, Copy)] |
| 1180 | +struct ArchiveMemberLoadJob<'a> { |
| 1181 | + index: usize, |
| 1182 | + key: ArchiveMemberKey, |
| 1183 | + archive_path: &'a Path, |
| 1184 | + archive_bytes: &'a [u8], |
| 1185 | + archive_load_order: usize, |
| 1186 | +} |
| 1187 | + |
| 1188 | +struct LoadedArchiveMember { |
| 1189 | + key: ArchiveMemberKey, |
| 1190 | + archive_load_order: usize, |
| 1191 | + logical_path: PathBuf, |
| 1192 | + bytes: Vec<u8>, |
| 1193 | + parsed: ObjectFile, |
| 1194 | +} |
| 1195 | + |
| 1196 | +fn make_archive_member_jobs<'a>( |
| 1197 | + inputs: &'a Inputs, |
| 1198 | + keys: Vec<ArchiveMemberKey>, |
| 1199 | +) -> Vec<ArchiveMemberLoadJob<'a>> { |
| 1200 | + keys.into_iter() |
| 1201 | + .enumerate() |
| 1202 | + .map(|(index, key)| { |
| 1203 | + let archive = &inputs.archives[key.archive.0 as usize]; |
| 1204 | + ArchiveMemberLoadJob { |
| 1205 | + index, |
| 1206 | + key, |
| 1207 | + archive_path: &archive.path, |
| 1208 | + archive_bytes: &archive.bytes, |
| 1209 | + archive_load_order: archive.load_order, |
| 1210 | + } |
| 1211 | + }) |
| 1212 | + .collect() |
| 1213 | +} |
| 1214 | + |
| 1215 | +fn load_archive_members_parallel( |
| 1216 | + inputs: &Inputs, |
| 1217 | + keys: Vec<ArchiveMemberKey>, |
| 1218 | +) -> Vec<(ArchiveMemberKey, Result<LoadedArchiveMember, FetchError>)> { |
| 1219 | + let jobs = make_archive_member_jobs(inputs, keys); |
| 1220 | + if jobs.is_empty() { |
| 1221 | + return Vec::new(); |
| 1222 | + } |
| 1223 | + let job_count = thread::available_parallelism() |
| 1224 | + .map(usize::from) |
| 1225 | + .unwrap_or(1) |
| 1226 | + .min(jobs.len()) |
| 1227 | + .max(1); |
| 1228 | + if job_count == 1 { |
| 1229 | + return jobs |
| 1230 | + .into_iter() |
| 1231 | + .map(load_archive_member_job) |
| 1232 | + .map(|(_, key, result)| (key, result)) |
| 1233 | + .collect(); |
| 1234 | + } |
| 1235 | + |
| 1236 | + let queue = Arc::new(Mutex::new(VecDeque::from(jobs))); |
| 1237 | + let (tx, rx) = mpsc::channel(); |
| 1238 | + let mut results = thread::scope(|scope| { |
| 1239 | + for _ in 0..job_count { |
| 1240 | + let queue = Arc::clone(&queue); |
| 1241 | + let tx = tx.clone(); |
| 1242 | + scope.spawn(move || loop { |
| 1243 | + let Some(job) = queue |
| 1244 | + .lock() |
| 1245 | + .expect("archive member load queue mutex poisoned") |
| 1246 | + .pop_front() |
| 1247 | + else { |
| 1248 | + break; |
| 1249 | + }; |
| 1250 | + tx.send(load_archive_member_job(job)) |
| 1251 | + .expect("archive member load receiver should stay live"); |
| 1252 | + }); |
| 1253 | + } |
| 1254 | + drop(tx); |
| 1255 | + rx.into_iter().collect::<Vec<_>>() |
| 1256 | + }); |
| 1257 | + results.sort_by_key(|(index, _, _)| *index); |
| 1258 | + results |
| 1259 | + .into_iter() |
| 1260 | + .map(|(_, key, result)| (key, result)) |
| 1261 | + .collect() |
| 1262 | +} |
| 1263 | + |
| 1264 | +fn load_archive_member_job( |
| 1265 | + job: ArchiveMemberLoadJob<'_>, |
| 1266 | +) -> ( |
| 1267 | + usize, |
| 1268 | + ArchiveMemberKey, |
| 1269 | + Result<LoadedArchiveMember, FetchError>, |
| 1270 | +) { |
| 1271 | + let result = (|| { |
| 1272 | + let archive = Archive::open(job.archive_path, job.archive_bytes)?; |
| 1273 | + let member = |
| 1274 | + archive |
| 1275 | + .member_at_offset(job.key.member.0) |
| 1276 | + .ok_or(FetchError::MemberNotFound { |
| 1277 | + archive: job.key.archive, |
| 1278 | + member: job.key.member, |
| 1279 | + })?; |
| 1280 | + let logical_path = |
| 1281 | + PathBuf::from(format!("{}({})", job.archive_path.display(), member.name)); |
| 1282 | + let bytes = member.body.to_vec(); |
| 1283 | + let parsed = ObjectFile::parse(&logical_path, &bytes)?; |
| 1284 | + Ok(LoadedArchiveMember { |
| 1285 | + key: job.key, |
| 1286 | + archive_load_order: job.archive_load_order, |
| 1287 | + logical_path, |
| 1288 | + bytes, |
| 1289 | + parsed, |
| 1290 | + }) |
| 1291 | + })(); |
| 1292 | + (job.index, job.key, result) |
| 1293 | +} |
| 1294 | + |
| 1295 | +fn archive_member_key(pending: PendingFetch) -> ArchiveMemberKey { |
| 1296 | + ArchiveMemberKey { |
| 1297 | + archive: pending.archive, |
| 1298 | + member: pending.member, |
| 1299 | + } |
| 1300 | +} |
| 1301 | + |
| 1302 | +fn archive_member_is_fetched(inputs: &Inputs, key: ArchiveMemberKey) -> bool { |
| 1303 | + inputs.archives[key.archive.0 as usize] |
| 1304 | + .fetched |
| 1305 | + .contains(&key.member.0) |
| 1306 | +} |
| 1307 | + |
| 1171 | 1308 | /// Shared ingest: copy one archive member's body into a fresh |
| 1172 | 1309 | /// `ObjectInput`, mark it fetched, and seed its symbols. Callers either |
| 1173 | 1310 | /// respond to a demand-driven `PendingFetch` or force-pull the member. |
| 1174 | | -fn ingest_member_bytes( |
| 1311 | +fn ingest_loaded_member( |
| 1175 | 1312 | inputs: &mut Inputs, |
| 1176 | 1313 | table: &mut SymbolTable, |
| 1177 | | - archive_id: ArchiveId, |
| 1178 | | - member_id: MemberId, |
| 1314 | + loaded: LoadedArchiveMember, |
| 1179 | 1315 | report: &mut DrainReport, |
| 1180 | 1316 | ) -> Result<Vec<PendingFetch>, FetchError> { |
| 1181 | | - let archive_load_order = inputs.archives[archive_id.0 as usize].load_order; |
| 1182 | | - let ai = &inputs.archives[archive_id.0 as usize]; |
| 1183 | | - if ai.fetched.contains(&member_id.0) { |
| 1317 | + if archive_member_is_fetched(inputs, loaded.key) { |
| 1184 | 1318 | return Ok(Vec::new()); |
| 1185 | 1319 | } |
| 1186 | 1320 | |
| 1187 | | - // Extract owned data before mutating the registry. |
| 1188 | | - let (logical_path, member_bytes) = { |
| 1189 | | - let archive = Archive::open(&ai.path, &ai.bytes)?; |
| 1190 | | - let member = archive |
| 1191 | | - .member_at_offset(member_id.0) |
| 1192 | | - .ok_or(FetchError::MemberNotFound { |
| 1193 | | - archive: archive_id, |
| 1194 | | - member: member_id, |
| 1195 | | - })?; |
| 1196 | | - let logical = format!("{}({})", ai.path.display(), member.name); |
| 1197 | | - (logical, member.body.to_vec()) |
| 1198 | | - }; |
| 1199 | | - let logical_path = PathBuf::from(logical_path); |
| 1200 | | - let parsed = ObjectFile::parse(&logical_path, &member_bytes)?; |
| 1201 | | - |
| 1202 | | - inputs.archives[archive_id.0 as usize] |
| 1321 | + inputs.archives[loaded.key.archive.0 as usize] |
| 1203 | 1322 | .fetched |
| 1204 | | - .insert(member_id.0); |
| 1323 | + .insert(loaded.key.member.0); |
| 1205 | 1324 | let input_id = InputId(inputs.objects.len() as u32); |
| 1206 | 1325 | inputs.objects.push(ObjectInput { |
| 1207 | | - path: logical_path, |
| 1208 | | - load_order: archive_load_order, |
| 1209 | | - archive_member_offset: Some(member_id.0), |
| 1210 | | - bytes: member_bytes, |
| 1211 | | - parsed, |
| 1326 | + path: loaded.logical_path, |
| 1327 | + load_order: loaded.archive_load_order, |
| 1328 | + archive_member_offset: Some(loaded.key.member.0), |
| 1329 | + bytes: loaded.bytes, |
| 1330 | + parsed: loaded.parsed, |
| 1212 | 1331 | }); |
| 1213 | 1332 | report.fetched_members += 1; |
| 1214 | 1333 | report |
@@ -1222,6 +1341,23 @@ fn ingest_member_bytes( |
| 1222 | 1341 | Ok(sub_report.pending_fetches) |
| 1223 | 1342 | } |
| 1224 | 1343 | |
| 1344 | +fn load_and_ingest_member( |
| 1345 | + inputs: &mut Inputs, |
| 1346 | + table: &mut SymbolTable, |
| 1347 | + key: ArchiveMemberKey, |
| 1348 | + report: &mut DrainReport, |
| 1349 | +) -> Result<Vec<PendingFetch>, FetchError> { |
| 1350 | + if archive_member_is_fetched(inputs, key) { |
| 1351 | + return Ok(Vec::new()); |
| 1352 | + } |
| 1353 | + let loaded = load_archive_members_parallel(inputs, vec![key]) |
| 1354 | + .into_iter() |
| 1355 | + .next() |
| 1356 | + .expect("single archive member load should produce one result") |
| 1357 | + .1?; |
| 1358 | + ingest_loaded_member(inputs, table, loaded, report) |
| 1359 | +} |
| 1360 | + |
| 1225 | 1361 | /// Pull `pending`'s member only if the symbol slot is still a |
| 1226 | 1362 | /// `LazyArchive` (i.e., a strong Defined hasn't superseded it). Returns |
| 1227 | 1363 | /// any new `PendingFetch` entries triggered by the inserted member. |
@@ -1235,7 +1371,7 @@ fn fetch_and_ingest_one( |
| 1235 | 1371 | if !slot_is_still_lazy { |
| 1236 | 1372 | return Ok(Vec::new()); |
| 1237 | 1373 | } |
| 1238 | | - ingest_member_bytes(inputs, table, pending.archive, pending.member, report) |
| 1374 | + load_and_ingest_member(inputs, table, archive_member_key(pending), report) |
| 1239 | 1375 | } |
| 1240 | 1376 | |
| 1241 | 1377 | /// Pull every member of one archive (bypasses demand tracking). Respects |
@@ -1255,9 +1391,16 @@ pub fn force_load_archive( |
| 1255 | 1391 | .map(|m| m.header_offset as u32) |
| 1256 | 1392 | .collect() |
| 1257 | 1393 | }; |
| 1394 | + let keys = member_offsets |
| 1395 | + .into_iter() |
| 1396 | + .map(|offset| ArchiveMemberKey { |
| 1397 | + archive: archive_id, |
| 1398 | + member: MemberId(offset), |
| 1399 | + }) |
| 1400 | + .collect(); |
| 1258 | 1401 | let mut queue: Vec<PendingFetch> = Vec::new(); |
| 1259 | | - for offset in member_offsets { |
| 1260 | | - let new = ingest_member_bytes(inputs, table, archive_id, MemberId(offset), report)?; |
| 1402 | + for (_, loaded) in load_archive_members_parallel(inputs, keys) { |
| 1403 | + let new = ingest_loaded_member(inputs, table, loaded?, report)?; |
| 1261 | 1404 | queue.extend(new); |
| 1262 | 1405 | } |
| 1263 | 1406 | while let Some(p) = queue.pop() { |
@@ -1556,14 +1699,59 @@ pub fn drain_fetches( |
| 1556 | 1699 | initial: Vec<PendingFetch>, |
| 1557 | 1700 | ) -> Result<DrainReport, FetchError> { |
| 1558 | 1701 | let mut queue = initial; |
| 1702 | + let mut prepared = HashMap::new(); |
| 1559 | 1703 | let mut report = DrainReport::default(); |
| 1560 | 1704 | while let Some(p) = queue.pop() { |
| 1561 | | - let new_pending = fetch_and_ingest_one(inputs, table, p, &mut report)?; |
| 1705 | + let key = archive_member_key(p); |
| 1706 | + let slot_is_still_lazy = matches!(table.get(p.id), Symbol::LazyArchive { .. }); |
| 1707 | + if !slot_is_still_lazy || archive_member_is_fetched(inputs, key) { |
| 1708 | + prepared.remove(&key); |
| 1709 | + continue; |
| 1710 | + } |
| 1711 | + // Parse siblings ahead of time, but only ingest the current stack |
| 1712 | + // entry after re-checking its lazy slot. This keeps member order stable. |
| 1713 | + if !prepared.contains_key(&key) { |
| 1714 | + preparse_pending_fetches(inputs, table, p, &queue, &mut prepared); |
| 1715 | + } |
| 1716 | + let Some(loaded) = prepared.remove(&key) else { |
| 1717 | + continue; |
| 1718 | + }; |
| 1719 | + let loaded = loaded?; |
| 1720 | + let slot_is_still_lazy = matches!(table.get(p.id), Symbol::LazyArchive { .. }); |
| 1721 | + if !slot_is_still_lazy || archive_member_is_fetched(inputs, key) { |
| 1722 | + continue; |
| 1723 | + } |
| 1724 | + let new_pending = ingest_loaded_member(inputs, table, loaded, &mut report)?; |
| 1562 | 1725 | queue.extend(new_pending); |
| 1563 | 1726 | } |
| 1564 | 1727 | Ok(report) |
| 1565 | 1728 | } |
| 1566 | 1729 | |
| 1730 | +fn preparse_pending_fetches( |
| 1731 | + inputs: &Inputs, |
| 1732 | + table: &SymbolTable, |
| 1733 | + current: PendingFetch, |
| 1734 | + queue: &[PendingFetch], |
| 1735 | + prepared: &mut HashMap<ArchiveMemberKey, Result<LoadedArchiveMember, FetchError>>, |
| 1736 | +) { |
| 1737 | + let mut seen = HashSet::new(); |
| 1738 | + let mut keys = Vec::new(); |
| 1739 | + for pending in std::iter::once(¤t).chain(queue.iter().rev()) { |
| 1740 | + let key = archive_member_key(*pending); |
| 1741 | + if prepared.contains_key(&key) |
| 1742 | + || archive_member_is_fetched(inputs, key) |
| 1743 | + || !matches!(table.get(pending.id), Symbol::LazyArchive { .. }) |
| 1744 | + || !seen.insert(key) |
| 1745 | + { |
| 1746 | + continue; |
| 1747 | + } |
| 1748 | + keys.push(key); |
| 1749 | + } |
| 1750 | + for (key, result) in load_archive_members_parallel(inputs, keys) { |
| 1751 | + prepared.insert(key, result); |
| 1752 | + } |
| 1753 | +} |
| 1754 | + |
| 1567 | 1755 | /// Turn a wire-form `InputSymbol` into a resolver-side `Symbol`. Returns |
| 1568 | 1756 | /// `None` for kinds the resolver does not track (currently: aliases with |
| 1569 | 1757 | /// unresolved target strx — Sprint 8's resolver defers those for now). |