@@ -55,7 +55,8 @@ pub fn run() -> io::Result<()> { |
| 55 | requests, | 55 | requests, |
| 56 | running: true, | 56 | running: true, |
| 57 | }; | 57 | }; |
| 58 | - persist_registry_state(&paths.request_store, &state.requests); | 58 | + let persisted = persist_registry_state(&paths.request_store, &state.requests); |
| | 59 | + update_health_after_persist(&mut state, persisted); |
| 59 | | 60 | |
| 60 | while state.running { | 61 | while state.running { |
| 61 | let expired = state.requests.expire_stale(Instant::now()); | 62 | let expired = state.requests.expire_stale(Instant::now()); |
@@ -63,7 +64,8 @@ pub fn run() -> io::Result<()> { |
| 63 | logging::warn(&format!("request_expired id={id}")); | 64 | logging::warn(&format!("request_expired id={id}")); |
| 64 | } | 65 | } |
| 65 | if !expired.is_empty() { | 66 | if !expired.is_empty() { |
| 66 | - persist_registry_state(&paths.request_store, &state.requests); | 67 | + let persisted = persist_registry_state(&paths.request_store, &state.requests); |
| | 68 | + update_health_after_persist(&mut state, persisted); |
| 67 | } | 69 | } |
| 68 | | 70 | |
| 69 | let pruned = state | 71 | let pruned = state |
@@ -73,7 +75,8 @@ pub fn run() -> io::Result<()> { |
| 73 | logging::info(&format!("request_pruned id={id}")); | 75 | logging::info(&format!("request_pruned id={id}")); |
| 74 | } | 76 | } |
| 75 | if !pruned.is_empty() { | 77 | if !pruned.is_empty() { |
| 76 | - persist_registry_state(&paths.request_store, &state.requests); | 78 | + let persisted = persist_registry_state(&paths.request_store, &state.requests); |
| | 79 | + update_health_after_persist(&mut state, persisted); |
| 77 | } | 80 | } |
| 78 | | 81 | |
| 79 | match listener.accept() { | 82 | match listener.accept() { |
@@ -81,7 +84,8 @@ pub fn run() -> io::Result<()> { |
| 81 | if let Err(error) = handle_connection(stream, &mut state) { | 84 | if let Err(error) = handle_connection(stream, &mut state) { |
| 82 | logging::warn(&format!("request_error={error}")); | 85 | logging::warn(&format!("request_error={error}")); |
| 83 | } else { | 86 | } else { |
| 84 | - persist_registry_state(&paths.request_store, &state.requests); | 87 | + let persisted = persist_registry_state(&paths.request_store, &state.requests); |
| | 88 | + update_health_after_persist(&mut state, persisted); |
| 85 | } | 89 | } |
| 86 | } | 90 | } |
| 87 | Err(error) if error.kind() == io::ErrorKind::WouldBlock => { | 91 | Err(error) if error.kind() == io::ErrorKind::WouldBlock => { |
@@ -512,9 +516,25 @@ fn peer_uid(stream: &UnixStream) -> io::Result<u32> { |
| 512 | } | 516 | } |
| 513 | } | 517 | } |
| 514 | | 518 | |
| 515 | -fn persist_registry_state(path: &std::path::Path, registry: &RequestRegistry) { | 519 | +fn persist_registry_state(path: &std::path::Path, registry: &RequestRegistry) -> bool { |
| 516 | if let Err(error) = request_store::persist_registry(path, registry) { | 520 | if let Err(error) = request_store::persist_registry(path, registry) { |
| 517 | logging::warn(&format!("request_store_write_failed error={error}")); | 521 | logging::warn(&format!("request_store_write_failed error={error}")); |
| | 522 | + return false; |
| | 523 | + } |
| | 524 | + true |
| | 525 | +} |
| | 526 | + |
| | 527 | +fn update_health_after_persist(state: &mut DaemonState, persisted: bool) { |
| | 528 | + match (state.health, persisted) { |
| | 529 | + (HealthStatus::Healthy, false) => { |
| | 530 | + state.health = HealthStatus::Degraded; |
| | 531 | + logging::warn("health_degraded reason=request_store_write_failed"); |
| | 532 | + } |
| | 533 | + (HealthStatus::Degraded, true) => { |
| | 534 | + state.health = HealthStatus::Healthy; |
| | 535 | + logging::info("health_recovered source=request_store_write"); |
| | 536 | + } |
| | 537 | + _ => {} |
| 518 | } | 538 | } |
| 519 | } | 539 | } |
| 520 | | 540 | |
@@ -523,7 +543,7 @@ mod tests { |
| 523 | use super::{ | 543 | use super::{ |
| 524 | DaemonState, MAX_CONTROL_LINE_BYTES, canonical_sender_for_uid, handle_connection, | 544 | DaemonState, MAX_CONTROL_LINE_BYTES, canonical_sender_for_uid, handle_connection, |
| 525 | is_trusted_control_peer, load_registry_with_fallback, load_registry_with_recovery, | 545 | is_trusted_control_peer, load_registry_with_fallback, load_registry_with_recovery, |
| 526 | - peer_uid, set_control_socket_permissions, | 546 | + peer_uid, set_control_socket_permissions, update_health_after_persist, |
| 527 | }; | 547 | }; |
| 528 | use garwarp_ipc::{ControlResponse, HealthStatus}; | 548 | use garwarp_ipc::{ControlResponse, HealthStatus}; |
| 529 | use std::fs; | 549 | use std::fs; |
@@ -572,6 +592,28 @@ mod tests { |
| 572 | let _ = fs::remove_file(path); | 592 | let _ = fs::remove_file(path); |
| 573 | } | 593 | } |
| 574 | | 594 | |
| | 595 | + #[test] |
| | 596 | + fn health_degrades_when_persist_fails() { |
| | 597 | + let mut state = DaemonState { |
| | 598 | + health: HealthStatus::Healthy, |
| | 599 | + requests: RequestRegistry::new(Duration::from_secs(5)), |
| | 600 | + running: true, |
| | 601 | + }; |
| | 602 | + update_health_after_persist(&mut state, false); |
| | 603 | + assert_eq!(state.health, HealthStatus::Degraded); |
| | 604 | + } |
| | 605 | + |
| | 606 | + #[test] |
| | 607 | + fn health_recovers_when_persist_succeeds() { |
| | 608 | + let mut state = DaemonState { |
| | 609 | + health: HealthStatus::Degraded, |
| | 610 | + requests: RequestRegistry::new(Duration::from_secs(5)), |
| | 611 | + running: true, |
| | 612 | + }; |
| | 613 | + update_health_after_persist(&mut state, true); |
| | 614 | + assert_eq!(state.health, HealthStatus::Healthy); |
| | 615 | + } |
| | 616 | + |
| 575 | #[test] | 617 | #[test] |
| 576 | fn status_request_returns_status_response() { | 618 | fn status_request_returns_status_response() { |
| 577 | let (mut client, server) = UnixStream::pair().expect("pair should be created"); | 619 | let (mut client, server) = UnixStream::pair().expect("pair should be created"); |