gardesk/garwarp / 1f26b54

Browse files

add health recovery policy

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
1f26b54a6c64e6367bcc64a5d52f798776c107a1
Parents
921cd0e
Tree
a211ede

1 changed file

StatusFile+-
M garwarp/src/daemon.rs 48 6
garwarp/src/daemon.rsmodified
@@ -55,7 +55,8 @@ pub fn run() -> io::Result<()> {
5555
         requests,
5656
         running: true,
5757
     };
58
-    persist_registry_state(&paths.request_store, &state.requests);
58
+    let persisted = persist_registry_state(&paths.request_store, &state.requests);
59
+    update_health_after_persist(&mut state, persisted);
5960
 
6061
     while state.running {
6162
         let expired = state.requests.expire_stale(Instant::now());
@@ -63,7 +64,8 @@ pub fn run() -> io::Result<()> {
6364
             logging::warn(&format!("request_expired id={id}"));
6465
         }
6566
         if !expired.is_empty() {
66
-            persist_registry_state(&paths.request_store, &state.requests);
67
+            let persisted = persist_registry_state(&paths.request_store, &state.requests);
68
+            update_health_after_persist(&mut state, persisted);
6769
         }
6870
 
6971
         let pruned = state
@@ -73,7 +75,8 @@ pub fn run() -> io::Result<()> {
7375
             logging::info(&format!("request_pruned id={id}"));
7476
         }
7577
         if !pruned.is_empty() {
76
-            persist_registry_state(&paths.request_store, &state.requests);
78
+            let persisted = persist_registry_state(&paths.request_store, &state.requests);
79
+            update_health_after_persist(&mut state, persisted);
7780
         }
7881
 
7982
         match listener.accept() {
@@ -81,7 +84,8 @@ pub fn run() -> io::Result<()> {
8184
                 if let Err(error) = handle_connection(stream, &mut state) {
8285
                     logging::warn(&format!("request_error={error}"));
8386
                 } else {
84
-                    persist_registry_state(&paths.request_store, &state.requests);
87
+                    let persisted = persist_registry_state(&paths.request_store, &state.requests);
88
+                    update_health_after_persist(&mut state, persisted);
8589
                 }
8690
             }
8791
             Err(error) if error.kind() == io::ErrorKind::WouldBlock => {
@@ -512,9 +516,25 @@ fn peer_uid(stream: &UnixStream) -> io::Result<u32> {
512516
     }
513517
 }
514518
 
515
-fn persist_registry_state(path: &std::path::Path, registry: &RequestRegistry) {
519
+fn persist_registry_state(path: &std::path::Path, registry: &RequestRegistry) -> bool {
516520
     if let Err(error) = request_store::persist_registry(path, registry) {
517521
         logging::warn(&format!("request_store_write_failed error={error}"));
522
+        return false;
523
+    }
524
+    true
525
+}
526
+
527
+fn update_health_after_persist(state: &mut DaemonState, persisted: bool) {
528
+    match (state.health, persisted) {
529
+        (HealthStatus::Healthy, false) => {
530
+            state.health = HealthStatus::Degraded;
531
+            logging::warn("health_degraded reason=request_store_write_failed");
532
+        }
533
+        (HealthStatus::Degraded, true) => {
534
+            state.health = HealthStatus::Healthy;
535
+            logging::info("health_recovered source=request_store_write");
536
+        }
537
+        _ => {}
518538
     }
519539
 }
520540
 
@@ -523,7 +543,7 @@ mod tests {
523543
     use super::{
524544
         DaemonState, MAX_CONTROL_LINE_BYTES, canonical_sender_for_uid, handle_connection,
525545
         is_trusted_control_peer, load_registry_with_fallback, load_registry_with_recovery,
526
-        peer_uid, set_control_socket_permissions,
546
+        peer_uid, set_control_socket_permissions, update_health_after_persist,
527547
     };
528548
     use garwarp_ipc::{ControlResponse, HealthStatus};
529549
     use std::fs;
@@ -572,6 +592,28 @@ mod tests {
572592
         let _ = fs::remove_file(path);
573593
     }
574594
 
595
+    #[test]
596
+    fn health_degrades_when_persist_fails() {
597
+        let mut state = DaemonState {
598
+            health: HealthStatus::Healthy,
599
+            requests: RequestRegistry::new(Duration::from_secs(5)),
600
+            running: true,
601
+        };
602
+        update_health_after_persist(&mut state, false);
603
+        assert_eq!(state.health, HealthStatus::Degraded);
604
+    }
605
+
606
+    #[test]
607
+    fn health_recovers_when_persist_succeeds() {
608
+        let mut state = DaemonState {
609
+            health: HealthStatus::Degraded,
610
+            requests: RequestRegistry::new(Duration::from_secs(5)),
611
+            running: true,
612
+        };
613
+        update_health_after_persist(&mut state, true);
614
+        assert_eq!(state.health, HealthStatus::Healthy);
615
+    }
616
+
575617
     #[test]
576618
     fn status_request_returns_status_response() {
577619
         let (mut client, server) = UnixStream::pair().expect("pair should be created");