gardesk/garwarp / 1f26b54

Browse files

add health recovery policy

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
1f26b54a6c64e6367bcc64a5d52f798776c107a1
Parents
921cd0e
Tree
a211ede

1 changed file

StatusFile+-
M garwarp/src/daemon.rs 48 6
garwarp/src/daemon.rsmodified
@@ -55,7 +55,8 @@ pub fn run() -> io::Result<()> {
55
         requests,
55
         requests,
56
         running: true,
56
         running: true,
57
     };
57
     };
58
-    persist_registry_state(&paths.request_store, &state.requests);
58
+    let persisted = persist_registry_state(&paths.request_store, &state.requests);
59
+    update_health_after_persist(&mut state, persisted);
59
 
60
 
60
     while state.running {
61
     while state.running {
61
         let expired = state.requests.expire_stale(Instant::now());
62
         let expired = state.requests.expire_stale(Instant::now());
@@ -63,7 +64,8 @@ pub fn run() -> io::Result<()> {
63
             logging::warn(&format!("request_expired id={id}"));
64
             logging::warn(&format!("request_expired id={id}"));
64
         }
65
         }
65
         if !expired.is_empty() {
66
         if !expired.is_empty() {
66
-            persist_registry_state(&paths.request_store, &state.requests);
67
+            let persisted = persist_registry_state(&paths.request_store, &state.requests);
68
+            update_health_after_persist(&mut state, persisted);
67
         }
69
         }
68
 
70
 
69
         let pruned = state
71
         let pruned = state
@@ -73,7 +75,8 @@ pub fn run() -> io::Result<()> {
73
             logging::info(&format!("request_pruned id={id}"));
75
             logging::info(&format!("request_pruned id={id}"));
74
         }
76
         }
75
         if !pruned.is_empty() {
77
         if !pruned.is_empty() {
76
-            persist_registry_state(&paths.request_store, &state.requests);
78
+            let persisted = persist_registry_state(&paths.request_store, &state.requests);
79
+            update_health_after_persist(&mut state, persisted);
77
         }
80
         }
78
 
81
 
79
         match listener.accept() {
82
         match listener.accept() {
@@ -81,7 +84,8 @@ pub fn run() -> io::Result<()> {
81
                 if let Err(error) = handle_connection(stream, &mut state) {
84
                 if let Err(error) = handle_connection(stream, &mut state) {
82
                     logging::warn(&format!("request_error={error}"));
85
                     logging::warn(&format!("request_error={error}"));
83
                 } else {
86
                 } else {
84
-                    persist_registry_state(&paths.request_store, &state.requests);
87
+                    let persisted = persist_registry_state(&paths.request_store, &state.requests);
88
+                    update_health_after_persist(&mut state, persisted);
85
                 }
89
                 }
86
             }
90
             }
87
             Err(error) if error.kind() == io::ErrorKind::WouldBlock => {
91
             Err(error) if error.kind() == io::ErrorKind::WouldBlock => {
@@ -512,9 +516,25 @@ fn peer_uid(stream: &UnixStream) -> io::Result<u32> {
512
     }
516
     }
513
 }
517
 }
514
 
518
 
515
-fn persist_registry_state(path: &std::path::Path, registry: &RequestRegistry) {
519
+fn persist_registry_state(path: &std::path::Path, registry: &RequestRegistry) -> bool {
516
     if let Err(error) = request_store::persist_registry(path, registry) {
520
     if let Err(error) = request_store::persist_registry(path, registry) {
517
         logging::warn(&format!("request_store_write_failed error={error}"));
521
         logging::warn(&format!("request_store_write_failed error={error}"));
522
+        return false;
523
+    }
524
+    true
525
+}
526
+
527
+fn update_health_after_persist(state: &mut DaemonState, persisted: bool) {
528
+    match (state.health, persisted) {
529
+        (HealthStatus::Healthy, false) => {
530
+            state.health = HealthStatus::Degraded;
531
+            logging::warn("health_degraded reason=request_store_write_failed");
532
+        }
533
+        (HealthStatus::Degraded, true) => {
534
+            state.health = HealthStatus::Healthy;
535
+            logging::info("health_recovered source=request_store_write");
536
+        }
537
+        _ => {}
518
     }
538
     }
519
 }
539
 }
520
 
540
 
@@ -523,7 +543,7 @@ mod tests {
523
     use super::{
543
     use super::{
524
         DaemonState, MAX_CONTROL_LINE_BYTES, canonical_sender_for_uid, handle_connection,
544
         DaemonState, MAX_CONTROL_LINE_BYTES, canonical_sender_for_uid, handle_connection,
525
         is_trusted_control_peer, load_registry_with_fallback, load_registry_with_recovery,
545
         is_trusted_control_peer, load_registry_with_fallback, load_registry_with_recovery,
526
-        peer_uid, set_control_socket_permissions,
546
+        peer_uid, set_control_socket_permissions, update_health_after_persist,
527
     };
547
     };
528
     use garwarp_ipc::{ControlResponse, HealthStatus};
548
     use garwarp_ipc::{ControlResponse, HealthStatus};
529
     use std::fs;
549
     use std::fs;
@@ -572,6 +592,28 @@ mod tests {
572
         let _ = fs::remove_file(path);
592
         let _ = fs::remove_file(path);
573
     }
593
     }
574
 
594
 
595
+    #[test]
596
+    fn health_degrades_when_persist_fails() {
597
+        let mut state = DaemonState {
598
+            health: HealthStatus::Healthy,
599
+            requests: RequestRegistry::new(Duration::from_secs(5)),
600
+            running: true,
601
+        };
602
+        update_health_after_persist(&mut state, false);
603
+        assert_eq!(state.health, HealthStatus::Degraded);
604
+    }
605
+
606
+    #[test]
607
+    fn health_recovers_when_persist_succeeds() {
608
+        let mut state = DaemonState {
609
+            health: HealthStatus::Degraded,
610
+            requests: RequestRegistry::new(Duration::from_secs(5)),
611
+            running: true,
612
+        };
613
+        update_health_after_persist(&mut state, true);
614
+        assert_eq!(state.health, HealthStatus::Healthy);
615
+    }
616
+
575
     #[test]
617
     #[test]
576
     fn status_request_returns_status_response() {
618
     fn status_request_returns_status_response() {
577
         let (mut client, server) = UnixStream::pair().expect("pair should be created");
619
         let (mut client, server) = UnixStream::pair().expect("pair should be created");