Bash · 4007 bytes Raw Blame History
1 #!/bin/bash
2
3 set -euo pipefail
4
5 log() {
6 echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
7 }
8
9 check_response_time() {
10 local url="$1"
11 local max_time="$2"
12 local name="$3"
13
14 log "Checking response time for $name..."
15
16 local response_time
17 response_time=$(curl -o /dev/null -s -w '%{time_total}' "$url" || echo "999")
18
19 local response_ms
20 response_ms=$(echo "$response_time * 1000" | bc)
21
22 if (( $(echo "$response_time > $max_time" | bc -l) )); then
23 log "FAIL: $name response time ${response_ms}ms > ${max_time}000ms"
24 return 1
25 else
26 log "PASS: $name response time ${response_ms}ms"
27 return 0
28 fi
29 }
30
31 check_service_health() {
32 local service="$1"
33 local url="$2"
34
35 log "Checking health of $service..."
36
37 if curl -f -s "$url" >/dev/null; then
38 log "PASS: $service is healthy"
39 return 0
40 else
41 log "FAIL: $service health check failed"
42 return 1
43 fi
44 }
45
46 check_docker_services() {
47 log "Checking Docker services status..."
48
49 local failed_services=0
50
51 while IFS= read -r line; do
52 local service_name=$(echo "$line" | awk '{print $2}')
53 local replicas=$(echo "$line" | awk '{print $4}')
54
55 if [[ "$replicas" == *"0/"* ]]; then
56 log "FAIL: Service $service_name has 0 running replicas"
57 ((failed_services++))
58 else
59 log "PASS: Service $service_name is running ($replicas)"
60 fi
61 done < <(docker service ls --filter name="zephyrfs_" --format "table {{.ID}}\t{{.Name}}\t{{.Mode}}\t{{.Replicas}}")
62
63 return $failed_services
64 }
65
66 run_performance_test() {
67 log "Running performance test..."
68
69 local url="http://localhost/api/health"
70 local concurrent_requests=10
71 local total_requests=100
72
73 log "Testing with $concurrent_requests concurrent requests ($total_requests total)..."
74
75 local output
76 output=$(ab -n $total_requests -c $concurrent_requests -q "$url" 2>/dev/null || echo "Test failed")
77
78 if [[ "$output" == "Test failed" ]]; then
79 log "FAIL: Performance test failed"
80 return 1
81 fi
82
83 local mean_time
84 mean_time=$(echo "$output" | grep "Time per request:" | head -1 | awk '{print $4}')
85
86 if [[ -n "$mean_time" ]]; then
87 log "Mean response time: ${mean_time}ms"
88
89 if (( $(echo "$mean_time > 500" | bc -l) )); then
90 log "FAIL: Mean response time exceeds 500ms"
91 return 1
92 else
93 log "PASS: Mean response time is acceptable"
94 return 0
95 fi
96 else
97 log "FAIL: Could not parse performance test results"
98 return 1
99 fi
100 }
101
102 main() {
103 local failed_tests=0
104
105 log "Starting comprehensive health check..."
106
107 # Check Docker services
108 if ! check_docker_services; then
109 ((failed_tests++))
110 fi
111
112 # Health checks
113 check_service_health "Web Frontend" "http://localhost/health" || ((failed_tests++))
114 check_service_health "API Server" "http://localhost/api/health" || ((failed_tests++))
115 check_service_health "Prometheus" "http://localhost:9090/-/healthy" || ((failed_tests++))
116 check_service_health "Grafana" "http://localhost:3001/api/health" || ((failed_tests++))
117
118 # Response time checks (sub-500ms requirement)
119 check_response_time "http://localhost/" 0.5 "Frontend" || ((failed_tests++))
120 check_response_time "http://localhost/api/health" 0.5 "API Health" || ((failed_tests++))
121 check_response_time "http://localhost/api/files" 0.5 "File Listing" || ((failed_tests++))
122
123 # Performance test
124 if command -v ab >/dev/null 2>&1; then
125 run_performance_test || ((failed_tests++))
126 else
127 log "WARNING: Apache Bench (ab) not available, skipping performance test"
128 fi
129
130 log "Health check completed"
131
132 if [[ $failed_tests -eq 0 ]]; then
133 log "SUCCESS: All tests passed"
134 exit 0
135 else
136 log "FAILURE: $failed_tests test(s) failed"
137 exit 1
138 fi
139 }
140
141 if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
142 main "$@"
143 fi