| 1 |
#!/bin/bash |
| 2 |
|
| 3 |
set -euo pipefail |
| 4 |
|
| 5 |
log() { |
| 6 |
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" |
| 7 |
} |
| 8 |
|
| 9 |
check_response_time() { |
| 10 |
local url="$1" |
| 11 |
local max_time="$2" |
| 12 |
local name="$3" |
| 13 |
|
| 14 |
log "Checking response time for $name..." |
| 15 |
|
| 16 |
local response_time |
| 17 |
response_time=$(curl -o /dev/null -s -w '%{time_total}' "$url" || echo "999") |
| 18 |
|
| 19 |
local response_ms |
| 20 |
response_ms=$(echo "$response_time * 1000" | bc) |
| 21 |
|
| 22 |
if (( $(echo "$response_time > $max_time" | bc -l) )); then |
| 23 |
log "FAIL: $name response time ${response_ms}ms > ${max_time}000ms" |
| 24 |
return 1 |
| 25 |
else |
| 26 |
log "PASS: $name response time ${response_ms}ms" |
| 27 |
return 0 |
| 28 |
fi |
| 29 |
} |
| 30 |
|
| 31 |
check_service_health() { |
| 32 |
local service="$1" |
| 33 |
local url="$2" |
| 34 |
|
| 35 |
log "Checking health of $service..." |
| 36 |
|
| 37 |
if curl -f -s "$url" >/dev/null; then |
| 38 |
log "PASS: $service is healthy" |
| 39 |
return 0 |
| 40 |
else |
| 41 |
log "FAIL: $service health check failed" |
| 42 |
return 1 |
| 43 |
fi |
| 44 |
} |
| 45 |
|
| 46 |
check_docker_services() { |
| 47 |
log "Checking Docker services status..." |
| 48 |
|
| 49 |
local failed_services=0 |
| 50 |
|
| 51 |
while IFS= read -r line; do |
| 52 |
local service_name=$(echo "$line" | awk '{print $2}') |
| 53 |
local replicas=$(echo "$line" | awk '{print $4}') |
| 54 |
|
| 55 |
if [[ "$replicas" == *"0/"* ]]; then |
| 56 |
log "FAIL: Service $service_name has 0 running replicas" |
| 57 |
((failed_services++)) |
| 58 |
else |
| 59 |
log "PASS: Service $service_name is running ($replicas)" |
| 60 |
fi |
| 61 |
done < <(docker service ls --filter name="zephyrfs_" --format "table {{.ID}}\t{{.Name}}\t{{.Mode}}\t{{.Replicas}}") |
| 62 |
|
| 63 |
return $failed_services |
| 64 |
} |
| 65 |
|
| 66 |
run_performance_test() { |
| 67 |
log "Running performance test..." |
| 68 |
|
| 69 |
local url="http://localhost/api/health" |
| 70 |
local concurrent_requests=10 |
| 71 |
local total_requests=100 |
| 72 |
|
| 73 |
log "Testing with $concurrent_requests concurrent requests ($total_requests total)..." |
| 74 |
|
| 75 |
local output |
| 76 |
output=$(ab -n $total_requests -c $concurrent_requests -q "$url" 2>/dev/null || echo "Test failed") |
| 77 |
|
| 78 |
if [[ "$output" == "Test failed" ]]; then |
| 79 |
log "FAIL: Performance test failed" |
| 80 |
return 1 |
| 81 |
fi |
| 82 |
|
| 83 |
local mean_time |
| 84 |
mean_time=$(echo "$output" | grep "Time per request:" | head -1 | awk '{print $4}') |
| 85 |
|
| 86 |
if [[ -n "$mean_time" ]]; then |
| 87 |
log "Mean response time: ${mean_time}ms" |
| 88 |
|
| 89 |
if (( $(echo "$mean_time > 500" | bc -l) )); then |
| 90 |
log "FAIL: Mean response time exceeds 500ms" |
| 91 |
return 1 |
| 92 |
else |
| 93 |
log "PASS: Mean response time is acceptable" |
| 94 |
return 0 |
| 95 |
fi |
| 96 |
else |
| 97 |
log "FAIL: Could not parse performance test results" |
| 98 |
return 1 |
| 99 |
fi |
| 100 |
} |
| 101 |
|
| 102 |
main() { |
| 103 |
local failed_tests=0 |
| 104 |
|
| 105 |
log "Starting comprehensive health check..." |
| 106 |
|
| 107 |
# Check Docker services |
| 108 |
if ! check_docker_services; then |
| 109 |
((failed_tests++)) |
| 110 |
fi |
| 111 |
|
| 112 |
# Health checks |
| 113 |
check_service_health "Web Frontend" "http://localhost/health" || ((failed_tests++)) |
| 114 |
check_service_health "API Server" "http://localhost/api/health" || ((failed_tests++)) |
| 115 |
check_service_health "Prometheus" "http://localhost:9090/-/healthy" || ((failed_tests++)) |
| 116 |
check_service_health "Grafana" "http://localhost:3001/api/health" || ((failed_tests++)) |
| 117 |
|
| 118 |
# Response time checks (sub-500ms requirement) |
| 119 |
check_response_time "http://localhost/" 0.5 "Frontend" || ((failed_tests++)) |
| 120 |
check_response_time "http://localhost/api/health" 0.5 "API Health" || ((failed_tests++)) |
| 121 |
check_response_time "http://localhost/api/files" 0.5 "File Listing" || ((failed_tests++)) |
| 122 |
|
| 123 |
# Performance test |
| 124 |
if command -v ab >/dev/null 2>&1; then |
| 125 |
run_performance_test || ((failed_tests++)) |
| 126 |
else |
| 127 |
log "WARNING: Apache Bench (ab) not available, skipping performance test" |
| 128 |
fi |
| 129 |
|
| 130 |
log "Health check completed" |
| 131 |
|
| 132 |
if [[ $failed_tests -eq 0 ]]; then |
| 133 |
log "SUCCESS: All tests passed" |
| 134 |
exit 0 |
| 135 |
else |
| 136 |
log "FAILURE: $failed_tests test(s) failed" |
| 137 |
exit 1 |
| 138 |
fi |
| 139 |
} |
| 140 |
|
| 141 |
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then |
| 142 |
main "$@" |
| 143 |
fi |