ALERT monitor_service_down IF up == 0 FOR 30s LABELS {severity="critical"} ANNOTATIONS {description="Service {{ $labels.instance }} is down.", summary="Monitor service non-operational"}
ALERT high_cpu_load IF node_load1 > 1.5 FOR 30s LABELS {severity="warning"} ANNOTATIONS {description="Docker host is under high load, the avg load 1m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.", summary="Server under high load"}
ALERT high_memory_load IF (sum(node_memory_MemTotal) - sum(node_memory_MemFree + node_memory_Buffers + node_memory_Cached)) / sum(node_memory_MemTotal) * 100 > 85 FOR 30s LABELS {severity="warning"} ANNOTATIONS {description="Docker host memory usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.", summary="Server memory is almost full"}
ALERT hight_storage_load IF (node_filesystem_size{fstype="aufs"} - node_filesystem_free{fstype="aufs"}) / node_filesystem_size{fstype="aufs"} * 100 > 85 FOR 30s LABELS {severity="warning"} ANNOTATIONS {description="Docker host storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.", summary="Server storage is almost full"}
ALERT jenkins_down IF absent(container_memory_usage_bytes{name="jenkins"}) FOR 30s LABELS {severity="critical"} ANNOTATIONS {description="Jenkins container is down for more than 30 seconds.", summary="Jenkins down"}
ALERT jenkins_high_cpu IF sum(rate(container_cpu_usage_seconds_total{name="jenkins"}[1m])) / count(node_cpu{mode="system"}) * 100 > 10 FOR 30s LABELS {severity="warning"} ANNOTATIONS {description="Jenkins CPU usage is {{ humanize $value}}%.", summary="Jenkins high CPU usage"}
ALERT jenkins_high_memory IF sum(container_memory_usage_bytes{name="jenkins"}) > 1200000000 FOR 30s LABELS {severity="warning"} ANNOTATIONS {description="Jenkins memory consumption is at {{ humanize $value}}.", summary="Jenkins high memory usage"}