DevOps运维技术栈

prometheus使用node_exporter组件监控linux

安装组件

# download: https://prometheus.io/download/
mkdir -p /usr/local/prometheus/node-exporter
useradd -s /sbin/nologin -M prometheus
pkg=node_exporter-1.7.0.linux-amd64.tar.gz
tar xf ${pkg} -C /usr/local/prometheus/node-exporter --strip-components=1
chown prometheus.prometheus -R /usr/local/prometheus
cat > /etc/systemd/system/node_exporter.service <<eof[Unit]
Description=node_export
Documentation=https://prometheus.io/
After=network.target
[Service]
ExecStart=/usr/local/node_exporter/node_exporter --web.listen-address=:9100
ExecReload=/bin/kill -s HUP
ExecStop=/bin/kill -s QUIT
[Install]
WantedBy=multi-user.target
eof
systemctl daemon-reload && systemctl enable node_exporter --now
systemctl status node_exporter
添加监控
scrape_configs:
 ...
 - job_name: "project-01"
   static_configs:
     - targets:
         - 10.0.0.68:9100
       labels:
         uat: webserver
./promtool check config /usr/local/prometheus/prometheus.yml
curl -X POST http://10.0.0.67:9090/-/reload
配置告警
mkdir rules.d/linux-alert.yml
--

groups:
- name: node-exporter
 rules:
 - alert: 内存不足
   expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
   for: 2m
   labels:
     severity: warning
   annotations:
     summary: "当前实例:{{ $labels.instance }}主机内存不足"
     description: "内存可用率<10,当前值: {{ $value }}"

 - alert: 系统盘空间不足
   expr: (100 - (node_filesystem_free_bytes{device!="tmpfs"}/node_filesystem_size_bytes{device!="tmpfs"}) * 100) > 85
   for: 2m
   labels:
     severity: warning
   annotations:
     summary: "当前实例:{{ $labels.instance }} 系统盘空间不足"
     description: "剩余空间<15%, 当前值: {{ $value }}"

 - alert: 机器宕机
   expr: up == 0
   for: 30s
   labels:
     severity: warning
   annotations:
     summary: "当前实例:{{ $labels.instance }}离线,无法通信"
     description: "主机疑似关机状态, 当前值: {{ $value }}"

 - alert: cpu高使用率
   expr: (1 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[2m])))) * 100 > 80
   for: 2m
   labels:
     severity: warning
   annotations:
     summary: "当前实例:{{ $labels.instance }}cpu负载高"
     description: "cpu使用率>80%, 当前值: {{ $value }}"
vim prometheus.yml
--

rule_files:
 - rules.d/*.yml
./promtool check config /usr/local/prometheus/prometheus.yml
curl -X POST http://10.0.0.67:9090/-/reload
退出移动版