==================================================================================== === On client ==================================================================================== ==================================================================================== === Node exporter installing: ==================================================================================== # read -s -p "Enter web password: " WEB_PASSWORD groupadd --system prometheus useradd -s /sbin/nologin --system -g prometheus prometheus curl -s https://api.github.com/repos/prometheus/node_exporter/releases/latest | grep browser_download_url | grep linux-amd64 | cut -d '"' -f 4 | wget -qi - tar xvf node_exporter-*linux-amd64.tar.gz cd node_exporter*/ mv node_exporter /usr/local/bin/ mkdir /etc/node_exporter chown root:root /usr/local/bin/node_exporter cd .. rm -rf node_exporter* echo "[Unit] Description=Prometheus Wants=network-online.target After=network-online.target [Service] Type=simple User=prometheus Group=prometheus ExecReload=/bin/kill -HUP \$MAINPID ExecStart=/usr/local/bin/node_exporter --collector.cpu --collector.diskstats --collector.filesystem --collector.loadavg --collector.meminfo --collector.filefd --collector.netdev --collector.stat --collector.netstat --collector.systemd --collector.uname --collector.vmstat --collector.time --collector.mdadm --collector.zfs --collector.tcpstat --collector.bonding --collector.hwmon --collector.arp --web.listen-address=:9100 --web.telemetry-path="/metrics" --web.config.file=/etc/node_exporter/web.yml [Install] WantedBy=multi-user.target" > /etc/systemd/system/node_exporter.service == Basic auth: == apt-get install python3-bcrypt echo '#!/usr/bin/python import os import getpass import bcrypt password = "nonsecure" hashed_password = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()) file = open("hash.txt","w") file.write(str(hashed_password.decode())) file.close()' > ./auth.py sed -i "s/nonsecure/$WEB_PASSWORD/" ./auth.py python3 ./auth.py HASHVAR=$(cat hash.txt) echo "basic_auth_users: user: $HASHVAR" > /etc/node_exporter/web.yml chown prometheus:prometheus /etc/node_exporter/web.yml chmod 400 /etc/node_exporter/web.yml rm auth.py rm hash.txt systemctl daemon-reload systemctl start node_exporter systemctl enable node_exporter ### Check firewall if need ==================================================================================== === Processes exporter installing: ==================================================================================== # VERSION="0.7.10" wget https://github.com/ncabatoff/process-exporter/releases/download/v$VERSION/process-exporter-$VERSION.linux-amd64.tar.gz tar -xvzf process-exporter-$VERSION.linux-amd64.tar.gz mv process-exporter-$VERSION.linux-amd64/process-exporter /usr/local/bin/ rm -rf process-exporter-$VERSION.linux-amd64* echo "process_names: - name: "{{.Comm}}" cmdline: - '.+'" > /etc/process-exporter.yml echo "[Unit] Description=Process Exporter for Prometheus [Service] User=root Type=simple ExecStart=/usr/local/bin/process-exporter --config.path /etc/process-exporter.yml --web.listen-address=:9256 KillMode=process Restart=always [Install] WantedBy=multi-user.target" > /etc/systemd/system/process-exporter.service systemctl daemon-reload systemctl enable process-exporter systemctl start process-exporter ==================================================================================== === Cadvisor installing: ==================================================================================== # cd /usr/local/bin wget https://github.com/Sergey1000/tools/raw/master/cadvisor chmod 755 cadvisor echo "[Unit] Description=cadvisor Wants=network-online.target After=network-online.target [Service] Type=simple ExecStop=/bin/kill -HUP ExecStart=/usr/local/bin/cadvisor -port 8091 [Install] WantedBy=multi-user.target" > /etc/systemd/system/cadvisor.service systemctl daemon-reload systemctl enable cadvisor systemctl start cadvisor ==================================================================================== === Fluentbit installing: ==================================================================================== # == https://docs.fluentd.org/installation/before-install == https://docs.fluentd.org/installation/install-by-deb wget -qO - https://packages.fluentbit.io/fluentbit.key | sudo apt-key add - echo "deb https://packages.fluentbit.io/ubuntu/focal focal main" > /etc/apt/sources.list.d/fluentbit.list sudo apt-get update sudo apt-get -y install td-agent-bit echo "[Unit] Description=Fluentbit for Prometheus Wants=network-online.target After=network-online.target [Service] Type=simple ExecStop=/bin/kill -HUP ExecStart=/opt/td-agent-bit/bin/td-agent-bit -c /etc/td-agent-bit/td-agent-bit.conf [Install] WantedBy=multi-user.target" > /etc/systemd/system/fluentbit.service systemctl daemon-reload systemctl enable fluentbit systemctl start fluentbit curl -fsSL https://calyptia-fluentd.s3.us-east-2.amazonaws.com/calyptia-fluentd-1-ubuntu-focal.sh | sh ==================================================================================== === Loki for Docker configuring: ==================================================================================== == install docker plugin for loki: == # docker plugin install grafana/loki-docker-driver:latest --alias loki --grant-all-permissions # be careful with rewriting: echo '{ "debug" : true, "log-driver": "loki", "log-opts": { "loki-url": "https://user:password@l.example.com/loki/api/v1/push", "loki-batch-size": "400" } }' > /etc/docker/daemon.json /etc/init.d/docker restart # start containers and check the new log driver within "docker inspect" ==================================================================================== === JMX configuring ==================================================================================== # VERSION="0.17.1" wget https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_httpserver/$VERSION/jmx_prometheus_httpserver-$VERSION.jar mv jmx_prometheus_httpserver-$VERSION.jar /usr/local/bin wget https://bitbucket.org/s_alekseev/tools/raw/master/jdk-8u261-linux-x64.tar.gz mkdir /usr/lib/jvm/ tar -zxvf jdk-8u261-linux-x64.tar.gz -C /usr/lib/jvm/ update-alternatives --install /usr/bin/java java /usr/lib/jvm/jdk1.8.0_261/bin/java 3 sudo update-alternatives --config java # You should edit update-alternatives if another Java was installed echo "PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin" JAVA_HOME=/usr/lib/jvm/jdk1.8.0_261/ JRE_HOME=/usr/lib/jvm/jdk1.8.0_261/jre/ J2SDKDIR=/usr/lib/jvm/jdk1.8.0_261/ J2REDIR=/usr/lib/jvm/jdk1.8.0_261/jre/ JAVA_INCLUDE_DIR=/usr/lib/jvm/jdk1.8.0_261/include" >> /etc/environment echo "[Unit] Description=jmx_exp [Service] User=root Type=simple ExecStart=/usr/bin/java -jar /usr/local/bin/jmx_prometheus_httpserver.jar 12345 /etc/jmx_exporter/jmx_exp.yml [Install] WantedBy=multi-user.target" > /etc/systemd/system/jmx_exp.service systemctl daemon-reload systemctl enable jmx_exp systemctl start jmx_exp ==================================================================================== === Nginx configuring: ==================================================================================== # VERSION="0.11.0" read -p "Enter Nginx domain: " $NGINX_DOMAIN wget https://github.com/nginxinc/nginx-prometheus-exporter/releases/download/v$VERSION/nginx-prometheus-exporter_$VERSION_linux_amd64.tar.gz tar -xvzf nginx-prometheus-exporter_$VERSION_linux_amd64.tar.gz mv nginx-prometheus-exporter /usr/local/bin/ chown root:root /usr/local/bin/nginx-prometheus-exporter echo "[Unit] Description=Prometheus Wants=network-online.target After=network-online.target [Service] Type=simple ExecReload=/bin/kill -HUP \$MAINPID ExecStart=/usr/local/bin/nginx-prometheus-exporter -nginx.scrape-uri=http://\$NGINX_DOMAIN/stub_status [Install] WantedBy=multi-user.target" > /etc/systemd/system/nginx_exporter.service systemctl daemon-reload systemctl start node_exporter systemctl enable node_exporter ==================================================================================== === Mysql configuring: ==================================================================================== # CREATE USER 'exporter'@'localhost' IDENTIFIED BY 'password' WITH MAX_USER_CONNECTIONS 3; GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'localhost'; == In case on newest versions of mysql-server (>= 8.x), maybe you need to change the plugin, used for auth: == == ALTER USER 'exporter'@'%' IDENTIFIED WITH mysql_native_password BY 'password'; == # VERSION="0.14.0" wget https://github.com/prometheus/mysqld_exporter/releases/download/v$VERSION/mysqld_exporter-$VERSION.linux-amd64.tar.gz tar -xvzf mysqld_exporter-$VERSION.linux-amd64.tar.gz mv mysqld_exporter-$VERSION.linux-amd64/mysqld_exporter /usr/local/bin/ rm -rf mysqld_exporter-$VERSION.linux-amd64* chown root:root /usr/local/bin/mysqld_exporter chmod 755 /usr/local/bin/mysqld_exporter echo "[Unit] Description=Prometheus Wants=network-online.target After=network-online.target [Service] Type=simple User=prometheus Group=prometheus Environment=export DATA_SOURCE_NAME='exporter:password@(host:mysql_port)/' ExecReload=/bin/kill -HUP ExecStart=/usr/local/bin/mysqld_exporter --collect.global_status --collect.info_schema.innodb_metrics --collect.auto_increment.columns --collect.info_schema.processlist --collect.binlog_size --collect.info_schema.tablestats --collect.global_variables --collect.info_schema.query_response_time --collect.info_schema.userstats --collect.info_schema.tables --collect.perf_schema.tablelocks --collect.perf_schema.file_events --collect.perf_schema.eventswaits --collect.perf_schema.indexiowaits --collect.perf_schema.tableiowaits --collect.slave_status --web.listen-address=host:exporter_port [Install] WantedBy=multi-user.target" > /etc/systemd/system/mysqld_exporter.service systemctl daemon-reload systemctl start mysqld_exporter systemctl enable mysqld_exporter ==================================================================================== === Iptables_exporter: ==================================================================================== # VERSION="0.3.0" wget https://github.com/kbknapp/iptables_exporter/releases/download/v$VERSION/iptables_exporter-v$VERSION-x86_64-linux-musl.tar.gz tar -xvzf iptables_exporter-v$VERSION-x86_64-linux-musl.tar.gz mv iptables_exporter /usr/local/bin/ echo "[Unit] Description=Iptables Exporter for Prometheus [Service] User=root Type=simple ExecStart=/usr/local/bin/iptables_exporter --scrape-targets iptables --listen-address 127.0.0.1 KillMode=process Restart=always [Install] WantedBy=multi-user.target" > /etc/systemd/system/iptables_exporter.service systemctl daemon-reload systemctl start iptables_exporter systemctl enable iptables_exporter ==================================================================================== === Windows exporter: ==================================================================================== # == Установка pwsh для использования Remove-Service. $ProgressPreference = 'SilentlyContinue' Invoke-WebRequest "https://github.com/PowerShell/PowerShell/releases/download/v7.4.1/PowerShell-7.4.1-win-x64.msi" -OutFile "$env:HOMEPATH\Desktop\pwsh.msi" msiexec.exe /package "$env:HOMEPATH\Desktop\pwsh.msi" /quiet ADD_EXPLORER_CONTEXT_MENU_OPENPOWERSHELL=1 ADD_FILE_CONTEXT_MENU_RUNPOWERSHELL=1 ENABLE_PSREMOTING=1 REGISTER_MANIFEST=1 USE_MU=1 ENABLE_MU=1 ADD_PATH=1 == Установка windows_exporter как сервиса с его кастомизацией с помощью конфига после удаления дефолтного сервиса. Invoke-WebRequest "https://github.com/prometheus-community/windows_exporter/releases/download/v0.25.1/windows_exporter-0.25.1-amd64.msi" -OutFile "$env:HOMEPATH\Desktop\windows_exporter.msi" msiexec /i "$env:HOMEPATH\Desktop\windows_exporter.msi" Stop-Service windows_exporter Remove-Service windows_exporter # В "service-where" в качестве имени выступает "Service Name" из списка services.msc, не "Display Name" $windows_exporter_config = @" collectors: enabled: service,cpu,logical_disk,memory,net,os collector: service: services-where: "Name='windows_exporter' or Name='Some Service Name'" log: level: warn web: listen-address: ":1234" "@ Set-Content "C:\Program Files\windows_exporter\config.yml" $windows_exporter_config $params = @{ Name = "windows_exporter" BinaryPathName = '"C:\Program Files\windows_exporter\windows_exporter.exe" --config.file="C:\Program Files\windows_exporter\config.yml" --config.file.insecure-skip-verify' DependsOn = "wmiApSrv" DisplayName = "windows_exporter" StartupType = "Automatic" Description = "This is a windows_exporter service." } New-Service @params Start-Service windows_exporter New-NetFirewallRule -DisplayName "windows_exporter (HTTP 1234)" -Description "windows_exporter HTTP endpoint" -Direction Inbound -Protocol TCP -LocalPort 1234 -Program "C:\Program Files\windows_exporter\windows_exporter.exe" -Profile Any -Action Allow ==================================================================================== === On server ==================================================================================== ==================================================================================== === Grafana installing: ==================================================================================== # read -p "Enter Grafana domain: " GRAFANA_DOMAIN curl -s https://packages.grafana.com/gpg.key | gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/grafana.gpg --import chmod 644 /etc/apt/trusted.gpg.d/grafana.gpg gpg --no-default-keyring --keyring gnupg-ring:/etc/apt/trusted.gpg.d/grafana.gpg --keyserver keyserver.ubuntu.com --recv 6B73A36E6026DFCA echo "deb https://packages.grafana.com/oss/deb stable main" > /etc/apt/sources.list.d/grafana.list apt-get update apt-get -y install grafana systemctl start grafana-server systemctl enable grafana-server.service ### Nginx configuring: rm /etc/nginx/conf.d/default.conf echo "server { server_name $GRAFANA_DOMAIN; location / { proxy_pass http://127.0.0.1:3000; proxy_set_header Host $GRAFANA_DOMAIN; proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; proxy_set_header X-Real-IP \$remote_addr; } }" > /etc/nginx/conf.d/m.conf # Please, configure TLS certificate within the acme.sh by following this URL: http://blog.abccba.xyz/acme.txt #certbot --nginx --agree-tos --register-unsafely-without-email -d $GRAFANA_DOMAIN ==================================================================================== === Prometheus installing: ==================================================================================== # read -s -p "Enter web password: " WEB_PASSWORD VERSION="2.38.0" useradd --no-create-home --shell /bin/false prometheus useradd --no-create-home --shell /bin/false node_exporter mkdir /etc/prometheus mkdir /var/lib/prometheus chown prometheus:prometheus /etc/prometheus chown prometheus:prometheus /var/lib/prometheus cd /opt/ wget https://github.com/prometheus/prometheus/releases/download/v$VERSION/prometheus-$VERSION.linux-amd64.tar.gz tar -xvf prometheus-$VERSION.linux-amd64.tar.gz cd prometheus-$VERSION.linux-amd64 cp /opt/prometheus-$VERSION.linux-amd64/prometheus /usr/local/bin/ cp /opt/prometheus-$VERSION.linux-amd64/promtool /usr/local/bin/ chown prometheus:prometheus /usr/local/bin/prometheus chown prometheus:prometheus /usr/local/bin/promtool cp -r /opt/prometheus-$VERSION.linux-amd64/consoles /etc/prometheus cp -r /opt/prometheus-$VERSION.linux-amd64/console_libraries /etc/prometheus cp -r /opt/prometheus-$VERSION.linux-amd64/prometheus.yml /etc/prometheus chown -R prometheus:prometheus /etc/prometheus/consoles chown -R prometheus:prometheus /etc/prometheus/console_libraries chown -R prometheus:prometheus /etc/prometheus/prometheus.yml echo "[Unit] Description=Prometheus Wants=network-online.target After=network-online.target [Service] User=prometheus Group=prometheus Type=simple ExecStart=/usr/local/bin/prometheus --config.file /etc/prometheus/prometheus.yml --storage.tsdb.path /var/lib/prometheus/ --web.console.templates=/etc/prometheus/consoles --web.console.libraries=/etc/prometheus/console_libraries --web.config.file=/etc/prometheus/web.yml --storage.tsdb.retention.time=180d [Install] WantedBy=multi-user.target" > /etc/systemd/system/prometheus.service systemctl daemon-reload systemctl start prometheus systemctl enable prometheus ##### Basic auth: apt-get install python3-bcrypt echo '#!/usr/bin/python import os import getpass import bcrypt password = "nonsecure" hashed_password = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()) file = open("hash.txt","w") file.write(str(hashed_password.decode())) file.close()' > ./auth.py sed -i "s/nonsecure/$WEB_PASSWORD/" ./auth.py python3 ./auth.py HASHVAR=$(cat hash.txt) echo "basic_auth_users: user: $HASHVAR" > /etc/prometheus/web.yml rm auth.py rm hash.txt ##### Add exporter URL to Grafana server's Prometheus in /etc/prometheus/prometheus.yml: - job_name: test-server static_configs: - targets: ["192.168.0.1:9100"] labels: alias: test-server systemctl restart prometheus ==================================================================================== === Loki installing: ==================================================================================== # read -p "Enter Loki domain: " LOKI_DOMAIN read -s -p "Enter web password: " WEB_PASSWORD apt-get install unzip VERSION="2.9.4" wget https://github.com/grafana/loki/releases/download/v$VERSION/loki-linux-amd64.zip wget https://github.com/grafana/loki/releases/download/v$VERSION/promtail-linux-amd64.zip unzip loki-linux-amd64.zip unzip promtail-linux-amd64.zip mv loki-linux-amd64 /usr/local/bin/ mv promtail-linux-amd64 /usr/local/bin/ rm loki-linux-amd64.zip rm promtail-linux-amd64.zip wget https://raw.githubusercontent.com/grafana/loki/master/cmd/loki/loki-local-config.yaml wget https://raw.githubusercontent.com/grafana/loki/main/clients/cmd/promtail/promtail-local-config.yaml mkdir /etc/loki mkdir /var/log/loki chown -R prometheus:prometheus /var/log/loki mv *yaml /etc/loki/ mkdir /etc/nginx/auth_conf echo "[Unit] Description=Loki Wants=network-online.target After=network-online.target [Service] User=prometheus Group=prometheus Type=simple ExecReload=/bin/kill -HUP ExecStart=/usr/local/bin/loki-linux-amd64 -config.file=/etc/loki/loki-local-config.yaml [Install] WantedBy=multi-user.target" > /etc/systemd/system/loki.service echo "[Unit] Description=Promtail Wants=network-online.target After=network-online.target [Service] User=prometheus Group=prometheus Type=simple ExecReload=/bin/kill -HUP ExecStart=/usr/local/bin/promtail-linux-amd64 -config.file=/etc/loki/promtail-local-config.yaml [Install] WantedBy=multi-user.target" > /etc/systemd/system/promtail.service echo "auth_enabled: false server: http_listen_port: 3100 common: path_prefix: /tmp/loki storage: filesystem: chunks_directory: /tmp/loki/chunks rules_directory: /tmp/loki/rules replication_factor: 1 ring: instance_addr: 127.0.0.1 kvstore: store: inmemory schema_config: configs: - from: 2020-09-07 store: boltdb-shipper object_store: filesystem schema: v12 index: prefix: loki_index_ period: 24h ruler: storage: type: local local: directory: /etc/loki rule_path: /tmp/loki/rules alertmanager_url: http://localhost:9093 ring: kvstore: store: inmemory enable_api: true enable_alertmanager_v2: true alertmanager_client: basic_auth_username: user basic_auth_password: password" > /etc/loki/loki-local-config.yaml echo "server: http_listen_port: 9080 grpc_listen_port: 0 positions: filename: /var/log/loki/positions.yaml clients: - url: http://localhost:3100/loki/api/v1/push scrape_configs: - job_name: system static_configs: - targets: - localhost labels: job: varlogs __path__: /var/log/*log" > /etc/loki/promtail-local-config.yaml systemctl daemon-reload systemctl start loki systemctl start promtail systemctl enable loki systemctl enable promtail ##### Basic auth: apt-get install python3-bcrypt echo '#!/usr/bin/python import os import getpass import bcrypt password = "nonsecure" hashed_password = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()) file = open("hash.txt","w") file.write(str(hashed_password.decode())) file.close()' > ./auth.py sed -i "s/nonsecure/$WEB_PASSWORD/" ./auth.py python3 ./auth.py HASHVAR=$(cat hash.txt) echo "user:$HASHVAR" > /etc/nginx/auth_conf/$LOKI_DOMAIN.passwords /etc/init.d/nginx reload rm auth.py rm hash.txt echo "server { server_name $LOKI_DOMAIN; auth_basic "loki auth"; auth_basic_user_file /etc/nginx/auth_conf/$LOKI_DOMAIN.passwords; location / { proxy_pass http://127.0.0.1:3100; proxy_read_timeout 1800s; proxy_connect_timeout 1600s; proxy_http_version 1.1; proxy_set_header Upgrade \$http_upgrade; proxy_set_header Connection "Keep-Alive"; proxy_set_header Proxy-Connection "Keep-Alive"; proxy_redirect off; } location /ready { proxy_pass http://127.0.0.1:3100; proxy_http_version 1.1; proxy_set_header Connection "Keep-Alive"; proxy_set_header Proxy-Connection "Keep-Alive"; proxy_redirect off; auth_basic "off"; } }" > /etc/nginx/conf.d/l.conf ==================================================================================== === Alertmanager installing: ==================================================================================== # read -s -p "Enter web password: " WEB_PASSWORD VERSION="0.26.0" wget https://github.com/prometheus/alertmanager/releases/download/v$VERSION/alertmanager-$VERSION.linux-amd64.tar.gz tar -xvzf alertmanager-$VERSION.linux-amd64.tar.gz mv alertmanager-$VERSION.linux-amd64/alertmanager /usr/local/bin/ mv alertmanager-$VERSION.linux-amd64/amtool /usr/local/bin/ mkdir /etc/amtool/ mv alertmanager-$VERSION.linux-amd64/alertmanager.yml /etc/amtool/ chown prometheus:prometheus /usr/local/bin/alertmanager chown prometheus:prometheus /usr/local/bin/amtool chown -R prometheus:root /etc/amtool chown prometheus:prometheus /etc/amtool/alertmanager.yml rm -rf alertmanager-$VERSION.linux-amd64* echo "[Unit] Description=Alertmanager Wants=network-online.target After=network-online.target [Service] User=prometheus Group=prometheus Type=simple # change the localhost IP to the one, suited in your case #ExecStart=/usr/local/bin/alertmanager --config.file=/etc/amtool/alertmanager.yml --storage.path=/etc/amtool/data --web.external-url http://127.0.0.1:9093 --web.config.file /etc/amtool/web.yml ExecStart=/usr/local/bin/alertmanager --config.file=/etc/amtool/alertmanager.yml --storage.path=/etc/amtool/data --web.external-url http://127.0.0.1:9093 --cluster.advertise-address=127.0.0.1:9093 --web.config.file /etc/amtool/web.yml [Install] WantedBy=multi-user.target" > /etc/systemd/system/alertmanager.service ##### Basic auth: apt-get install python3-bcrypt echo '#!/usr/bin/python import os import getpass import bcrypt password = "nonsecure" hashed_password = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()) file = open("hash.txt","w") file.write(str(hashed_password.decode())) file.close()' > ./auth.py sed -i "s/nonsecure/$WEB_PASSWORD/" ./auth.py python3 ./auth.py HASHVAR=$(cat hash.txt) # Enter password and copy output to file: #echo "tls_server_config: # cert_file: /path/to/cert.pem # key_file: /path/to/privkey.pem #basic_auth_users: # user: $HASHVAR" > /etc/amtool/web.yml echo "basic_auth_users: user: $HASHVAR" > /etc/amtool/web.yml chown prometheus:prometheus /etc/amtool/web.yml chmod 400 /etc/amtool/web.yml rm auth.py rm hash.txt systemctl daemon-reload systemctl start alertmanager systemctl enable alertmanager == Alertmanager with basic_auth in prometheus.yml: == alerting: alertmanagers: - scheme: http static_configs: - targets: - "localhost:9093" basic_auth: username: user password: password_not_hash ==================================================================================== === Blackbox exporter installing: ==================================================================================== # VERSION="0.22.0" wget https://github.com/prometheus/blackbox_exporter/releases/download/v$VERSION/blackbox_exporter-$VERSION.linux-amd64.tar.gz tar -xvzf blackbox_exporter-$VERSION.linux-amd64.tar.gz mv blackbox_exporter-$VERSION.linux-amd64/blackbox_exporter /usr/local/bin/ chown prometheus:prometheus /usr/local/bin/blackbox_exporter mkdir /etc/blackbox_exporter mv blackbox_exporter-$VERSION.linux-amd64/blackbox.yml /etc/blackbox_exporter/ chown -R prometheus:prometheus /etc/blackbox_exporter rm -rf "blackbox_exporter-$VERSION.linux-amd64*" echo "[Unit] Description=blackbox_exporter Wants=network-online.target After=network-online.target [Service] User=prometheus Group=prometheus Type=simple ExecStart=/usr/local/bin/blackbox_exporter --config.file=/etc/blackbox_exporter/blackbox.yml [Install] WantedBy=multi-user.target" > /etc/systemd/system/blackbox_exporter.service systemctl daemon-reload systemctl start blackbox_exporter systemctl enable blackbox_exporter ==================================================================================== === Default configs: ==================================================================================== ==================================================================================== === /etc/amtool/alertmanager.yml ==================================================================================== # global: slack_api_url: 'https://hooks.slack.com/services/T111/7222C/caasqw' route: group_by: ['...'] # group_by: [alertname, alertstate] group_wait: 1m group_interval: 1m repeat_interval: 24h receiver: webhook_team # - match: # severity: test-telegram # receiver: stardata-telegram receivers: - name: 'webhook_team' email_configs: - to: 'name@example.com' from: 'name2@example.com' smarthost: example.com:587 auth_username: 'name2@example.com' auth_identity: 'name2@example.com' auth_password: 'password' send_resolved: true ==================================================================================== === /etc/prometheus/prometheus.yml ==================================================================================== # global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration alerting: alertmanagers: - scheme: http static_configs: - targets: - "localhost:9093" basic_auth: username: user password: password # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: # - "first_rules.yml" # - "second_rules.yml" scrape_configs: - job_name: "prometheus" scheme: https static_configs: - targets: ["localhost:9090"] - job_name: "test-vm" static_configs: - targets: ["11.11.11.11:1111"] labels: alias: "test-vm" ==================================================================================== === Дать права Server Admin пользователю Grafana из консоли: ==================================================================================== # apt install sqlite3 /etc/init.d/grafana-server stop sqlite3 /var/lib/grafana/grafana.db update user set is_admin=1 where login='user'; ctrl+d /etc/init.d/grafana-server start ==================================================================================== === Отправка тестового алерта: ==================================================================================== # #amtool --alertmanager.url=http://localhost:9093/ alert add alertname="test123" severity="test-telegram" job="test-alert" instance="localhost" exporter="none" cluster="test" == Здесь есть избыточные ключи: amtool --alertmanager.url=https://example.com alert add alertname="test123" severity="test-telegram" job="test-alert" instance="localhost" exporter="none" cluster="test" --http.config.file=/etc/amtool/http_config.yml /etc/amtool/http_config.yml: basic_auth: username: user password: password_plain_text ==================================================================================== === Выполнять скрипт при срабатывании алерта: ==================================================================================== # apt-get install webhook vi /etc/webhook/webhook.json: [ { "id": "test-webhook", "execute-command": "/opt/scripts/test_webhook.sh", "command-working-directory": "/opt/scripts", "response-message": "test ok\n" } ] /usr/bin/webhook -nopanic -hooks /etc/webhook/webhook.json == Сервер будет висеть на порту 9000 == vi /etc/amtool/alertmanager: receivers: - name: 'webhook_team' . . webhook_configs: - url: 'http://localhost:9000/hooks/test-webhook' . == При срабатывании алерта, какой дергает webhook_team, будет отправляться POST на localhost:9000, где уже будет триггериться скрипт. ==================================================================================== === Алерт на событие в логах Loki (с концептом конфига): ==================================================================================== # vi /etc/loki/loki-local-config.yaml: auth_enabled: false server: http_listen_port: 3100 grpc_listen_port: 9096 common: path_prefix: /var/log/loki storage: filesystem: chunks_directory: /var/log/loki/chunks rules_directory: /tmp/loki/rules-temp replication_factor: 1 ring: instance_addr: 127.0.0.1 kvstore: store: inmemory schema_config: configs: - from: 2020-10-24 store: boltdb-shipper object_store: filesystem schema: v11 index: prefix: index_ period: 24h ruler: storage: type: local local: directory: /etc/loki rule_path: /tmp/loki/rules-temp alertmanager_url: http://localhost:9093 ring: kvstore: store: inmemory enable_api: true enable_alertmanager_v2: true alertmanager_client: basic_auth_username: user basic_auth_password: password querier: max_concurrent: 2048 frontend: max_outstanding_per_tenant: 4096 compress_responses: true == С дефолтными настройками Loki файл правил должен лежать в /etc/loki/fake/rules.yml. Правила написаны с помощью LogQL: groups: - name: webhook_team rules: - alert: oom_checker expr: sum(rate({container_name="api1", host="server1"} |= `OutOfMemoryError` [10s])) by (host) > 0.00001 for: 1s labels: severity: critical annotations: summary: OOM happened description: "OOM happened description" == Для дашборда по правилу выше можно использовать это: sum(rate({container_name="api1", host="server1"} |= `OutOfMemoryError` [1m])) by (host) ==================================================================================== === Отправлять алерты в телеграм: ==================================================================================== # vi /etc/amtool/alertmanager.yml - name: 'team' telegram_configs: - bot_token: '$TOKEN' api_url: https://api.telegram.org chat_id: $CHAT_ID parse_mode: '' == Создать бота можно здесь: https://core.telegram.org/bots#6-botfather == После создание бота нужно создать канал и сделать бота админом канала. Нужно написать хоть бы одно сообщение в чат и перейти по линку https://api.telegram.org/botYOUR_BOT_TOKEN/getUpdates. Там найти секцию "chat -> id", скопировать значение и вставить в конфиг выше (id может быть отрицательным). ==================================================================================== === Сбросить пароль админа в grafana: ==================================================================================== # grafana-cli admin reset-admin-password pwd123 ==================================================================================== === Скрыть версию Grafana для анонимных пользователей: ==================================================================================== # vi grafana.ini hide_version = true ==================================================================================== === Работа с Value Mappings: ==================================================================================== # == Получить из строки "/subscriptions/11-11-11/resourceGroups/test-rg1/providers/Microsoft.Compute/virtualMachines/test-vm1", которая является value в ячейке таблицы, часть "virtualMachines/test-vm1": В настройках нужного Visualizations перейти в Overrides -> Field with name -> Value mappings -> Regex и указать следующее: Regex: ^.*(virtualMachines.*).*"$ Display text: $1 == Убрать кавычки по обе стороны паттерна "dev-proj-mssqlserver1.database.windows.net": Regex: ^"(.*)"$ Display text: $1 == Не забывать, что "Transform data" рядом с "Query" только для заголовков таблицы. ==================================================================================== === Как с помощью Azure Resource Graph запросить значение, находящееся за "[{": ==================================================================================== # == Колонка "Properties" в портянке json: {"provisioningState":"Succeeded","resourceGuid":"111-11-111-11","enableAcceleratedNetworking":false,"disableTcpStateTracking":false,"ipConfigurations":[{"type":"Microsoft.Network/networkInterfaces/ipConfigurations"... == Запрос для ARG: resources | where type == "microsoft.network/networkinterfaces" | mv-expand ip_properties = properties.ipConfigurations | project name, private_ip = ip_properties.properties.privateIPAddress == mv-expand ip_properties = properties.ipConfigurations раскрывает квадратные скобки, после чего в project уже можно работать с переменной ip_properties. ==================================================================================== === Просмотр списка алертов alertmanager из терминала: ==================================================================================== # wget https://github.com/pehlicd/amtui/releases/download/v0.1.5/amtui_v0.1.5_linux_amd64.tar.gz tar -xvzf amtui_v0.1.5_linux_amd64.tar.gz ./amtui --host 127.0.0.1 --port 9093 --scheme http --username user --password password ==================================================================================== === Установка promtail под windows: ==================================================================================== # == В виде сервиса под windows по состоянию на начало 2024-го не устанавливается $version = "2.9.4" $temp_dir = "$ENV:HOMEPATH\Desktop" $app_name = "promtail" $app_url = "https://github.com/grafana/loki/releases/download/v$version/promtail-windows-amd64.exe.zip" $installation_dir = "C:\Program Files" $app_binary_name = "promtail-windows-amd64.exe" $app_config_name = "promtail-local-config.yaml" $app_config = @" server: http_listen_port: 9080 grpc_listen_port: 0 positions: filename: C:\Users\user\Desktop\positions.yaml clients: - url: https://example.com/loki/api/v1/push basic_auth: username: user password: password scrape_configs: - job_name: system static_configs: - targets: - localhost labels: job: logs __path__: c:\logs\subfolder\*txt "@ New-Item -Path "$installation_dir" -Name "$app_name" -ItemType "directory" Invoke-WebRequest "$app_url" -OutFile "$temp_dir\$app_name.zip" Expand-Archive -LiteralPath "$temp_dir\$app_name.zip" -DestinationPath "$installation_dir\$app_name" # Before the next command, copy the config to "c:\promtail\promtail-local-config.yaml" Set-Content "$installation_dir\$app_name\$app_config_name" $app_config Remove-Item "$temp_dir\$app_name.zip" & "C:\Program Files\promtail\promtail-windows-amd64.exe" --config.file="C:\Program Files\promtail\promtail-local-config.yaml" ==================================================================================== === Настройка grafana-infinity-datasource для подключения к подписке Azure: ==================================================================================== # https://github.com/grafana/grafana-infinity-datasource/discussions/404 ==================================================================================== === Отправка сообщения из скрипта в бот Телеграма: ==================================================================================== # #!/bin/bash TOKEN="1111111111111111111111111" CHAT_ID="222222" MESSAGE="test" curl -s -X POST https://api.telegram.org/bot$TOKEN/sendMessage -d chat_id=$CHAT_ID -d text="$MESSAGE" > /dev/null