mirror of
https://gitee.com/milvus-io/milvus.git
synced 2025-12-08 01:58:34 +08:00
Merge branch 'branch-0.5.0' into Refactor_Knowhere
Former-commit-id: a57472c60518d5fe322469561bd71304192df9f9
This commit is contained in:
commit
ad1fe081f0
@ -140,6 +140,7 @@ Please mark all change in change log and use the ticket from JIRA.
|
|||||||
- MS-523 - Config file validation
|
- MS-523 - Config file validation
|
||||||
- MS-539 - Remove old task code
|
- MS-539 - Remove old task code
|
||||||
- MS-546 - Add simple mode resource_config
|
- MS-546 - Add simple mode resource_config
|
||||||
|
- MS-570 - Add prometheus docker-compose file
|
||||||
|
|
||||||
## New Feature
|
## New Feature
|
||||||
- MS-343 - Implement ResourceMgr
|
- MS-343 - Implement ResourceMgr
|
||||||
|
|||||||
@ -245,6 +245,7 @@ SystemInfo::CPUTemperature(){
|
|||||||
float temp;
|
float temp;
|
||||||
fscanf(file, "%f", &temp);
|
fscanf(file, "%f", &temp);
|
||||||
result.push_back(temp / 1000);
|
result.push_back(temp / 1000);
|
||||||
|
fclose(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
19
docker/alertmanager.yml
Normal file
19
docker/alertmanager.yml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
global:
|
||||||
|
resolve_timeout: 5m
|
||||||
|
|
||||||
|
route:
|
||||||
|
group_by: ['alertname']
|
||||||
|
group_wait: 10s
|
||||||
|
group_interval: 10s
|
||||||
|
repeat_interval: 1h
|
||||||
|
receiver: 'web.hook'
|
||||||
|
receivers:
|
||||||
|
- name: 'web.hook'
|
||||||
|
webhook_configs:
|
||||||
|
- url: 'http://127.0.0.1:5001/'
|
||||||
|
inhibit_rules:
|
||||||
|
- source_match:
|
||||||
|
severity: 'critical'
|
||||||
|
target_match:
|
||||||
|
severity: 'warning'
|
||||||
|
equal: ['alertname', 'dev', 'instance']
|
||||||
56
docker/docker-compose-monitor.yml
Normal file
56
docker/docker-compose-monitor.yml
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
version: '2.3'
|
||||||
|
|
||||||
|
networks:
|
||||||
|
monitor:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
services:
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:v2.11.1
|
||||||
|
container_name: prometheus
|
||||||
|
hostname: prometheus
|
||||||
|
restart: always
|
||||||
|
volumes:
|
||||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||||
|
- ./server_down.yml:/etc/prometheus/node_down.yml
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
networks:
|
||||||
|
- monitor
|
||||||
|
|
||||||
|
alertmanager:
|
||||||
|
image: prom/alertmanager
|
||||||
|
container_name: alertmanager
|
||||||
|
hostname: alertmanager
|
||||||
|
restart: always
|
||||||
|
volumes:
|
||||||
|
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
|
||||||
|
ports:
|
||||||
|
- "9093:9093"
|
||||||
|
networks:
|
||||||
|
- monitor
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana
|
||||||
|
container_name: grafana
|
||||||
|
hostname: grafana
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
networks:
|
||||||
|
- monitor
|
||||||
|
|
||||||
|
milvus:
|
||||||
|
runtime: nvidia
|
||||||
|
image: registry.zilliz.com/milvus/engine:branch-0.4.0-release
|
||||||
|
container_name: milvus
|
||||||
|
hostname: milvus
|
||||||
|
restart: always
|
||||||
|
volumes:
|
||||||
|
- ../cpp/conf/server_config.yaml:/opt/milvus/cpp/conf/server_config.yaml
|
||||||
|
- ../cpp/conf/log_config.conf:/opt/milvus/cpp/conf/log_config.conf
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
- "19530:19530"
|
||||||
|
networks:
|
||||||
|
- monitor
|
||||||
46
docker/prometheus.yml
Normal file
46
docker/prometheus.yml
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
# my global config
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s # Set the scrape interval to every 1 seconds. Default is every 1 minute.
|
||||||
|
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||||
|
# scrape_timeout is set to the global default (10s).
|
||||||
|
|
||||||
|
# Alertmanager configuration
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- targets: ['localhost:9093']
|
||||||
|
|
||||||
|
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||||
|
rule_files:
|
||||||
|
- "serverdown.yml" # add alerting rules
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's Prometheus itself.
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
|
||||||
|
# metrics_path defaults to '/metrics'
|
||||||
|
# scheme defaults to 'http'.
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
|
||||||
|
# scrape metrics of server
|
||||||
|
- job_name: 'milvus_server'
|
||||||
|
scrape_interval: 1s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:8080']
|
||||||
|
|
||||||
|
# scrape metrics of server
|
||||||
|
- job_name: 'milvus_server_1'
|
||||||
|
scrape_interval: 1s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:8080']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# under development
|
||||||
|
- job_name: 'pushgateway'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9091']
|
||||||
8
docker/server_down.yml
Normal file
8
docker/server_down.yml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
groups:
|
||||||
|
- name: milvus
|
||||||
|
rules:
|
||||||
|
- alert: MilvusServerDown
|
||||||
|
expr: up{job="milvus_server"}
|
||||||
|
for: 1s
|
||||||
|
labels:
|
||||||
|
serverity: page
|
||||||
Loading…
x
Reference in New Issue
Block a user