diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f1dc0c4c7..5e9840a63c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Please mark all change in change log and use the issue from GitHub - \#4484 Milvus only search default partition if search parameter 'partition_tags' contains '_default' ## Feature +- \#4504 Add a metric to display the number of files opened by Milvus ## Improvement - \#4454 Optimize the process of indexing and querying diff --git a/docker/docker-compose-monitor.yml b/docker/docker-compose-monitor.yml index 185ad86cc9..7a484bd797 100644 --- a/docker/docker-compose-monitor.yml +++ b/docker/docker-compose-monitor.yml @@ -4,13 +4,19 @@ networks: monitor: driver: bridge +volumes: + prometheus_data: {} + grafana_data: {} + services: prometheus: - image: prom/prometheus:v2.11.1 + image: prom/prometheus:v2.17.1 container_name: prometheus hostname: prometheus restart: always volumes: + - ./prometheus:/etc/prometheus + - prometheus_data:/prometheus - ./prometheus.yml:/etc/prometheus/prometheus.yml - ./server_down.yml:/etc/prometheus/node_down.yml ports: @@ -30,29 +36,54 @@ services: networks: - monitor + pushgateway: + image: prom/pushgateway + container_name: pushgateway + restart: unless-stopped + expose: + - 9091 + ports: + - "9091:9091" + networks: + - monitor + labels: + org.label-schema.group: "monitoring" + + nodeexporter: + image: prom/node-exporter:v0.18.1 + container_name: nodeexporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.rootfs=/rootfs' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' + restart: unless-stopped + expose: + - 9100 + networks: + - monitor + labels: + org.label-schema.group: "monitoring" + grafana: - image: grafana/grafana + image: grafana/grafana:6.7.2 container_name: grafana + volumes: + - grafana_data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning + environment: + - GF_SECURITY_ADMIN_USER=${ADMIN_USER} + - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD} + - GF_USERS_ALLOW_SIGN_UP=false + restart: unless-stopped + expose: + - 3000 hostname: grafana - restart: always ports: - "3000:3000" networks: - monitor - - milvus_server: - runtime: nvidia - image: milvusdb/milvus:latest - restart: always - links: - - prometheus - environment: - WEB_APP: host.docker.internal - volumes: - - ../core/conf/server_config.yaml:/var/lib/milvus/conf/server_config.yaml - - ../core/conf/log_config.conf:/var/lib/milvus/conf/log_config.conf - ports: - - "8080:8080" - - "19530:19530" - networks: - - monitor diff --git a/docker/grafana/provisioning/dashboards/dashboard.json b/docker/grafana/provisioning/dashboards/dashboard.json new file mode 100644 index 0000000000..26e520e414 --- /dev/null +++ b/docker/grafana/provisioning/dashboards/dashboard.json @@ -0,0 +1,1527 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 2, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Number of queries completed in every minute", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 16, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 86, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 0, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "query_vector_response_summary_count-(query_vector_response_summary_count offset 1m)", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "1h", + "timeRegions": [], + "timeShift": null, + "title": "QPM (Query per minute)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "datasource": null, + "description": "The time (in minutes) Milvus server has been working and available", + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "hideTimeOverride": true, + "id": 90, + "links": [], + "options": { + "fieldOptions": { + "calcs": [ + "mean" + ], + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [], + "values": false + }, + "orientation": "auto", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.2", + "targets": [ + { + "expr": "keeping_alive_seconds_total", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "Uptime", + "transparent": true, + "type": "gauge" + }, + { + "datasource": null, + "description": "Cache utilization ratio(%)", + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 46, + "links": [], + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": null, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [], + "values": false + }, + "orientation": "auto", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.2", + "targets": [ + { + "expr": "cache_usage_bytes", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "Cache utilization ratio", + "transparent": true, + "type": "gauge" + }, + { + "datasource": null, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 4 + }, + "id": 100, + "links": [], + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#6ED0E0", + "value": 200 + }, + { + "color": "red", + "value": 500 + }, + { + "color": "#EAB839", + "value": 800 + } + ] + }, + "unit": "percent" + }, + "overrides": [], + "values": false + }, + "orientation": "auto", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.2", + "targets": [ + { + "expr": "CPU_usage_percent{CPU=\"avg\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "milvus CPU utilization ratio", + "transparent": true, + "type": "gauge" + }, + { + "datasource": null, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 4 + }, + "id": 104, + "links": [], + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "degree" + }, + "overrides": [], + "values": false + }, + "orientation": "auto", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.2", + "targets": [ + { + "expr": "CPU_temperature", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU temperature", + "transparent": true, + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": null, + "description": "GPU utilization(MB)", + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 8 + }, + "hideTimeOverride": true, + "id": 68, + "links": [], + "options": { + "displayMode": "gradient", + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": null, + "mappings": [], + "max": "6000", + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-blue", + "value": null + }, + { + "color": "semi-dark-orange", + "value": 2000 + }, + { + "color": "light-purple", + "value": 4000 + }, + { + "color": "red", + "value": 5000 + } + ] + }, + "unit": "decmbytes" + }, + "overrides": [], + "values": false + }, + "orientation": "horizontal", + "showUnfilled": true + }, + "pluginVersion": "6.7.2", + "targets": [ + { + "expr": "GPU_memory_usage_total", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}:gpu{{DeviceNum}}", + "refId": "A" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "GPU utilization", + "transparent": true, + "type": "bargauge" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Memory (in GB) currently consumed by Milvus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 16, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 64, + "interval": "1s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "RAM_usage_percent", + "format": "time_series", + "intervalFactor": 1, + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": "1m", + "timeRegions": [], + "timeShift": null, + "title": "Memory usage ratio", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "datasource": null, + "description": "GPU utilization rate(%)", + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 13 + }, + "hideTimeOverride": true, + "id": 66, + "links": [], + "options": { + "displayMode": "gradient", + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": null, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "light-blue", + "value": null + }, + { + "color": "semi-dark-orange", + "value": 80 + }, + { + "color": "semi-dark-red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [], + "values": false + }, + "orientation": "horizontal", + "showUnfilled": true + }, + "pluginVersion": "6.7.2", + "repeat": null, + "targets": [ + { + "expr": "Gpu_usage_percent", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "GPU utilization ratio", + "transparent": true, + "type": "bargauge" + }, + { + "datasource": null, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 18 + }, + "id": 102, + "links": [], + "options": { + "fieldOptions": { + "calcs": [ + "last" + ], + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "degree" + }, + "overrides": [], + "values": false + }, + "orientation": "auto", + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "6.7.2", + "targets": [ + { + "expr": "GPU_temperature", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{job}}:gpu{{GPU}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GPU temperature", + "transparent": true, + "type": "gauge" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "System wide metric. It is the total query elapsed time divided by total number of queries", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 16, + "x": 0, + "y": 20 + }, + "hiddenSeries": false, + "id": 44, + "interval": "1s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "query_response_summary/1e6", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "1m", + "timeRegions": [], + "timeShift": null, + "title": "Query elapsed time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 22 + }, + "hiddenSeries": false, + "id": 106, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "GPU_memory_usage_total", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GPU utilization history", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Current number of files in Milvus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 50, + "interval": "1s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "data_file_size_bytes", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "1m", + "timeRegions": [], + "timeShift": null, + "title": "Total file", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 30 + }, + "hiddenSeries": false, + "id": 108, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filefd_allocated", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Open FDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Disk write speed", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 38 + }, + "hiddenSeries": false, + "id": 42, + "interval": "1s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "6.1.6", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "disk_store_IO_speed_bytes_per_microseconds*1e6", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "1m", + "timeRegions": [], + "timeShift": null, + "title": "Disk write speed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Disk read speed", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 38 + }, + "hiddenSeries": false, + "id": 38, + "interval": "1s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "disk_load_IO_speed_byte_per_microsec*1e6", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "1m", + "timeRegions": [], + "timeShift": null, + "title": "Disk read speed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Number of vectors that are inserted in a second.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 46 + }, + "hiddenSeries": false, + "id": 94, + "interval": "1s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "add_vectors_throughput_per_microsecond*(1e6)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "1m", + "timeRegions": [], + "timeShift": null, + "title": "Insert per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Total amount of data stored in Milvus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 46 + }, + "hiddenSeries": false, + "id": 2, + "interval": "1s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "add_vectors_request_total", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "1m", + "timeRegions": [], + "timeShift": null, + "title": "Data size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Network IO read/write speed (per second)", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 57 + }, + "hiddenSeries": false, + "id": 92, + "interval": "1s", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "octets_bytes_per_second", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": "1m", + "timeRegions": [], + "timeShift": null, + "title": "Network IO", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "5s", + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Milvus Monitor", + "uid": "B4pw8fNWz", + "variables": { + "list": [] + }, + "version": 1 +} \ No newline at end of file diff --git a/docker/grafana/provisioning/dashboards/dashboard.yml b/docker/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000000..d83b43c7b0 --- /dev/null +++ b/docker/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'Prometheus' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards \ No newline at end of file diff --git a/docker/grafana/provisioning/datasources/datasource.yml b/docker/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000000..bb37f13d99 --- /dev/null +++ b/docker/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + orgId: 1 + url: http://prometheus:9090 + basicAuth: false + isDefault: true + editable: true \ No newline at end of file diff --git a/docker/prometheus.yml b/docker/prometheus.yml index 91c64458a5..bea0f62a85 100644 --- a/docker/prometheus.yml +++ b/docker/prometheus.yml @@ -1,7 +1,7 @@ # my global config global: - scrape_interval: 15s # Set the scrape interval to every 1 seconds. Default is every 1 minute. - evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + scrape_interval: 2s # Set the scrape interval to every 1 seconds. Default is every 1 minute. + evaluation_interval: 2s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration @@ -19,10 +19,8 @@ rule_files: scrape_configs: # The job name is added as a label `job=` to any timeseries scraped from this config. - job_name: 'prometheus' - # metrics_path defaults to '/metrics' # scheme defaults to 'http'. - static_configs: - targets: ['prometheus:9090'] @@ -32,6 +30,11 @@ scrape_configs: static_configs: - targets: ['milvus_server:8080'] + - job_name: 'nodeexporter' + scrape_interval: 5s + static_configs: + - targets: ['nodeexporter:9100'] + # under development - job_name: 'pushgateway' static_configs: diff --git a/docker/prometheus/node_down.yml b/docker/prometheus/node_down.yml new file mode 100755 index 0000000000..e69de29bb2 diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml new file mode 100755 index 0000000000..e69de29bb2