From 78f1ae94d2bd5561dd07ce898bd44a0587c919f7 Mon Sep 17 00:00:00 2001 From: navinko Date: Sun, 31 May 2026 12:20:39 +0530 Subject: [PATCH 1/2] HDDS-15395. Grafana dashboard for container balancer --- .../Ozone - Container Balancer Metrics.json | 1379 +++++++++++++++++ 1 file changed, 1379 insertions(+) create mode 100644 hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Container Balancer Metrics.json diff --git a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Container Balancer Metrics.json b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Container Balancer Metrics.json new file mode 100644 index 000000000000..ad20ad6fbfa1 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Container Balancer Metrics.json @@ -0,0 +1,1379 @@ +{ + "annotations": [ + { + "kind": "AnnotationQuery", + "spec": { + "builtIn": true, + "enable": true, + "hide": true, + "iconColor": "", + "name": "Annotations & Alerts", + "query": { + "group": "grafana", + "kind": "DataQuery", + "spec": {}, + "version": "v0" + } + } + } + ], + "cursorSync": "Crosshair", + "description": "Comprehensive tracking of Ozone cluster balancing operations. Monitors real-time DataNode capacity convergence, current iteration health (Scheduled vs Completed), and lifetime data movement metrics.", + "editable": true, + "elements": { + "panel-1": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_datanodes_unbalanced)", + "legendFormat": "Unbalanced DataNodes", + "range": false + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "Tracks the total number of DataNodes whose capacity usage falls outside the configured cluster balance threshold. A healthy, fully balanced cluster should ideally maintain a value of 0.", + "id": 1, + "links": [], + "title": "Unbalanced DataNodes", + "vizConfig": { + "group": "stat", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + } + }, + "version": "13.0.1+security-01" + } + } + }, + "panel-2": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_data_size_unbalanced_gb * 1024 * 1024 * 1024)", + "legendFormat": "Total Unbalanced Data Size", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "Represents the total volume of data in gigabytes currently residing on over-utilized nodes that must be shifted to under-utilized nodes to satisfy your configured container balancing thresholds.", + "id": 2, + "links": [], + "title": "Cluster Unbalanced Data Size Over Time", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + } + }, + "version": "13.0.1+security-01" + } + } + }, + "panel-3": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(increase(container_balancer_metrics_data_size_moved_gb[$__range]))", + "legendFormat": "Moved Data Size (GB)", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "Measures the total volume of data in gigabytes successfully transferred between source and target DataNodes during the most recently executed balancer iteration loop.", + "id": 3, + "links": [], + "title": "Size Moved (Latest)", + "vizConfig": { + "group": "stat", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1000 + } + ] + }, + "unit": "decgbytes" + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + } + }, + "version": "13.0.1+security-01" + } + } + }, + "panel-4": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_datanodes_involved_in_latest_iteration)", + "legendFormat": "Datanode Involved", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "The count of unique DataNode hosts that actively participated as either a source (sender) or target (receiver) of data blocks in the latest iteration.", + "id": 4, + "links": [], + "title": "Nodes Involved (Latest)", + "vizConfig": { + "group": "stat", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + } + }, + "version": "13.0.1+security-01" + } + } + }, + "panel-5": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_container_moves_scheduled_in_latest_iteration)", + "legendFormat": "Scheduled", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_container_moves_completed_in_latest_iteration)", + "legendFormat": "Completed", + "range": true + }, + "version": "v0" + }, + "refId": "B" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_container_moves_failed_in_latest_iteration)", + "legendFormat": "Failed", + "range": true + }, + "version": "v0" + }, + "refId": "C" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_container_moves_timeout_in_latest_iteration)", + "legendFormat": "Timeout", + "range": true + }, + "version": "v0" + }, + "refId": "D" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "A real-time status breakdown of individual container transfers during the current or latest iteration. Displays the exact counts of Scheduled, Completed, Failed, and Timeout movements.", + "id": 5, + "links": [], + "title": "Container Move Operations Breakdown (Latest Iteration)", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + } + }, + "version": "13.0.1+security-01" + } + } + }, + "panel-6": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "expr": "container_balancer_metrics_data_size_moved_gb * 1024 * 1024 * 1024", + "legendFormat": "Total Data Moved" + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "An accumulating historical counter showing the total volume of data moved across the cluster since tracking began. This acts as a lifetime indicator of balancer workload.", + "id": 6, + "links": [], + "title": "Cumulative Data Volume Moved", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + } + }, + "version": "13.0.1+security-01" + } + } + }, + "panel-7": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_container_moves_completed)", + "legendFormat": "Completed", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_container_moves_failed)", + "instant": false, + "legendFormat": "Failed", + "range": true + }, + "version": "v0" + }, + "refId": "B" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_container_moves_scheduled)", + "instant": false, + "legendFormat": "Scheduled", + "range": true + }, + "version": "v0" + }, + "refId": "C" + } + }, + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "sum(container_balancer_metrics_num_container_moves_timeout)", + "instant": false, + "legendFormat": "Timeout", + "range": true + }, + "version": "v0" + }, + "refId": "D" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "Cluster-wide historical aggregation of all attempted container migrations since inception. Compares total Scheduled vs Completed moves alongside long-term Failed and Timeout counts to assess network and disk reliability.", + "id": 7, + "links": [], + "title": "Cumulative Executed Container Moves", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Completed" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": true, + "viz": true + } + } + ] + } + ] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + } + }, + "version": "13.0.1+security-01" + } + } + }, + "panel-8": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "${datasource}" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "expr": "container_balancer_metrics_num_iterations", + "legendFormat": "Completed Iterations" + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "The lifetime count of fully executed, successful balancing loops completed by the Storage Container Manager (SCM). If the balancer exits during initialization due to an already balanced cluster, this counter does not increment.", + "id": 8, + "links": [], + "title": "Total Balancer Iterations Completed", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Completed Iterations" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": true, + "viz": true + } + } + ] + } + ] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + } + }, + "version": "13.0.1+security-01" + } + } + }, + "panel-9": { + "kind": "Panel", + "spec": { + "data": { + "kind": "QueryGroup", + "spec": { + "queries": [ + { + "kind": "PanelQuery", + "spec": { + "hidden": false, + "query": { + "datasource": { + "name": "dfm13nk97y9kwf" + }, + "group": "prometheus", + "kind": "DataQuery", + "spec": { + "editorMode": "code", + "expr": "volume_info_metrics_used", + "legendFormat": "{{hostname}}", + "range": true + }, + "version": "v0" + }, + "refId": "A" + } + } + ], + "queryOptions": {}, + "transformations": [] + } + }, + "description": "Tracks the raw physical bytes consumed across individual DataNode storage volumes over time. This panel visualizes how storage distribution scales and shifts across nodes during active cluster balancing.", + "id": 9, + "links": [], + "title": "Datanode Disk Usage (Convergence)", + "vizConfig": { + "group": "timeseries", + "kind": "VizConfig", + "spec": { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "options": { + "annotations": { + "clustering": -1, + "multiLane": false + }, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + } + }, + "version": "13.0.1+security-01" + } + } + } + }, + "layout": { + "kind": "RowsLayout", + "spec": { + "rows": [ + { + "kind": "RowsLayoutRow", + "spec": { + "collapse": false, + "layout": { + "kind": "GridLayout", + "spec": { + "items": [ + { + "kind": "GridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-1" + }, + "height": 4, + "width": 10, + "x": 0, + "y": 0 + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-9" + }, + "height": 4, + "width": 14, + "x": 10, + "y": 0 + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-2" + }, + "height": 4, + "width": 24, + "x": 0, + "y": 4 + } + } + ] + } + }, + "title": "Cluster Imbalance Status" + } + }, + { + "kind": "RowsLayoutRow", + "spec": { + "collapse": false, + "layout": { + "kind": "GridLayout", + "spec": { + "items": [ + { + "kind": "GridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-3" + }, + "height": 4, + "width": 8, + "x": 0, + "y": 0 + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-4" + }, + "height": 4, + "width": 8, + "x": 8, + "y": 0 + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-5" + }, + "height": 4, + "width": 8, + "x": 16, + "y": 0 + } + } + ] + } + }, + "title": "Latest Iteration Metrics" + } + }, + { + "kind": "RowsLayoutRow", + "spec": { + "collapse": false, + "layout": { + "kind": "GridLayout", + "spec": { + "items": [ + { + "kind": "GridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-6" + }, + "height": 5, + "width": 8, + "x": 0, + "y": 0 + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-7" + }, + "height": 5, + "width": 8, + "x": 8, + "y": 0 + } + }, + { + "kind": "GridLayoutItem", + "spec": { + "element": { + "kind": "ElementReference", + "name": "panel-8" + }, + "height": 5, + "width": 8, + "x": 16, + "y": 0 + } + } + ] + } + }, + "title": "Lifetime Metrics" + } + } + ] + } + }, + "links": [], + "liveNow": false, + "preload": false, + "tags": [ + "Ozone", + "SCM" + ], + "timeSettings": { + "autoRefresh": "5s", + "autoRefreshIntervals": [ + "5s", + "10s", + "30s" + ], + "fiscalYearStartMonth": 0, + "from": "now-24h", + "hideTimepicker": false, + "timezone": "browser", + "to": "now" + }, + "title": "Ozone - Container Balancer", + "variables": [ + { + "kind": "DatasourceVariable", + "spec": { + "allowCustomValue": true, + "current": { + "text": "prometheus", + "value": "dfm13nk97y9kwf" + }, + "hide": "dontHide", + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "datasource", + "options": [], + "pluginId": "prometheus", + "refresh": "onDashboardLoad", + "regex": "", + "skipUrlSync": false + } + } + ] +} From bf263eb50ca5bfb0215f7324b804aed3e8444818 Mon Sep 17 00:00:00 2001 From: navinko Date: Tue, 2 Jun 2026 02:25:03 +0530 Subject: [PATCH 2/2] HDDS-15395. updated container balancer dashboard json --- .../dashboards/Ozone - Container Balancer Metrics.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Container Balancer Metrics.json b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Container Balancer Metrics.json index ad20ad6fbfa1..c68235c81c60 100644 --- a/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Container Balancer Metrics.json +++ b/hadoop-ozone/dist/src/main/compose/common/grafana/dashboards/Ozone - Container Balancer Metrics.json @@ -248,7 +248,7 @@ "kind": "DataQuery", "spec": { "editorMode": "code", - "expr": "sum(increase(container_balancer_metrics_data_size_moved_gb[$__range]))", + "expr": "sum(container_balancer_metrics_data_size_moved_gb_in_latest_iteration)", "legendFormat": "Moved Data Size (GB)", "range": true }, @@ -1060,7 +1060,7 @@ "hidden": false, "query": { "datasource": { - "name": "dfm13nk97y9kwf" + "name": "${datasource}" }, "group": "prometheus", "kind": "DataQuery", @@ -1360,8 +1360,8 @@ "spec": { "allowCustomValue": true, "current": { - "text": "prometheus", - "value": "dfm13nk97y9kwf" + "text": "default", + "value": "default" }, "hide": "dontHide", "includeAll": false,