Skip to content

Commit d319d82

Browse files
committed
add more dcgm metrics
1 parent 9759645 commit d319d82

File tree

2 files changed

+18
-0
lines changed

2 files changed

+18
-0
lines changed

cmd/do-agent/aggregation.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,15 @@ var gpuAggregationSpec = map[string][]string{
115115
"dcgm_fi_dev_fb_free": nvidiaAggregatedLabels,
116116
"dcgm_fi_dev_fb_used": nvidiaAggregatedLabels,
117117
"dcgm_fi_dev_nvlink_bandwidth_total": nvidiaAggregatedLabels,
118+
"dcgm_fi_prof_sm_occupancy": nvidiaAggregatedLabels,
119+
"dcgm_fi_prof_pipe_tensor_active": nvidiaAggregatedLabels,
120+
"dcgm_fi_prof_pcie_rx_bytes": nvidiaAggregatedLabels,
121+
"dcgm_fi_prof_pcie_tx_bytes": nvidiaAggregatedLabels,
122+
"dcgm_fi_dev_gpu_temp": nvidiaAggregatedLabels,
123+
"dcgm_fi_dev_memory_temp": nvidiaAggregatedLabels,
124+
"dcgm_fi_dev_power_usage": nvidiaAggregatedLabels,
125+
"dcgm_fi_dev_power_violation": nvidiaAggregatedLabels,
126+
"dcgm_fi_dev_thermal_violation": nvidiaAggregatedLabels,
118127

119128
// GPU Utilization metrics
120129
"amd_gpu_prof_gui_util_percent": amdAggregatedLabels,

cmd/do-agent/whitelist.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,15 @@ var gpuWhitelist = map[string]bool{
170170
"DCGM_FI_DEV_FB_FREE": true,
171171
"DCGM_FI_DEV_FB_USED": true,
172172
"DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL": true,
173+
"DCGM_FI_PROF_SM_OCCUPANCY": true,
174+
"DCGM_FI_PROF_PIPE_TENSOR_ACTIVE": true,
175+
"DCGM_FI_PROF_PCIE_RX_BYTES": true,
176+
"DCGM_FI_PROF_PCIE_TX_BYTES": true,
177+
"DCGM_FI_DEV_GPU_TEMP": true,
178+
"DCGM_FI_DEV_MEMORY_TEMP": true,
179+
"DCGM_FI_DEV_POWER_USAGE": true,
180+
"DCGM_FI_DEV_POWER_VIOLATION": true,
181+
"DCGM_FI_DEV_THERMAL_VIOLATION": true,
173182

174183
// GPU Utilization
175184
"amd_gpu_prof_gui_util_percent": true,

0 commit comments

Comments
 (0)