diff --git a/cmd/do-agent/aggregation.go b/cmd/do-agent/aggregation.go
index 847a7fb0..23f0ab31 100644
--- a/cmd/do-agent/aggregation.go
+++ b/cmd/do-agent/aggregation.go
@@ -95,3 +95,73 @@ var k8sAggregationSpec = map[string][]string{
 var mongoAggregationSpec = map[string][]string{
 	"mongoagent_data_usage_percentage": {"cluster_uuid"},
 }
+
+// amdAggregatedLabels contains all the labels we want to aggregate on for AMD GPU metrics.
+// keep only gpu_id and hostname
+var amdAggregatedLabels = []string{
+	"card_model", "card_series", "card_vendor", "cluster_name", "container",
+	"driver_version", "gpu_compute_partition_type", "gpu_memory_partition_type",
+	"gpu_partition_id", "gpu_uuid", "job_id", "job_partition", "job_user",
+	"namespace", "pod", "serial_number", "usergroup_id", "vbios_version", "workload_id",
+}
+
+var gpuAggregationSpec = map[string][]string{
+	// GPU Utilization metrics
+	"amd_gpu_prof_gui_util_percent":      amdAggregatedLabels,
+	"amd_gpu_prof_valu_pipe_issue_util":  amdAggregatedLabels,
+	"amd_gpu_prof_tensor_active_percent": amdAggregatedLabels,
+	"amd_gpu_prof_occupancy_percent":     amdAggregatedLabels,
+	"amd_gpu_prof_fetch_size":            amdAggregatedLabels,
+	"amd_gpu_prof_write_size":            amdAggregatedLabels,
+
+	// GPU VRAM usage metrics
+	"amd_gpu_used_vram":  amdAggregatedLabels,
+	"amd_gpu_total_vram": amdAggregatedLabels,
+	"amd_gpu_free_vram":  amdAggregatedLabels,
+
+	// XGMI Bandwidth metrics - all neighbors 0-7
+	"amd_xgmi_neighbor_0_tx_throughput": amdAggregatedLabels,
+	"amd_xgmi_neighbor_1_tx_throughput": amdAggregatedLabels,
+	"amd_xgmi_neighbor_2_tx_throughput": amdAggregatedLabels,
+	"amd_xgmi_neighbor_3_tx_throughput": amdAggregatedLabels,
+	"amd_xgmi_neighbor_4_tx_throughput": amdAggregatedLabels,
+	"amd_xgmi_neighbor_5_tx_throughput": amdAggregatedLabels,
+	"amd_xgmi_neighbor_6_tx_throughput": amdAggregatedLabels,
+	"amd_xgmi_neighbor_7_tx_throughput": amdAggregatedLabels,
+
+	"amd_xgmi_neighbor_0_response_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_1_response_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_2_response_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_3_response_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_4_response_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_5_response_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_6_response_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_7_response_tx": amdAggregatedLabels,
+
+	"amd_xgmi_neighbor_0_request_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_1_request_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_2_request_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_3_request_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_4_request_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_5_request_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_6_request_tx": amdAggregatedLabels,
+	"amd_xgmi_neighbor_7_request_tx": amdAggregatedLabels,
+
+	// PCIe bandwidth
+	"amd_pcie_bandwidth": amdAggregatedLabels,
+
+	"amd_gpu_ecc_uncorrect_total":                         amdAggregatedLabels,
+	"amd_pcie_replay_count":                               amdAggregatedLabels,
+	"amd_pcie_recovery_count":                             amdAggregatedLabels,
+	"amd_pcie_replay_rollover_count":                      amdAggregatedLabels,
+	"amd_pcie_max_speed":                                  amdAggregatedLabels,
+	"amd_pcie_speed":                                      amdAggregatedLabels,
+	"amd_gpu_prof_cpf_cpf_stat_stall":                     amdAggregatedLabels,
+	"amd_gpu_clock":                                       amdAggregatedLabels,
+	"amd_gpu_violation_proc_hot_residency_accumulated":    amdAggregatedLabels,
+	"amd_gpu_violation_soc_thermal_residency_accumulated": amdAggregatedLabels,
+	"amd_gpu_violation_ppt_residency_accumulated":         amdAggregatedLabels,
+	"amd_gpu_violation_hbm_thermal_residency_accumulated": amdAggregatedLabels,
+	"amd_gpu_violation_vr_thermal_tracking_accumulated":   amdAggregatedLabels,
+	"amd_gpu_junction_temperature":                        amdAggregatedLabels,
+}
diff --git a/cmd/do-agent/config.go b/cmd/do-agent/config.go
index 4da83090..278866fd 100644
--- a/cmd/do-agent/config.go
+++ b/cmd/do-agent/config.go
@@ -10,11 +10,11 @@ import (
 	"time"
 
 	"github.com/alecthomas/kingpin/v2"
+	"github.com/digitalocean/do-agent/internal/flags"
 	"github.com/prometheus/client_golang/prometheus"
 	dto "github.com/prometheus/client_model/go"
 	"github.com/prometheus/common/model"
 
-	"github.com/digitalocean/do-agent/internal/flags"
 	"github.com/digitalocean/do-agent/internal/log"
 	"github.com/digitalocean/do-agent/internal/process"
 	"github.com/digitalocean/do-agent/pkg/clients/tsclient"
@@ -46,6 +46,7 @@ var (
 		defaultMaxBatchSize    int
 		defaultMaxMetricLength int
 		promAddr               string
+		gpuMetricsPath         string
 		topK                   int
 		scrapeTimeout          time.Duration
 	}
@@ -123,6 +124,9 @@ func init() {
 	kingpin.Flag("metrics-path", "enable metrics collection from a prometheus endpoint").
 		StringVar(&config.promAddr)
 
+	kingpin.Flag("gpu-metrics-path", "enable GPU metrics collection from a prometheus endpoint (e.g., AMD device-metrics-exporter)").
+		StringVar(&config.gpuMetricsPath)
+
 	kingpin.Flag("web.listen", "enable a local endpoint for scrapeable prometheus metrics as well").
 		Default("false").
 		BoolVar(&config.webListen)
@@ -155,6 +159,7 @@ func initConfig() {
 	// parse all command line flags which are defined across the app
 	kingpin.HelpFlag.Short('h')
 	kingpin.Parse()
+
 }
 
 func checkConfig() error {
@@ -247,6 +252,12 @@ func initAggregatorSpecs() map[string][]string {
 		}
 	}
 
+	if config.gpuMetricsPath != "" {
+		for k, v := range gpuAggregationSpec {
+			aggregateSpecs[k] = append(aggregateSpecs[k], v...)
+		}
+	}
+
 	return aggregateSpecs
 }
 
@@ -321,6 +332,15 @@ func initCollectors() []prometheus.Collector {
 		}
 	}
 
+	if config.gpuMetricsPath != "" {
+		gpu, err := collector.NewScraper("gpu", config.gpuMetricsPath, nil, gpuWhitelist, collector.WithTimeout(config.scrapeTimeout))
+		if err != nil {
+			log.Error("Failed to initialize GPU metrics collector: %+v", err)
+		} else {
+			cols = append(cols, gpu)
+		}
+	}
+
 	// create the default DO agent to collect metrics about
 	// this device
 	if !config.noNode {
diff --git a/cmd/do-agent/whitelist.go b/cmd/do-agent/whitelist.go
index 5ff50d69..7341ca6e 100644
--- a/cmd/do-agent/whitelist.go
+++ b/cmd/do-agent/whitelist.go
@@ -163,3 +163,64 @@ var dbaasWhitelist = map[string]bool{
 
 	"opensearch_http_total_opened": true,
 }
+
+var gpuWhitelist = map[string]bool{
+	// GPU Utilization
+	"amd_gpu_prof_gui_util_percent":      true,
+	"amd_gpu_prof_valu_pipe_issue_util":  true,
+	"amd_gpu_prof_tensor_active_percent": true,
+	"amd_gpu_prof_occupancy_percent":     true,
+	"amd_gpu_prof_fetch_size":            true,
+	"amd_gpu_prof_write_size":            true,
+
+	// GPU VRAM usage
+	"amd_gpu_used_vram":  true,
+	"amd_gpu_total_vram": true,
+	"amd_gpu_free_vram":  true,
+
+	// XGMI Bandwidth
+	"amd_xgmi_neighbor_0_tx_throughput": true,
+	"amd_xgmi_neighbor_1_tx_throughput": true,
+	"amd_xgmi_neighbor_2_tx_throughput": true,
+	"amd_xgmi_neighbor_3_tx_throughput": true,
+	"amd_xgmi_neighbor_4_tx_throughput": true,
+	"amd_xgmi_neighbor_5_tx_throughput": true,
+	"amd_xgmi_neighbor_6_tx_throughput": true,
+	"amd_xgmi_neighbor_7_tx_throughput": true,
+
+	"amd_xgmi_neighbor_0_response_tx": true,
+	"amd_xgmi_neighbor_1_response_tx": true,
+	"amd_xgmi_neighbor_2_response_tx": true,
+	"amd_xgmi_neighbor_3_response_tx": true,
+	"amd_xgmi_neighbor_4_response_tx": true,
+	"amd_xgmi_neighbor_5_response_tx": true,
+	"amd_xgmi_neighbor_6_response_tx": true,
+	"amd_xgmi_neighbor_7_response_tx": true,
+
+	"amd_xgmi_neighbor_0_request_tx": true,
+	"amd_xgmi_neighbor_1_request_tx": true,
+	"amd_xgmi_neighbor_2_request_tx": true,
+	"amd_xgmi_neighbor_3_request_tx": true,
+	"amd_xgmi_neighbor_4_request_tx": true,
+	"amd_xgmi_neighbor_5_request_tx": true,
+	"amd_xgmi_neighbor_6_request_tx": true,
+	"amd_xgmi_neighbor_7_request_tx": true,
+
+	// PCIe bandwidth
+	"amd_pcie_bandwidth": true,
+
+	"amd_gpu_ecc_uncorrect_total":                         true,
+	"amd_pcie_replay_count":                               true,
+	"amd_pcie_recovery_count":                             true,
+	"amd_pcie_replay_rollover_count":                      true,
+	"amd_pcie_max_speed":                                  true,
+	"amd_pcie_speed":                                      true,
+	"amd_gpu_prof_cpf_cpf_stat_stall":                     true,
+	"amd_gpu_clock":                                       true,
+	"amd_gpu_violation_proc_hot_residency_accumulated":    true,
+	"amd_gpu_violation_soc_thermal_residency_accumulated": true,
+	"amd_gpu_violation_ppt_residency_accumulated":         true,
+	"amd_gpu_violation_hbm_thermal_residency_accumulated": true,
+	"amd_gpu_violation_vr_thermal_tracking_accumulated":   true,
+	"amd_gpu_junction_temperature":                        true,
+}