From 56f71cf4ca1106b0a5c58f431652c0db29a58080 Mon Sep 17 00:00:00 2001 From: Xuhui Zhu Date: Thu, 22 Aug 2024 16:42:48 -0400 Subject: [PATCH 1/4] add gpu clk Signed-off-by: Xuhui Zhu --- collector/hwmon_linux.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/collector/hwmon_linux.go b/collector/hwmon_linux.go index ed46f0f808..3b0f0a2aef 100644 --- a/collector/hwmon_linux.go +++ b/collector/hwmon_linux.go @@ -44,7 +44,7 @@ var ( hwmonSensorTypes = []string{ "vrm", "beep_enable", "update_interval", "in", "cpu", "fan", "pwm", "temp", "curr", "power", "energy", "humidity", - "intrusion", + "intrusion", "freq", } ) @@ -354,6 +354,15 @@ func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir string) er continue } + if sensorType == "freq" && element == "input" { + if label, ok := sensorData["label"]; ok { + sensorLabel := cleanMetricName(label) + desc := prometheus.NewDesc("node_hwmon_freq_hertz", "Hardware monitor for GPU frequency in MHz", hwmonLabelDesc, nil) + ch <- prometheus.MustNewConstMetric( + desc, prometheus.GaugeValue, parsedValue/1000000.0, append(labels[:len(labels)-1], sensorLabel)...) + } + continue + } // fallback, just dump the metric as is desc := prometheus.NewDesc(name, "Hardware monitor "+sensorType+" element "+element, hwmonLabelDesc, nil) From 77e6a6546cd5eaabab2861642ed112cf51d1e717 Mon Sep 17 00:00:00 2001 From: Xuhui Zhu Date: Fri, 23 Aug 2024 16:34:23 -0400 Subject: [PATCH 2/4] restruct code Signed-off-by: Xuhui Zhu --- collector/hwmon_linux.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collector/hwmon_linux.go b/collector/hwmon_linux.go index 3b0f0a2aef..e90ce7e98e 100644 --- a/collector/hwmon_linux.go +++ b/collector/hwmon_linux.go @@ -357,7 +357,7 @@ func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir string) er if sensorType == "freq" && element == "input" { if label, ok := sensorData["label"]; ok { sensorLabel := cleanMetricName(label) - desc := prometheus.NewDesc("node_hwmon_freq_hertz", "Hardware monitor for GPU frequency in MHz", hwmonLabelDesc, nil) + desc := prometheus.NewDesc(name+"_freq_mhz", "Hardware monitor for GPU frequency in MHz", hwmonLabelDesc, nil) ch <- prometheus.MustNewConstMetric( desc, prometheus.GaugeValue, parsedValue/1000000.0, append(labels[:len(labels)-1], sensorLabel)...) } From c43b2b67b6b3f8816fbf521b4a070827f9bf45ad Mon Sep 17 00:00:00 2001 From: Xuhui Zhu Date: Tue, 27 Aug 2024 16:48:32 -0400 Subject: [PATCH 3/4] add test Signed-off-by: Xuhui Zhu --- collector/fixtures/e2e-output.txt | 10 ++++++++-- collector/fixtures/sys.ttar | 26 +++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 39b8c2b940..111a7543bf 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -889,6 +889,10 @@ node_hwmon_fan_target_rpm{chip="nct6779",sensor="fan2"} 27000 # HELP node_hwmon_fan_tolerance Hardware monitor fan element tolerance # TYPE node_hwmon_fan_tolerance gauge node_hwmon_fan_tolerance{chip="nct6779",sensor="fan2"} 0 +# HELP node_hwmon_freq_freq_mhz Hardware monitor for GPU frequency in MHz +# TYPE node_hwmon_freq_freq_mhz gauge +node_hwmon_freq_freq_mhz{chip="hwmon4",sensor="mclk"} 300 +node_hwmon_freq_freq_mhz{chip="hwmon4",sensor="sclk"} 214 # HELP node_hwmon_in_alarm Hardware sensor alarm status (in) # TYPE node_hwmon_in_alarm gauge node_hwmon_in_alarm{chip="nct6779",sensor="in0"} 0 @@ -1002,8 +1006,10 @@ node_hwmon_pwm_weight_temp_step_tol{chip="nct6779",sensor="pwm1"} 0 # TYPE node_hwmon_sensor_label gauge node_hwmon_sensor_label{chip="hwmon4",label="foosensor",sensor="temp1"} 1 node_hwmon_sensor_label{chip="hwmon4",label="foosensor",sensor="temp2"} 1 -node_hwmon_sensor_label{chip="platform_applesmc_768",label="Left side ",sensor="fan1"} 1 -node_hwmon_sensor_label{chip="platform_applesmc_768",label="Right side ",sensor="fan2"} 1 +node_hwmon_sensor_label{chip="hwmon4",label="mclk",sensor="freq2"} 1 +node_hwmon_sensor_label{chip="hwmon4",label="sclk",sensor="freq1"} 1 +node_hwmon_sensor_label{chip="platform_applesmc_768",label="Left side",sensor="fan1"} 1 +node_hwmon_sensor_label{chip="platform_applesmc_768",label="Right side",sensor="fan2"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 0",sensor="temp2"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 1",sensor="temp3"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 2",sensor="temp4"} 1 diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index 6fcf094dd1..7b2f27b8e2 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -437,6 +437,26 @@ Lines: 1 100000 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/hwmon/hwmon4/freq1_input +Lines: 1 +214000000 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/hwmon/hwmon4/freq1_label +Lines: 1 +sclk +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/hwmon/hwmon4/freq2_input +Lines: 1 +300000000 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/hwmon/hwmon4/freq2_label +Lines: 1 +mclk +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/hwmon/hwmon5 SymlinkTo: ../../devices/platform/bogus.0/hwmon/hwmon5/ # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1337,7 +1357,7 @@ Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/nvme/nvme0/model Lines: 1 -Samsung SSD 970 PRO 512GB +Samsung SSD 970 PRO 512GB Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/nvme/nvme0/serial @@ -2750,7 +2770,7 @@ Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/devices/platform/applesmc.768/fan1_label Lines: 1 -Left side +Left side Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/devices/platform/applesmc.768/fan1_manual @@ -2784,7 +2804,7 @@ Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/devices/platform/applesmc.768/fan2_label Lines: 1 -Right side +Right side Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/devices/platform/applesmc.768/fan2_manual From 0bae94673cd43dfcac051b32a7fe05ff569ae173 Mon Sep 17 00:00:00 2001 From: Xuhui Zhu Date: Tue, 27 Aug 2024 16:59:20 -0400 Subject: [PATCH 4/4] add arm test Signed-off-by: Xuhui Zhu --- collector/fixtures/e2e-64k-page-output.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index d649896a57..0276002fd0 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -867,6 +867,10 @@ node_hwmon_fan_target_rpm{chip="nct6779",sensor="fan2"} 27000 # HELP node_hwmon_fan_tolerance Hardware monitor fan element tolerance # TYPE node_hwmon_fan_tolerance gauge node_hwmon_fan_tolerance{chip="nct6779",sensor="fan2"} 0 +# HELP node_hwmon_freq_freq_mhz Hardware monitor for GPU frequency in MHz +# TYPE node_hwmon_freq_freq_mhz gauge +node_hwmon_freq_freq_mhz{chip="hwmon4",sensor="mclk"} 300 +node_hwmon_freq_freq_mhz{chip="hwmon4",sensor="sclk"} 214 # HELP node_hwmon_in_alarm Hardware sensor alarm status (in) # TYPE node_hwmon_in_alarm gauge node_hwmon_in_alarm{chip="nct6779",sensor="in0"} 0 @@ -980,8 +984,10 @@ node_hwmon_pwm_weight_temp_step_tol{chip="nct6779",sensor="pwm1"} 0 # TYPE node_hwmon_sensor_label gauge node_hwmon_sensor_label{chip="hwmon4",label="foosensor",sensor="temp1"} 1 node_hwmon_sensor_label{chip="hwmon4",label="foosensor",sensor="temp2"} 1 -node_hwmon_sensor_label{chip="platform_applesmc_768",label="Left side ",sensor="fan1"} 1 -node_hwmon_sensor_label{chip="platform_applesmc_768",label="Right side ",sensor="fan2"} 1 +node_hwmon_sensor_label{chip="hwmon4",label="mclk",sensor="freq2"} 1 +node_hwmon_sensor_label{chip="hwmon4",label="sclk",sensor="freq1"} 1 +node_hwmon_sensor_label{chip="platform_applesmc_768",label="Left side",sensor="fan1"} 1 +node_hwmon_sensor_label{chip="platform_applesmc_768",label="Right side",sensor="fan2"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 0",sensor="temp2"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 1",sensor="temp3"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 2",sensor="temp4"} 1