From 59e30111234194573ba1692dd74c36e4f06d1edf Mon Sep 17 00:00:00 2001 From: "Taylor, Perry" Date: Thu, 9 Nov 2023 13:25:04 -0700 Subject: [PATCH] Adding metric for UPI receive bandwidth. --- CLX/metrics/cascadelakex_metrics.json | 21 ++++++++++++++++++- .../perf/cascadelakex_metrics_perf.json | 7 +++++++ EMR/metrics/emeraldrapids_metrics.json | 21 ++++++++++++++++++- .../perf/emeraldrapids_metrics_perf.json | 7 +++++++ ICX/metrics/icelakex_metrics.json | 21 ++++++++++++++++++- ICX/metrics/perf/icelakex_metrics_perf.json | 7 +++++++ SKX/metrics/perf/skylakex_metrics_perf.json | 7 +++++++ SKX/metrics/skylakex_metrics.json | 21 ++++++++++++++++++- .../perf/sapphirerapids_metrics_perf.json | 7 +++++++ SPR/metrics/sapphirerapids_metrics.json | 21 ++++++++++++++++++- 10 files changed, 135 insertions(+), 5 deletions(-) diff --git a/CLX/metrics/cascadelakex_metrics.json b/CLX/metrics/cascadelakex_metrics.json index 5b5af0a3..1b66fe44 100644 --- a/CLX/metrics/cascadelakex_metrics.json +++ b/CLX/metrics/cascadelakex_metrics.json @@ -2,7 +2,7 @@ "Header": { "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.", "Info": "Performance Monitoring Metrics for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V", - "DatePublished": "05/12/2023", + "DatePublished": "11/09/2023", "Version": "", "Legend": "" }, @@ -984,6 +984,25 @@ "ResolutionLevels": "CHA, SOCKET, SYSTEM", "MetricGroup": "" }, + { + "MetricName": "upi_data_receive_bw", + "LegacyName": "metric_UPI Data receive BW (MB/sec) (only data)", + "Level": 1, + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "UnitOfMeasure": "MB/sec", + "Events": [ + { + "Name": "UNC_UPI_RxL_FLITS.ALL_DATA", + "Alias": "a" + } + ], + "Constants": [], + "Formula": "(a * (64 / 9.0) / 1000000) / DURATIONTIMEINSECONDS", + "Category": "", + "Threshold": "", + "ResolutionLevels": "UPI, SOCKET, SYSTEM", + "MetricGroup": "" + }, { "MetricName": "Frontend_Bound", "LegacyName": "metric_TMA_Frontend_Bound(%)", diff --git a/CLX/metrics/perf/cascadelakex_metrics_perf.json b/CLX/metrics/perf/cascadelakex_metrics_perf.json index 94fe3381..6307e27a 100644 --- a/CLX/metrics/perf/cascadelakex_metrics_perf.json +++ b/CLX/metrics/perf/cascadelakex_metrics_perf.json @@ -279,6 +279,13 @@ "MetricName": "llc_miss_remote_memory_bandwidth_read", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "( UNC_UPI_RxL_FLITS.ALL_DATA * (64 / 9.0) / 1000000) / duration_time", + "MetricGroup": "", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "MetricExpr": "( IDQ_UOPS_NOT_DELIVERED.CORE / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) )", diff --git a/EMR/metrics/emeraldrapids_metrics.json b/EMR/metrics/emeraldrapids_metrics.json index 6230b92d..9a863c17 100644 --- a/EMR/metrics/emeraldrapids_metrics.json +++ b/EMR/metrics/emeraldrapids_metrics.json @@ -2,7 +2,7 @@ "Header": { "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.", "Info": "Performance Monitoring Metrics for 5th Generation Intel(R) Xeon(R) Processor Scalable Family", - "DatePublished": "10/25/2023", + "DatePublished": "11/09/2023", "Version": "", "Legend": "" }, @@ -1067,6 +1067,25 @@ "ResolutionLevels": "CHA, SOCKET, SYSTEM", "MetricGroup": "" }, + { + "MetricName": "upi_data_receive_bw", + "LegacyName": "metric_UPI Data receive BW (MB/sec) (only data)", + "Level": 1, + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "UnitOfMeasure": "MB/sec", + "Events": [ + { + "Name": "UNC_UPI_RxL_FLITS.ALL_DATA", + "Alias": "a" + } + ], + "Constants": [], + "Formula": "(a * (64 / 9.0) / 1000000) / DURATIONTIMEINSECONDS", + "Category": "", + "Threshold": "", + "ResolutionLevels": "UPI, SOCKET, SYSTEM", + "MetricGroup": "" + }, { "MetricName": "llc_miss_remote_memory_bandwidth_write", "LegacyName": "metric_llc_miss_remote_memory_bandwidth_write_MB/s", diff --git a/EMR/metrics/perf/emeraldrapids_metrics_perf.json b/EMR/metrics/perf/emeraldrapids_metrics_perf.json index 4c4ad466..dd2dd1a9 100644 --- a/EMR/metrics/perf/emeraldrapids_metrics_perf.json +++ b/EMR/metrics/perf/emeraldrapids_metrics_perf.json @@ -293,6 +293,13 @@ "MetricName": "llc_miss_remote_memory_bandwidth_read", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "( UNC_UPI_RxL_FLITS.ALL_DATA * (64 / 9.0) / 1000000) / duration_time", + "MetricGroup": "", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.", "MetricExpr": "( UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1000000) / duration_time", diff --git a/ICX/metrics/icelakex_metrics.json b/ICX/metrics/icelakex_metrics.json index 67d87eff..80f0a155 100644 --- a/ICX/metrics/icelakex_metrics.json +++ b/ICX/metrics/icelakex_metrics.json @@ -2,7 +2,7 @@ "Header": { "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.", "Info": "Performance Monitoring Metrics for 3rd Generation Intel(R) Xeon(R) Processor Scalable Family based on Ice Lake microarchitecture - V", - "DatePublished": "05/12/2023", + "DatePublished": "11/09/2023", "Version": "", "Legend": "" }, @@ -1122,6 +1122,25 @@ "ResolutionLevels": "CHA, SOCKET, SYSTEM", "MetricGroup": "" }, + { + "MetricName": "upi_data_receive_bw", + "LegacyName": "metric_UPI Data receive BW (MB/sec) (only data)", + "Level": 1, + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "UnitOfMeasure": "MB/sec", + "Events": [ + { + "Name": "UNC_UPI_RxL_FLITS.ALL_DATA", + "Alias": "a" + } + ], + "Constants": [], + "Formula": "(a * (64 / 9.0) / 1000000) / DURATIONTIMEINSECONDS", + "Category": "", + "Threshold": "", + "ResolutionLevels": "UPI, SOCKET, SYSTEM", + "MetricGroup": "" + }, { "MetricName": "Frontend_Bound", "LegacyName": "metric_TMA_Frontend_Bound(%)", diff --git a/ICX/metrics/perf/icelakex_metrics_perf.json b/ICX/metrics/perf/icelakex_metrics_perf.json index b43676fb..fec9fe95 100644 --- a/ICX/metrics/perf/icelakex_metrics_perf.json +++ b/ICX/metrics/perf/icelakex_metrics_perf.json @@ -307,6 +307,13 @@ "MetricName": "llc_miss_remote_memory_bandwidth_write", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "( UNC_UPI_RxL_FLITS.ALL_DATA * (64 / 9.0) / 1000000) / duration_time", + "MetricGroup": "", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "MetricExpr": "( topdown\\-fe\\-bound / ( topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound ) - INT_MISC.UOP_DROPPING / ( slots ) )", diff --git a/SKX/metrics/perf/skylakex_metrics_perf.json b/SKX/metrics/perf/skylakex_metrics_perf.json index 7feb184e..25f717d1 100644 --- a/SKX/metrics/perf/skylakex_metrics_perf.json +++ b/SKX/metrics/perf/skylakex_metrics_perf.json @@ -258,6 +258,13 @@ "MetricName": "llc_miss_remote_memory_bandwidth_read", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "( UNC_UPI_RxL_FLITS.ALL_DATA * (64 / 9.0) / 1000000) / duration_time", + "MetricGroup": "", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "MetricExpr": "( IDQ_UOPS_NOT_DELIVERED.CORE / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) )", diff --git a/SKX/metrics/skylakex_metrics.json b/SKX/metrics/skylakex_metrics.json index 4ffaf7db..2825b5dc 100644 --- a/SKX/metrics/skylakex_metrics.json +++ b/SKX/metrics/skylakex_metrics.json @@ -2,7 +2,7 @@ "Header": { "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.", "Info": "Performance Monitoring Metrics for Intel(R) Xeon(R) Processor Scalable Family based on Skylake microarchitecture - V", - "DatePublished": "05/12/2023", + "DatePublished": "11/09/2023", "Version": "", "Legend": "" }, @@ -899,6 +899,25 @@ "ResolutionLevels": "CHA, SOCKET, SYSTEM", "MetricGroup": "" }, + { + "MetricName": "upi_data_receive_bw", + "LegacyName": "metric_UPI Data receive BW (MB/sec) (only data)", + "Level": 1, + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "UnitOfMeasure": "MB/sec", + "Events": [ + { + "Name": "UNC_UPI_RxL_FLITS.ALL_DATA", + "Alias": "a" + } + ], + "Constants": [], + "Formula": "(a * (64 / 9.0) / 1000000) / DURATIONTIMEINSECONDS", + "Category": "", + "Threshold": "", + "ResolutionLevels": "UPI, SOCKET, SYSTEM", + "MetricGroup": "" + }, { "MetricName": "Frontend_Bound", "LegacyName": "metric_TMA_Frontend_Bound(%)", diff --git a/SPR/metrics/perf/sapphirerapids_metrics_perf.json b/SPR/metrics/perf/sapphirerapids_metrics_perf.json index 1d105301..b9979e7f 100644 --- a/SPR/metrics/perf/sapphirerapids_metrics_perf.json +++ b/SPR/metrics/perf/sapphirerapids_metrics_perf.json @@ -307,6 +307,13 @@ "MetricName": "llc_miss_remote_memory_bandwidth_write", "ScaleUnit": "1MB/s" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "( UNC_UPI_RxL_FLITS.ALL_DATA * (64 / 9.0) / 1000000) / duration_time", + "MetricGroup": "", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "MetricExpr": "( topdown\\-fe\\-bound / ( topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound ) - INT_MISC.UOP_DROPPING / ( slots ) )", diff --git a/SPR/metrics/sapphirerapids_metrics.json b/SPR/metrics/sapphirerapids_metrics.json index 846ea592..69f99d14 100644 --- a/SPR/metrics/sapphirerapids_metrics.json +++ b/SPR/metrics/sapphirerapids_metrics.json @@ -2,7 +2,7 @@ "Header": { "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.", "Info": "Performance Monitoring Metrics for 4th Generation Intel(R) Xeon(R) Processor Scalable Family based on Sapphire Rapids microarchitecture - V", - "DatePublished": "05/12/2023", + "DatePublished": "11/09/2023", "Version": "", "Legend": "" }, @@ -1106,6 +1106,25 @@ "ResolutionLevels": "CHA, SOCKET, SYSTEM", "MetricGroup": "" }, + { + "MetricName": "upi_data_receive_bw", + "LegacyName": "metric_UPI Data receive BW (MB/sec) (only data)", + "Level": 1, + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "UnitOfMeasure": "MB/sec", + "Events": [ + { + "Name": "UNC_UPI_RxL_FLITS.ALL_DATA", + "Alias": "a" + } + ], + "Constants": [], + "Formula": "(a * (64 / 9.0) / 1000000) / DURATIONTIMEINSECONDS", + "Category": "", + "Threshold": "", + "ResolutionLevels": "UPI, SOCKET, SYSTEM", + "MetricGroup": "" + }, { "MetricName": "Frontend_Bound", "LegacyName": "metric_TMA_Frontend_Bound(%)",