From a9d82759cadff575dd6ee37b22b8c7cb20888208 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Fri, 5 May 2023 15:07:20 -0500 Subject: [PATCH 01/32] Implement custom merge utility for rocprof Signed-off-by: coleramos425 --- src/omniperf | 26 ++++++++++--- src/utils/perfagg.py | 89 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 5 deletions(-) diff --git a/src/omniperf b/src/omniperf index f3388bdf4..41fce2fbb 100755 --- a/src/omniperf +++ b/src/omniperf @@ -38,7 +38,7 @@ import warnings from parser import parse from utils import specs -from utils.perfagg import perfmon_filter, pmc_filter +from utils.perfagg import perfmon_filter, pmc_filter, pmc_perf_split, join_prof from utils import remove_workload from utils import csv_converter # Import workload from omniperf_analyze.omniperf_analyze import roofline_only # Standalone roofline @@ -163,11 +163,13 @@ def isWorkloadEmpty(my_parser, path): def replace_timestamps(workload_dir): df_stamps = pd.read_csv(workload_dir + "/timestamps.csv") if "BeginNs" in df_stamps.columns and "EndNs" in df_stamps.columns: - df_pmc_perf = pd.read_csv(workload_dir + "/pmc_perf.csv") + # Update timestamps for all *.csv output files + for fname in glob.glob(workload_dir + "/" + "*.csv"): + df_pmc_perf = pd.read_csv(fname) - df_pmc_perf["BeginNs"] = df_stamps["BeginNs"] - df_pmc_perf["EndNs"] = df_stamps["EndNs"] - df_pmc_perf.to_csv(workload_dir + "/pmc_perf.csv", index=False) + df_pmc_perf["BeginNs"] = df_stamps["BeginNs"] + df_pmc_perf["EndNs"] = df_stamps["EndNs"] + df_pmc_perf.to_csv(fname, index=False) else: warnings.warn( "WARNING: Incomplete profiling data detected. Unable to update timestamps." @@ -395,6 +397,9 @@ def characterize_app(args, VER): # Perfmon filtering pmc_filter(workload_dir, perfmon_dir, args.target) + # Separate pmc_perf runs + pmc_perf_split(workload_dir, perfmon_dir) + # Set up a log file log = open(workload_dir + "/log.txt", "w") print("Log: ", workload_dir + "/log.txt\n") @@ -449,6 +454,10 @@ def characterize_app(args, VER): # Update pmc_perf.csv timestamps replace_timestamps(workload_dir) + # Manually join each pmc_perf*.csv output + if args.use_rocscope == False: + join_prof(workload_dir, workload_dir + "/pmc_perf_NEW.csv") + ################################################ # Profiling Helpers @@ -551,6 +560,9 @@ def omniperf_profile(args, VER): # Perfmon filtering perfmon_filter(workload_dir, perfmon_dir, args) + # Separate pmc_perf runs + pmc_perf_split(workload_dir) + # Set up a log file log = open(workload_dir + "/log.txt", "w") print("Log: ", workload_dir + "/log.txt\n") @@ -670,6 +682,10 @@ def omniperf_profile(args, VER): ) # Update pmc_perf.csv timestamps replace_timestamps(workload_dir) + + # Manually join each pmc_perf*.csv output + if args.use_rocscope == False: + join_prof(workload_dir, workload_dir + "/pmc_perf.csv") # Generate sysinfo gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 0606b4dc4..6dd5eb4f4 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -25,6 +25,7 @@ import sys, os, pathlib, shutil, subprocess, argparse, glob, re import numpy as np import math +import pandas as pd prog = "omniperf" @@ -85,6 +86,94 @@ }, } +# joins disparate runs less dumbly than rocprof +def join_prof(workload_dir, out): + files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") + df = None + + for i, file in enumerate(files): + #_df = parse_rocprof_kernels(file) + _df = pd.read_csv(file) + key = _df.groupby("KernelName").cumcount() + _df['key'] = _df.KernelName + ' - ' + key.astype(str) + + if df is None: + df = _df + else: + # join by unique index of kernel + df = pd.merge(df, _df, how='inner', on='key', suffixes=('', f'_{i}')) + # now, we can: + #   A) throw away any of the "boring" duplicats + df = df[[k for k in df.keys() if not any( + check in k for check in [ + 'gpu', 'queue-id', 'queue-index', 'pid', 'tid', 'grd', 'wgr', + 'lds', 'scr', 'vgpr', 'sgpr', 'fbar', 'sig', 'obj'])]] + #   B) any timestamps that are _not_ the duration, which is the one we care + #   about + df = df[[k for k in df.keys() if not any( + check in k for check in [ + 'stop', 'start', 'DispatchNs', 'CompleteNs'])]] + #   C) sanity check the name and key + namekeys = [k for k in df.keys() if 'KernelName' in k] + assert len(namekeys) + for k in namekeys[1:]: + assert (df[namekeys[0]] == df[k]).all() + df = df.drop(columns=namekeys[1:]) + # now take the median of the durations + dkeys = [k for k in df.keys() if 'duration' in k] + duration = df[dkeys].median(axis=1) + # compute min and max, just for sanity + min_duration = df[dkeys].min(axis=1) + max_duration = df[dkeys].max(axis=1) + std_duration = df[dkeys].std(axis=1) + mean_duration = df[dkeys].mean(axis=1) + # and replace + df = df.drop(columns=dkeys) + df['duration'] = duration + df['duration[max]'] = max_duration + df['duration[min]'] = min_duration + df['duration[std]'] = std_duration + df['duration[mean]'] = mean_duration + # finally, join the drop key + df = df.drop(columns=['key']) + # and save to file + df.to_csv(out, index=False) + # and delete old file(s) + for file in files: + os.remove(file) + +def pmc_perf_split(workload_dir): + workload_perfmon_dir = workload_dir + "/perfmon" + lines = open(workload_perfmon_dir + "/pmc_perf.txt", "r").read().splitlines() + + # Iterate over each line in pmc_perf.txt + mpattern = r"^pmc:(.*)" + i = 0 + for line in lines: + # Verify no comments + stext = line.split("#")[0].strip() + if not stext: + continue + + # all pmc counters start with "pmc:" + m = re.match(mpattern, stext) + if m is None: + continue + + # Create separate file for each line + fd = open(workload_perfmon_dir + "/pmc_perf_" + str(i) + ".txt", "w") + fd.write(stext + "\n\n") + fd.write("gpu:\n") + fd.write("range:\n") + fd.write("kernel:\n") + fd.close() + + i += 1 + + # Remove old pmc_perf.txt input from perfmon dir + os.remove(workload_perfmon_dir + "/pmc_perf.txt") + + def perfmon_coalesce(pmc_files_list, workload_dir, soc): workload_perfmon_dir = workload_dir + "/perfmon" From 298271e1d083c75c0da11dea120a0f31fac553eb Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 8 May 2023 11:25:05 -0500 Subject: [PATCH 02/32] Update mean timestamp calculation Signed-off-by: coleramos425 --- src/utils/perfagg.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 6dd5eb4f4..3c09a648b 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -112,7 +112,7 @@ def join_prof(workload_dir, out): #   about df = df[[k for k in df.keys() if not any( check in k for check in [ - 'stop', 'start', 'DispatchNs', 'CompleteNs'])]] + 'DispatchNs', 'CompleteNs'])]] #   C) sanity check the name and key namekeys = [k for k in df.keys() if 'KernelName' in k] assert len(namekeys) @@ -120,20 +120,21 @@ def join_prof(workload_dir, out): assert (df[namekeys[0]] == df[k]).all() df = df.drop(columns=namekeys[1:]) # now take the median of the durations - dkeys = [k for k in df.keys() if 'duration' in k] - duration = df[dkeys].median(axis=1) - # compute min and max, just for sanity - min_duration = df[dkeys].min(axis=1) - max_duration = df[dkeys].max(axis=1) - std_duration = df[dkeys].std(axis=1) - mean_duration = df[dkeys].mean(axis=1) + bkeys = [] + ekeys = [] + for k in df.keys(): + if 'Begin' in k: + bkeys.append(k) + if 'End' in k: + ekeys.append(k) + # compute mean begin and end timestamps + endNs = df[ekeys].mean(axis=1) + beginNs = df[bkeys].mean(axis=1) # and replace - df = df.drop(columns=dkeys) - df['duration'] = duration - df['duration[max]'] = max_duration - df['duration[min]'] = min_duration - df['duration[std]'] = std_duration - df['duration[mean]'] = mean_duration + df= df.drop(columns=bkeys) + df= df.drop(columns=ekeys) + df['BeginNs'] = beginNs + df['EndNs'] = endNs # finally, join the drop key df = df.drop(columns=['key']) # and save to file From f499408c7b042120a8f6501de6546b21a70fddd8 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 8 May 2023 11:55:45 -0500 Subject: [PATCH 03/32] Fix join_prof() call for --roof-only mode Signed-off-by: coleramos425 --- src/omniperf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omniperf b/src/omniperf index 41fce2fbb..520af8446 100755 --- a/src/omniperf +++ b/src/omniperf @@ -398,7 +398,7 @@ def characterize_app(args, VER): pmc_filter(workload_dir, perfmon_dir, args.target) # Separate pmc_perf runs - pmc_perf_split(workload_dir, perfmon_dir) + pmc_perf_split(workload_dir) # Set up a log file log = open(workload_dir + "/log.txt", "w") @@ -456,7 +456,7 @@ def characterize_app(args, VER): # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, workload_dir + "/pmc_perf_NEW.csv") + join_prof(workload_dir, workload_dir + "/pmc_perf.csv") ################################################ From 8c173446d2909db48c6cfe024ab54205fbfef1e8 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 8 May 2023 11:56:49 -0500 Subject: [PATCH 04/32] Comply to Python formatting Signed-off-by: coleramos425 --- src/utils/perfagg.py | 80 +++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 3c09a648b..81537d626 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -86,35 +86,61 @@ }, } + # joins disparate runs less dumbly than rocprof def join_prof(workload_dir, out): files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") df = None - + for i, file in enumerate(files): - #_df = parse_rocprof_kernels(file) + # _df = parse_rocprof_kernels(file) _df = pd.read_csv(file) key = _df.groupby("KernelName").cumcount() - _df['key'] = _df.KernelName + ' - ' + key.astype(str) - + _df["key"] = _df.KernelName + " - " + key.astype(str) + if df is None: df = _df else: # join by unique index of kernel - df = pd.merge(df, _df, how='inner', on='key', suffixes=('', f'_{i}')) + df = pd.merge(df, _df, how="inner", on="key", suffixes=("", f"_{i}")) # now, we can: - #   A) throw away any of the "boring" duplicats - df = df[[k for k in df.keys() if not any( - check in k for check in [ - 'gpu', 'queue-id', 'queue-index', 'pid', 'tid', 'grd', 'wgr', - 'lds', 'scr', 'vgpr', 'sgpr', 'fbar', 'sig', 'obj'])]] - #   B) any timestamps that are _not_ the duration, which is the one we care - #   about - df = df[[k for k in df.keys() if not any( - check in k for check in [ - 'DispatchNs', 'CompleteNs'])]] - #   C) sanity check the name and key - namekeys = [k for k in df.keys() if 'KernelName' in k] + #   A) throw away any of the "boring" duplicats + df = df[ + [ + k + for k in df.keys() + if not any( + check in k + for check in [ + "gpu", + "queue-id", + "queue-index", + "pid", + "tid", + "grd", + "wgr", + "lds", + "scr", + "vgpr", + "sgpr", + "fbar", + "sig", + "obj", + ] + ) + ] + ] + #   B) any timestamps that are _not_ the duration, which is the one we care + #   about + df = df[ + [ + k + for k in df.keys() + if not any(check in k for check in ["DispatchNs", "CompleteNs"]) + ] + ] + #   C) sanity check the name and key + namekeys = [k for k in df.keys() if "KernelName" in k] assert len(namekeys) for k in namekeys[1:]: assert (df[namekeys[0]] == df[k]).all() @@ -123,26 +149,27 @@ def join_prof(workload_dir, out): bkeys = [] ekeys = [] for k in df.keys(): - if 'Begin' in k: + if "Begin" in k: bkeys.append(k) - if 'End' in k: + if "End" in k: ekeys.append(k) # compute mean begin and end timestamps endNs = df[ekeys].mean(axis=1) beginNs = df[bkeys].mean(axis=1) # and replace - df= df.drop(columns=bkeys) - df= df.drop(columns=ekeys) - df['BeginNs'] = beginNs - df['EndNs'] = endNs + df = df.drop(columns=bkeys) + df = df.drop(columns=ekeys) + df["BeginNs"] = beginNs + df["EndNs"] = endNs # finally, join the drop key - df = df.drop(columns=['key']) + df = df.drop(columns=["key"]) # and save to file df.to_csv(out, index=False) # and delete old file(s) for file in files: os.remove(file) + def pmc_perf_split(workload_dir): workload_perfmon_dir = workload_dir + "/perfmon" lines = open(workload_perfmon_dir + "/pmc_perf.txt", "r").read().splitlines() @@ -160,7 +187,7 @@ def pmc_perf_split(workload_dir): m = re.match(mpattern, stext) if m is None: continue - + # Create separate file for each line fd = open(workload_perfmon_dir + "/pmc_perf_" + str(i) + ".txt", "w") fd.write(stext + "\n\n") @@ -170,12 +197,11 @@ def pmc_perf_split(workload_dir): fd.close() i += 1 - + # Remove old pmc_perf.txt input from perfmon dir os.remove(workload_perfmon_dir + "/pmc_perf.txt") - def perfmon_coalesce(pmc_files_list, workload_dir, soc): workload_perfmon_dir = workload_dir + "/perfmon" From 419e2fcac6ef38bc6f96ff3d8d56c1773e4213ff Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Thu, 11 May 2023 13:00:30 -0500 Subject: [PATCH 05/32] Add grid size option and cmd line option for --join-type Signed-off-by: coleramos425 --- src/omniperf | 4 ++-- src/parser.py | 8 ++++++++ src/utils/perfagg.py | 16 ++++++++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/omniperf b/src/omniperf index 520af8446..c22aaa4e6 100755 --- a/src/omniperf +++ b/src/omniperf @@ -456,7 +456,7 @@ def characterize_app(args, VER): # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, workload_dir + "/pmc_perf.csv") + join_prof(workload_dir, args.join_type) ################################################ @@ -685,7 +685,7 @@ def omniperf_profile(args, VER): # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, workload_dir + "/pmc_perf.csv") + join_prof(workload_dir, args.join_type) # Generate sysinfo gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof) diff --git a/src/parser.py b/src/parser.py index a18ad5d22..65b29f12e 100644 --- a/src/parser.py +++ b/src/parser.py @@ -182,6 +182,14 @@ def parse(my_parser): required=False, help="\t\t\tDispatch ID filtering.", ) + profile_group.add_argument( + "--join-type", + metavar="", + required=False, + choices=["kernel", "grid"], + default="grid", + help="\t\t\tChoose how to join rocprof runs: (DEFAULT: grid)\n\t\t\t kernel (i.e. By unique kernel name dispatches)\n\t\t\t grid (i.e. By unique kernel name + grid size dispatches)", + ) profile_group.add_argument( "--no-roof", required=False, diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 81537d626..333498b2c 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -88,15 +88,27 @@ # joins disparate runs less dumbly than rocprof -def join_prof(workload_dir, out): +def join_prof(workload_dir, join_type, out=None): + # Set default output directory if not specified + if out == None: + out = workload_dir + "/pmc_perf.csv" files = glob.glob(workload_dir + "/" + "pmc_perf_*.csv") df = None for i, file in enumerate(files): # _df = parse_rocprof_kernels(file) + _df = pd.read_csv(file) key = _df.groupby("KernelName").cumcount() - _df["key"] = _df.KernelName + " - " + key.astype(str) + if join_type == "kernel": + _df["key"] = _df.KernelName + " - " + key.astype(str) + elif join_type == "grid": + _df["key"] = ( + _df.KernelName + " - " + key.astype(str) + " - " + _df.grd.astype(str) + ) + else: + print("ERROR: Unrecognized --join-type") + sys.exit(1) if df is None: df = _df From 444102d92726baf286d1cb321fc2eb3f1bc30ee3 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Thu, 11 May 2023 14:04:02 -0500 Subject: [PATCH 06/32] Redefine boring merge values Signed-off-by: coleramos425 --- src/utils/perfagg.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 333498b2c..8d77ff12d 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -124,17 +124,17 @@ def join_prof(workload_dir, join_type, out=None): if not any( check in k for check in [ - "gpu", + # "gpu", "queue-id", "queue-index", "pid", "tid", - "grd", - "wgr", - "lds", - "scr", - "vgpr", - "sgpr", + # "grd", + # "wgr", + # "lds", + # "scr", + # "vgpr", + # "sgpr", "fbar", "sig", "obj", From d8d834ff608c42a60d0fb58c55e1b0cedc66d0d6 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 15 May 2023 17:04:08 -0500 Subject: [PATCH 07/32] Update VGPR counter (#117) Signed-off-by: coleramos425 --- dashboards/Omniperf_v1.0.8_pub.json | 744 +++++++++--------- .../configs/gfx906/0700_wavefront-launch.yaml | 12 +- .../configs/gfx908/0700_wavefront-launch.yaml | 12 +- .../configs/gfx90a/0700_wavefront-launch.yaml | 12 +- 4 files changed, 395 insertions(+), 385 deletions(-) diff --git a/dashboards/Omniperf_v1.0.8_pub.json b/dashboards/Omniperf_v1.0.8_pub.json index f75a00f0e..25538d117 100644 --- a/dashboards/Omniperf_v1.0.8_pub.json +++ b/dashboards/Omniperf_v1.0.8_pub.json @@ -25,7 +25,7 @@ "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": 43, - "iteration": 1683039685468, + "iteration": 1684188009505, "links": [], "liveNow": false, "panels": [ @@ -1539,7 +1539,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1817,7 +1818,7 @@ }, "rawQuery": true, "refId": "pmc_perf", - "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_life\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&SQ_WAVES\", 0] },\n { \"$multiply\": [4, { \"$divide\": [\"&SQ_WAVE_CYCLES\", \"&SQ_WAVES\"] }] },\n null\n ]\n }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"valu\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VALU\", \"&denom\"] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_MFMA\", \"&denom\"] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VMEM\", \"&denom\"] }\n },\n \"lds_instr\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n },\n \"gws\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_GDS\", \"&denom\"] }\n },\n \"br\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_BRANCH\", \"&denom\"] }\n },\n \"vgpr\": {\n \"$avg\": \"&vgpr\"\n },\n \"sgpr\": {\n \"$avg\": \"&sgpr\"\n },\n \"lds_alloc\": {\n \"$avg\": \"&lds\"\n },\n \"scratch_alloc\": {\n \"$avg\": \"&scr\"\n },\n \"wavefronts\": {\n \"$avg\": \"&SPI_CSN_WAVE\"\n },\n \"workgroups\": {\n \"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"\n },\n \"lds_req\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n }, \n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n \"vl1_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_READ_sum\", \"&denom\"] }\n },\n \"vl1_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_WRITE_sum\", \"&denom\"] }\n },\n \"vl1_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"] }\n },\n \"il1_fetch\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"il1_hit\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_REQ\"] }\n },\n \"il1_l2_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_INST_REQ\", \"&denom\"] }\n },\n \"sl1_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"sl1_hit\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQC_DCACHE_REQ\", 0]},\n { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_REQ\"] },\n \"\"\n ]\n }\n},\n \"sl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"sl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"sl1_l2_atom\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"vl1_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vl1_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0] },\n { \"$divide\": [\"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\"] },\n null\n ]\n }\n },\n \"vl1_coales\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n 0\n ]\n }\n },\n \"vl1_stall\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n \"\"\n ]\n }},\n \"vl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_READ_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }\n },\n \"l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_READ_sum\", \"&denom\"] }\n },\n \"l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_WRITE_sum\", \"&denom\"] }\n },\n \"l2_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_ATOMIC_sum\", \"&denom\"] }\n },\n \"l2_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0] },\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null\n ]\n }\n },\n \"l2_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"l2_wr_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"fabric_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_RDREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_wr_lat\": { \n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_WRREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_atom_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\"] },\n null\n ]\n }\n },\n \"l2_fabric_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_ATOMIC_sum\", \"&denom\"] }\n },\n \"hbm_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\"] }\n },\n \"hbm_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Life\",\n \"Alias\": \"wave_life_\",\n \"Value\": { \"$round\": [\"&wave_life\", 0] }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Alias\": \"active_cu_\",\n \"Value\": {\"$concat\": [\"$numActiveCUs\", \"/\", \"$numCU\"]}\n },\n {\n \"Metric\": \"SALU\",\n \"Alias\": \"salu_\",\n \"Value\": { \"$round\": [\"&salu\", 0] }\n },\n {\n \"Metric\": \"SMEM\",\n \"Alias\": \"smem_\",\n \"Value\": { \"$round\": [\"&smem\", 0] }\n },\n {\n \"Metric\": \"VALU\",\n \"Alias\": \"valu_\",\n \"Value\": { \"$round\": [\"&valu\", 0] }\n },\n {\n \"Metric\": \"MFMA\",\n \"Alias\": \"mfma_\",\n \"Value\": { \"$round\": [\"&mfma\", 0] }\n },\n {\n \"Metric\": \"VMEM\",\n \"Alias\": \"vmem_\",\n \"Value\": { \"$round\": [\"&vmem\", 0] }\n },\n {\n \"Metric\": \"LDS\",\n \"Alias\": \"lds_\",\n \"Value\": { \"$round\": [\"&lds_instr\", 0] }\n },\n {\n \"Metric\": \"GWS\",\n \"Alias\": \"gws_\",\n \"Value\": { \"$round\": [\"&gws\", 0] }\n },\n {\n \"Metric\": \"BR\",\n \"Alias\": \"br_\",\n \"Value\": { \"$round\": [\"&br\", 0] }\n },\n {\n \"Metric\": \"VGPR\",\n \"Alias\": \"vgpr_\",\n \"Value\": { \"$round\": [\"&vgpr\", 0] }\n },\n {\n \"Metric\": \"SGPR\",\n \"Alias\": \"sgpr_\",\n \"Value\": { \"$round\": [\"&sgpr\", 0] }\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Alias\": \"lds_alloc_\",\n \"Value\": { \"$round\": [\"&lds_alloc\", 0] }\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Alias\": \"scratch_alloc_\",\n \"Value\": { \"$round\": [\"&scratch_alloc\", 0] }\n },\n {\n \"Metric\": \"Wavefronts\",\n \"Alias\": \"wavefronts_\",\n \"Value\": { \"$round\": [\"&wavefronts\", 0] }\n },\n {\n \"Metric\": \"Workgroups\",\n \"Alias\": \"workgroups_\",\n \"Value\": { \"$round\": [\"&workgroups\", 0] }\n },\n {\n \"Metric\": \"LDS Req\",\n \"Alias\": \"lds_req_\",\n \"Value\": { \"$round\": [\"&lds_req\", 0] }\n },\n {\n \"Metric\": \"IL1 Fetch\",\n \"Alias\": \"il1_fetch_\",\n \"Value\": { \"$round\": [\"&il1_fetch\", 0] }\n },\n {\n \"Metric\": \"IL1 Hit\",\n \"Alias\": \"il1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&il1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"IL1_L2 Rd\",\n \"Alias\": \"il1_l2_req_\",\n \"Value\": { \"$round\": [\"&il1_l2_req\", 0] }\n },\n {\n \"Metric\": \"vL1D Rd\",\n \"Alias\": \"sl1_rd_\",\n \"Value\": { \"$round\": [\"&sl1_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D Hit\",\n \"Alias\": \"sl1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&sl1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Rd\",\n \"Alias\": \"sl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&sl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Wr\",\n \"Alias\": \"sl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&sl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Atomic\",\n \"Alias\": \"sl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&sl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Rd\",\n \"Alias\": \"vl1_rd_\",\n \"Value\": { \"$round\": [\"&vl1_rd\", 0] }\n },\n {\n \"Metric\": \"VL1 Wr\",\n \"Alias\": \"vl1_wr_\",\n \"Value\": { \"$round\": [\"&vl1_wr\", 0] }\n },\n {\n \"Metric\": \"VL1 Atomic\",\n \"Alias\": \"vl1_atom_\",\n \"Value\": { \"$round\": [\"&vl1_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Hit\",\n \"Alias\": \"vl1_hit_\",\n \"Value\": { \"$round\": [\"&vl1_hit\", 0] }\n },\n {\n \"Metric\": \"VL1 Lat\",\n \"Alias\": \"vl1_lat_\",\n \"Value\": { \"$round\": [\"&vl1_lat\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Rd\",\n \"Alias\": \"vl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&vl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Wr\",\n \"Alias\": \"vl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&vl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1_L2 Atomic\",\n \"Alias\": \"vl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&vl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Rd\",\n \"Alias\": \"l2_rd_\",\n \"Value\": { \"$round\": [\"&l2_rd\", 0] }\n },\n {\n \"Metric\": \"L2 Wr\",\n \"Alias\": \"l2_wr_\",\n \"Value\": { \"$round\": [\"&l2_wr\", 0] }\n },\n {\n \"Metric\": \"L2 Atomic\",\n \"Alias\": \"l2_atom_\",\n \"Value\": { \"$round\": [\"&l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Hit\",\n \"Alias\": \"l2_hit_\",\n \"Value\": { \"$round\": [\"&l2_hit\", 0] }\n },\n {\n \"Metric\": \"L2 Rd Lat\",\n \"Alias\": \"l2_rd_lat_\",\n \"Value\": { \"$round\": [\"&l2_rd_lat\", 0] }\n },\n {\n \"Metric\": \"L2 Wr Lat\",\n \"Alias\": \"l2_wr_lat_\",\n \"Value\": { \"$round\": [\"&l2_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Rd Lat\",\n \"Alias\": \"fabric_rd_lat_\",\n \"Value\": { \"$round\": [\"&fabric_rd_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Wr Lat\",\n \"Alias\": \"fabric_wr_lat_\",\n \"Value\": { \"$round\": [\"&fabric_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Atomic Lat\",\n \"Alias\": \"fabric_atom_lat_\",\n \"Value\": { \"$round\": [\"&fabric_atom_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Rd\",\n \"Alias\": \"l2_fabric_rd_\",\n \"Value\": { \"$round\": [\"&l2_fabric_rd\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Wr\",\n \"Alias\": \"l2_fabric_wr_\",\n \"Value\": { \"$round\": [\"&l2_fabric_wr\", 0] }\n },\n {\n \"Metric\": \"Fabric_l2 Atomic\",\n \"Alias\": \"l2_fabric_atom_\",\n \"Value\": { \"$round\": [\"&l2_fabric_atom\", 0] }\n },\n {\n \"Metric\": \"HBM Rd\",\n \"Alias\": \"hbm_rd_\",\n \"Value\": { \"$round\": [\"&hbm_rd\", 0] }\n },\n {\n \"Metric\": \"HBM Wr\",\n \"Alias\": \"hbm_wr_\",\n \"Value\": { \"$round\": [\"&hbm_wr\", 0] }\n },\n {\n \"Metric\": \"LDS Util\",\n \"Alias\": \"lds_util_\",\n \"Value\": { \"$round\": [\"&lds_util\", 0] }\n },\n {\n \"Metric\": \"VL1 Coalesce\",\n \"Alias\": \"vl1_coales_\",\n \"Value\": { \"$round\": [\"&vl1_coales\", 0]}\n },\n {\n \"Metric\": \"VL1 Stall\",\n \"Alias\": \"vl1_stall_\",\n \"Value\": { \"$round\": [\"&vl1_stall\", 0]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"wave_life\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&SQ_WAVES\", 0] },\n { \"$multiply\": [4, { \"$divide\": [\"&SQ_WAVE_CYCLES\", \"&SQ_WAVES\"] }] },\n null\n ]\n }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"valu\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VALU\", \"&denom\"] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_MFMA\", \"&denom\"] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_VMEM\", \"&denom\"] }\n },\n \"lds_instr\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n },\n \"gws\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_GDS\", \"&denom\"] }\n },\n \"br\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_BRANCH\", \"&denom\"] }\n },\n \"vgpr\": {\n \"$avg\": \"&arch_vgpr\"\n },\n \"sgpr\": {\n \"$avg\": \"&sgpr\"\n },\n \"lds_alloc\": {\n \"$avg\": \"&lds\"\n },\n \"scratch_alloc\": {\n \"$avg\": \"&scr\"\n },\n \"wavefronts\": {\n \"$avg\": \"&SPI_CSN_WAVE\"\n },\n \"workgroups\": {\n \"$avg\": \"&SPI_CSN_NUM_THREADGROUPS\"\n },\n \"lds_req\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\", \"&denom\"] }\n }, \n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n \"vl1_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_READ_sum\", \"&denom\"] }\n },\n \"vl1_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TOTAL_WRITE_sum\", \"&denom\"] }\n },\n \"vl1_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TOTAL_ATOMIC_WITH_RET_sum\", \"&TCP_TOTAL_ATOMIC_WITHOUT_RET_sum\"] }, \"&denom\"] }\n },\n \"il1_fetch\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_REQ\", \"&denom\"] }\n },\n \"il1_hit\": {\n \"$avg\": { \"$divide\": [\"&SQC_ICACHE_HITS\", \"&SQC_ICACHE_REQ\"] }\n },\n \"il1_l2_req\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_INST_REQ\", \"&denom\"] }\n },\n \"sl1_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_DCACHE_REQ\", \"&denom\"] }\n },\n \"sl1_hit\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQC_DCACHE_REQ\", 0]},\n { \"$divide\": [\"&SQC_DCACHE_HITS\", \"&SQC_DCACHE_REQ\"] },\n \"\"\n ]\n }\n},\n \"sl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_READ_REQ\", \"&denom\"] }\n },\n \"sl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_WRITE_REQ\", \"&denom\"] }\n },\n \"sl1_l2_atom\": {\n \"$avg\": { \"$divide\": [\"&SQC_TC_DATA_ATOMIC_REQ\", \"&denom\"] }\n },\n \"vl1_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCP_TOTAL_CACHE_ACCESSES_sum\", 0] },\n { \"$subtract\": [100, { \"$divide\": [{ \"$multiply\": [100, { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] }, \"&TCP_TOTAL_CACHE_ACCESSES_sum\"] }] },\n null\n ]\n }\n },\n \"vl1_lat\": {\n \"$avg\": { \n \"$cond\": [\n { \"$ne\": [\"&TCP_TA_TCP_STATE_READ_sum\", 0] },\n { \"$divide\": [\"&TCP_TCP_LATENCY_sum\", \"&TCP_TA_TCP_STATE_READ_sum\"] },\n null\n ]\n }\n },\n \"vl1_coales\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [\"&TCP_TOTAL_ACCESSES_sum\", 0] },\n { \"$divide\": [{\"$multiply\": [\"&TA_TOTAL_WAVEFRONTS_sum\", 64, 100]}, {\"$multiply\": [\"&TCP_TOTAL_ACCESSES_sum\", 4]}] },\n 0\n ]\n }\n },\n \"vl1_stall\": {\"$avg\":{\n \"$cond\":[\n {\"$ne\": [\"&TCP_GATE_EN1_sum\", 0]}, \n {\"$divide\": [ { \"$multiply\": [100, \"&TCP_TCR_TCP_STALL_CYCLES_sum\"] }, \"&TCP_GATE_EN1_sum\"] },\n \"\"\n ]\n }},\n \"vl1_l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_READ_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&denom\"] }\n },\n \"vl1_l2_atom\": {\n \"$avg\": { \"$divide\": [{ \"$add\": [\"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, \"&denom\"] }\n },\n \"l2_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_READ_sum\", \"&denom\"] }\n },\n \"l2_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_WRITE_sum\", \"&denom\"] }\n },\n \"l2_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_ATOMIC_sum\", \"&denom\"] }\n },\n \"l2_hit\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }, 0] },\n { \"$divide\": [{ \"$multiply\": [100, \"&TCC_HIT_sum\"] }, { \"$add\": [\"&TCC_HIT_sum\", \"&TCC_MISS_sum\"] }] },\n null\n ]\n }\n },\n \"l2_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_READ_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_READ_REQ_sum\", \"&TCP_TCC_ATOMIC_WITH_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"l2_wr_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [{ \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }, 0] },\n { \"$divide\": [\"&TCP_TCC_WRITE_REQ_LATENCY_sum\", { \"$add\": [\"&TCP_TCC_WRITE_REQ_sum\", \"&TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum\"] }] },\n null\n ]\n }\n },\n \"fabric_rd_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_RDREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_RDREQ_LEVEL_sum\", \"&TCC_EA_RDREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_wr_lat\": { \n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_WRREQ_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_WRREQ_LEVEL_sum\", \"&TCC_EA_WRREQ_sum\"] },\n null\n ]\n }\n },\n \"fabric_atom_lat\": {\n \"$avg\": {\n \"$cond\": [\n { \"$ne\": [\"&TCC_EA_ATOMIC_sum\", 0] },\n { \"$divide\": [\"&TCC_EA_ATOMIC_LEVEL_sum\", \"&TCC_EA_ATOMIC_sum\"] },\n null\n ]\n }\n },\n \"l2_fabric_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_sum\", \"&denom\"] }\n },\n \"l2_fabric_atom\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_ATOMIC_sum\", \"&denom\"] }\n },\n \"hbm_rd\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_RDREQ_DRAM_sum\", \"&denom\"] }\n },\n \"hbm_wr\": {\n \"$avg\": { \"$divide\": [\"&TCC_EA_WRREQ_DRAM_sum\", \"&denom\"] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Wave Life\",\n \"Alias\": \"wave_life_\",\n \"Value\": { \"$round\": [\"&wave_life\", 0] }\n },\n {\n \"Metric\": \"Active CUs\",\n \"Alias\": \"active_cu_\",\n \"Value\": {\"$concat\": [\"$numActiveCUs\", \"/\", \"$numCU\"]}\n },\n {\n \"Metric\": \"SALU\",\n \"Alias\": \"salu_\",\n \"Value\": { \"$round\": [\"&salu\", 0] }\n },\n {\n \"Metric\": \"SMEM\",\n \"Alias\": \"smem_\",\n \"Value\": { \"$round\": [\"&smem\", 0] }\n },\n {\n \"Metric\": \"VALU\",\n \"Alias\": \"valu_\",\n \"Value\": { \"$round\": [\"&valu\", 0] }\n },\n {\n \"Metric\": \"MFMA\",\n \"Alias\": \"mfma_\",\n \"Value\": { \"$round\": [\"&mfma\", 0] }\n },\n {\n \"Metric\": \"VMEM\",\n \"Alias\": \"vmem_\",\n \"Value\": { \"$round\": [\"&vmem\", 0] }\n },\n {\n \"Metric\": \"LDS\",\n \"Alias\": \"lds_\",\n \"Value\": { \"$round\": [\"&lds_instr\", 0] }\n },\n {\n \"Metric\": \"GWS\",\n \"Alias\": \"gws_\",\n \"Value\": { \"$round\": [\"&gws\", 0] }\n },\n {\n \"Metric\": \"BR\",\n \"Alias\": \"br_\",\n \"Value\": { \"$round\": [\"&br\", 0] }\n },\n {\n \"Metric\": \"VGPR\",\n \"Alias\": \"vgpr_\",\n \"Value\": { \"$round\": [\"&vgpr\", 0] }\n },\n {\n \"Metric\": \"SGPR\",\n \"Alias\": \"sgpr_\",\n \"Value\": { \"$round\": [\"&sgpr\", 0] }\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Alias\": \"lds_alloc_\",\n \"Value\": { \"$round\": [\"&lds_alloc\", 0] }\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Alias\": \"scratch_alloc_\",\n \"Value\": { \"$round\": [\"&scratch_alloc\", 0] }\n },\n {\n \"Metric\": \"Wavefronts\",\n \"Alias\": \"wavefronts_\",\n \"Value\": { \"$round\": [\"&wavefronts\", 0] }\n },\n {\n \"Metric\": \"Workgroups\",\n \"Alias\": \"workgroups_\",\n \"Value\": { \"$round\": [\"&workgroups\", 0] }\n },\n {\n \"Metric\": \"LDS Req\",\n \"Alias\": \"lds_req_\",\n \"Value\": { \"$round\": [\"&lds_req\", 0] }\n },\n {\n \"Metric\": \"IL1 Fetch\",\n \"Alias\": \"il1_fetch_\",\n \"Value\": { \"$round\": [\"&il1_fetch\", 0] }\n },\n {\n \"Metric\": \"IL1 Hit\",\n \"Alias\": \"il1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&il1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"IL1_L2 Rd\",\n \"Alias\": \"il1_l2_req_\",\n \"Value\": { \"$round\": [\"&il1_l2_req\", 0] }\n },\n {\n \"Metric\": \"vL1D Rd\",\n \"Alias\": \"sl1_rd_\",\n \"Value\": { \"$round\": [\"&sl1_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D Hit\",\n \"Alias\": \"sl1_hit_\",\n \"Value\": { \"$round\": [{ \"$multiply\": [\"&sl1_hit\", 100] }, 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Rd\",\n \"Alias\": \"sl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&sl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Wr\",\n \"Alias\": \"sl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&sl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1D_L2 Atomic\",\n \"Alias\": \"sl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&sl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Rd\",\n \"Alias\": \"vl1_rd_\",\n \"Value\": { \"$round\": [\"&vl1_rd\", 0] }\n },\n {\n \"Metric\": \"VL1 Wr\",\n \"Alias\": \"vl1_wr_\",\n \"Value\": { \"$round\": [\"&vl1_wr\", 0] }\n },\n {\n \"Metric\": \"VL1 Atomic\",\n \"Alias\": \"vl1_atom_\",\n \"Value\": { \"$round\": [\"&vl1_atom\", 0] }\n },\n {\n \"Metric\": \"VL1 Hit\",\n \"Alias\": \"vl1_hit_\",\n \"Value\": { \"$round\": [\"&vl1_hit\", 0] }\n },\n {\n \"Metric\": \"VL1 Lat\",\n \"Alias\": \"vl1_lat_\",\n \"Value\": { \"$round\": [\"&vl1_lat\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Rd\",\n \"Alias\": \"vl1_l2_rd_\",\n \"Value\": { \"$round\": [\"&vl1_l2_rd\", 0] }\n },\n {\n \"Metric\": \"VL1_L2 Wr\",\n \"Alias\": \"vl1_l2_wr_\",\n \"Value\": { \"$round\": [\"&vl1_l2_wr\", 0] }\n },\n {\n \"Metric\": \"vL1_L2 Atomic\",\n \"Alias\": \"vl1_l2_atom_\",\n \"Value\": { \"$round\": [\"&vl1_l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Rd\",\n \"Alias\": \"l2_rd_\",\n \"Value\": { \"$round\": [\"&l2_rd\", 0] }\n },\n {\n \"Metric\": \"L2 Wr\",\n \"Alias\": \"l2_wr_\",\n \"Value\": { \"$round\": [\"&l2_wr\", 0] }\n },\n {\n \"Metric\": \"L2 Atomic\",\n \"Alias\": \"l2_atom_\",\n \"Value\": { \"$round\": [\"&l2_atom\", 0] }\n },\n {\n \"Metric\": \"L2 Hit\",\n \"Alias\": \"l2_hit_\",\n \"Value\": { \"$round\": [\"&l2_hit\", 0] }\n },\n {\n \"Metric\": \"L2 Rd Lat\",\n \"Alias\": \"l2_rd_lat_\",\n \"Value\": { \"$round\": [\"&l2_rd_lat\", 0] }\n },\n {\n \"Metric\": \"L2 Wr Lat\",\n \"Alias\": \"l2_wr_lat_\",\n \"Value\": { \"$round\": [\"&l2_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Rd Lat\",\n \"Alias\": \"fabric_rd_lat_\",\n \"Value\": { \"$round\": [\"&fabric_rd_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Wr Lat\",\n \"Alias\": \"fabric_wr_lat_\",\n \"Value\": { \"$round\": [\"&fabric_wr_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric Atomic Lat\",\n \"Alias\": \"fabric_atom_lat_\",\n \"Value\": { \"$round\": [\"&fabric_atom_lat\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Rd\",\n \"Alias\": \"l2_fabric_rd_\",\n \"Value\": { \"$round\": [\"&l2_fabric_rd\", 0] }\n },\n {\n \"Metric\": \"Fabric_L2 Wr\",\n \"Alias\": \"l2_fabric_wr_\",\n \"Value\": { \"$round\": [\"&l2_fabric_wr\", 0] }\n },\n {\n \"Metric\": \"Fabric_l2 Atomic\",\n \"Alias\": \"l2_fabric_atom_\",\n \"Value\": { \"$round\": [\"&l2_fabric_atom\", 0] }\n },\n {\n \"Metric\": \"HBM Rd\",\n \"Alias\": \"hbm_rd_\",\n \"Value\": { \"$round\": [\"&hbm_rd\", 0] }\n },\n {\n \"Metric\": \"HBM Wr\",\n \"Alias\": \"hbm_wr_\",\n \"Value\": { \"$round\": [\"&hbm_wr\", 0] }\n },\n {\n \"Metric\": \"LDS Util\",\n \"Alias\": \"lds_util_\",\n \"Value\": { \"$round\": [\"&lds_util\", 0] }\n },\n {\n \"Metric\": \"VL1 Coalesce\",\n \"Alias\": \"vl1_coales_\",\n \"Value\": { \"$round\": [\"&vl1_coales\", 0]}\n },\n {\n \"Metric\": \"VL1 Stall\",\n \"Alias\": \"vl1_stall_\",\n \"Value\": { \"$round\": [\"&vl1_stall\", 0]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"$array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"$array\"\n }}\n]);", "type": "table" }, { @@ -4122,7 +4123,6 @@ "id": 185, "panels": [ { - "datasource": {}, "fieldConfig": { "defaults": { "color": { @@ -4322,7 +4322,7 @@ "hide": false, "rawQuery": true, "refId": "B", - "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n\n \"vgprs_avg\":{\"$avg\": \"&vgpr\"},\n \"vgprs_min\":{\"$min\": \"&vgpr\"},\n \"vgprs_max\":{\"$max\": \"&vgpr\"},\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Wavefront)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n \"gridSize_avg\":{\"$avg\": \"&grd\"},\n \"gridSize_min\":{\"$min\": \"&grd\"},\n \"gridSize_max\":{\"$max\": \"&grd\"},\n\n \"workSize_avg\":{\"$avg\": \"&wgr\"},\n \"workSize_min\":{\"$min\": \"&wgr\"},\n \"workSize_max\":{\"$max\": \"&wgr\"},\n\n \"totWavefront_avg\":{\"$avg\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_min\":{\"$min\": \"&SPI_CSN_WAVE\"},\n \"totWavefront_max\":{\"$max\": \"&SPI_CSN_WAVE\"},\n\n \"savedWavefront_avg\":{\"$avg\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_min\":{\"$min\": \"&SQ_WAVES_SAVED\"},\n \"savedWavefront_max\":{\"$max\": \"&SQ_WAVES_SAVED\"},\n\n \"restoredWavefront_avg\":{\"$avg\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_min\":{\"$min\": \"&SQ_WAVES_RESTORED\"},\n \"restoredWavefront_max\":{\"$max\": \"&SQ_WAVES_RESTORED\"},\n\n \"vgprs_avg\":{\n \"$avg\": {\"$cond\": [\n {\"$ne\": [{\"$type\": \"&vgpr\"}, \"missing\"]},\n \"&vgpr\",\n \"&arch_vgpr\"\n ] \n }\n },\n \"vgprs_min\":{\n \"$min\": {\"$cond\": [\n {\"$ne\": [{\"$type\": \"&vgpr\"}, \"missing\"]},\n \"&vgpr\",\n \"&arch_vgpr\"\n ] \n }\n },\n \"vgprs_max\":{\n \"$max\": {\"$cond\": [\n {\"$ne\": [{\"$type\": \"&vgpr\"}, \"missing\"]},\n \"&vgpr\",\n \"&arch_vgpr\"\n ] \n }\n },\n\n \"agprs_avg\":{\n \"$avg\": \"&accum_vgpr\"\n },\n \"agprs_min\":{\n \"$min\": \"&accum_vgpr\"\n },\n \"agprs_max\":{\n \"$max\": \"&accum_vgpr\"\n },\n\n \"sgprs_avg\":{\"$avg\": \"&sgpr\"},\n \"sgprs_min\":{\"$min\": \"&sgpr\"},\n \"sgprs_max\":{\"$max\": \"&sgpr\"},\n\n \"ldsAlloc_avg\":{\"$avg\": \"&lds\"},\n \"ldsAlloc_min\":{\"$min\": \"&lds\"},\n \"ldsAlloc_max\":{\"$max\": \"&lds\"},\n\n \"scratchAlloc_avg\":{\"$avg\": \"&scr\"},\n \"scratchAlloc_min\":{\"$min\": \"&scr\"},\n \"scratchAlloc_max\":{\"$max\": \"&scr\"}\n\n }},\n {\"$set\": {\n \"array\": [\n {\n \"Metric\": \"Grid Size\",\n \"Avg\": \"&gridSize_avg\",\n \"Min\": \"&gridSize_min\",\n \"Max\": \"&gridSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Workgroup Size\",\n \"Avg\": \"&workSize_avg\",\n \"Min\": \"&workSize_min\",\n \"Max\": \"&workSize_max\",\n \"Unit\": \"Work Items\"\n },\n {\n \"Metric\": \"Total Wavefronts\",\n \"Avg\": \"&totWavefront_avg\",\n \"Min\": \"&totWavefront_min\",\n \"Max\": \"&totWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Saved Wavefronts\",\n \"Avg\": \"&savedWavefront_avg\",\n \"Min\": \"&savedWavefront_min\",\n \"Max\": \"&savedWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"Restored Wavefronts\",\n \"Avg\": \"&restoredWavefront_avg\",\n \"Min\": \"&restoredWavefront_min\",\n \"Max\": \"&restoredWavefront_max\",\n \"Unit\": \"Wavefronts\"\n },\n {\n \"Metric\": \"VGPRs\",\n \"Avg\": \"&vgprs_avg\",\n \"Min\": \"&vgprs_min\",\n \"Max\": \"&vgprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"AGPRs\",\n \"Avg\": \"&agprs_avg\",\n \"Min\": \"&agprs_min\",\n \"Max\": \"&agprs_max\",\n \"Unit\": \"Registers\"\n },\n {\n \"Metric\": \"SGPRs\",\n \"Avg\": \"&sgprs_avg\",\n \"Min\": \"&sgprs_min\",\n \"Max\": \"&sgprs_max\",\n \"Unit\":\"Registers\"\n },\n {\n \"Metric\": \"LDS Allocation\",\n \"Avg\": \"&ldsAlloc_avg\",\n \"Min\": \"&ldsAlloc_min\",\n \"Max\": \"&ldsAlloc_max\",\n \"Unit\": \"Bytes\"\n },\n {\n \"Metric\": \"Scratch Allocation\",\n \"Avg\": \"&scratchAlloc_avg\",\n \"Min\": \"&scratchAlloc_min\",\n \"Max\": \"&scratchAlloc_max\",\n \"Unit\": \"Bytes\"\n }\n \n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n]);", "type": "table" } ], @@ -4578,7 +4578,7 @@ "type": "row" }, { - "collapsed": false, + "collapsed": true, "datasource": { "type": "amd-miperf-data-plugin", "uid": "oVK0I__nk" @@ -4590,394 +4590,391 @@ "y": 8 }, "id": 209, - "panels": [], - "targets": [ + "panels": [ { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "oVK0I__nk" + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [] }, - "refId": "A" - } - ], - "title": "Compute Unit - Instruction Mix", - "type": "row" - }, - { - "datasource": {}, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" + "gridPos": { + "h": 17, + "w": 12, + "x": 0, + "y": 9 }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "id": 12, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": true + }, + "showUnfilled": true, + "text": { + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" }, - { - "color": "red", - "value": 80 + "rawQuery": true, + "refId": "A", + "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n \n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector (Baseline)\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM (Baseline)\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS (Baseline)\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA (Baseline)\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU (Baseline)\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM (Baseline)\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch (Baseline)\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS (Baseline)\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "Instruction Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] } - ] - }, - "unit": "locale" - }, - "overrides": [] - }, - "gridPos": { - "h": 17, - "w": 12, - "x": 0, - "y": 9 - }, - "id": 12, - "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": true + } + ], + "transparent": true, + "type": "bargauge" }, - "showUnfilled": true, - "text": { - "valueSize": 16 - } - }, - "pluginVersion": "8.3.4", - "targets": [ { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] }, - "rawQuery": true, - "refId": "A", - "target": "$Workload1.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", - "type": "table" - }, - { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" + "gridPos": { + "h": 24, + "w": 12, + "x": 12, + "y": 9 }, - "hide": false, - "rawQuery": true, - "refId": "B", - "target": "$Workload2.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n \n {\"$group\": {\n \"_id\": null,\n \"valu\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VALU\", \"&SQ_INSTS_MFMA\"]}, \"&denom\" ] }\n },\n \"mfma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_MFMA\", \"&denom\" ] }\n },\n \"vmem\": {\n \"$avg\": { \"$divide\": [ { \"$subtract\": [\"&SQ_INSTS_VMEM\", \"&SQ_INSTS_FLAT_LDS_ONLY\"]}, \"&denom\" ] }\n },\n \"lds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_LDS\", \"&denom\" ] }\n },\n \"salu\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SALU\", \"&denom\" ] }\n },\n \"smem\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_SMEM\", \"&denom\" ] }\n },\n \"branch\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_BRANCH\", \"&denom\" ] }\n },\n \"gds\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_GDS\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"VALU - Vector (Baseline)\",\n \"count\": \"&valu\"\n },\n {\n \"metric\": \"VMEM (Baseline)\",\n \"count\": \"&vmem\"\n },\n {\n \"metric\": \"LDS (Baseline)\",\n \"count\": \"&lds\"\n },\n {\n \"metric\": \"VALU - MFMA (Baseline)\",\n \"count\": \"&mfma\"\n },\n {\n \"metric\": \"SALU (Baseline)\",\n \"count\": \"&salu\"\n },\n {\n \"metric\": \"SMEM (Baseline)\",\n \"count\": \"&smem\"\n },\n {\n \"metric\": \"Branch (Baseline)\",\n \"count\": \"&branch\"\n },\n {\n \"metric\": \"GDS (Baseline)\",\n \"count\": \"&gds\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", - "type": "table" - } - ], - "title": "Instruction Mix", - "transformations": [ - { - "id": "merge", - "options": {} - }, - { - "id": "sortBy", + "id": 24, "options": { - "fields": {}, - "sort": [ - { - "field": "metric" - } - ] - } - } - ], - "transparent": true, - "type": "bargauge" - }, - { - "datasource": {}, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^count$/", + "limit": 100, + "values": true + }, + "showUnfilled": true, + "text": {} }, - "mappings": [], - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": null + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" }, - { - "color": "red", - "value": 80 + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32 (Baseline)\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64 (Baseline)\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD (Baseline)\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL (Baseline)\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA (Baseline)\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans (Baseline)\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD (Baseline)\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL (Baseline)\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA (Baseline)\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans (Baseline)\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD (Baseline)\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL (Baseline)\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA (Baseline)\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans (Baseline)\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion (Baseline)\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VALU Arithmetic Instr Mix", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "metric" + } + ] } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 24, - "w": 12, - "x": 12, - "y": 9 - }, - "id": 24, - "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "/^count$/", - "limit": 100, - "values": true + } + ], + "transparent": true, + "type": "bargauge" }, - "showUnfilled": true, - "text": {} - }, - "pluginVersion": "8.3.4", - "targets": [ { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] }, - "hide": false, - "rawQuery": true, - "refId": "A", - "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", - "type": "table" - }, - { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 26 }, - "hide": false, - "rawQuery": true, - "refId": "B", - "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"count_int_i32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT32\", \"&denom\" ] }\n },\n \"count_int_i64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_INT64\", \"&denom\" ] }\n },\n \"count_f16_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F16\", \"&denom\" ] }\n },\n \"count_f16_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F16\", \"&denom\" ] }\n },\n \"count_f16_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F16\", \"&denom\" ] }\n },\n \"count_f16_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F16\", \"&denom\" ] }\n },\n \"count_f32_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F32\", \"&denom\" ] }\n },\n \"count_f32_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F32\", \"&denom\" ] }\n },\n \"count_f32_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F32\", \"&denom\" ] }\n },\n \"count_f32_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F32\", \"&denom\" ] }\n },\n \"count_f64_add\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_ADD_F64\", \"&denom\" ] }\n },\n \"count_f64_MUL\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MUL_F64\", \"&denom\" ] }\n },\n \"count_f64_fma\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_FMA_F64\", \"&denom\" ] }\n },\n \"count_f64_trans\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_TRANS_F64\", \"&denom\" ] }\n },\n \"conversion\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_CVT\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"INT32 (Baseline)\",\n \"count\": \"&count_int_i32\"\n },\n {\n \"metric\": \"INT64 (Baseline)\",\n \"count\": \"&count_int_i64\"\n },\n {\n \"metric\": \"F16-ADD (Baseline)\",\n \"count\": \"&count_f16_add\"\n },\n {\n \"metric\": \"F16-MUL (Baseline)\",\n \"count\": \"&count_f16_MUL\"\n },\n {\n \"metric\": \"F16-FMA (Baseline)\",\n \"count\": \"&count_f16_fma\"\n },\n {\n \"metric\": \"F16-Trans (Baseline)\",\n \"count\": \"&count_f16_trans\"\n },\n {\n \"metric\": \"F32-ADD (Baseline)\",\n \"count\": \"&count_f32_add\"\n },\n {\n \"metric\": \"F32-MUL (Baseline)\",\n \"count\": \"&count_f32_MUL\"\n },\n {\n \"metric\": \"F32-FMA (Baseline)\",\n \"count\": \"&count_f32_fma\"\n },\n {\n \"metric\": \"F32-Trans (Baseline)\",\n \"count\": \"&count_f32_trans\"\n },\n {\n \"metric\": \"F64-ADD (Baseline)\",\n \"count\": \"&count_f64_add\"\n },\n {\n \"metric\": \"F64-MUL (Baseline)\",\n \"count\": \"&count_f64_MUL\"\n },\n {\n \"metric\": \"F64-FMA (Baseline)\",\n \"count\": \"&count_f64_fma\"\n },\n {\n \"metric\": \"F64-Trans (Baseline)\",\n \"count\": \"&count_f64_trans\"\n },\n {\n \"metric\": \"Conversion (Baseline)\",\n \"count\": \"&conversion\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", - "type": "table" - } - ], - "title": "VALU Arithmetic Instr Mix", - "transformations": [ - { - "id": "merge", - "options": {} - }, - { - "id": "sortBy", + "id": 275, "options": { - "fields": {}, - "sort": [ - { - "field": "metric" - } - ] - } - } - ], - "transparent": true, - "type": "bargauge" - }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "displayMode": "auto" + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" }, - { - "color": "red", - "value": 80 + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n\n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr (Baseline)\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read (Baseline)\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write (Baseline)\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic (Baseline)\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr (Baseline)\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read (Baseline)\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write (Baseline)\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic (Baseline)\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "VMEM Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "VMEM Instr", + "type 1": "VMEM Instr" + } } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 26 - }, - "id": 275, - "options": { - "footer": { - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "8.3.4", - "targets": [ - { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" - }, - "rawQuery": true, - "refId": "A", - "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n \n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + } + ], + "transparent": true, "type": "table" }, { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] }, - "hide": false, - "rawQuery": true, - "refId": "B", - "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n\n {\"$group\": {\n \"_id\": null,\n \"buffer_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"buffer_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_BUFFER_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n\n \"flat_instr\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_read\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_READ_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_write\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_WRITE_WAVEFRONTS_sum\", \"&denom\" ] }\n },\n \"flat_atomic\": {\n \"$avg\": { \"$divide\": [ \"&TA_FLAT_ATOMIC_WAVEFRONTS_sum\", \"&denom\" ] }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"Buffer Instr (Baseline)\",\n \"count\": \"&buffer_instr\"\n },\n {\n \"type\": \"Buffer Read (Baseline)\",\n \"count\": \"&buffer_read\"\n },\n {\n \"type\": \"Buffer Write (Baseline)\",\n \"count\": \"&buffer_write\"\n },\n {\n \"type\": \"Buffer Atomic (Baseline)\",\n \"count\": \"&buffer_atomic\"\n },\n {\n \"type\": \"Flat Instr (Baseline)\",\n \"count\": \"&flat_instr\"\n },\n {\n \"type\": \"Flat Read (Baseline)\",\n \"count\": \"&flat_read\"\n },\n {\n \"type\": \"Flat Write (Baseline)\",\n \"count\": \"&flat_write\"\n },\n {\n \"type\": \"Flat Atomic (Baseline)\",\n \"count\": \"&flat_atomic\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", - "type": "table" - } - ], - "title": "VMEM Instr Mix", - "transformations": [ - { - "id": "concatenate", - "options": {} - }, - { - "id": "organize", + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 16, "options": { - "excludeByName": { - "type 2": true + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false }, - "indexByName": {}, - "renameByName": { - "count": "Count", - "count 1": "Avg (Current)", - "count 2": "Avg (Baseline)", - "type": "VMEM Instr", - "type 1": "VMEM Instr" - } - } - } - ], - "transparent": true, - "type": "table" - }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "displayMode": "auto" + "showHeader": true }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" }, - { - "color": "red", - "value": 80 + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mfma_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&denom\" ] }\n },\n \"mfma_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&denom\" ] }\n },\n \"mfma_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&denom\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&denom\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8\",\n \"count\": \"&mfma_i8\"\n },\n {\n \"type\": \"MFMA-F16\",\n \"count\": \"&mfma_f16\"\n },\n {\n \"type\": \"MFMA-BF16\",\n \"count\": \"&mfma_bf16\"\n },\n {\n \"type\": \"MFMA-F32\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mfma_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&denom\" ] }\n },\n \"mfma_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&denom\" ] }\n },\n \"mfma_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&denom\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&denom\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8 (Baseline)\",\n \"count\": \"&mfma_i8\"\n },\n {\n \"type\": \"MFMA-F16 (Baseline)\",\n \"count\": \"&mfma_f16\"\n },\n {\n \"type\": \"MFMA-BF16 (Baseline)\",\n \"count\": \"&mfma_bf16\"\n },\n {\n \"type\": \"MFMA-F32 (Baseline)\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64 (Baseline)\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + "type": "table" + } + ], + "title": "MFMA Arithmetic Instr Mix", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "type 2": true + }, + "indexByName": {}, + "renameByName": { + "count": "Count", + "count 1": "Avg (Current)", + "count 2": "Avg (Baseline)", + "type": "MFMA Instr", + "type 1": "MFMA Instr" + } } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 33 - }, - "id": 16, - "options": { - "footer": { - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "8.3.4", - "targets": [ - { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" - }, - "rawQuery": true, - "refId": "A", - "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mfma_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&denom\" ] }\n },\n \"mfma_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&denom\" ] }\n },\n \"mfma_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&denom\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&denom\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8\",\n \"count\": \"&mfma_i8\"\n },\n {\n \"type\": \"MFMA-F16\",\n \"count\": \"&mfma_f16\"\n },\n {\n \"type\": \"MFMA-BF16\",\n \"count\": \"&mfma_bf16\"\n },\n {\n \"type\": \"MFMA-F32\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", + } + ], + "transparent": true, "type": "table" - }, + } + ], + "targets": [ { "datasource": { "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" + "uid": "oVK0I__nk" }, - "hide": false, - "rawQuery": true, - "refId": "B", - "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Instruction Mix)\"}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"mfma_i8\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_I8\", \"&denom\" ] }\n },\n \"mfma_f16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F16\", \"&denom\" ] }\n },\n \"mfma_bf16\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_BF16\", \"&denom\" ] }\n },\n \"mfma_f32\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F32\", \"&denom\" ] }\n },\n \"mfma_f64\": {\n \"$avg\": { \"$divide\": [ \"&SQ_INSTS_VALU_MFMA_F64\", \"&denom\" ] }\n }\n }\n },\n {\"$set\": {\n \"array\": [\n {\n \"type\": \"MFMA-I8 (Baseline)\",\n \"count\": \"&mfma_i8\"\n },\n {\n \"type\": \"MFMA-F16 (Baseline)\",\n \"count\": \"&mfma_f16\"\n },\n {\n \"type\": \"MFMA-BF16 (Baseline)\",\n \"count\": \"&mfma_bf16\"\n },\n {\n \"type\": \"MFMA-F32 (Baseline)\",\n \"count\": \"&mfma_f32\"\n },\n {\n \"type\": \"MFMA-F64 (Baseline)\",\n \"count\": \"&mfma_f64\"\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n ]);", - "type": "table" - } - ], - "title": "MFMA Arithmetic Instr Mix", - "transformations": [ - { - "id": "concatenate", - "options": {} - }, - { - "id": "organize", - "options": { - "excludeByName": { - "type 2": true - }, - "indexByName": {}, - "renameByName": { - "count": "Count", - "count 1": "Avg (Current)", - "count 2": "Avg (Baseline)", - "type": "MFMA Instr", - "type 1": "MFMA Instr" - } - } + "refId": "A" } ], - "transparent": true, - "type": "table" + "title": "Compute Unit - Instruction Mix", + "type": "row" }, { "collapsed": true, @@ -4989,7 +4986,7 @@ "h": 1, "w": 24, "x": 0, - "y": 43 + "y": 9 }, "id": 8, "panels": [ @@ -5008,8 +5005,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "#EAB839", @@ -5166,8 +5162,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5266,8 +5261,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5449,8 +5443,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5624,7 +5617,7 @@ "h": 1, "w": 24, "x": 0, - "y": 44 + "y": 10 }, "id": 98, "panels": [ @@ -5964,7 +5957,7 @@ "h": 1, "w": 24, "x": 0, - "y": 45 + "y": 11 }, "id": 44, "panels": [ @@ -6212,7 +6205,7 @@ "h": 1, "w": 24, "x": 0, - "y": 46 + "y": 12 }, "id": 203, "panels": [ @@ -6633,7 +6626,7 @@ "h": 1, "w": 24, "x": 0, - "y": 47 + "y": 13 }, "id": 130, "panels": [ @@ -6976,7 +6969,7 @@ "h": 1, "w": 24, "x": 0, - "y": 48 + "y": 14 }, "id": 112, "panels": [ @@ -7719,7 +7712,7 @@ "h": 1, "w": 24, "x": 0, - "y": 49 + "y": 15 }, "id": 56, "panels": [ @@ -8441,7 +8434,7 @@ "h": 1, "w": 24, "x": 0, - "y": 50 + "y": 16 }, "id": 66, "panels": [ @@ -8460,8 +8453,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -13386,6 +13378,6 @@ "timezone": "", "title": "Omniperf_v1.0.8_pub", "uid": "MIPerf_v1_0_063020221121", - "version": 9, + "version": 11, "weekStart": "" } \ No newline at end of file diff --git a/src/omniperf_analyze/configs/gfx906/0700_wavefront-launch.yaml b/src/omniperf_analyze/configs/gfx906/0700_wavefront-launch.yaml index c0592783e..70141193e 100644 --- a/src/omniperf_analyze/configs/gfx906/0700_wavefront-launch.yaml +++ b/src/omniperf_analyze/configs/gfx906/0700_wavefront-launch.yaml @@ -50,9 +50,15 @@ Panel Config: unit: Wavefronts tips: VGPRs: - avg: AVG(vgpr) - min: MIN(vgpr) - max: MAX(vgpr) + avg: AVG(arch_vgpr) + min: MIN(arch_vgpr) + max: MAX(arch_vgpr) + unit: Registers + tips: + AGPRs: + avg: AVG(accum_vgpr) + min: MIN(accum_vgpr) + max: MAX(accum_vgpr) unit: Registers tips: SGPRs: diff --git a/src/omniperf_analyze/configs/gfx908/0700_wavefront-launch.yaml b/src/omniperf_analyze/configs/gfx908/0700_wavefront-launch.yaml index c0592783e..70141193e 100644 --- a/src/omniperf_analyze/configs/gfx908/0700_wavefront-launch.yaml +++ b/src/omniperf_analyze/configs/gfx908/0700_wavefront-launch.yaml @@ -50,9 +50,15 @@ Panel Config: unit: Wavefronts tips: VGPRs: - avg: AVG(vgpr) - min: MIN(vgpr) - max: MAX(vgpr) + avg: AVG(arch_vgpr) + min: MIN(arch_vgpr) + max: MAX(arch_vgpr) + unit: Registers + tips: + AGPRs: + avg: AVG(accum_vgpr) + min: MIN(accum_vgpr) + max: MAX(accum_vgpr) unit: Registers tips: SGPRs: diff --git a/src/omniperf_analyze/configs/gfx90a/0700_wavefront-launch.yaml b/src/omniperf_analyze/configs/gfx90a/0700_wavefront-launch.yaml index fa53bbff7..13ba5b8e1 100644 --- a/src/omniperf_analyze/configs/gfx90a/0700_wavefront-launch.yaml +++ b/src/omniperf_analyze/configs/gfx90a/0700_wavefront-launch.yaml @@ -50,9 +50,15 @@ Panel Config: unit: Wavefronts tips: VGPRs: - avg: AVG(vgpr) - min: MIN(vgpr) - max: MAX(vgpr) + avg: AVG(arch_vgpr) + min: MIN(arch_vgpr) + max: MAX(arch_vgpr) + unit: Registers + tips: + AGPRs: + avg: AVG(accum_vgpr) + min: MIN(accum_vgpr) + max: MAX(accum_vgpr) unit: Registers tips: SGPRs: From 8eb7f2b7e5fd786914ef99b1bb996f3c9c205bf8 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Mon, 15 May 2023 17:21:09 -0500 Subject: [PATCH 08/32] Remove wave cycles from LDS section (#130) Signed-off-by: coleramos425 --- dashboards/Omniperf_v1.0.8_pub.json | 609 +++++++++--------- .../configs/gfx906/1200_lds.yaml | 6 - .../configs/gfx908/1200_lds.yaml | 6 - .../configs/gfx90a/1200_lds.yaml | 6 - 4 files changed, 305 insertions(+), 322 deletions(-) diff --git a/dashboards/Omniperf_v1.0.8_pub.json b/dashboards/Omniperf_v1.0.8_pub.json index 25538d117..fbebb0d44 100644 --- a/dashboards/Omniperf_v1.0.8_pub.json +++ b/dashboards/Omniperf_v1.0.8_pub.json @@ -25,7 +25,7 @@ "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": 43, - "iteration": 1684188009505, + "iteration": 1684189070197, "links": [], "liveNow": false, "panels": [ @@ -5608,7 +5608,7 @@ "type": "row" }, { - "collapsed": true, + "collapsed": false, "datasource": { "type": "amd-miperf-data-plugin", "uid": "oVK0I__nk" @@ -5620,332 +5620,333 @@ "y": 10 }, "id": 98, - "panels": [ + "panels": [], + "targets": [ { - "datasource": {}, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "oVK0I__nk" + }, + "refId": "A" + } + ], + "title": "Local Data Share (LDS)", + "type": "row" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "decimals": 1, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "#EAB839", - "value": 50 - }, - { - "color": "red", - "value": 90 - } - ] + { + "color": "#EAB839", + "value": 50 }, - "unit": "percent" - }, - "overrides": [] + { + "color": "red", + "value": 90 + } + ] }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 11 + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 205, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/.*/", + "values": true + }, + "showUnfilled": true, + "text": { + "titleSize": 14, + "valueSize": 16 + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n }\n \n }},\n \n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n \n ]\n }},\n \n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n \n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" }, - "id": 205, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n\n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n }\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", + "type": "table" + } + ], + "title": "Speed-of-Light: LDS", + "transformations": [ + { + "id": "concatenate", + "options": {} + }, + { + "id": "organize", "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "/.*/", - "values": true - }, - "showUnfilled": true, - "text": { - "titleSize": 14, - "valueSize": 16 + "excludeByName": {}, + "indexByName": { + "Access Rate 1": 6, + "Access Rate 2": 7, + "Bandwith (Pct-of-Peak) 1": 0, + "Bandwith (Pct-of-Peak) 2": 1, + "Bank Conflict Rate 1": 2, + "Bank Conflict Rate 2": 3, + "Utilization 1": 4, + "Utilization 2": 5 + }, + "renameByName": { + "Access Rate 1": "Access Rate (Current)", + "Access Rate 2": "Access Rate (Baseline)", + "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", + "Bandwith (Pct-of-Peak)": "", + "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", + "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", + "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", + "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", + "Cache Hit 1": "Cache Hit (Current)", + "Cache Hit 2": "Cache Hit (Baseline)", + "Latency (Cycles) 1": "Latency (Current) [Cycles]", + "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", + "Stall 1": "Stall (Current)", + "Stall 2": "Stall (Baseline)", + "Util 1": "Util (Current)", + "Util 2": "Util (Baseline)", + "Utilization 1": "Util (Current)", + "Utilization 2": "Util (Baseline)" } + } + } + ], + "transparent": true, + "type": "bargauge" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, - "pluginVersion": "8.3.4", - "targets": [ - { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" - }, - "rawQuery": true, - "refId": "A", - "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$group\": {\n \"_id\": null,\n \n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n \n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk, $numCU, 0.00128]}\n ]}\n },\n\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU]}\n ]}\n }\n \n }},\n \n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n \n ]\n }},\n \n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n \n ]);", - "type": "table" - }, - { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "min": -100000000000000000000, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "hide": false, - "rawQuery": true, - "refId": "B", - "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n {\"$group\": {\n \"_id\": null,\n\n \"bconf_rate\": {\n \"$avg\": {\"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n {\"$multiply\": [\"&SQ_LDS_BANK_CONFLICT\", 3.125]},\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n } \n },\n\n \"bw_pop\": {\n \"$avg\": {\"$divide\": [\n {\"$divide\":[ \n { \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, \n 4, \n {\"$toInt\": \"$L2Banks2\"}\n ]}, \n {\"$subtract\": [\"&EndNs\", \"&BeginNs\"]}\n ]},\n \n {\"$multiply\": [$sclk2, $numCU2, 0.00128]}\n ]}\n },\n \"lds_util\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [ 100, \"&SQ_LDS_IDX_ACTIVE\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n },\n\n \"lds_access_rate\": {\n \"$avg\": {\"$divide\":[ \n { \"$multiply\": [200, \"&SQ_ACTIVE_INST_LDS\" ]}, \n {\"$multiply\": [\"&GRBM_GUI_ACTIVE\", $numCU2]}\n ]}\n }\n\n\n }},\n\n {\"$set\": {\n \"array\": [\n {\n \"Utilization\": \"&lds_util\",\n \"Access Rate\": \"&lds_access_rate\",\n \"Bandwith (Pct-of-Peak)\": \"&bw_pop\",\n \"Bank Conflict Rate\": \"&bconf_rate\"\n }\n\n ]\n }},\n\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n \n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }}\n\n]);", - "type": "table" - } - ], - "title": "Speed-of-Light: LDS", - "transformations": [ - { - "id": "concatenate", - "options": {} - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": { - "Access Rate 1": 6, - "Access Rate 2": 7, - "Bandwith (Pct-of-Peak) 1": 0, - "Bandwith (Pct-of-Peak) 2": 1, - "Bank Conflict Rate 1": 2, - "Bank Conflict Rate 2": 3, - "Utilization 1": 4, - "Utilization 2": 5 - }, - "renameByName": { - "Access Rate 1": "Access Rate (Current)", - "Access Rate 2": "Access Rate (Baseline)", - "BW (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", - "BW (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline)", - "Bandwith (Pct-of-Peak)": "", - "Bandwith (Pct-of-Peak) 1": "BW Pct-of-Peak (Current)", - "Bandwith (Pct-of-Peak) 2": "BW Pct-of-Peak (Baseline) ", - "Bank Conflict Rate 1": "Bank Conflict Rate (Current)", - "Bank Conflict Rate 2": "Bank Conflict Rate (Baseline)", - "Cache Hit 1": "Cache Hit (Current)", - "Cache Hit 2": "Cache Hit (Baseline)", - "Latency (Cycles) 1": "Latency (Current) [Cycles]", - "Latency (Cycles) 2": "Latency (Baseline) [Cycles]", - "Stall 1": "Stall (Current)", - "Stall 2": "Stall (Baseline)", - "Util 1": "Util (Current)", - "Util 2": "Util (Baseline)", - "Utilization 1": "Util (Current)", - "Utilization 2": "Util (Baseline)" - } + { + "color": "red", + "value": 80 } - } - ], - "transparent": true, - "type": "bargauge" + ] + }, + "unit": "locale" }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "displayMode": "auto" - }, - "decimals": 0, - "mappings": [], - "min": -100000000000000000000, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "locale" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Avg (Current)" }, - "overrides": [ + "properties": [ { - "matcher": { - "id": "byName", - "options": "Avg (Current)" - }, - "properties": [ - { - "id": "custom.width", - "value": 114 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Min (Current)" - }, - "properties": [ - { - "id": "custom.width", - "value": 107 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Min (Baseline)" - }, - "properties": [ - { - "id": "custom.width", - "value": 128 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Max (Current)" - }, - "properties": [ - { - "id": "custom.width", - "value": 115 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unit" - }, - "properties": [ - { - "id": "custom.width", - "value": 138 - } - ] - }, + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min (Current)" + }, + "properties": [ { - "matcher": { - "id": "byName", - "options": "Avg (Baseline)" - }, - "properties": [ - { - "id": "custom.width", - "value": 141 - } - ] + "id": "custom.width", + "value": 107 } ] }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 11 + { + "matcher": { + "id": "byName", + "options": "Min (Baseline)" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] }, - "id": 100, - "options": { - "footer": { - "fields": "", - "reducer": [ - "sum" - ], - "show": false + { + "matcher": { + "id": "byName", + "options": "Max (Current)" }, - "showHeader": true, - "sortBy": [] + "properties": [ + { + "id": "custom.width", + "value": 115 + } + ] }, - "pluginVersion": "8.3.4", - "targets": [ - { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" - }, - "hide": false, - "rawQuery": true, - "refId": "A", - "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_waveCycles\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"min_waveCycles\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"max_waveCycles\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \n \n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Wave Cycles\",\n \"avg\": \"&avg_waveCycles\",\n \"min\": \"&min_waveCycles\",\n \"max\": \"&max_waveCycles\",\n \"Unit\": \"Cycles/Wave\"\n },\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", - "type": "table" + { + "matcher": { + "id": "byName", + "options": "Unit" }, - { - "datasource": { - "type": "amd-miperf-data-plugin", - "uid": "Zzw1yR27k" - }, - "hide": false, - "rawQuery": true, - "refId": "B", - "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n \n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_waveCycles\": {\n \"$avg\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"min_waveCycles\": {\n \"$min\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \"max_waveCycles\": {\n \"$max\": { \"$divide\": [ { \"$multiply\": [4, \"&SQ_WAVE_CYCLES\"] }, \"&SQ_WAVES\" ] }\n },\n \n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"Wave Cycles\",\n \"avg\": \"&avg_waveCycles\",\n \"min\": \"&min_waveCycles\",\n \"max\": \"&max_waveCycles\",\n \"Unit\": \"Cycles/Wave\"\n },\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", - "type": "table" - } - ], - "title": "LDS Stats", - "transformations": [ - { - "id": "concatenate", - "options": { - "frameNameLabel": "frame", - "frameNameMode": "field" + "properties": [ + { + "id": "custom.width", + "value": 138 } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg (Baseline)" }, - { - "id": "organize", - "options": { - "excludeByName": { - "Unit 2": true, - "metric 2": true - }, - "indexByName": { - "Unit 1": 9, - "Unit 2": 8, - "avg 1": 1, - "avg 2": 2, - "max 1": 5, - "max 2": 6, - "metric 1": 0, - "metric 2": 7, - "min 1": 3, - "min 2": 4 - }, - "renameByName": { - "avg 1": "Avg (Current)", - "avg 2": "Avg (Baseline)", - "max 1": "Max (Current)", - "max 2": "Max (Baseline)", - "min 1": "Min (Current)", - "min 2": "Min (Baseline)" - } + "properties": [ + { + "id": "custom.width", + "value": 141 } - } - ], - "type": "table" - } - ], + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 100, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.3.4", "targets": [ { "datasource": { "type": "amd-miperf-data-plugin", - "uid": "oVK0I__nk" + "uid": "Zzw1yR27k" }, - "refId": "A" + "hide": false, + "rawQuery": true, + "refId": "A", + "target": "${Workload1}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \n \n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter:json}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" + }, + { + "datasource": { + "type": "amd-miperf-data-plugin", + "uid": "Zzw1yR27k" + }, + "hide": false, + "rawQuery": true, + "refId": "B", + "target": "${Workload2}.pmc_perf.aggregate([\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}}, \n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n \n }},\n {\"$addFields\": {\n \"denom\": {\n \"$switch\" : {\n \"branches\": [\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Wave\"]} ,\n \"then\": \"&SQ_WAVES\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Cycle\"]} ,\n \"then\": \"&GRBM_GUI_ACTIVE\"\n },\n {\n \"case\": { \"$eq\": [ $normUnit, \"per Sec\"]} ,\n \"then\": {\"$divide\":[{\"$subtract\": [\"&EndNs\", \"&BeginNs\" ]}, 1000000000]}\n }\n ],\n \"default\": 1\n } \n } \n }},\n {\"$group\": {\n \"_id\": null,\n \"avg_ldsInstrs\": {\n \"$avg\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"min_ldsInstrs\": {\n \"$min\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \"max_ldsInstrs\": {\n \"$max\": { \"$divide\": [\"&SQ_INSTS_LDS\" , \"&denom\"] }\n },\n \n \"avg_indexAccesses\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"min_indexAccesses\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \"max_indexAccesses\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_IDX_ACTIVE\" , \"&denom\"] }\n },\n \n \"avg_atomicCycles\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"min_atomicCycles\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \"max_atomicCycles\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ATOMIC_RETURN\" , \"&denom\" ] }\n },\n \n \"avg_bankConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"min_bankConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \"max_bankConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_BANK_CONFLICT\" , \"&denom\" ] }\n },\n \n \"avg_addrConflicts\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"min_addrConflicts\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"max_addrConflicts\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_ADDR_CONFLICT\" , \"&denom\" ] }\n },\n \"avg_unalignedStall\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"min_unalignedStall\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \"max_unalignedStall\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_UNALIGNED_STALL\" , \"&denom\" ] }\n },\n \n \"avg_memViolations\": {\n \"$avg\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"min_memViolations\": {\n \"$min\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \"max_memViolations\": {\n \"$max\": { \"$divide\": [\"&SQ_LDS_MEM_VIOLATIONS\" , \"&denom\" ] }\n },\n \n \n \"avg_bconf_per_op\": {\n \"$avg\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"min_bconf_per_op\": {\n \"$min\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \"max_bconf_per_op\": {\n \"$max\": {\n \"$cond\": [ {\"$ne\": [{\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}, 0]},\n {\"$divide\": [\n \"&SQ_LDS_BANK_CONFLICT\",\n {\"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]}\n ]},\n null\n ]\n }\n },\n \n \"avg_bw\": {\n \"$avg\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"min_bw\": {\n \"$min\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n },\n \"max_bw\": {\n \"$max\": {\n \"$divide\":\n [ { \"$multiply\": [{ \"$multiply\": [ { \"$subtract\": [\"&SQ_LDS_IDX_ACTIVE\", \"&SQ_LDS_BANK_CONFLICT\"]} , 4]}, {\"$toInt\": \"$L2Banks\"}]}, \n \"&denom\"\n ]\n }\n }\n }},\n {\"$set\": {\n \"array\": [\n {\n \"metric\": \"LDS Instrs\",\n \"avg\": \"&avg_ldsInstrs\",\n \"min\": \"&min_ldsInstrs\",\n \"max\": \"&max_ldsInstrs\",\n \"Unit\":{\"$concat\": [\"Instr \", $normUnit]} \n },\n {\n \"metric\": \"Bandwidth\",\n \"avg\": \"&avg_bw\",\n \"min\": \"&min_bw\",\n \"max\": \"&max_bw\",\n \"Unit\": {\"$concat\": [\"Bytes \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conficts/Access\",\n \"avg\": \"&avg_bconf_per_op\",\n \"min\": \"&min_bconf_per_op\",\n \"max\": \"&max_bconf_per_op\",\n \"Unit\": \"Conflicts/Access\"\n },\n {\n \"metric\": \"Index Accesses\",\n \"avg\": \"&avg_indexAccesses\",\n \"min\": \"&min_indexAccesses\",\n \"max\": \"&max_indexAccesses\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Atomic Cycles\",\n \"avg\": \"&avg_atomicCycles\",\n \"min\": \"&min_atomicCycles\",\n \"max\": \"&max_atomicCycles\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Bank Conflict\",\n \"avg\": \"&avg_bankConflicts\",\n \"min\": \"&min_bankConflicts\",\n \"max\": \"&max_bankConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Addr Conflict\",\n \"avg\": \"&avg_addrConflicts\",\n \"min\": \"&min_addrConflicts\",\n \"max\": \"&max_addrConflicts\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Unaligned Stall\",\n \"avg\": \"&avg_unalignedStall\",\n \"min\": \"&min_unalignedStall\",\n \"max\": \"&max_unalignedStall\",\n \"Unit\": {\"$concat\": [\"Cycles \", $normUnit]}\n },\n {\n \"metric\": \"Mem Violations\",\n \"avg\": \"&avg_memViolations\",\n \"min\": \"&min_memViolations\",\n \"max\": \"&max_memViolations\",\n \"Unit\": {\"$concat\": [\"\", $normUnit]}\n }\n ]\n }},\n {\"$unwind\": {\n \"path\": \"&array\"\n }},\n {\"$replaceRoot\": {\n \"newRoot\": \"&array\"\n }},\n {\"$unionWith\": {\n \"coll\": \"SQ_INST_LEVEL_LDS\",\n \"pipeline\": [\n {\"$match\": {\n \"Index\": { \"$in\": [${DispatchIDFilter2:raw}] },\n \"gpu-id\": { \"$in\": [${gpuFilter2:raw}] },\n \"KernelName\": { \"$in\": ${KernelNameFilter2:json}},\n \"$expr\":{\"$regexMatch\": {\"input\":\"${select}\",\"regex\":\"(Local Data Share)\"}}\n }},\n \n {\"$group\": {\n \"_id\": null,\n \"avg_ldsLatency\": {\n \"$avg\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"min_ldsLatency\": {\n \"$min\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n },\n \"max_ldsLatency\": {\n \"$max\": { \n \"$cond\": [\n {\"$ne\": [\"&SQ_INSTS_LDS\", 0]},\n {\"$divide\": [ \"&SQ_ACCUM_PREV_HIRES\", \"&SQ_INSTS_LDS\" ]},\n null\n ] \n }\n }\n }},\n {\"$project\": {\n \"_id\": 0,\n \"metric\": \"LDS Latency\",\n \"avg\": \"&avg_ldsLatency\",\n \"min\": \"&min_ldsLatency\",\n \"max\": \"&max_ldsLatency\",\n \"Unit\": \"Cycles\"\n }}\n ]\n }}\n ]);", + "type": "table" } ], - "title": "Local Data Share (LDS)", - "type": "row" + "title": "LDS Stats", + "transformations": [ + { + "id": "concatenate", + "options": { + "frameNameLabel": "frame", + "frameNameMode": "field" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Unit 2": true, + "metric 2": true + }, + "indexByName": { + "Unit 1": 9, + "Unit 2": 8, + "avg 1": 1, + "avg 2": 2, + "max 1": 5, + "max 2": 6, + "metric 1": 0, + "metric 2": 7, + "min 1": 3, + "min 2": 4 + }, + "renameByName": { + "avg 1": "Avg (Current)", + "avg 2": "Avg (Baseline)", + "max 1": "Max (Current)", + "max 2": "Max (Baseline)", + "min 1": "Min (Current)", + "min 2": "Min (Baseline)" + } + } + } + ], + "type": "table" }, { "collapsed": true, @@ -5957,7 +5958,7 @@ "h": 1, "w": 24, "x": 0, - "y": 11 + "y": 23 }, "id": 44, "panels": [ @@ -6205,7 +6206,7 @@ "h": 1, "w": 24, "x": 0, - "y": 12 + "y": 24 }, "id": 203, "panels": [ @@ -6626,7 +6627,7 @@ "h": 1, "w": 24, "x": 0, - "y": 13 + "y": 25 }, "id": 130, "panels": [ @@ -6969,7 +6970,7 @@ "h": 1, "w": 24, "x": 0, - "y": 14 + "y": 26 }, "id": 112, "panels": [ @@ -7712,7 +7713,7 @@ "h": 1, "w": 24, "x": 0, - "y": 15 + "y": 27 }, "id": 56, "panels": [ @@ -8434,7 +8435,7 @@ "h": 1, "w": 24, "x": 0, - "y": 16 + "y": 28 }, "id": 66, "panels": [ @@ -13378,6 +13379,6 @@ "timezone": "", "title": "Omniperf_v1.0.8_pub", "uid": "MIPerf_v1_0_063020221121", - "version": 11, + "version": 12, "weekStart": "" } \ No newline at end of file diff --git a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml index 253b2fc23..178849c17 100644 --- a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml +++ b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml @@ -42,12 +42,6 @@ Panel Config: unit: Unit tips: Tips metric: - Wave Cycles: - avg: AVG(((4 * SQ_WAVE_CYCLES) / SQ_WAVES)) - min: MIN(((4 * SQ_WAVE_CYCLES) / SQ_WAVES)) - max: MAX(((4 * SQ_WAVE_CYCLES) / SQ_WAVES)) - unit: Cycles/Wave - tips: LDS Instrs: avg: AVG((SQ_INSTS_LDS / $denom)) min: MIN((SQ_INSTS_LDS / $denom)) diff --git a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml index 253b2fc23..178849c17 100644 --- a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml +++ b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml @@ -42,12 +42,6 @@ Panel Config: unit: Unit tips: Tips metric: - Wave Cycles: - avg: AVG(((4 * SQ_WAVE_CYCLES) / SQ_WAVES)) - min: MIN(((4 * SQ_WAVE_CYCLES) / SQ_WAVES)) - max: MAX(((4 * SQ_WAVE_CYCLES) / SQ_WAVES)) - unit: Cycles/Wave - tips: LDS Instrs: avg: AVG((SQ_INSTS_LDS / $denom)) min: MIN((SQ_INSTS_LDS / $denom)) diff --git a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml index 253b2fc23..178849c17 100644 --- a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml +++ b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml @@ -42,12 +42,6 @@ Panel Config: unit: Unit tips: Tips metric: - Wave Cycles: - avg: AVG(((4 * SQ_WAVE_CYCLES) / SQ_WAVES)) - min: MIN(((4 * SQ_WAVE_CYCLES) / SQ_WAVES)) - max: MAX(((4 * SQ_WAVE_CYCLES) / SQ_WAVES)) - unit: Cycles/Wave - tips: LDS Instrs: avg: AVG((SQ_INSTS_LDS / $denom)) min: MIN((SQ_INSTS_LDS / $denom)) From 44edef4635c18ee3f2bb764f7078db05dfbbecd6 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 16 May 2023 15:39:45 -0500 Subject: [PATCH 09/32] Enhance logging and warning reporting Signed-off-by: coleramos425 --- src/omniperf | 38 +++++++++++++++---------- src/utils/perfagg.py | 66 ++++++++++++++++++++++++++++++++------------ 2 files changed, 73 insertions(+), 31 deletions(-) diff --git a/src/omniperf b/src/omniperf index c22aaa4e6..71208c177 100755 --- a/src/omniperf +++ b/src/omniperf @@ -160,7 +160,7 @@ def isWorkloadEmpty(my_parser, path): ) -def replace_timestamps(workload_dir): +def replace_timestamps(workload_dir, log_file): df_stamps = pd.read_csv(workload_dir + "/timestamps.csv") if "BeginNs" in df_stamps.columns and "EndNs" in df_stamps.columns: # Update timestamps for all *.csv output files @@ -171,9 +171,11 @@ def replace_timestamps(workload_dir): df_pmc_perf["EndNs"] = df_stamps["EndNs"] df_pmc_perf.to_csv(fname, index=False) else: + warning = "WARNING: Incomplete profiling data detected. Unable to update timestamps." warnings.warn( - "WARNING: Incomplete profiling data detected. Unable to update timestamps." + warning ) + log_file.write(warning + "\n") def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof): @@ -408,7 +410,7 @@ def characterize_app(args, VER): for fname in glob.glob(workload_dir + "/perfmon/*.txt"): # Kernel filtering (in-place replacement) if not args.kernel == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -417,10 +419,11 @@ def characterize_app(args, VER): fname, ] ) + log.write(output) # Dispatch filtering (inplace replacement) if not args.dispatch == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -429,17 +432,17 @@ def characterize_app(args, VER): fname, ] ) + log.write(output) print(fname) if args.use_rocscope == True: run_rocscope(args, fname) else: run_prof(fname, workload_dir, perfmon_dir, app_cmd, args.target, log, args.verbose) - # Close log - log.close() + # run again with timestamps - run_subprocess( + success, output = capture_subprocess_output( [ rocprof_cmd, # "-i", fname, @@ -451,12 +454,16 @@ def characterize_app(args, VER): '"' + app_cmd + '"', ] ) + log.write(output) # Update pmc_perf.csv timestamps - replace_timestamps(workload_dir) + replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, args.join_type) + join_prof(workload_dir, args.join_type, log, args.verbose) + + # Close log + log.close() ################################################ @@ -640,7 +647,7 @@ def omniperf_profile(args, VER): for fname in glob.glob(workload_dir + "/perfmon/*.txt"): # Kernel filtering (in-place replacement) if not args.kernel == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -649,10 +656,11 @@ def omniperf_profile(args, VER): fname, ] ) + log.write(output) # Dispatch filtering (inplace replacement) if not args.dispatch == None: - run_subprocess( + success, output = capture_subprocess_output( [ "sed", "-i", @@ -661,6 +669,7 @@ def omniperf_profile(args, VER): fname, ] ) + log.write(output) print(fname) if args.use_rocscope == True: run_rocscope(args, fname) @@ -668,7 +677,7 @@ def omniperf_profile(args, VER): run_prof(fname, workload_dir, perfmon_dir, args.remaining, args.target, log, args.verbose) # run again with timestamps - run_subprocess( + success, output = capture_subprocess_output( [ rocprof_cmd, # "-i", fname, @@ -680,12 +689,13 @@ def omniperf_profile(args, VER): '"' + args.remaining + '"', ] ) + log.write(output) # Update pmc_perf.csv timestamps - replace_timestamps(workload_dir) + replace_timestamps(workload_dir, log) # Manually join each pmc_perf*.csv output if args.use_rocscope == False: - join_prof(workload_dir, args.join_type) + join_prof(workload_dir, args.join_type, log, args.verbose) # Generate sysinfo gen_sysinfo(args.name, workload_dir, args.ipblocks, args.remaining, args.no_roof) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 8d77ff12d..fa14a60b0 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -25,6 +25,7 @@ import sys, os, pathlib, shutil, subprocess, argparse, glob, re import numpy as np import math +import warnings import pandas as pd prog = "omniperf" @@ -87,8 +88,12 @@ } +def test_df_column_equality(df): + return df.eq(df.iloc[:, 0], axis=0).all(1).all() + + # joins disparate runs less dumbly than rocprof -def join_prof(workload_dir, join_type, out=None): +def join_prof(workload_dir, join_type, log_file, verbose, out=None): # Set default output directory if not specified if out == None: out = workload_dir + "/pmc_perf.csv" @@ -96,25 +101,48 @@ def join_prof(workload_dir, join_type, out=None): df = None for i, file in enumerate(files): - # _df = parse_rocprof_kernels(file) - _df = pd.read_csv(file) - key = _df.groupby("KernelName").cumcount() if join_type == "kernel": - _df["key"] = _df.KernelName + " - " + key.astype(str) + key = _df.groupby("KernelName").cumcount() elif join_type == "grid": - _df["key"] = ( - _df.KernelName + " - " + key.astype(str) + " - " + _df.grd.astype(str) - ) + key = _df.groupby(["KernelName", "grd"]).cumcount() else: print("ERROR: Unrecognized --join-type") sys.exit(1) + _df["key"] = _df.KernelName + " - " + key.astype(str) if df is None: df = _df else: # join by unique index of kernel df = pd.merge(df, _df, how="inner", on="key", suffixes=("", f"_{i}")) + + # TODO: check for any mismatch in joins + duplicate_cols = { + "gpu": [col for col in df.columns if "gpu" in col], + "grd": [col for col in df.columns if "grd" in col], + "wpr": [col for col in df.columns if "wgr" in col], + "lds": [col for col in df.columns if "lds" in col], + "scr": [col for col in df.columns if "scr" in col], + "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col], + "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col], + "spgr": [col for col in df.columns if "sgpr" in col], + } + for key, cols in duplicate_cols.items(): + _df = df[cols] + if not test_df_column_equality(_df): + msg = ( + "WARNING: Detected differing {} values while joining pmc_perf.csv".format( + key + ) + ) + warnings.warn(msg) + log_file.write(msg + "\n") + if test_df_column_equality(_df) and verbose: + msg = "Successfully joined {} in pmc_perf.csv".format(key) + print(msg) + log_file.write(msg + "\n") + # now, we can: #   A) throw away any of the "boring" duplicats df = df[ @@ -124,17 +152,20 @@ def join_prof(workload_dir, join_type, out=None): if not any( check in k for check in [ - # "gpu", + # removed merged counters, keep original + "gpu-id_", + "grd_", + "wgr_", + "lds_", + "scr_", + "vgpr_", + "sgpr_", + "Index_", + # un-mergable, remove all "queue-id", "queue-index", "pid", "tid", - # "grd", - # "wgr", - # "lds", - # "scr", - # "vgpr", - # "sgpr", "fbar", "sig", "obj", @@ -178,8 +209,9 @@ def join_prof(workload_dir, join_type, out=None): # and save to file df.to_csv(out, index=False) # and delete old file(s) - for file in files: - os.remove(file) + if not verbose: + for file in files: + os.remove(file) def pmc_perf_split(workload_dir): From ae3f562b6ce921db8a06ebd5b5adf3958f0da0a7 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Fri, 19 May 2023 12:41:00 -0500 Subject: [PATCH 10/32] Remove private submodules Signed-off-by: coleramos425 --- .gitmodules | 10 ---------- src/mibench | 1 - src/multevent | 1 - src/waveparser | 1 - 4 files changed, 13 deletions(-) delete mode 100644 .gitmodules delete mode 160000 src/mibench delete mode 160000 src/multevent delete mode 160000 src/waveparser diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 52b4e0f96..000000000 --- a/.gitmodules +++ /dev/null @@ -1,10 +0,0 @@ -[submodule "src/mibench"] - path = src/mibench - url = git@github.com:AARInternal/mibench.git - branch = main -[submodule "src/waveparser"] - path = src/waveparser - url = git@github.com:AARInternal/waveparser.git -[submodule "src/multevent"] - path = src/multevent - url = git@github.com:AARInternal/multevent.git diff --git a/src/mibench b/src/mibench deleted file mode 160000 index 9151f75a0..000000000 --- a/src/mibench +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9151f75a05b17a66cc99b60b511f4a6e2c4b9880 diff --git a/src/multevent b/src/multevent deleted file mode 160000 index 2367a3f34..000000000 --- a/src/multevent +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2367a3f341fc9f966b1fd9b485d0dd3d163fb7f8 diff --git a/src/waveparser b/src/waveparser deleted file mode 160000 index 0b6c8ef95..000000000 --- a/src/waveparser +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0b6c8ef95686046d360800e600838bbab91c263a From d23efe461ee4d7a1dbe9815144b77dcff30d9e09 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Fri, 19 May 2023 13:38:19 -0500 Subject: [PATCH 11/32] Enhance logging Signed-off-by: coleramos425 --- src/utils/perfagg.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index fa14a60b0..fdaea47bb 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -138,10 +138,12 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): ) warnings.warn(msg) log_file.write(msg + "\n") - if test_df_column_equality(_df) and verbose: + else: msg = "Successfully joined {} in pmc_perf.csv".format(key) - print(msg) log_file.write(msg + "\n") + if test_df_column_equality(_df) and verbose: + print(msg) + # now, we can: #   A) throw away any of the "boring" duplicats From 514b91b210e4cfa36b0a7177a0b695fe2d7c9bbd Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Fri, 19 May 2023 16:08:23 -0500 Subject: [PATCH 12/32] Add LDSBanks for LDS bw calc (#107) Signed-off-by: coleramos425 --- src/omniperf | 8 ++++---- .../configs/gfx906/0200_system-speed-of-light.yaml | 4 ++-- src/omniperf_analyze/configs/gfx906/1200_lds.yaml | 8 ++++---- .../configs/gfx908/0200_system-speed-of-light.yaml | 4 ++-- src/omniperf_analyze/configs/gfx908/1200_lds.yaml | 8 ++++---- .../configs/gfx90a/0200_system-speed-of-light.yaml | 4 ++-- src/omniperf_analyze/configs/gfx90a/1200_lds.yaml | 8 ++++---- src/omniperf_analyze/utils/parser.py | 1 + src/soc_params/mi100.csv | 4 ++-- src/soc_params/mi200.csv | 4 ++-- src/soc_params/mi50.csv | 4 ++-- 11 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/omniperf b/src/omniperf index 71208c177..3b38e419b 100755 --- a/src/omniperf +++ b/src/omniperf @@ -188,7 +188,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof): header += "command," header += "host_name,host_cpu,host_distro,host_kernel,host_rocmver,date," header += "gpu_soc,numSE,numCU,numSIMD,waveSize,maxWavesPerCU,maxWorkgroupSize," - header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,name,numSQC,hbmBW," + header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,LDSBanks,name,numSQC,hbmBW," header += "ip_blocks\n" sysinfo.write(header) @@ -232,11 +232,11 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof): blocks = [] hbmBW = int(mspec.cur_MCLK) / 1000 * 4096 / 8 * 2 if mspec.GPU == "gfx906": - param += ["16", "mi50", str(int(mspec.CU) // 4), str(hbmBW)] + param += ["16", "32", "mi50", str(int(mspec.CU) // 4), str(hbmBW)] elif mspec.GPU == "gfx908": - param += ["32", "mi100", "48", str(hbmBW)] + param += ["32", "32", "mi100", "48", str(hbmBW)] elif mspec.GPU == "gfx90a": - param += ["32", "mi200", "56", str(hbmBW)] + param += ["32", "32", "mi200", "56", str(hbmBW)] if not skip_roof: blocks.append("roofline") diff --git a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml index 8ad6d9d9a..74de040b2 100644 --- a/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx906/0200_system-speed-of-light.yaml @@ -104,11 +104,11 @@ Panel Config: / SQ_ACTIVE_INST_ANY))) / 5) tips: LDS BW: - value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) - pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: diff --git a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml index 178849c17..4f12a2ac3 100644 --- a/src/omniperf_analyze/configs/gfx906/1200_lds.yaml +++ b/src/omniperf_analyze/configs/gfx906/1200_lds.yaml @@ -23,7 +23,7 @@ Panel Config: value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU))) tips: Bandwidth (Pct-of-Peak): - value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: Bank Conflict Rate: @@ -49,11 +49,11 @@ Panel Config: unit: (Instr + $normUnit) tips: Bandwidth: - avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) unit: (Bytes + $normUnit) tips: diff --git a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml index 8ad6d9d9a..74de040b2 100644 --- a/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx908/0200_system-speed-of-light.yaml @@ -104,11 +104,11 @@ Panel Config: / SQ_ACTIVE_INST_ANY))) / 5) tips: LDS BW: - value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) - pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: diff --git a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml index 178849c17..4f12a2ac3 100644 --- a/src/omniperf_analyze/configs/gfx908/1200_lds.yaml +++ b/src/omniperf_analyze/configs/gfx908/1200_lds.yaml @@ -23,7 +23,7 @@ Panel Config: value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU))) tips: Bandwidth (Pct-of-Peak): - value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: Bank Conflict Rate: @@ -49,11 +49,11 @@ Panel Config: unit: (Instr + $normUnit) tips: Bandwidth: - avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) unit: (Bytes + $normUnit) tips: diff --git a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml index e26910abf..f10d7630f 100644 --- a/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml +++ b/src/omniperf_analyze/configs/gfx90a/0200_system-speed-of-light.yaml @@ -121,11 +121,11 @@ Panel Config: / SQ_ACTIVE_INST_ANY))) / 5) tips: LDS BW: - value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs))) unit: GB/sec peak: (($sclk * $numCU) * 0.128) - pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + pop: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: LDS Bank Conflict: diff --git a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml index 178849c17..4f12a2ac3 100644 --- a/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml +++ b/src/omniperf_analyze/configs/gfx90a/1200_lds.yaml @@ -23,7 +23,7 @@ Panel Config: value: AVG(((200 * SQ_ACTIVE_INST_LDS) / (GRBM_GUI_ACTIVE * $numCU))) tips: Bandwidth (Pct-of-Peak): - value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + value: AVG((((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / (EndNs - BeginNs)) / (($sclk * $numCU) * 0.00128))) tips: Bank Conflict Rate: @@ -49,11 +49,11 @@ Panel Config: unit: (Instr + $normUnit) tips: Bandwidth: - avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + avg: AVG(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + min: MIN(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) - max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($L2Banks)) + max: MAX(((((SQ_LDS_IDX_ACTIVE - SQ_LDS_BANK_CONFLICT) * 4) * TO_INT($LDSBanks)) / $denom)) unit: (Bytes + $normUnit) tips: diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index fa3dec159..6cc5676a2 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -478,6 +478,7 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): ammolite__numWavesPerCU = sys_info.maxWavesPerCU # todo: check do we still need it ammolite__numSQC = sys_info.numSQC ammolite__L2Banks = sys_info.L2Banks + ammolite__LDSBanks = soc_spec.LDSBanks # todo: eventually switch this over to sys_info. its a new spec so trying not to break compatibility ammolite__freq = sys_info.cur_sclk # todo: check do we still need it ammolite__mclk = sys_info.cur_mclk ammolite__sclk = sys_info.sclk diff --git a/src/soc_params/mi100.csv b/src/soc_params/mi100.csv index fa85f5def..c52a4e1bb 100644 --- a/src/soc_params/mi100.csv +++ b/src/soc_params/mi100.csv @@ -1,2 +1,2 @@ -name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,Freq,mclk -mi100,8,120,480,40,30,32,1502,1200 +name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,LDSBanks,Freq,mclk +mi100,8,120,480,40,30,32,32,1502,1200 diff --git a/src/soc_params/mi200.csv b/src/soc_params/mi200.csv index e60ca2b69..bf6343fc0 100644 --- a/src/soc_params/mi200.csv +++ b/src/soc_params/mi200.csv @@ -1,2 +1,2 @@ -name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,Freq,mclk -mi200,8,110,440,32,56,32,1700,1600 +name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,LDSBanks,Freq,mclk +mi200,8,110,440,32,56,32,32,1700,1600 diff --git a/src/soc_params/mi50.csv b/src/soc_params/mi50.csv index 959985fb2..f5e1bda0b 100644 --- a/src/soc_params/mi50.csv +++ b/src/soc_params/mi50.csv @@ -1,2 +1,2 @@ -name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,Freq,mclk -mi50,4,60,240,40,15,16,1725,1000 +name,numSE,numCU,numSIMD,numWavesPerCU,numSQC,L2Banks,LDSBanks,Freq,mclk +mi50,4,60,240,40,15,16,32,1725,1000 From dc89d64f11f226d4adabc6936eccc8bf2a810370 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Fri, 19 May 2023 16:10:20 -0500 Subject: [PATCH 13/32] Comply to Python formatting Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/parser.py | 4 +++- src/utils/perfagg.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index 6cc5676a2..5fad02acb 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -478,7 +478,9 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): ammolite__numWavesPerCU = sys_info.maxWavesPerCU # todo: check do we still need it ammolite__numSQC = sys_info.numSQC ammolite__L2Banks = sys_info.L2Banks - ammolite__LDSBanks = soc_spec.LDSBanks # todo: eventually switch this over to sys_info. its a new spec so trying not to break compatibility + ammolite__LDSBanks = ( + soc_spec.LDSBanks + ) # todo: eventually switch this over to sys_info. its a new spec so trying not to break compatibility ammolite__freq = sys_info.cur_sclk # todo: check do we still need it ammolite__mclk = sys_info.cur_mclk ammolite__sclk = sys_info.sclk diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index fdaea47bb..4560d89cf 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -143,7 +143,6 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): log_file.write(msg + "\n") if test_df_column_equality(_df) and verbose: print(msg) - # now, we can: #   A) throw away any of the "boring" duplicats From 14df0584b43e87b7aa59b59d250868e1ef15bb7b Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Fri, 19 May 2023 16:25:50 -0500 Subject: [PATCH 14/32] Display first X chars in --kernel-names (#115) Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/gui_components/roofline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/omniperf_analyze/utils/gui_components/roofline.py b/src/omniperf_analyze/utils/gui_components/roofline.py index 429d613f0..1c6727a80 100644 --- a/src/omniperf_analyze/utils/gui_components/roofline.py +++ b/src/omniperf_analyze/utils/gui_components/roofline.py @@ -225,6 +225,7 @@ def get_roofline( margin=dict(b=0, r=0), xaxis_range=[-1, 1], xaxis_side="top", + yaxis_side="right", height=400, width=1000, ) From 10e6b1f068e6a2a0bd3195c7282e9fe63da6996d Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Wed, 24 May 2023 14:22:00 -0400 Subject: [PATCH 15/32] Fix issue where join was being done on the incorrect key --- src/utils/perfagg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 4560d89cf..54e23dfd0 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -104,13 +104,14 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): _df = pd.read_csv(file) if join_type == "kernel": key = _df.groupby("KernelName").cumcount() + _df["key"] = _df.KernelName + " - " + key.astype(str) elif join_type == "grid": key = _df.groupby(["KernelName", "grd"]).cumcount() + _df["key"] = _df.KernelName + " - " + _df.grd.astype(str) + " - " + key.astype(str) else: print("ERROR: Unrecognized --join-type") sys.exit(1) - _df["key"] = _df.KernelName + " - " + key.astype(str) if df is None: df = _df else: From 2e403232db802f1c32db1f7ac87ffb156b94a9f6 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Wed, 24 May 2023 15:29:29 -0400 Subject: [PATCH 16/32] fix column name --- src/utils/perfagg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 54e23dfd0..95b1d9c98 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -122,7 +122,7 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): duplicate_cols = { "gpu": [col for col in df.columns if "gpu" in col], "grd": [col for col in df.columns if "grd" in col], - "wpr": [col for col in df.columns if "wgr" in col], + "wgr": [col for col in df.columns if "wgr" in col], "lds": [col for col in df.columns if "lds" in col], "scr": [col for col in df.columns if "scr" in col], "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col], From b48fb1e4cb36f971049cf5610c0b6daf433c5171 Mon Sep 17 00:00:00 2001 From: Nicholas Curtis Date: Wed, 24 May 2023 16:08:09 -0400 Subject: [PATCH 17/32] reformat --- src/utils/perfagg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 95b1d9c98..e965b784c 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -107,7 +107,9 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): _df["key"] = _df.KernelName + " - " + key.astype(str) elif join_type == "grid": key = _df.groupby(["KernelName", "grd"]).cumcount() - _df["key"] = _df.KernelName + " - " + _df.grd.astype(str) + " - " + key.astype(str) + _df["key"] = ( + _df.KernelName + " - " + _df.grd.astype(str) + " - " + key.astype(str) + ) else: print("ERROR: Unrecognized --join-type") sys.exit(1) From 8af6efc0be5288dadd2e758556154e52f7406e5d Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Thu, 25 May 2023 10:26:59 -0500 Subject: [PATCH 18/32] Remove submodule checkout from CI Signed-off-by: coleramos425 --- .github/workflows/rhel-8.yml | 3 --- .github/workflows/ubuntu-focal.yml | 3 --- 2 files changed, 6 deletions(-) diff --git a/.github/workflows/rhel-8.yml b/.github/workflows/rhel-8.yml index 5fe6d5aed..c2d7a4c1e 100644 --- a/.github/workflows/rhel-8.yml +++ b/.github/workflows/rhel-8.yml @@ -33,9 +33,6 @@ jobs: yum -y install which - name: Checkout uses: actions/checkout@v3 - with: - submodules: recursive - token: ${{ secrets.GH_PAT }} - name: Install Python prereqs run: | python3.9 -m pip install -r requirements.txt diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index d9683af8c..d72aaad70 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -32,9 +32,6 @@ jobs: sudo apt-get install -y cmake - name: Checkout uses: actions/checkout@v3 - with: - submodules: recursive - token: ${{ secrets.GH_PAT }} - name: Install Python prereqs run: | python3 -m pip install -r requirements.txt From 70405f184f2eef9b22b94652feb9ef62cc51750c Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Thu, 25 May 2023 15:45:05 -0500 Subject: [PATCH 19/32] Detect illogical counter values and throw warning (#103) Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/parser.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index 5fad02acb..3fc18d432 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -467,6 +467,11 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): Execute the expr string for each metric in the df. """ + # confirm no illogical counter values + if (raw_pmc_df['pmc_perf']['GRBM_GUI_ACTIVE'] == 0).any(): + print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") + sys.exit(1) + # NB: # Following with Omniperf 0.2.0, we are using HW spec from sys_info instead. # The soc_spec is not in using right now, but can be used to do verification From efc4c0b9c4c5db2cf91d27f25f73c04307bfa09d Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 30 May 2023 10:57:58 -0500 Subject: [PATCH 20/32] Modify unhashable default values in Workload class definition for Python 3.11 (#131) Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/schema.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py index 2c6d7048d..bcfc0bff5 100644 --- a/src/omniperf_analyze/utils/schema.py +++ b/src/omniperf_analyze/utils/schema.py @@ -55,9 +55,9 @@ class ArchConfig: @dataclass class Workload: - sys_info: pd.DataFrame = pd.DataFrame() - soc_spec: pd.DataFrame = pd.DataFrame() # TODO: might move it to ArchConfig - raw_pmc: pd.DataFrame = pd.DataFrame() + sys_info: pd.DataFrame = None + soc_spec: pd.DataFrame = None # TODO: might move it to ArchConfig + raw_pmc: pd.DataFrame = None dfs: Dict[int, pd.DataFrame] = field(default_factory=dict) dfs_type: Dict[int, str] = field(default_factory=dict) filter_kernel_ids: List[int] = field(default_factory=list) From ce4e8d6e935cae82c6aff56d12f392aa4fe1f4c4 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 30 May 2023 11:00:41 -0500 Subject: [PATCH 21/32] Comply to Python formatting Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index 3fc18d432..e3bcfbbf7 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -468,7 +468,7 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): """ # confirm no illogical counter values - if (raw_pmc_df['pmc_perf']['GRBM_GUI_ACTIVE'] == 0).any(): + if (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) From 73ed4743954a110770052b15b722510166fdc2e9 Mon Sep 17 00:00:00 2001 From: Cole Ramos Date: Tue, 30 May 2023 11:28:44 -0500 Subject: [PATCH 22/32] Update changelog for v1.0.8 Signed-off-by: Cole Ramos --- CHANGES | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/CHANGES b/CHANGES index 148cd44a9..651e87144 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,26 @@ +Version 1.0.8 (30 May 2023) + + * add `--kernel-names` option to toggle kernelName overlay in standalone roofline plot (#93) + * remove unused python modules (#96) + * fix empirical roofline calculation for single dispatch workloads (#97) + * match color of arithmetic intensity points to corresponding bw lines + + * ux improvements in standalone GUI (#101) + * enhanced readability for filtering dropdowns in standalone GUI (#102) + * new logfile to capture rocprofiler output (#106) + * roofline support for sles15 sp4 and future service packs (#109) + * adding dockerfiles for all supported Linux distos + * new examples for `--roof-only` and `--kernel` options added to documentation + + * enable cli analysis in Windows (#110) + * optional random port number in standalone GUI (#111) + * limit length of visable kernel name in `--kernel-name` option (#115) + * adjust metric definitions (#117, #130) + * manually merge rocprof runs, overriding default rocprofiler implementation (#125) + * fixed compatibility issues with Python 3.11 (#131) + Version 1.0.8-PR2 (17 Apr 2023) + * ux improvements in standalone GUI (#101) * enhanced readability for filtering dropdowns in standalone GUI (#102) * new logfile to capture rocprofiler output (#106) From 7253bb742169248d2207cde5218c4e0baee8bd82 Mon Sep 17 00:00:00 2001 From: Cole Ramos Date: Tue, 30 May 2023 11:34:33 -0500 Subject: [PATCH 23/32] Fix spelling mistakes Signed-off-by: Cole Ramos --- CHANGES | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 651e87144..bcc92f180 100644 --- a/CHANGES +++ b/CHANGES @@ -9,12 +9,12 @@ Version 1.0.8 (30 May 2023) * enhanced readability for filtering dropdowns in standalone GUI (#102) * new logfile to capture rocprofiler output (#106) * roofline support for sles15 sp4 and future service packs (#109) - * adding dockerfiles for all supported Linux distos + * adding dockerfiles for all supported Linux distros * new examples for `--roof-only` and `--kernel` options added to documentation * enable cli analysis in Windows (#110) * optional random port number in standalone GUI (#111) - * limit length of visable kernel name in `--kernel-name` option (#115) + * limit length of visible kernelName in `--kernel-names` option (#115) * adjust metric definitions (#117, #130) * manually merge rocprof runs, overriding default rocprofiler implementation (#125) * fixed compatibility issues with Python 3.11 (#131) @@ -25,7 +25,7 @@ Version 1.0.8-PR2 (17 Apr 2023) * enhanced readability for filtering dropdowns in standalone GUI (#102) * new logfile to capture rocprofiler output (#106) * roofline support for sles15 sp4 and future service packs (#109) - * adding dockerfiles for all supported Linux distos + * adding dockerfiles for all supported Linux distros * new examples for `--roof-only` and `--kernel` options added to documentation Version 1.0.8-PR1 (13 Mar 2023) From 1b151e23a14295031edd70a3c526d0d1af017e48 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 30 May 2023 13:30:39 -0500 Subject: [PATCH 24/32] Skip GRBM_GUI_ACTIVE checker in --roof-only mode Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index e3bcfbbf7..c46c7a91b 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -467,8 +467,9 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): Execute the expr string for each metric in the df. """ - # confirm no illogical counter values - if (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): + # confirm no illogical counter values (only consider non-roofline runs) + roof_only_run = (sys_info.ip_blocks == "roofline") + if not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) From 2049e56eb27c2a684a12471c4999353b7889fa66 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 30 May 2023 13:40:49 -0500 Subject: [PATCH 25/32] Comply to Python formatting Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index c46c7a91b..d11cbbbfc 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -468,7 +468,7 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): """ # confirm no illogical counter values (only consider non-roofline runs) - roof_only_run = (sys_info.ip_blocks == "roofline") + roof_only_run = sys_info.ip_blocks == "roofline" if not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) From b8cf891e5e91166e52255b193bd22c66612fd8a3 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 30 May 2023 14:50:39 -0500 Subject: [PATCH 26/32] Update version Signed-off-by: coleramos425 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 4fb9f845c..b0f3d96f8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.8-PR2 +1.0.8 From 2daeb1bc749e77907298b11c43089d68ecc223d4 Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 30 May 2023 15:44:43 -0500 Subject: [PATCH 27/32] Correct typo in Agg L2 per channel stats for mi100 Signed-off-by: coleramos425 --- .../configs/gfx906/1800_L2_cache_per_channel.yaml | 14 +++++++------- .../configs/gfx908/1800_L2_cache_per_channel.yaml | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml index 01375ab28..95bba22e8 100644 --- a/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml +++ b/src/omniperf_analyze/configs/gfx906/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) diff --git a/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml b/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml index 1b25734fe..e68511e9e 100644 --- a/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml +++ b/src/omniperf_analyze/configs/gfx908/1800_L2_cache_per_channel.yaml @@ -204,7 +204,7 @@ Panel Config: + TO_INT(TCC_REQ[22])) + TO_INT(TCC_REQ[23])) + TO_INT(TCC_REQ[24])) + TO_INT(TCC_REQ[25])) + TO_INT(TCC_REQ[26])) + TO_INT(TCC_REQ[27])) + TO_INT(TCC_REQ[28])) + TO_INT(TCC_REQ[29])) + TO_INT(TCC_REQ[30])) + TO_INT(TCC_REQ[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L1 - L2 Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_READ[0]) + TO_INT(TCC_READ[1])) @@ -247,7 +247,7 @@ Panel Config: + TO_INT(TCC_READ[24])) + TO_INT(TCC_READ[25])) + TO_INT(TCC_READ[26])) + TO_INT(TCC_READ[27])) + TO_INT(TCC_READ[28])) + TO_INT(TCC_READ[29])) + TO_INT(TCC_READ[30])) + TO_INT(TCC_READ[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L1 - L2 Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_WRITE[0]) + TO_INT(TCC_WRITE[1])) @@ -294,7 +294,7 @@ Panel Config: + TO_INT(TCC_WRITE[24])) + TO_INT(TCC_WRITE[25])) + TO_INT(TCC_WRITE[26])) + TO_INT(TCC_WRITE[27])) + TO_INT(TCC_WRITE[28])) + TO_INT(TCC_WRITE[29])) + TO_INT(TCC_WRITE[30])) + TO_INT(TCC_WRITE[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L1 - L2 Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_ATOMIC[0]) + TO_INT(TCC_ATOMIC[1])) @@ -345,7 +345,7 @@ Panel Config: + TO_INT(TCC_ATOMIC[26])) + TO_INT(TCC_ATOMIC[27])) + TO_INT(TCC_ATOMIC[28])) + TO_INT(TCC_ATOMIC[29])) + TO_INT(TCC_ATOMIC[30])) + TO_INT(TCC_ATOMIC[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L2 - EA Read Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_RDREQ[0]) + TO_INT(TCC_EA_RDREQ[1])) @@ -396,7 +396,7 @@ Panel Config: + TO_INT(TCC_EA_RDREQ[26])) + TO_INT(TCC_EA_RDREQ[27])) + TO_INT(TCC_EA_RDREQ[28])) + TO_INT(TCC_EA_RDREQ[29])) + TO_INT(TCC_EA_RDREQ[30])) + TO_INT(TCC_EA_RDREQ[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L2 - EA Write Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_WRREQ[0]) + TO_INT(TCC_EA_WRREQ[1])) @@ -447,7 +447,7 @@ Panel Config: + TO_INT(TCC_EA_WRREQ[26])) + TO_INT(TCC_EA_WRREQ[27])) + TO_INT(TCC_EA_WRREQ[28])) + TO_INT(TCC_EA_WRREQ[29])) + TO_INT(TCC_EA_WRREQ[30])) + TO_INT(TCC_EA_WRREQ[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L2 - EA Atomic Req: mean: AVG((((((((((((((((((((((((((((((((((TO_INT(TCC_EA_ATOMIC[0]) + TO_INT(TCC_EA_ATOMIC[1])) @@ -498,7 +498,7 @@ Panel Config: + TO_INT(TCC_EA_ATOMIC[26])) + TO_INT(TCC_EA_ATOMIC[27])) + TO_INT(TCC_EA_ATOMIC[28])) + TO_INT(TCC_EA_ATOMIC[29])) + TO_INT(TCC_EA_ATOMIC[30])) + TO_INT(TCC_EA_ATOMIC[31])) / 32) / $denom)) - units: ( $normUnit ) + units: ( + $normUnit) tips: L2 - EA Read Lat: mean: AVG((((((((((((((((((((((((((((((((((TCC_EA_RDREQ_LEVEL[0] + TCC_EA_RDREQ_LEVEL[1]) From 62304b8543beaef3ef3629a20b454c4992642f8a Mon Sep 17 00:00:00 2001 From: coleramos425 Date: Tue, 30 May 2023 17:03:22 -0500 Subject: [PATCH 28/32] Suppress verbose output Signed-off-by: coleramos425 --- src/omniperf_analyze/utils/gui_components/roofline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/omniperf_analyze/utils/gui_components/roofline.py b/src/omniperf_analyze/utils/gui_components/roofline.py index 1c6727a80..36718e5b3 100644 --- a/src/omniperf_analyze/utils/gui_components/roofline.py +++ b/src/omniperf_analyze/utils/gui_components/roofline.py @@ -50,7 +50,8 @@ def generate_plots( fig = go.Figure() plotMode = "lines+text" if is_standalone else "lines" line_data = roofline_calc.empirical_roof(roof_info, mem_level, verbose) - print("Line data:\n", line_data) + if verbose: + print("Line data:\n", line_data) ####################### # Plot BW Lines From a346db7646b0a935f4cac51d131b4a585f065c05 Mon Sep 17 00:00:00 2001 From: Cole Ramos Date: Wed, 31 May 2023 10:31:39 -0500 Subject: [PATCH 29/32] Update Zenodo Signed-off-by: Cole Ramos --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index aa6e0413e..c1cce5697 100644 --- a/README.md +++ b/README.md @@ -40,18 +40,19 @@ This software can be cited using a Zenodo style reference is provided below for convenience: ``` -@software{xiamin_lu_2022_7314631 +@software{xiaomin_lu_2022_7314631 author = {Xiaomin Lu and Cole Ramos and Fei Zheng and Karl W. Schulz and Jose Santos and - Keith Lowery}, - title = {AMDResearch/omniperf: v1.0.8-PR2 (17 April 2023)}, - month = apr, + Keith Lowery and + Cristian Di Pietrantonio}, + title = {AMDResearch/omniperf: v1.0.8 (30 May 2023)}, + month = may, year = 2023, publisher = {Zenodo}, - version = {v1.0.8-PR1}, + version = {v1.0.8}, doi = {10.5281/zenodo.7314631}, url = {https://doi.org/10.5281/zenodo.7314631} } From acb972954091d14bf91797383ebf0017810a4168 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Fri, 9 Jun 2023 10:00:56 -0500 Subject: [PATCH 30/32] Fix VGPR issue (#139) Signed-off-by: colramos-amd --- src/utils/perfagg.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index e965b784c..651bcb86d 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -127,11 +127,17 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): "wgr": [col for col in df.columns if "wgr" in col], "lds": [col for col in df.columns if "lds" in col], "scr": [col for col in df.columns if "scr" in col], - "arch_vgpr": [col for col in df.columns if "arch_vgpr" in col], - "accum_vgpr": [col for col in df.columns if "accum_vgpr" in col], "spgr": [col for col in df.columns if "sgpr" in col], } + # Check for vgpr counter in ROCm < 5.3 + if "vgpr" in df.columns: + duplicate_cols["vgpr"] = [col for col in df.columns if "vgpr" in col] + # Check for vgpr counter in ROCm >= 5.3 + else: + duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col] + duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] for key, cols in duplicate_cols.items(): + print("Key is ", key) _df = df[cols] if not test_df_column_equality(_df): msg = ( From 5f6c776170f01bd62c4eac16a0ec4257583c32c3 Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Fri, 9 Jun 2023 10:01:37 -0500 Subject: [PATCH 31/32] Omniperf rocomni changes Signed-off-by: colramos-amd --- src/omniperf_analyze/omniperf_analyze.py | 38 +++++++++++++++------- src/omniperf_analyze/utils/parser.py | 41 ++++++++++++++++++++++-- src/omniperf_analyze/utils/schema.py | 2 ++ 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py index 58991e8b3..c15181c6f 100644 --- a/src/omniperf_analyze/omniperf_analyze.py +++ b/src/omniperf_analyze/omniperf_analyze.py @@ -47,36 +47,50 @@ from omniperf_analyze.utils.gui_components.roofline import get_roofline -def initialize_run(args, normalization_filter=None): - import pandas as pd - from collections import OrderedDict +################################################ +# Helper Functions +################################################ +def generate_configs(config_dir, list_kernels, filter_metrics): from omniperf_analyze.utils import schema - from tabulate import tabulate - # Fixme: cur_root.parent.joinpath('soc_params') - soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params") - soc_spec_df = file_io.load_soc_params(soc_params_dir) - - single_panel_config = file_io.is_single_panel_config(Path(args.config_dir)) + single_panel_config = file_io.is_single_panel_config(Path(config_dir)) global archConfigs archConfigs = {} for arch in file_io.supported_arch.keys(): ac = schema.ArchConfig() - if args.list_kernels: + if list_kernels: ac.panel_configs = file_io.top_stats_build_in_config else: arch_panel_config = ( - args.config_dir if single_panel_config else args.config_dir.joinpath(arch) + config_dir if single_panel_config else config_dir.joinpath(arch) ) ac.panel_configs = file_io.load_panel_configs(arch_panel_config) # TODO: filter_metrics should/might be one per arch # print(ac) - parser.build_dfs(ac, args.filter_metrics) + parser.build_dfs(ac, filter_metrics) archConfigs[arch] = ac + return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin + + +################################################ +# Core Functions +################################################ +def initialize_run(args, normalization_filter=None): + import pandas as pd + from collections import OrderedDict + from tabulate import tabulate + from omniperf_analyze.utils import schema + + # Fixme: cur_root.parent.joinpath('soc_params') + soc_params_dir = os.path.join(os.path.dirname(__file__), "..", "soc_params") + soc_spec_df = file_io.load_soc_params(soc_params_dir) + + generate_configs(args.config_dir, args.list_kernels, args.filter_metrics) + if args.list_metrics in file_io.supported_arch.keys(): print( tabulate( diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index d11cbbbfc..5fb03c39a 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -320,6 +320,26 @@ def update_normUnit_string(equation, unit): str(equation), ).capitalize() +def gen_counter_list(formula): + function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None} + + counters = [] + if not isinstance(formula,str): + return counters + try: + tree = ast.parse( + formula + .replace("$normUnit", "SQ_WAVES") + .replace("$denom", "SQ_WAVES") + .replace("$","") + ) + for node in ast.walk(tree): + if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter: + counters.append(node.id.rstrip("_sum")) + except: + pass + return counters + def build_dfs(archConfigs, filter_metrics): """ @@ -338,6 +358,7 @@ def build_dfs(archConfigs, filter_metrics): d = {} metric_list = {} dfs_type = {} + metric_counters = {} for panel_id, panel in archConfigs.panel_configs.items(): for data_source in panel["data source"]: for type, data_cofig in data_source.items(): @@ -362,6 +383,7 @@ def build_dfs(archConfigs, filter_metrics): ) metric_idx = data_source_idx + "." + str(i) values = [] + eqn_content = [] if ( (not filter_metrics) @@ -378,6 +400,7 @@ def build_dfs(archConfigs, filter_metrics): for k, v in entries.items(): if k != "tips" and k != "coll_level" and k != "alias": values.append(v) + eqn_content.append(v) if "alias" in entries.keys(): values.append(entries["alias"]) @@ -396,6 +419,15 @@ def build_dfs(archConfigs, filter_metrics): # collect metric_list metric_list[metric_idx] = key.replace(" ", "_") + # generate mapping of counters and metrics + filter = {} + for formula in eqn_content: + if formula is not None and formula != "None": + for k in gen_counter_list(formula): + filter[k] = None + if len(filter) > 0: + metric_counters[key] = list(filter) + i += 1 df.set_index("Index", inplace=True) @@ -431,6 +463,7 @@ def build_dfs(archConfigs, filter_metrics): setattr(archConfigs, "dfs", d) setattr(archConfigs, "metric_list", metric_list) setattr(archConfigs, "dfs_type", dfs_type) + setattr(archConfigs, "metric_counters", metric_counters) def build_metric_value_string(dfs, dfs_type, normal_unit): @@ -469,7 +502,8 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): # confirm no illogical counter values (only consider non-roofline runs) roof_only_run = sys_info.ip_blocks == "roofline" - if not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): + rocscope_run = sys_info.ip_blocks == "rocscope" + if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) @@ -711,12 +745,13 @@ def load_kernel_top(workload, dir): workload.dfs.update(tmp) -def load_table_data(workload, dir, is_gui, debug, verbose): +def load_table_data(workload, dir, is_gui, debug, verbose, skipKernelTop=False): """ Load data for all "raw_csv_table". Calculate mertric value for all "metric_table". """ - load_kernel_top(workload, dir) + if not skipKernelTop: + load_kernel_top(workload, dir) eval_metric( workload.dfs, diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py index bcfc0bff5..6e147fcae 100644 --- a/src/omniperf_analyze/utils/schema.py +++ b/src/omniperf_analyze/utils/schema.py @@ -52,6 +52,8 @@ class ArchConfig: # [Index: Metric name] pairs metric_list: Dict[str, str] = field(default_factory=dict) + # [Metric name: Counters] pairs + metric_counters: Dict[str, list] = field(default_factory=dict) @dataclass class Workload: From 79eecb445e4cc4fdc02bdf20fe638bb9c10f755d Mon Sep 17 00:00:00 2001 From: colramos-amd Date: Fri, 9 Jun 2023 10:04:32 -0500 Subject: [PATCH 32/32] Comply to Python formatting Signed-off-by: colramos-amd --- src/omniperf_analyze/omniperf_analyze.py | 2 +- src/omniperf_analyze/utils/parser.py | 41 +++++++++++++++++++----- src/omniperf_analyze/utils/schema.py | 1 + src/utils/perfagg.py | 2 +- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/omniperf_analyze/omniperf_analyze.py b/src/omniperf_analyze/omniperf_analyze.py index c15181c6f..6415ed285 100644 --- a/src/omniperf_analyze/omniperf_analyze.py +++ b/src/omniperf_analyze/omniperf_analyze.py @@ -73,7 +73,7 @@ def generate_configs(config_dir, list_kernels, filter_metrics): archConfigs[arch] = ac - return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin + return archConfigs # Note: This return comes in handy for rocScope which borrows generate_configs() in its rocomni plugin ################################################ diff --git a/src/omniperf_analyze/utils/parser.py b/src/omniperf_analyze/utils/parser.py index 5fb03c39a..b6573566b 100644 --- a/src/omniperf_analyze/utils/parser.py +++ b/src/omniperf_analyze/utils/parser.py @@ -320,26 +320,47 @@ def update_normUnit_string(equation, unit): str(equation), ).capitalize() + def gen_counter_list(formula): - function_filter = {"MIN": None, "MAX": None, "AVG": None, "ROUND": None, "TO_INT": None, "GB": None, "STD": None, "GFLOP": None, "GOP": None, "OP": None, "CU": None, "NC": None, "UC": None, "CC": None, "RW": None, "GIOP": None} + function_filter = { + "MIN": None, + "MAX": None, + "AVG": None, + "ROUND": None, + "TO_INT": None, + "GB": None, + "STD": None, + "GFLOP": None, + "GOP": None, + "OP": None, + "CU": None, + "NC": None, + "UC": None, + "CC": None, + "RW": None, + "GIOP": None, + } counters = [] - if not isinstance(formula,str): + if not isinstance(formula, str): return counters try: tree = ast.parse( - formula - .replace("$normUnit", "SQ_WAVES") + formula.replace("$normUnit", "SQ_WAVES") .replace("$denom", "SQ_WAVES") - .replace("$","") + .replace("$", "") ) for node in ast.walk(tree): - if isinstance(node, ast.Name) and node.id.rstrip("_sum").isupper() and node.id not in function_filter: + if ( + isinstance(node, ast.Name) + and node.id.rstrip("_sum").isupper() + and node.id not in function_filter + ): counters.append(node.id.rstrip("_sum")) except: pass return counters - + def build_dfs(archConfigs, filter_metrics): """ @@ -503,7 +524,11 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): # confirm no illogical counter values (only consider non-roofline runs) roof_only_run = sys_info.ip_blocks == "roofline" rocscope_run = sys_info.ip_blocks == "rocscope" - if not rocscope_run and not roof_only_run and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any(): + if ( + not rocscope_run + and not roof_only_run + and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any() + ): print("WARNING: Dectected GRBM_GUI_ACTIVE == 0\nHaulting execution.") sys.exit(1) diff --git a/src/omniperf_analyze/utils/schema.py b/src/omniperf_analyze/utils/schema.py index 6e147fcae..f9b59868f 100644 --- a/src/omniperf_analyze/utils/schema.py +++ b/src/omniperf_analyze/utils/schema.py @@ -55,6 +55,7 @@ class ArchConfig: # [Metric name: Counters] pairs metric_counters: Dict[str, list] = field(default_factory=dict) + @dataclass class Workload: sys_info: pd.DataFrame = None diff --git a/src/utils/perfagg.py b/src/utils/perfagg.py index 651bcb86d..109fdecda 100755 --- a/src/utils/perfagg.py +++ b/src/utils/perfagg.py @@ -135,7 +135,7 @@ def join_prof(workload_dir, join_type, log_file, verbose, out=None): # Check for vgpr counter in ROCm >= 5.3 else: duplicate_cols["arch_vgpr"] = [col for col in df.columns if "arch_vgpr" in col] - duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] + duplicate_cols["accum_vgpr"] = [col for col in df.columns if "accum_vgpr" in col] for key, cols in duplicate_cols.items(): print("Key is ", key) _df = df[cols]