From 7bcf8f6eb0dfc91b0b2a7ba383884e4630ecbd95 Mon Sep 17 00:00:00 2001 From: Karl W Schulz Date: Fri, 16 Feb 2024 15:34:28 -0600 Subject: [PATCH 1/3] code formatting updates Signed-off-by: Karl W Schulz --- src/argparser.py | 42 ++-- src/config.py | 2 +- src/docs/conf.py | 2 +- src/omniperf_analyze/analysis_base.py | 96 ++++++---- src/omniperf_analyze/analysis_cli.py | 35 ++-- src/omniperf_base.py | 123 +++++++----- src/omniperf_profile/profiler_base.py | 154 +++++++++------ src/omniperf_profile/profiler_rocprof_v1.py | 34 ++-- src/omniperf_profile/profiler_rocprof_v2.py | 37 ++-- src/omniperf_profile/profiler_rocscope.py | 30 +-- src/omniperf_soc/soc_base.py | 90 +++++---- src/omniperf_soc/soc_gfx906.py | 30 +-- src/omniperf_soc/soc_gfx908.py | 31 +-- src/omniperf_soc/soc_gfx90a.py | 51 +++-- src/omniperf_soc/soc_gfx940.py | 44 +++-- src/omniperf_soc/soc_gfx941.py | 44 +++-- src/omniperf_soc/soc_gfx942.py | 44 +++-- src/roofline.py | 200 +++++++++++--------- src/utils/db_connector.py | 97 ++++++---- src/utils/file_io.py | 5 +- src/utils/gui.py | 55 +++--- src/utils/gui_components/memchart.py | 38 ++-- src/utils/kernel_name_shortener.py | 9 +- src/utils/mem_chart.py | 2 +- src/utils/parser.py | 34 ++-- src/utils/roofline_calc.py | 64 +++++-- src/utils/specs.py | 122 ++++++------ src/utils/tty.py | 11 +- tests/test_profile_general.py | 5 +- 29 files changed, 894 insertions(+), 637 deletions(-) diff --git a/src/argparser.py b/src/argparser.py index 714252e3b..150095e8d 100644 --- a/src/argparser.py +++ b/src/argparser.py @@ -26,12 +26,14 @@ import shutil import os + def print_avail_arch(avail_arch: list): ret_str = "\t\tList all available metrics for analysis on specified arch:" for arch in avail_arch: ret_str += "\n\t\t {}".format(arch) return ret_str + def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): # ----------------------------------------- # Parse arguments (dependent on mode) @@ -42,8 +44,12 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): general_group = parser.add_argument_group("General Options") parser._positionals.title = "Modes" parser._optionals.title = "Help" - general_group.add_argument("-v", "--version", action="version", version=omniperf_version["ver_pretty"]) - general_group.add_argument("-s", "--specs", action="store_true", help="Print system specs.") + general_group.add_argument( + "-v", "--version", action="version", version=omniperf_version["ver_pretty"] + ) + general_group.add_argument( + "-s", "--specs", action="store_true", help="Print system specs." + ) subparsers = parser.add_subparsers( dest="mode", help="Select mode of interaction with the target application:" @@ -78,7 +84,9 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): profile_group = profile_parser.add_argument_group("Profile Options") roofline_group = profile_parser.add_argument_group("Standalone Roofline Options") - general_group.add_argument("-v", "--version", action="version", version=omniperf_version["ver_pretty"]) + general_group.add_argument( + "-v", "--version", action="version", version=omniperf_version["ver_pretty"] + ) general_group.add_argument( "-V", "--verbose", help="Increase output verbosity", action="count", default=0 ) @@ -283,11 +291,15 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): interaction_group = db_parser.add_argument_group("Interaction Type") connection_group = db_parser.add_argument_group("Connection Options") - general_group.add_argument("-v", "--version", action="version", version=omniperf_version["ver_pretty"]) + general_group.add_argument( + "-v", "--version", action="version", version=omniperf_version["ver_pretty"] + ) general_group.add_argument( "-V", "--verbose", help="Increase output verbosity", action="count", default=0 ) - general_group.add_argument("-s", "--specs", action="store_true", help="Print system specs.") + general_group.add_argument( + "-s", "--specs", action="store_true", help="Print system specs." + ) interaction_group.add_argument( "-i", @@ -374,11 +386,15 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): analyze_group = analyze_parser.add_argument_group("Analyze Options") analyze_advanced_group = analyze_parser.add_argument_group("Advanced Options") - general_group.add_argument("-v", "--version", action="version", version=omniperf_version["ver_pretty"]) + general_group.add_argument( + "-v", "--version", action="version", version=omniperf_version["ver_pretty"] + ) general_group.add_argument( "-V", "--verbose", help="Increase output verbosity", action="count", default=0 ) - general_group.add_argument("-s", "--specs", action="store_true", help="Print system specs.") + general_group.add_argument( + "-s", "--specs", action="store_true", help="Print system specs." + ) analyze_group.add_argument( "-p", @@ -398,7 +414,7 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): analyze_group.add_argument( "--list-metrics", metavar="", - choices=supported_archs.keys(),#["gfx906", "gfx908", "gfx90a"], + choices=supported_archs.keys(), # ["gfx906", "gfx908", "gfx90a"], help=print_avail_arch(supported_archs.keys()), ) analyze_group.add_argument( @@ -460,7 +476,7 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): metavar="", type=int, default=10, - help="\t\tSpecify the maximum number of stats shown in \"Top Stats\" tables (DEFAULT: 10)", + help='\t\tSpecify the maximum number of stats shown in "Top Stats" tables (DEFAULT: 10)', ) analyze_advanced_group.add_argument( "-n", @@ -508,7 +524,9 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): nargs="+", help="\t\tSpecify column indices to display.", ) - analyze_advanced_group.add_argument("-g", dest="debug", action="store_true", help="\t\tDebug single metric.") + analyze_advanced_group.add_argument( + "-g", dest="debug", action="store_true", help="\t\tDebug single metric." + ) analyze_advanced_group.add_argument( "--dependency", action="store_true", help="\t\tList the installation dependency." ) @@ -527,5 +545,5 @@ def omniarg_parser(parser, omniperf_home, supported_archs, omniperf_version): "--specs-correction", type=str, metavar="", - help="\t\tSpecify the specs to correct." - ) \ No newline at end of file + help="\t\tSpecify the specs to correct.", + ) diff --git a/src/config.py b/src/config.py index fcb017aae..6cd07ff1c 100644 --- a/src/config.py +++ b/src/config.py @@ -26,4 +26,4 @@ # NB: Creating a new module to share global vars across modules omniperf_home = Path(__file__).resolve().parent -prog = "omniperf" \ No newline at end of file +prog = "omniperf" diff --git a/src/docs/conf.py b/src/docs/conf.py index f1f26ff80..81e6bd3f2 100644 --- a/src/docs/conf.py +++ b/src/docs/conf.py @@ -116,7 +116,7 @@ def install(package): html_static_path = ["_static"] latex_elements = { - "sphinxsetup": 'verbatimwrapslines=true, verbatimforcewraps=true', + "sphinxsetup": "verbatimwrapslines=true, verbatimforcewraps=true", } diff --git a/src/omniperf_analyze/analysis_base.py b/src/omniperf_analyze/analysis_base.py index d9c166b8c..18cab8b55 100644 --- a/src/omniperf_analyze/analysis_base.py +++ b/src/omniperf_analyze/analysis_base.py @@ -34,26 +34,31 @@ import pandas as pd from tabulate import tabulate -class OmniAnalyze_Base(): - def __init__(self,args,supported_archs): + +class OmniAnalyze_Base: + def __init__(self, args, supported_archs): self.__args = args - self._runs = OrderedDict() - self._arch_configs = {} + self._runs = OrderedDict() + self._arch_configs = {} self.__supported_archs = supported_archs - self._output = None - self.__socs = None # available OmniSoC objs + self._output = None + self.__socs = None # available OmniSoC objs def get_args(self): return self.__args + def set_soc(self, omni_socs): self.__socs = omni_socs + def get_socs(self): return self.__socs - + @demarcate def generate_configs(self, arch, config_dir, list_stats, filter_metrics, sys_info): - single_panel_config = file_io.is_single_panel_config(Path(config_dir), self.__supported_archs) - + single_panel_config = file_io.is_single_panel_config( + Path(config_dir), self.__supported_archs + ) + ac = schema.ArchConfig() if list_stats: ac.panel_configs = file_io.top_stats_build_in_config @@ -66,22 +71,26 @@ def generate_configs(self, arch, config_dir, list_stats, filter_metrics, sys_inf # TODO: filter_metrics should/might be one per arch # print(ac) - parser.build_dfs( - archConfigs=ac, - filter_metrics=filter_metrics, - sys_info=sys_info - ) + parser.build_dfs(archConfigs=ac, filter_metrics=filter_metrics, sys_info=sys_info) self._arch_configs[arch] = ac return self._arch_configs - + @demarcate def list_metrics(self): args = self.__args if args.list_metrics in self.__supported_archs.keys(): arch = args.list_metrics if arch not in self._arch_configs.keys(): - sys_info = file_io.load_sys_info(Path(self.__args.path[0][0], "sysinfo.csv")) - self.generate_configs(arch, args.config_dir, args.list_stats, args.filter_metrics, sys_info.iloc[0]) + sys_info = file_io.load_sys_info( + Path(self.__args.path[0][0], "sysinfo.csv") + ) + self.generate_configs( + arch, + args.config_dir, + args.list_stats, + args.filter_metrics, + sys_info.iloc[0], + ) for key, value in self._arch_configs[args.list_metrics].metric_list.items(): prefix = "" @@ -100,11 +109,13 @@ def list_metrics(self): def load_options(self, normalization_filter): if not normalization_filter: for k, v in self._arch_configs.items(): - parser.build_metric_value_string(v.dfs, v.dfs_type, self.__args.normal_unit) + parser.build_metric_value_string( + v.dfs, v.dfs_type, self.__args.normal_unit + ) else: for k, v in self._arch_configs.items(): parser.build_metric_value_string(v.dfs, v.dfs_type, normalization_filter) - + args = self.__args # Error checking for multiple runs and multiple gpu_kernel filters if args.gpu_kernel and (len(args.path) != len(args.gpu_kernel)): @@ -112,27 +123,37 @@ def load_options(self, normalization_filter): for i in range(len(args.path) - 1): args.gpu_kernel.extend(args.gpu_kernel) else: - error("Error: the number of --filter-kernels doesn't match the number of --dir.") - + error( + "Error: the number of --filter-kernels doesn't match the number of --dir." + ) + @demarcate def initalize_runs(self, normalization_filter=None): if self.__args.list_metrics: self.list_metrics() - + # load required configs for d in self.__args.path: sys_info = file_io.load_sys_info(Path(d[0], "sysinfo.csv")) arch = sys_info.iloc[0]["gpu_soc"] args = self.__args - self.generate_configs(arch, args.config_dir, args.list_stats, args.filter_metrics, sys_info.iloc[0]) + self.generate_configs( + arch, + args.config_dir, + args.list_stats, + args.filter_metrics, + sys_info.iloc[0], + ) self.load_options(normalization_filter) - + for d in self.__args.path: w = schema.Workload() w.sys_info = file_io.load_sys_info(Path(d[0], "sysinfo.csv")) if self.__args.specs_correction: - w.sys_info = parser.correct_sys_info(w.sys_info, self.__args.specs_correction) + w.sys_info = parser.correct_sys_info( + w.sys_info, self.__args.specs_correction + ) w.avail_ips = w.sys_info["ip_blocks"].item().split("|") arch = w.sys_info.iloc[0]["gpu_soc"] w.dfs = copy.deepcopy(self._arch_configs[arch].dfs) @@ -142,11 +163,9 @@ def initalize_runs(self, normalization_filter=None): return self._runs - @demarcate def sanitize(self): - """Perform sanitization of inputs - """ + """Perform sanitization of inputs""" if not self.__args.path: error("The following arguments are required: -p/--path") # verify not accessing parent directories @@ -160,23 +179,23 @@ def sanitize(self): error("Invalid directory {}\nPlease try again.".format(dir[0])) # validate profiling data is_workload_empty(dir[0]) - - - #---------------------------------------------------- + + # ---------------------------------------------------- # Required methods to be implemented by child classes - #---------------------------------------------------- + # ---------------------------------------------------- @abstractmethod def pre_processing(self): - """Perform initialization prior to analysis. - """ + """Perform initialization prior to analysis.""" logging.debug("[analysis] prepping to do some analysis") logging.info("[analysis] deriving Omniperf metrics...") # initalize output file - self._output = open(self.__args.output_file, "w+") if self.__args.output_file else sys.stdout - + self._output = ( + open(self.__args.output_file, "w+") if self.__args.output_file else sys.stdout + ) + # initalize runs self._runs = self.initalize_runs() - + # set filters if self.__args.gpu_kernel: for d, gk in zip(self.__args.path, self.__args.gpu_kernel): @@ -196,6 +215,5 @@ def pre_processing(self): @abstractmethod def run_analysis(self): - """Run analysis. - """ + """Run analysis.""" logging.debug("[analysis] generating analysis") diff --git a/src/omniperf_analyze/analysis_cli.py b/src/omniperf_analyze/analysis_cli.py index 422b7ea91..b240b7246 100644 --- a/src/omniperf_analyze/analysis_cli.py +++ b/src/omniperf_analyze/analysis_cli.py @@ -27,15 +27,14 @@ from utils import file_io, parser, tty from utils.kernel_name_shortener import kernel_name_shortener -class cli_analysis(OmniAnalyze_Base): - #----------------------- +class cli_analysis(OmniAnalyze_Base): + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def pre_processing(self): - """Perform any pre-processing steps prior to analysis. - """ + """Perform any pre-processing steps prior to analysis.""" super().pre_processing() if self.get_args().random_port: error("--gui flag is required to enable --random-port") @@ -48,7 +47,7 @@ def pre_processing(self): filter_gpu_ids=self._runs[d[0]].filter_gpu_ids, filter_dispatch_ids=self._runs[d[0]].filter_dispatch_ids, time_unit=self.get_args().time_unit, - max_stat_num=self.get_args().max_stat_num + max_stat_num=self.get_args().max_stat_num, ) # create 'mega dataframe' self._runs[d[0]].raw_pmc = file_io.create_df_pmc( @@ -56,30 +55,32 @@ def pre_processing(self): ) # create the loaded table parser.load_table_data( - workload=self._runs[d[0]], - dir=d[0], + workload=self._runs[d[0]], + dir=d[0], is_gui=False, - debug=self.get_args().debug, - verbose=self.get_args().verbose + debug=self.get_args().debug, + verbose=self.get_args().verbose, ) - @demarcate def run_analysis(self): - """Run CLI analysis. - """ + """Run CLI analysis.""" super().run_analysis() if self.get_args().list_stats: tty.show_kernel_stats( self.get_args(), self._runs, - self._arch_configs[self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_soc"]], - self._output + self._arch_configs[ + self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_soc"] + ], + self._output, ) else: tty.show_all( self.get_args(), self._runs, - self._arch_configs[self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_soc"]], - self._output + self._arch_configs[ + self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_soc"] + ], + self._output, ) diff --git a/src/omniperf_base.py b/src/omniperf_base.py index 512d1535b..71cf5ade4 100644 --- a/src/omniperf_base.py +++ b/src/omniperf_base.py @@ -29,7 +29,15 @@ from pathlib import Path import shutil from utils.specs import get_machine_specs -from utils.utils import demarcate, trace_logger, get_version, get_version_display, detect_rocprof, error, get_submodules +from utils.utils import ( + demarcate, + trace_logger, + get_version, + get_version_display, + detect_rocprof, + error, + get_submodules, +) from argparser import omniarg_parser import config import pandas as pd @@ -44,13 +52,16 @@ "gfx942": {"mi300": ["MI300A_A1", "MI300X_A1"]}, } + class Omniperf: def __init__(self): self.__args = None self.__profiler_mode = None self.__analyze_mode = None - self.__soc_name = set() # gpu name, or in case of analyze mode, all loaded gpu name(s) - self.__soc = dict() # set of key, value pairs. Where arch->OmniSoc() obj + self.__soc_name = ( + set() + ) # gpu name, or in case of analyze mode, all loaded gpu name(s) + self.__soc = dict() # set of key, value pairs. Where arch->OmniSoc() obj self.__version = { "ver": None, "ver_pretty": None, @@ -68,22 +79,21 @@ def __init__(self): self.detect_profiler() elif self.__mode == "analyze": self.detect_analyze() - + logging.info("Execution mode = %s" % self.__mode) - + def print_graphic(self): - """Log program name as ascii art to terminal. - """ - ascii_art = ''' + """Log program name as ascii art to terminal.""" + ascii_art = """ ___ _ __ / _ \ _ __ ___ _ __ (_)_ __ ___ _ __ / _| | | | | '_ ` _ \| '_ \| | '_ \ / _ \ '__| |_ | |_| | | | | | | | | | | |_) | __/ | | _| \___/|_| |_| |_|_| |_|_| .__/ \___|_| |_| |_| -''' +""" logging.info(ascii_art) - + def setup_logging(self): # register a trace level logger logging.TRACE = logging.DEBUG - 5 @@ -92,16 +102,16 @@ def setup_logging(self): setattr(logging, "trace", trace_logger) # demonstrate override of default loglevel via env variable - loglevel=logging.INFO + loglevel = logging.INFO if "OMNIPERF_LOGLEVEL" in os.environ.keys(): - loglevel = os.environ['OMNIPERF_LOGLEVEL'] - if loglevel in {"DEBUG","debug"}: + loglevel = os.environ["OMNIPERF_LOGLEVEL"] + if loglevel in {"DEBUG", "debug"}: loglevel = logging.DEBUG - elif loglevel in {"TRACE","trace"}: + elif loglevel in {"TRACE", "trace"}: loglevel = logging.TRACE - elif loglevel in {"INFO","info"}: + elif loglevel in {"INFO", "info"}: loglevel = logging.INFO - elif loglevel in {"ERROR","error"}: + elif loglevel in {"ERROR", "error"}: loglevel = logging.ERROR else: print("Ignoring unsupported OMNIPERF_LOGLEVEL setting (%s)" % loglevel) @@ -111,15 +121,21 @@ def setup_logging(self): def get_mode(self): return self.__mode - + def set_version(self): vData = get_version(config.omniperf_home) self.__version["ver"] = vData["version"] - self.__version["ver_pretty"] = get_version_display(vData["version"], vData["sha"], vData["mode"]) + self.__version["ver_pretty"] = get_version_display( + vData["version"], vData["sha"], vData["mode"] + ) return - + def detect_profiler(self): - if self.__args.lucky == True or self.__args.summaries == True or self.__args.use_rocscope: + if ( + self.__args.lucky == True + or self.__args.summaries == True + or self.__args.use_rocscope + ): if not shutil.which("rocscope"): logging.error("Rocscope must be in PATH") sys.exit(1) @@ -132,10 +148,13 @@ def detect_profiler(self): elif str(rocprof_cmd).endswith("rocprofv2"): self.__profiler_mode = "rocprofv2" else: - error("Incompatible profiler: %s. Supported profilers include: %s" % (rocprof_cmd, get_submodules('omniperf_profile'))) - + error( + "Incompatible profiler: %s. Supported profilers include: %s" + % (rocprof_cmd, get_submodules("omniperf_profile")) + ) return + def detect_analyze(self): if self.__args.gui: self.__analyze_mode = "web_ui" @@ -145,8 +164,7 @@ def detect_analyze(self): @demarcate def detect_soc(self, sys_info=pd.DataFrame()): - """Load OmniSoC instance for Omniperf run - """ + """Load OmniSoC instance for Omniperf run""" # in case of analyze mode, we can explicitly specify an arch # rather than detect from rocminfo if sys_info.empty: @@ -163,11 +181,11 @@ def detect_soc(self, sys_info=pd.DataFrame()): error("%s is an unsupported SoC" % arch) else: self.__soc_name.add(target) - if hasattr(self.__args, 'target'): + if hasattr(self.__args, "target"): self.__args.target = target - soc_module = importlib.import_module('omniperf_soc.soc_'+arch) - soc_class = getattr(soc_module, arch+'_soc') + soc_module = importlib.import_module("omniperf_soc.soc_" + arch) + soc_class = getattr(soc_module, arch + "_soc") self.__soc[arch] = soc_class(self.__args) logging.info("SoC = %s" % self.__soc_name) @@ -176,14 +194,16 @@ def detect_soc(self, sys_info=pd.DataFrame()): @demarcate def parse_args(self): parser = argparse.ArgumentParser( - description="Command line interface for AMD's GPU profiler, Omniperf", - prog="tool", - formatter_class=lambda prog: argparse.RawTextHelpFormatter( + description="Command line interface for AMD's GPU profiler, Omniperf", + prog="tool", + formatter_class=lambda prog: argparse.RawTextHelpFormatter( prog, max_help_position=30 ), usage="omniperf [mode] [options]", ) - omniarg_parser(parser, config.omniperf_home, self.__supported_archs ,self.__version) + omniarg_parser( + parser, config.omniperf_home, self.__supported_archs, self.__version + ) self.__args = parser.parse_args() if self.__args.specs: @@ -202,27 +222,38 @@ def run_profiler(self): # Update default path if self.__args.path == os.path.join(os.getcwd(), "workloads"): - self.__args.path = os.path.join(self.__args.path, self.__args.name, self.__args.target) + self.__args.path = os.path.join( + self.__args.path, self.__args.name, self.__args.target + ) logging.info("Profiler choice = %s" % self.__profiler_mode) # instantiate desired profiler if self.__profiler_mode == "rocprofv1": from omniperf_profile.profiler_rocprof_v1 import rocprof_v1_profiler - profiler = rocprof_v1_profiler(self.__args, self.__profiler_mode, self.__soc[targ_arch]) + + profiler = rocprof_v1_profiler( + self.__args, self.__profiler_mode, self.__soc[targ_arch] + ) elif self.__profiler_mode == "rocprofv2": from omniperf_profile.profiler_rocprof_v2 import rocprof_v2_profiler - profiler = rocprof_v2_profiler(self.__args, self.__profiler_mode, self.__soc[targ_arch]) + + profiler = rocprof_v2_profiler( + self.__args, self.__profiler_mode, self.__soc[targ_arch] + ) elif self.__profiler_mode == "rocscope": from omniperf_profile.profiler_rocscope import rocscope_profiler - profiler = rocscope_profiler(self.__args, self.__profiler_mode, self.__soc[targ_arch]) + + profiler = rocscope_profiler( + self.__args, self.__profiler_mode, self.__soc[targ_arch] + ) else: logging.error("Unsupported profiler") sys.exit(1) - #----------------------- + # ----------------------- # run profiling workflow - #----------------------- + # ----------------------- self.__soc[targ_arch].profiling_setup() profiler.pre_processing() profiler.run_profiling(self.__version["ver"], config.prog) @@ -235,17 +266,18 @@ def run_profiler(self): def update_db(self): self.print_graphic() from utils.db_connector import DatabaseConnector + db_connection = DatabaseConnector(self.__args) - - #----------------------- + + # ----------------------- # run database workflow - #----------------------- + # ----------------------- db_connection.pre_processing() if self.__args.upload: db_connection.db_import() else: db_connection.db_remove() - + return @demarcate @@ -256,18 +288,20 @@ def run_analysis(self): if self.__analyze_mode == "cli": from omniperf_analyze.analysis_cli import cli_analysis + analyzer = cli_analysis(self.__args, self.__supported_archs) elif self.__analyze_mode == "web_ui": from omniperf_analyze.analysis_webui import webui_analysis + analyzer = webui_analysis(self.__args, self.__supported_archs) else: error("Unsupported anlaysis mode -> %s" % self.__analyze_mode) - #----------------------- + # ----------------------- # run analysis workflow - #----------------------- + # ----------------------- analyzer.sanitize() - + # Load required SoC(s) from input for d in analyzer.get_args().path: sys_info = pd.read_csv(Path(d[0], "sysinfo.csv")) @@ -278,4 +312,3 @@ def run_analysis(self): analyzer.run_analysis() return - diff --git a/src/omniperf_profile/profiler_base.py b/src/omniperf_profile/profiler_base.py index 5fbf8ebe1..3915425bc 100644 --- a/src/omniperf_profile/profiler_base.py +++ b/src/omniperf_profile/profiler_base.py @@ -28,31 +28,44 @@ import sys import os import re -from utils.utils import capture_subprocess_output, run_prof, gen_sysinfo, run_rocscope, error, demarcate +from utils.utils import ( + capture_subprocess_output, + run_prof, + gen_sysinfo, + run_rocscope, + error, + demarcate, +) import config import pandas as pd -class OmniProfiler_Base(): + +class OmniProfiler_Base: def __init__(self, args, profiler_mode, soc): self.__args = args self.__profiler = profiler_mode - self._soc = soc # OmniSoC obj - self.__perfmon_dir = os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs") + self._soc = soc # OmniSoC obj + self.__perfmon_dir = os.path.join( + str(config.omniperf_home), "omniperf_soc", "profile_configs" + ) def get_args(self): return self.__args + def get_profiler_options(self, fname): - """Fetch any version specific arguments required by profiler - """ + """Fetch any version specific arguments required by profiler""" # assume no SoC specific options and return empty list by default return [] - + @demarcate def pmc_perf_split(self): - """Avoid default rocprof join utility by spliting each line into a separate input file - """ + """Avoid default rocprof join utility by spliting each line into a separate input file""" workload_perfmon_dir = os.path.join(self.__args.path, "perfmon") - lines = open(os.path.join(workload_perfmon_dir, "pmc_perf.txt"), "r").read().splitlines() + lines = ( + open(os.path.join(workload_perfmon_dir, "pmc_perf.txt"), "r") + .read() + .splitlines() + ) # Iterate over each line in pmc_perf.txt mpattern = r"^pmc:(.*)" @@ -83,8 +96,7 @@ def pmc_perf_split(self): @demarcate def join_prof(self, out=None): - """Manually join separated rocprof runs - """ + """Manually join separated rocprof runs""" # Set default output directory if not specified if type(self.__args.path) == str: if out is None: @@ -105,7 +117,11 @@ def join_prof(self, out=None): elif self.__args.join_type == "grid": key = _df.groupby(["Kernel_Name", "Grid_Size"]).cumcount() _df["key"] = ( - _df["Kernel_Name"] + " - " + _df["Grid_Size"].astype(str) + " - " + key.astype(str) + _df["Kernel_Name"] + + " - " + + _df["Grid_Size"].astype(str) + + " - " + + key.astype(str) ) else: print("ERROR: Unrecognized --join-type") @@ -121,9 +137,15 @@ def join_prof(self, out=None): duplicate_cols = { "GPU_ID": [col for col in df.columns if col.startswith("GPU_ID")], "Grid_Size": [col for col in df.columns if col.startswith("Grid_Size")], - "Workgroup_Size": [col for col in df.columns if col.startswith("Workgroup_Size")], - "LDS_Per_Workgroup": [col for col in df.columns if col.startswith("LDS_Per_Workgroup")], - "Scratch_Per_Workitem": [col for col in df.columns if col.startswith("Scratch_Per_Workitem")], + "Workgroup_Size": [ + col for col in df.columns if col.startswith("Workgroup_Size") + ], + "LDS_Per_Workgroup": [ + col for col in df.columns if col.startswith("LDS_Per_Workgroup") + ], + "Scratch_Per_Workitem": [ + col for col in df.columns if col.startswith("Scratch_Per_Workitem") + ], "SGPR": [col for col in df.columns if col.startswith("SGPR")], } # Check for vgpr counter in ROCm < 5.3 @@ -131,15 +153,17 @@ def join_prof(self, out=None): duplicate_cols["vgpr"] = [col for col in df.columns if col.startswith("vgpr")] # Check for vgpr counter in ROCm >= 5.3 else: - duplicate_cols["Arch_VGPR"] = [col for col in df.columns if col.startswith("Arch_VGPR")] - duplicate_cols["Accum_VGPR"] = [col for col in df.columns if col.startswith("Accum_VGPR")] + duplicate_cols["Arch_VGPR"] = [ + col for col in df.columns if col.startswith("Arch_VGPR") + ] + duplicate_cols["Accum_VGPR"] = [ + col for col in df.columns if col.startswith("Accum_VGPR") + ] for key, cols in duplicate_cols.items(): _df = df[cols] if not test_df_column_equality(_df): - msg = ( - "WARNING: Detected differing {} values while joining pmc_perf.csv".format( - key - ) + msg = "WARNING: Detected differing {} values while joining pmc_perf.csv".format( + key ) logging.warning(msg + "\n") else: @@ -157,7 +181,7 @@ def join_prof(self, out=None): if not any( k.startswith(check) for check in [ - # rocprofv2 headers + # rocprofv2 headers "GPU_ID_", "Grid_Size_", "Workgroup_Size_", @@ -236,31 +260,42 @@ def join_prof(self, out=None): else: return df - #---------------------------------------------------- + # ---------------------------------------------------- # Required methods to be implemented by child classes - #---------------------------------------------------- + # ---------------------------------------------------- @abstractmethod def pre_processing(self): - """Perform any pre-processing steps prior to profiling. - """ + """Perform any pre-processing steps prior to profiling.""" logging.debug("[profiling] pre-processing using %s profiler" % self.__profiler) - + # verify soc compatibility if self.__profiler not in self._soc.get_compatible_profilers(): - error("%s is not enabled in %s. Available profilers include: %s" % (self._soc.get_soc_name(), self.__profiler, self._soc.get_compatible_profilers())) + error( + "%s is not enabled in %s. Available profilers include: %s" + % ( + self._soc.get_soc_name(), + self.__profiler, + self._soc.get_compatible_profilers(), + ) + ) # verify not accessing parent directories if ".." in str(self.__args.path): error("Access denied. Cannot access parent directories in path (i.e. ../)") - + # verify correct formatting for application binary self.__args.remaining = self.__args.remaining[1:] if self.__args.remaining: if not os.path.isfile(self.__args.remaining[0]): - error("Your command %s doesn't point to a executable. Please verify." % self.__args.remaining[0]) + error( + "Your command %s doesn't point to a executable. Please verify." + % self.__args.remaining[0] + ) self.__args.remaining = " ".join(self.__args.remaining) else: - error("Profiling command required. Pass application executable after -- at the end of options.\n\t\ti.e. omniperf profile -n vcopy -- ./vcopy 1048576 256") - + error( + "Profiling command required. Pass application executable after -- at the end of options.\n\t\ti.e. omniperf profile -n vcopy -- ./vcopy 1048576 256" + ) + # verify name meets MongoDB length requirements and no illegal chars if len(self.__args.name) > 35: error("-n/--name exceeds 35 character limit. Try again.") @@ -268,11 +303,12 @@ def pre_processing(self): error("'-' and '.' are not permitted in -n/--name") @abstractmethod - def run_profiling(self, version:str, prog:str): - """Run profiling. - """ - logging.debug("[profiling] performing profiling using %s profiler" % self.__profiler) - + def run_profiling(self, version: str, prog: str): + """Run profiling.""" + logging.debug( + "[profiling] performing profiling using %s profiler" % self.__profiler + ) + # log basic info logging.info(str(prog) + " ver: " + str(version)) logging.info("Path: " + str(os.path.abspath(self.__args.path))) @@ -283,7 +319,7 @@ def run_profiling(self, version:str, prog:str): if self.__args.ipblocks == None: logging.info("IP Blocks: All") else: - logging.info("IP Blocks: "+ str(self.__args.ipblocks)) + logging.info("IP Blocks: " + str(self.__args.ipblocks)) if self.__args.kernel_verbose > 5: logging.info("KernelName verbose: DISABLED") else: @@ -298,7 +334,10 @@ def run_profiling(self, version:str, prog:str): "sed", "-i", "-r", - "s%^(kernel:).*%" + "kernel: " + ",".join(self.__args.kernel) + "%g", + "s%^(kernel:).*%" + + "kernel: " + + ",".join(self.__args.kernel) + + "%g", fname, ] ) @@ -315,7 +354,10 @@ def run_profiling(self, version:str, prog:str): "sed", "-i", "-r", - "s%^(range:).*%" + "range: " + " ".join(self.__args.dispatch) + "%g", + "s%^(range:).*%" + + "range: " + + " ".join(self.__args.dispatch) + + "%g", fname, ] ) @@ -325,42 +367,44 @@ def run_profiling(self, version:str, prog:str): else: logging.debug(output) logging.info("\nCurrent input file: %s" % fname) - + # Fetch any SoC/profiler specific profiling options options = self._soc.get_profiler_options() options += self.get_profiler_options(fname) if self.__profiler == "rocprofv1" or self.__profiler == "rocprofv2": run_prof( - fname=fname, - # workload_dir=self.get_args().path, - # perfmon_dir=self.__perfmon_dir, + fname=fname, + # workload_dir=self.get_args().path, + # perfmon_dir=self.__perfmon_dir, # cmd=self.__args.remaining, # target=self.__args.target, profiler_options=options, target=self.__args.target, - workload_dir=self.get_args().path + workload_dir=self.get_args().path, ) elif self.__profiler == "rocscope": run_rocscope(self.__args, fname) else: - #TODO: Finish logic + # TODO: Finish logic error("profiler not supported") @abstractmethod def post_processing(self): - """Perform any post-processing steps prior to profiling. - """ - logging.debug("[profiling] performing post-processing using %s profiler" % self.__profiler) + """Perform any post-processing steps prior to profiling.""" + logging.debug( + "[profiling] performing post-processing using %s profiler" % self.__profiler + ) gen_sysinfo( - workload_name=self.__args.name, - workload_dir=self.get_args().path, - ip_blocks=self.__args.ipblocks, - app_cmd=self.__args.remaining, - skip_roof=self.__args.no_roof, + workload_name=self.__args.name, + workload_dir=self.get_args().path, + ip_blocks=self.__args.ipblocks, + app_cmd=self.__args.remaining, + skip_roof=self.__args.no_roof, roof_only=self.__args.roof_only, ) + def test_df_column_equality(df): return df.eq(df.iloc[:, 0], axis=0).all(1).all() diff --git a/src/omniperf_profile/profiler_rocprof_v1.py b/src/omniperf_profile/profiler_rocprof_v1.py index dd21fe5e0..bcc40b6a7 100644 --- a/src/omniperf_profile/profiler_rocprof_v1.py +++ b/src/omniperf_profile/profiler_rocprof_v1.py @@ -31,39 +31,42 @@ class rocprof_v1_profiler(OmniProfiler_Base): - def __init__(self,profiling_args,profiler_mode,soc): - super().__init__(profiling_args,profiler_mode,soc) - self.ready_to_profile = (self.get_args().roof_only and not os.path.isfile(os.path.join(self.get_args().path, "pmc_perf.csv")) - or not self.get_args().roof_only) + def __init__(self, profiling_args, profiler_mode, soc): + super().__init__(profiling_args, profiler_mode, soc) + self.ready_to_profile = ( + self.get_args().roof_only + and not os.path.isfile(os.path.join(self.get_args().path, "pmc_perf.csv")) + or not self.get_args().roof_only + ) def get_profiler_options(self, fname): fbase = os.path.splitext(os.path.basename(fname))[0] app_cmd = self.get_args().remaining args = [ # v1 requires request for timestamps - "--timestamp", "on", + "--timestamp", + "on", # v1 requires csv extension - "-o", self.get_args().path + "/" + fbase + ".csv", + "-o", + self.get_args().path + "/" + fbase + ".csv", # v1 does require quotes on app cmd '"' + app_cmd + '"', ] return args - #----------------------- + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def pre_processing(self): - """Perform any pre-processing steps prior to profiling. - """ + """Perform any pre-processing steps prior to profiling.""" super().pre_processing() if self.ready_to_profile: self.pmc_perf_split() @demarcate - def run_profiling(self, version:str, prog:str): - """Run profiling. - """ + def run_profiling(self, version: str, prog: str): + """Run profiling.""" if self.ready_to_profile: if self.get_args().roof_only: logging.info("[roofline] Generating pmc_perf.csv") @@ -71,11 +74,10 @@ def run_profiling(self, version:str, prog:str): super().run_profiling(version, prog) else: logging.info("[roofline] Detected existing pmc_perf.csv") - + @demarcate def post_processing(self): - """Perform any post-processing steps prior to profiling. - """ + """Perform any post-processing steps prior to profiling.""" super().post_processing() if self.ready_to_profile: diff --git a/src/omniperf_profile/profiler_rocprof_v2.py b/src/omniperf_profile/profiler_rocprof_v2.py index ffd234911..a833cca82 100644 --- a/src/omniperf_profile/profiler_rocprof_v2.py +++ b/src/omniperf_profile/profiler_rocprof_v2.py @@ -28,39 +28,44 @@ from utils.utils import demarcate from utils.kernel_name_shortener import kernel_name_shortener + class rocprof_v2_profiler(OmniProfiler_Base): - def __init__(self,profiling_args,profiler_mode,soc): - super().__init__(profiling_args,profiler_mode,soc) - self.ready_to_profile = (self.get_args().roof_only and not os.path.isfile(os.path.join(self.get_args().path, "pmc_perf.csv")) - or not self.get_args().roof_only) + def __init__(self, profiling_args, profiler_mode, soc): + super().__init__(profiling_args, profiler_mode, soc) + self.ready_to_profile = ( + self.get_args().roof_only + and not os.path.isfile(os.path.join(self.get_args().path, "pmc_perf.csv")) + or not self.get_args().roof_only + ) def get_profiler_options(self, fname): fbase = os.path.splitext(os.path.basename(fname))[0] app_cmd = self.get_args().remaining args = [ # v2 requires output directory argument - "-d", self.get_args().path + "/" + "out", + "-d", + self.get_args().path + "/" + "out", # v2 does not require csv extension - "-o", fbase, + "-o", + fbase, # v2 doen not require quotes on cmd - app_cmd + app_cmd, ] return args - #----------------------- + + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def pre_processing(self): - """Perform any pre-processing steps prior to profiling. - """ + """Perform any pre-processing steps prior to profiling.""" super().pre_processing() if self.ready_to_profile: self.pmc_perf_split() @demarcate def run_profiling(self, version, prog): - """Run profiling. - """ + """Run profiling.""" if self.ready_to_profile: if self.get_args().roof_only: logging.info("[roofline] Generating pmc_perf.csv") @@ -70,13 +75,11 @@ def run_profiling(self, version, prog): @demarcate def post_processing(self): - """Perform any post-processing steps prior to profiling. - """ + """Perform any post-processing steps prior to profiling.""" super().post_processing() if self.ready_to_profile: - # Pass headers to join on + # Pass headers to join on self.join_prof() # Demangle and overwrite original KernelNames kernel_name_shortener(self.get_args().path, self.get_args().kernel_verbose) - diff --git a/src/omniperf_profile/profiler_rocscope.py b/src/omniperf_profile/profiler_rocscope.py index cc5a90a24..fc0121a5a 100644 --- a/src/omniperf_profile/profiler_rocscope.py +++ b/src/omniperf_profile/profiler_rocscope.py @@ -26,26 +26,30 @@ from omniperf_profile.profiler_base import OmniProfiler_Base from utils.utils import demarcate + class rocscope_profiler(OmniProfiler_Base): - def __init__(self,profiling_args,profiler_mode,soc): - super().__init__(profiling_args,profiler_mode,soc) + def __init__(self, profiling_args, profiler_mode, soc): + super().__init__(profiling_args, profiler_mode, soc) - #----------------------- + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def pre_processing(self): - """Perform any pre-processing steps prior to profiling. - """ - self.__profiler="rocscope" + """Perform any pre-processing steps prior to profiling.""" + self.__profiler = "rocscope" logging.debug("[profiling] pre-processing using %s profiler" % self.__profiler) + @demarcate def run_profiling(self, version, prog): - """Run profiling. - """ - logging.debug("[profiling] performing profiling using %s profiler" % self.__profiler) + """Run profiling.""" + logging.debug( + "[profiling] performing profiling using %s profiler" % self.__profiler + ) + @demarcate def post_processing(self): - """Perform any post-processing steps prior to profiling. - """ - logging.debug("[profiling] performing post-processing using %s profiler" % self.__profiler) + """Perform any post-processing steps prior to profiling.""" + logging.debug( + "[profiling] performing post-processing using %s profiler" % self.__profiler + ) diff --git a/src/omniperf_soc/soc_base.py b/src/omniperf_soc/soc_base.py index 586068a1a..a684998cc 100644 --- a/src/omniperf_soc/soc_base.py +++ b/src/omniperf_soc/soc_base.py @@ -33,59 +33,74 @@ from utils.utils import demarcate from pathlib import Path -class OmniSoC_Base(): - def __init__(self,args): + +class OmniSoC_Base: + def __init__(self, args): self.__args = args - self.__name = None # SoC name + self.__name = None # SoC name self.__perfmon_dir = None - self.__perfmon_config = {} # Per IP block max number of simulutaneous counters. GFX IP Blocks - self.__soc_params = {} # SoC specifications - self.__compatible_profilers = [] # Store profilers compatible with SoC + self.__perfmon_config = ( + {} + ) # Per IP block max number of simulutaneous counters. GFX IP Blocks + self.__soc_params = {} # SoC specifications + self.__compatible_profilers = [] # Store profilers compatible with SoC if self.__args.path == os.path.join(os.getcwd(), "workloads"): - self.__workload_dir = os.path.join(self.__args.path, self.__args.name, self.__args.target) + self.__workload_dir = os.path.join( + self.__args.path, self.__args.name, self.__args.target + ) else: self.__workload_dir = self.__args.path - + def __hash__(self): return hash(self.__name) + def __eq__(self, other): if not isinstance(other, type(self)): return NotImplemented return self.__name == other.get_soc() - def set_perfmon_dir(self, path:str): + def set_perfmon_dir(self, path: str): self.__perfmon_dir = path + def set_perfmon_config(self, config: dict): self.__perfmon_config = config + def set_soc_param(self, param: dict): self.__soc_params = param + def get_workload_perfmon_dir(self): return str(Path(self.__perfmon_dir).parent.absolute()) + def get_soc_param(self): return self.__soc_params + def set_soc_name(self, soc: str): self.__name = soc + def get_soc_name(self): return self.__name + def get_args(self): return self.__args + def set_compatible_profilers(self, profiler_names: list): self.__compatible_profilers = profiler_names + def get_compatible_profilers(self): return self.__compatible_profilers - + @demarcate def get_profiler_options(self): - """Fetch any SoC specific arguments required by the profiler - """ + """Fetch any SoC specific arguments required by the profiler""" # assume no SoC specific options and return empty list by default return [] - + @demarcate def perfmon_filter(self, roofline_perfmon_only: bool): - """Filter default performance counter set based on user arguments - """ - if roofline_perfmon_only and os.path.isfile(os.path.join(self.get_args().path, "pmc_perf.csv")): + """Filter default performance counter set based on user arguments""" + if roofline_perfmon_only and os.path.isfile( + os.path.join(self.get_args().path, "pmc_perf.csv") + ): return workload_perfmon_dir = self.__workload_dir + "/perfmon" @@ -99,7 +114,9 @@ def perfmon_filter(self, roofline_perfmon_only: bool): if not roofline_perfmon_only: ref_pmc_files_list = glob.glob(self.__perfmon_dir + "/" + "pmc_*perf*.txt") - ref_pmc_files_list += glob.glob(self.__perfmon_dir + "/" + self.__name + "/pmc_*_perf*.txt") + ref_pmc_files_list += glob.glob( + self.__perfmon_dir + "/" + self.__name + "/pmc_*_perf*.txt" + ) # Perfmon list filtering if self.__args.ipblocks != None: @@ -124,38 +141,34 @@ def perfmon_filter(self, roofline_perfmon_only: bool): ref_pmc_files_list = glob.glob(self.__perfmon_dir + "/" + "pmc_roof_perf.txt") pmc_files_list = ref_pmc_files_list - # Coalesce and writeback workload specific perfmon - pmc_list = perfmon_coalesce(pmc_files_list, self.__perfmon_config, self.__workload_dir) + pmc_list = perfmon_coalesce( + pmc_files_list, self.__perfmon_config, self.__workload_dir + ) perfmon_emit(pmc_list, self.__perfmon_config, self.__workload_dir) - #---------------------------------------------------- + # ---------------------------------------------------- # Required methods to be implemented by child classes - #---------------------------------------------------- + # ---------------------------------------------------- @abstractmethod def profiling_setup(self): - """Perform any SoC-specific setup prior to profiling. - """ + """Perform any SoC-specific setup prior to profiling.""" logging.debug("[profiling] perform SoC profiling setup for %s" % self.__name) - @abstractmethod def post_profiling(self): - """Perform any SoC-specific post profiling activities. - """ + """Perform any SoC-specific post profiling activities.""" logging.debug("[profiling] perform SoC post processing for %s" % self.__name) @abstractmethod def analysis_setup(self): - """Perform any SoC-specific setup prior to analysis. - """ + """Perform any SoC-specific setup prior to analysis.""" logging.debug("[analysis] perform SoC analysis setup for %s" % self.__name) - + @demarcate def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir): - """Sort and bucket all related performance counters to minimize required application passes - """ + """Sort and bucket all related performance counters to minimize required application passes""" workload_perfmon_dir = workload_dir + "/perfmon" # match pattern for pmc counters @@ -216,9 +229,15 @@ def perfmon_coalesce(pmc_files_list, perfmon_config, workload_dir): return pmc_list + @demarcate def update_pmc_bucket( - counters, save_file, perfmon_config, pmc_list=None, stext=None, workload_perfmon_dir=None + counters, + save_file, + perfmon_config, + pmc_list=None, + stext=None, + workload_perfmon_dir=None, ): # Verify inputs. # If save_file is True, we're being called internally, from perfmon_coalesce @@ -303,6 +322,7 @@ def update_pmc_bucket( pmc_list["TCC2"][str(ch)].sort() return pmc_list + @demarcate def perfmon_emit(pmc_list, perfmon_config, workload_dir=None): # Calculate the minimum number of iteration to save the pmc counters @@ -363,13 +383,13 @@ def perfmon_emit(pmc_list, perfmon_config, workload_dir=None): for iter in range(niter): # Prefix line = "pmc: " - + N = perfmon_config["TCC"] # TCC per-channel counters tcc_counters = [] for ch in range(perfmon_config["TCC_channels"]): tcc_counters += pmc_list["TCC2"][str(ch)][tcc2_index * N : tcc2_index * N + N] - + tcc2_index += 1 # TCC2 aggregated counters @@ -387,4 +407,4 @@ def perfmon_emit(pmc_list, perfmon_config, workload_dir=None): fd.write("kernel:\n") fd.close() else: - return batches \ No newline at end of file + return batches diff --git a/src/omniperf_soc/soc_gfx906.py b/src/omniperf_soc/soc_gfx906.py index 048397425..d31d4e82b 100644 --- a/src/omniperf_soc/soc_gfx906.py +++ b/src/omniperf_soc/soc_gfx906.py @@ -37,14 +37,22 @@ "L2Banks": 16, "LDSBanks": 32, "Freq": 1725, - "mclk": 1000 + "mclk": 1000, } -class gfx906_soc (OmniSoC_Base): - def __init__(self,args): + +class gfx906_soc(OmniSoC_Base): + def __init__(self, args): super().__init__(args) self.set_soc_name("gfx906") - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", self.get_soc_name())) + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), + "omniperf_soc", + "profile_configs", + self.get_soc_name(), + ) + ) self.set_compatible_profilers(["rocprofv1", "rocscope"]) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( @@ -64,13 +72,12 @@ def __init__(self,args): ) self.set_soc_param(SOC_PARAM) - #----------------------- + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def profiling_setup(self): - """Perform any SoC-specific setup prior to profiling. - """ + """Perform any SoC-specific setup prior to profiling.""" super().profiling_setup() if self.get_args().roof_only: error("%s does not support roofline analysis" % self.get_soc_name()) @@ -79,13 +86,10 @@ def profiling_setup(self): @demarcate def post_profiling(self): - """Perform any SoC-specific post profiling activities. - """ + """Perform any SoC-specific post profiling activities.""" super().post_profiling() @demarcate def analysis_setup(self): - """Perform any SoC-specific setup prior to analysis. - """ + """Perform any SoC-specific setup prior to analysis.""" super().analysis_setup() - diff --git a/src/omniperf_soc/soc_gfx908.py b/src/omniperf_soc/soc_gfx908.py index d7da55c76..b80898fb6 100644 --- a/src/omniperf_soc/soc_gfx908.py +++ b/src/omniperf_soc/soc_gfx908.py @@ -37,14 +37,22 @@ "L2Banks": 32, "LDSBanks": 32, "Freq": 1502, - "mclk": 1200 + "mclk": 1200, } -class gfx908_soc (OmniSoC_Base): - def __init__(self,args): + +class gfx908_soc(OmniSoC_Base): + def __init__(self, args): super().__init__(args) self.set_soc_name("gfx908") - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", self.get_soc_name())) + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), + "omniperf_soc", + "profile_configs", + self.get_soc_name(), + ) + ) self.set_compatible_profilers(["rocprofv1", "rocscope"]) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( @@ -69,13 +77,12 @@ def get_profiler_options(self): # Mi100 requires a custom xml config return ["-m", self.get_workload_perfmon_dir() + "/" + "metrics.xml"] - #----------------------- + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def profiling_setup(self): - """Perform any SoC-specific setup prior to profiling. - """ + """Perform any SoC-specific setup prior to profiling.""" super().profiling_setup() if self.get_args().roof_only: error("%s does not support roofline analysis" % self.get_soc_name()) @@ -84,12 +91,10 @@ def profiling_setup(self): @demarcate def post_profiling(self): - """Perform any SoC-specific post profiling activities. - """ + """Perform any SoC-specific post profiling activities.""" super().post_profiling() @demarcate def analysis_setup(self): - """Perform any SoC-specific setup prior to analysis. - """ - super().analysis_setup() \ No newline at end of file + """Perform any SoC-specific setup prior to analysis.""" + super().analysis_setup() diff --git a/src/omniperf_soc/soc_gfx90a.py b/src/omniperf_soc/soc_gfx90a.py index ea13d9e66..498fb613e 100644 --- a/src/omniperf_soc/soc_gfx90a.py +++ b/src/omniperf_soc/soc_gfx90a.py @@ -39,17 +39,32 @@ "L2Banks": 32, "LDSBanks": 32, "Freq": 1700, - "mclk": 1600 + "mclk": 1600, } -class gfx90a_soc (OmniSoC_Base): - def __init__(self,args): + +class gfx90a_soc(OmniSoC_Base): + def __init__(self, args): super().__init__(args) self.set_soc_name("gfx90a") - if hasattr(self.get_args(), 'roof_only') and self.get_args().roof_only: - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "roofline")) + if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only: + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), + "omniperf_soc", + "profile_configs", + "roofline", + ) + ) else: - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", self.get_soc_name())) + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), + "omniperf_soc", + "profile_configs", + self.get_soc_name(), + ) + ) self.set_compatible_profilers(["rocprofv1", "rocscope"]) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( @@ -64,46 +79,40 @@ def __init__(self,args): "SPI": 2, "GRBM": 2, "GDS": 4, - "TCC_channels": 32 + "TCC_channels": 32, } ) self.set_soc_param(SOC_PARAM) self.roofline_obj = Roofline(args) - #----------------------- + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def profiling_setup(self): - """Perform any SoC-specific setup prior to profiling. - """ + """Perform any SoC-specific setup prior to profiling.""" super().profiling_setup() # Performance counter filtering self.perfmon_filter(self.get_args().roof_only) - @demarcate def post_profiling(self): - """Perform any SoC-specific post profiling activities. - """ + """Perform any SoC-specific post profiling activities.""" super().post_profiling() if not self.get_args().no_roof: - logging.info("[roofline] Checking for roofline.csv in " + str(self.get_args().path)) + logging.info( + "[roofline] Checking for roofline.csv in " + str(self.get_args().path) + ) if not os.path.isfile(os.path.join(self.get_args().path, "roofline.csv")): mibench(self.get_args()) self.roofline_obj.post_processing() else: logging.info("[roofline] Skipping roofline") - @demarcate def analysis_setup(self, roofline_parameters=None): - """Perform any SoC-specific setup prior to analysis. - """ + """Perform any SoC-specific setup prior to analysis.""" super().analysis_setup() # configure roofline for analysis if roofline_parameters: self.roofline_obj = Roofline(self.get_args(), roofline_parameters) - - - diff --git a/src/omniperf_soc/soc_gfx940.py b/src/omniperf_soc/soc_gfx940.py index 504f14a52..38970a400 100644 --- a/src/omniperf_soc/soc_gfx940.py +++ b/src/omniperf_soc/soc_gfx940.py @@ -39,18 +39,30 @@ "L2Banks": 16, "LDSBanks": 32, "Freq": 1950, - "mclk": 1300 + "mclk": 1300, } -class gfx940_soc (OmniSoC_Base): - def __init__(self,args): + +class gfx940_soc(OmniSoC_Base): + def __init__(self, args): super().__init__(args) self.set_soc_name("gfx940") - if hasattr(self.get_args(), 'roof_only') and self.get_args().roof_only: - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "roofline")) + if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only: + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), + "omniperf_soc", + "profile_configs", + "roofline", + ) + ) else: # NB: We're using generalized Mi300 perfmon configs - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "gfx940")) + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), "omniperf_soc", "profile_configs", "gfx940" + ) + ) self.set_compatible_profilers(["rocprofv2"]) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( @@ -65,28 +77,25 @@ def __init__(self,args): "SPI": 2, "GRBM": 2, "GDS": 4, - "TCC_channels": 32 + "TCC_channels": 32, } ) self.set_soc_param(SOC_PARAM) self.roofline_obj = Roofline(args) - #----------------------- + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def profiling_setup(self): - """Perform any SoC-specific setup prior to profiling. - """ + """Perform any SoC-specific setup prior to profiling.""" super().profiling_setup() # Performance counter filtering self.perfmon_filter(self.get_args().roof_only) - @demarcate def post_profiling(self): - """Perform any SoC-specific post profiling activities. - """ + """Perform any SoC-specific post profiling activities.""" super().post_profiling() logging.info("[roofline] Roofline temporarily disabled in Mi300") @@ -98,16 +107,11 @@ def post_profiling(self): # else: # logging.info("[roofline] Skipping roofline") - @demarcate def analysis_setup(self, roofline_parameters=None): - """Perform any SoC-specific setup prior to analysis. - """ + """Perform any SoC-specific setup prior to analysis.""" super().analysis_setup() logging.info("[roofline] Roofline temporarily disabled in Mi300") # configure roofline for analysis # if roofline_parameters: # self.roofline_obj = Roofline(self.get_args(), roofline_parameters) - - - diff --git a/src/omniperf_soc/soc_gfx941.py b/src/omniperf_soc/soc_gfx941.py index 6b8865b82..f796a004d 100644 --- a/src/omniperf_soc/soc_gfx941.py +++ b/src/omniperf_soc/soc_gfx941.py @@ -39,18 +39,30 @@ "L2Banks": 16, "LDSBanks": 32, "Freq": 1950, - "mclk": 1300 + "mclk": 1300, } -class gfx941_soc (OmniSoC_Base): - def __init__(self,args): + +class gfx941_soc(OmniSoC_Base): + def __init__(self, args): super().__init__(args) self.set_soc_name("gfx941") - if hasattr(self.get_args(), 'roof_only') and self.get_args().roof_only: - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "roofline")) + if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only: + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), + "omniperf_soc", + "profile_configs", + "roofline", + ) + ) else: # NB: We're using generalized Mi300 perfmon configs - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "gfx940")) + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), "omniperf_soc", "profile_configs", "gfx940" + ) + ) self.set_compatible_profilers(["rocprofv2"]) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( @@ -65,28 +77,25 @@ def __init__(self,args): "SPI": 2, "GRBM": 2, "GDS": 4, - "TCC_channels": 32 + "TCC_channels": 32, } ) self.set_soc_param(SOC_PARAM) self.roofline_obj = Roofline(args) - #----------------------- + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def profiling_setup(self): - """Perform any SoC-specific setup prior to profiling. - """ + """Perform any SoC-specific setup prior to profiling.""" super().profiling_setup() # Performance counter filtering self.perfmon_filter(self.get_args().roof_only) - @demarcate def post_profiling(self): - """Perform any SoC-specific post profiling activities. - """ + """Perform any SoC-specific post profiling activities.""" super().post_profiling() logging.info("[roofline] Roofline temporarily disabled in Mi300") @@ -98,16 +107,11 @@ def post_profiling(self): # else: # logging.info("[roofline] Skipping roofline") - @demarcate def analysis_setup(self, roofline_parameters=None): - """Perform any SoC-specific setup prior to analysis. - """ + """Perform any SoC-specific setup prior to analysis.""" super().analysis_setup() logging.info("[roofline] Roofline temporarily disabled in Mi300") # configure roofline for analysis # if roofline_parameters: # self.roofline_obj = Roofline(self.get_args(), roofline_parameters) - - - diff --git a/src/omniperf_soc/soc_gfx942.py b/src/omniperf_soc/soc_gfx942.py index 064dd9792..4135d337c 100644 --- a/src/omniperf_soc/soc_gfx942.py +++ b/src/omniperf_soc/soc_gfx942.py @@ -39,18 +39,30 @@ "L2Banks": 16, "LDSBanks": 32, "Freq": 1950, - "mclk": 1300 + "mclk": 1300, } -class gfx942_soc (OmniSoC_Base): - def __init__(self,args): + +class gfx942_soc(OmniSoC_Base): + def __init__(self, args): super().__init__(args) self.set_soc_name("gfx942") - if hasattr(self.get_args(), 'roof_only') and self.get_args().roof_only: - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "roofline")) + if hasattr(self.get_args(), "roof_only") and self.get_args().roof_only: + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), + "omniperf_soc", + "profile_configs", + "roofline", + ) + ) else: # NB: We're using generalized Mi300 perfmon configs - self.set_perfmon_dir(os.path.join(str(config.omniperf_home), "omniperf_soc", "profile_configs", "gfx940")) + self.set_perfmon_dir( + os.path.join( + str(config.omniperf_home), "omniperf_soc", "profile_configs", "gfx940" + ) + ) self.set_compatible_profilers(["rocprofv2"]) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( @@ -65,28 +77,25 @@ def __init__(self,args): "SPI": 2, "GRBM": 2, "GDS": 4, - "TCC_channels": 32 + "TCC_channels": 32, } ) self.set_soc_param(SOC_PARAM) self.roofline_obj = Roofline(args) - #----------------------- + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def profiling_setup(self): - """Perform any SoC-specific setup prior to profiling. - """ + """Perform any SoC-specific setup prior to profiling.""" super().profiling_setup() # Performance counter filtering self.perfmon_filter(self.get_args().roof_only) - @demarcate def post_profiling(self): - """Perform any SoC-specific post profiling activities. - """ + """Perform any SoC-specific post profiling activities.""" super().post_profiling() logging.info("[roofline] Roofline temporarily disabled in Mi300") @@ -98,16 +107,11 @@ def post_profiling(self): # else: # logging.info("[roofline] Skipping roofline") - @demarcate def analysis_setup(self, roofline_parameters=None): - """Perform any SoC-specific setup prior to analysis. - """ + """Perform any SoC-specific setup prior to analysis.""" super().analysis_setup() logging.info("[roofline] Roofline temporarily disabled in Mi300") # configure roofline for analysis # if roofline_parameters: # self.roofline_obj = Roofline(self.get_args(), roofline_parameters) - - - diff --git a/src/roofline.py b/src/roofline.py index 051bf51ed..eebda645b 100644 --- a/src/roofline.py +++ b/src/roofline.py @@ -36,69 +36,73 @@ SYMBOLS = [0, 1, 2, 3, 4, 5, 13, 17, 18, 20] + class Roofline: def __init__(self, args, run_parameters=None): self.__args = args - self.__run_parameters = run_parameters if run_parameters else { - 'path_to_dir': self.__args.path, - 'device_id': 0, - 'sort_type': 'kernels', - 'mem_level': 'ALL', - 'include_kernel_names': False, - 'is_standalone': False - } + self.__run_parameters = ( + run_parameters + if run_parameters + else { + "path_to_dir": self.__args.path, + "device_id": 0, + "sort_type": "kernels", + "mem_level": "ALL", + "include_kernel_names": False, + "is_standalone": False, + } + ) self.__ai_data = None self.__ceiling_data = None self.__figure = go.Figure() - if not isinstance(self.__run_parameters['path_to_dir'], list): + if not isinstance(self.__run_parameters["path_to_dir"], list): self.roof_setup() # Set roofline run parameters from args - if hasattr(self.__args, 'roof_only') and self.__args.roof_only == True: - self.__run_parameters['is_standalone'] = True - if hasattr(self.__args, 'kernel_names') and self.__args.kernel_names == True: - self.__run_parameters['include_kernel_names'] = True - if hasattr(self.__args, 'mem_level') and self.__args.mem_level != "ALL": - self.__run_parameters['mem_level'] = self.__args.mem_level - if hasattr(self.__args, 'sort') and self.__args.sort != "ALL": - self.__run_parameters['sort_type'] = self.__args.sort + if hasattr(self.__args, "roof_only") and self.__args.roof_only == True: + self.__run_parameters["is_standalone"] = True + if hasattr(self.__args, "kernel_names") and self.__args.kernel_names == True: + self.__run_parameters["include_kernel_names"] = True + if hasattr(self.__args, "mem_level") and self.__args.mem_level != "ALL": + self.__run_parameters["mem_level"] = self.__args.mem_level + if hasattr(self.__args, "sort") and self.__args.sort != "ALL": + self.__run_parameters["sort_type"] = self.__args.sort self.validate_parameters() def validate_parameters(self): - if self.__run_parameters['include_kernel_names'] and (not self.__run_parameters['is_standalone']): + if self.__run_parameters["include_kernel_names"] and ( + not self.__run_parameters["is_standalone"] + ): error("--roof-only is required for --kernel-names") def roof_setup(self): # set default workload path if not specified - if self.__run_parameters['path_to_dir'] == os.path.join(os.getcwd(), 'workloads'): - self.__run_parameters['path_to_dir'] = os.path.join(self.__run_parameters['path_to_dir'], self.__args.name, self.__args.target) + if self.__run_parameters["path_to_dir"] == os.path.join(os.getcwd(), "workloads"): + self.__run_parameters["path_to_dir"] = os.path.join( + self.__run_parameters["path_to_dir"], self.__args.name, self.__args.target + ) # create new directory for roofline if it doesn't exist - if not os.path.isdir(self.__run_parameters['path_to_dir']): - os.makedirs(self.__run_parameters['path_to_dir']) + if not os.path.isdir(self.__run_parameters["path_to_dir"]): + os.makedirs(self.__run_parameters["path_to_dir"]) @demarcate def empirical_roofline( self, ret_df, ): - """Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data - """ + """Generate a set of empirical roofline plots given a directory containing required profiling and benchmarking data""" # Create arithmetic intensity data that will populate the roofline model - logging.debug("[roofline] Path: %s" % self.__run_parameters['path_to_dir']) - self.__ai_data = calc_ai(self.__run_parameters['sort_type'], ret_df) - + logging.debug("[roofline] Path: %s" % self.__run_parameters["path_to_dir"]) + self.__ai_data = calc_ai(self.__run_parameters["sort_type"], ret_df) + logging.debug("[roofline] AI at each mem level:") for i in self.__ai_data: logging.debug("%s -> %s" % (i, self.__ai_data[i])) logging.debug("\n") # Generate a roofline figure for each data type - fp32_fig = self.generate_plot( - dtype="FP32" - ) - fp16_fig = self.generate_plot( - dtype="FP16" - ) + fp32_fig = self.generate_plot(dtype="FP32") + fp16_fig = self.generate_plot(dtype="FP16") ml_combo_fig = self.generate_plot( dtype="I8", fig=fp16_fig, @@ -125,24 +129,36 @@ def empirical_roofline( self.__figure.update_xaxes(dtick=1) # Output will be different depending on interaction type: # Save PDFs if we're in "standalone roofline" mode, otherwise return HTML to be used in GUI output - if self.__run_parameters['is_standalone']: - dev_id = str(self.__run_parameters['device_id']) + if self.__run_parameters["is_standalone"]: + dev_id = str(self.__run_parameters["device_id"]) - fp32_fig.write_image(self.__run_parameters['path_to_dir'] + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id)) + fp32_fig.write_image( + self.__run_parameters["path_to_dir"] + + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id) + ) ml_combo_fig.write_image( - self.__run_parameters['path_to_dir'] + "/empirRoof_gpu-{}_int8_fp16.pdf".format(dev_id) + self.__run_parameters["path_to_dir"] + + "/empirRoof_gpu-{}_int8_fp16.pdf".format(dev_id) ) # only save a legend if kernel_names option is toggled - if self.__run_parameters['include_kernel_names']: - self.__figure.write_image(self.__run_parameters['path_to_dir'] + "/kernelName_legend.pdf") + if self.__run_parameters["include_kernel_names"]: + self.__figure.write_image( + self.__run_parameters["path_to_dir"] + "/kernelName_legend.pdf" + ) time.sleep(1) # Re-save to remove loading MathJax pop up - fp32_fig.write_image(self.__run_parameters['path_to_dir'] + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id)) + fp32_fig.write_image( + self.__run_parameters["path_to_dir"] + + "/empirRoof_gpu-{}_fp32.pdf".format(dev_id) + ) ml_combo_fig.write_image( - self.__run_parameters['path_to_dir'] + "/empirRoof_gpu-{}_int8_fp16.pdf".format(dev_id) + self.__run_parameters["path_to_dir"] + + "/empirRoof_gpu-{}_int8_fp16.pdf".format(dev_id) ) - if self.__run_parameters['include_kernel_names']: - self.__figure.write_image(self.__run_parameters['path_to_dir'] + "/kernelName_legend.pdf") + if self.__run_parameters["include_kernel_names"]: + self.__figure.write_image( + self.__run_parameters["path_to_dir"] + "/kernelName_legend.pdf" + ) logging.info("[roofline] Empirical Roofline PDFs saved!") else: return html.Section( @@ -173,17 +189,13 @@ def empirical_roofline( ) ], ) - - + @demarcate - def generate_plot( - self, dtype, fig=None - ) -> go.Figure(): - """Create graph object from ai_data (coordinate points) and ceiling_data (peak FLOP and BW) data. - """ + def generate_plot(self, dtype, fig=None) -> go.Figure(): + """Create graph object from ai_data (coordinate points) and ceiling_data (peak FLOP and BW) data.""" if fig is None: fig = go.Figure() - plot_mode = "lines+text" if self.__run_parameters['is_standalone'] else "lines" + plot_mode = "lines+text" if self.__run_parameters["is_standalone"] else "lines" self.__ceiling_data = constuct_roof( roofline_parameters=self.__run_parameters, dtype=dtype, @@ -193,10 +205,10 @@ def generate_plot( ####################### # Plot ceilings ####################### - if self.__run_parameters['mem_level'] == "ALL": + if self.__run_parameters["mem_level"] == "ALL": cache_hierarchy = ["HBM", "L2", "L1", "LDS"] else: - cache_hierarchy = self.__run_parameters['mem_level'] + cache_hierarchy = self.__run_parameters["mem_level"] # Plot peak BW ceiling(s) for cache_level in cache_hierarchy: @@ -208,10 +220,14 @@ def generate_plot( mode=plot_mode, hovertemplate="%{text}", text=[ - "{} GB/s".format(to_int(self.__ceiling_data[cache_level.lower()][2])), + "{} GB/s".format( + to_int(self.__ceiling_data[cache_level.lower()][2]) + ), None - if self.__run_parameters['is_standalone'] - else "{} GB/s".format(to_int(self.__ceiling_data[cache_level.lower()][2])), + if self.__run_parameters["is_standalone"] + else "{} GB/s".format( + to_int(self.__ceiling_data[cache_level.lower()][2]) + ), ], textposition="top right", ) @@ -228,7 +244,7 @@ def generate_plot( hovertemplate="%{text}", text=[ None - if self.__run_parameters['is_standalone'] + if self.__run_parameters["is_standalone"] else "{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])), "{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])), ], @@ -250,7 +266,7 @@ def generate_plot( hovertemplate="%{text}", text=[ None - if self.__run_parameters['is_standalone'] + if self.__run_parameters["is_standalone"] else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])), "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])), ], @@ -269,7 +285,9 @@ def generate_plot( name="ai_l1", mode="markers", marker={"color": "#00CC96"}, - marker_symbol=SYMBOLS if self.__run_parameters['include_kernel_names'] else None, + marker_symbol=SYMBOLS + if self.__run_parameters["include_kernel_names"] + else None, ) ) fig.add_trace( @@ -279,7 +297,9 @@ def generate_plot( name="ai_l2", mode="markers", marker={"color": "#EF553B"}, - marker_symbol=SYMBOLS if self.__run_parameters['include_kernel_names'] else None, + marker_symbol=SYMBOLS + if self.__run_parameters["include_kernel_names"] + else None, ) ) fig.add_trace( @@ -289,7 +309,9 @@ def generate_plot( name="ai_hbm", mode="markers", marker={"color": "#636EFA"}, - marker_symbol=SYMBOLS if self.__run_parameters['include_kernel_names'] else None, + marker_symbol=SYMBOLS + if self.__run_parameters["include_kernel_names"] + else None, ) ) @@ -311,78 +333,78 @@ def standalone_roofline(self): from collections import OrderedDict # Change vL1D to a interpretable str, if required - if "vL1D" in self.__run_parameters['mem_level']: - self.__run_parameters['mem_level'].remove("vL1D") - self.__run_parameters['mem_level'].append("L1") + if "vL1D" in self.__run_parameters["mem_level"]: + self.__run_parameters["mem_level"].remove("vL1D") + self.__run_parameters["mem_level"].append("L1") - app_path = os.path.join(self.__run_parameters['path_to_dir'], "pmc_perf.csv") + app_path = os.path.join(self.__run_parameters["path_to_dir"], "pmc_perf.csv") roofline_exists = os.path.isfile(app_path) if not roofline_exists: logging.error("[roofline] Error: {} does not exist".format(app_path)) sys.exit(1) t_df = OrderedDict() t_df["pmc_perf"] = pd.read_csv(app_path) - self.empirical_roofline( - ret_df=t_df - ) + self.empirical_roofline(ret_df=t_df) # Main methods @abstractmethod def pre_processing(self): if self.__args.roof_only: # check for sysinfo - logging.info("[roofline] Checking for sysinfo.csv in " + str(self.__args.path)) + logging.info( + "[roofline] Checking for sysinfo.csv in " + str(self.__args.path) + ) sysinfo_path = os.path.join(self.__args.path, "sysinfo.csv") if not os.path.isfile(sysinfo_path): logging.info("[roofline] sysinfo.csv not found. Generating...") gen_sysinfo( - workload_name=self.__args.name, - workload_dir=self.__workload_dir, - ip_blocks=self.__args.ipblocks, - app_cmd=self.__args.remaining, - skip_roof=self.__args.no_roof, - roof_only=self.__args.roof_only + workload_name=self.__args.name, + workload_dir=self.__workload_dir, + ip_blocks=self.__args.ipblocks, + app_cmd=self.__args.remaining, + skip_roof=self.__args.no_roof, + roof_only=self.__args.roof_only, ) @abstractmethod def profile(self): if self.__args.roof_only: # check for roofline benchmark - logging.info("[roofline] Checking for roofline.csv in " + str(self.__args.path)) + logging.info( + "[roofline] Checking for roofline.csv in " + str(self.__args.path) + ) roof_path = os.path.join(self.__args.path, "roofline.csv") if not os.path.isfile(roof_path): mibench(self.__args) # check for profiling data - logging.info("[roofline] Checking for pmc_perf.csv in " + str(self.__args.path)) + logging.info( + "[roofline] Checking for pmc_perf.csv in " + str(self.__args.path) + ) app_path = os.path.join(self.__args.path, "pmc_perf.csv") if not os.path.isfile(app_path): logging.info("[roofline] pmc_perf.csv not found. Generating...") if not self.__args.remaining: - error("An is required to run.\nomniperf profile -n test -- ") - #TODO: Add an equivelent of characterize_app() to run profiling directly out of this module - + error( + "An is required to run.\nomniperf profile -n test -- " + ) + # TODO: Add an equivelent of characterize_app() to run profiling directly out of this module + elif self.__args.no_roof: logging.info("[roofline] Skipping roofline.") else: mibench(self.__args) - #NB: Currently the post_prossesing() method is the only one being used by omniperf, - # we include pre_processing() and profile() methods for those who wish to borrow the roofline module + # NB: Currently the post_prossesing() method is the only one being used by omniperf, + # we include pre_processing() and profile() methods for those who wish to borrow the roofline module @abstractmethod def post_processing(self): - if self.__run_parameters['is_standalone']: + if self.__run_parameters["is_standalone"]: self.standalone_roofline() - def to_int(a): if str(type(a)) == "": return np.nan else: return int(a) - - - - - diff --git a/src/utils/db_connector.py b/src/utils/db_connector.py index bf013164d..e8b4e70ad 100644 --- a/src/utils/db_connector.py +++ b/src/utils/db_connector.py @@ -34,6 +34,7 @@ MAX_SERVER_SEL_DELAY = 5000 # 5 sec connection timeout + class DatabaseConnector: def __init__(self, args): self.args = args @@ -45,17 +46,22 @@ def __init__(self, args): "port": str(self.args.port), "team": self.args.team, "workload": self.args.workload, - "db": None + "db": None, } - self.interaction_type: str = None #set to 'import' or 'remove' based on user arguments + self.interaction_type: str = ( + None # set to 'import' or 'remove' based on user arguments + ) self.client: MongoClient = None + @demarcate def prep_import(self, profile_and_export=False): if profile_and_export: - self.connection_info['workload'] = os.path.join(self.connection_info['workload'], self.args.target) + self.connection_info["workload"] = os.path.join( + self.connection_info["workload"], self.args.target + ) # Extract SoC and workload name from sysinfo.csv - sys_info = os.path.join(self.connection_info['workload'], "sysinfo.csv") + sys_info = os.path.join(self.connection_info["workload"], "sysinfo.csv") if os.path.isfile(sys_info): sys_info = pd.read_csv(sys_info) soc = sys_info["name"][0] @@ -63,7 +69,9 @@ def prep_import(self, profile_and_export=False): else: error("[database] Unable to parse SoC and/or workload name from sysinfo.csv") - self.connection_info["db"] = "omniperf_" + str(self.args.team) + "_" + str(name) + "_" + str(soc) + self.connection_info["db"] = ( + "omniperf_" + str(self.args.team) + "_" + str(name) + "_" + str(soc) + ) @demarcate def db_import(self): @@ -72,7 +80,11 @@ def db_import(self): file = "blank" for file in tqdm(os.listdir(self.connection_info["workload"])): if file.endswith(".csv"): - logging.info("[database] Uploading: %s" % self.connection_info["workload"] + "/" + file) + logging.info( + "[database] Uploading: %s" % self.connection_info["workload"] + + "/" + + file + ) try: fileName = file[0 : file.find(".")] cmd = ( @@ -101,69 +113,85 @@ def db_import(self): @demarcate def db_remove(self): - db_to_remove = self.client[self.connection_info['workload']] + db_to_remove = self.client[self.connection_info["workload"]] # check the collection names on the database col_list = db_to_remove.list_collection_names() self.client.drop_database(db_to_remove) db = self.client["workload_names"] col = db["names"] - col.delete_many({"name": self.connection_info['workload']}) - - logging.info("[database] Successfully removed %s" % self.connection_info['workload']) + col.delete_many({"name": self.connection_info["workload"]}) + logging.info( + "[database] Successfully removed %s" % self.connection_info["workload"] + ) @abstractmethod def pre_processing(self): - """Perform any pre-processing steps prior to database conncetion. - """ + """Perform any pre-processing steps prior to database conncetion.""" logging.debug("[database] pre-processing database connection") if not self.args.remove and not self.args.upload: error("Either -i/--import or -r/--remove is required in database mode") - self.interaction_type = 'import' if self.args.upload else 'remove' + self.interaction_type = "import" if self.args.upload else "remove" # Detect interaction type - if self.interaction_type == 'remove': + if self.interaction_type == "remove": logging.debug("[database] validating arguments for --remove workflow") is_full_workload_name = self.args.workload.count("_") >= 3 if not is_full_workload_name: - error("-w/--workload is not valid. Please use full workload name as seen in GUI when removing (i.e. omniperf_asw_vcopy_mi200)") + error( + "-w/--workload is not valid. Please use full workload name as seen in GUI when removing (i.e. omniperf_asw_vcopy_mi200)" + ) - if self.connection_info['host'] == None or self.connection_info['username'] == None: - error("-H/--host and -u/--username are required when interaction type is set to %s" % self.interaction_type) - if self.connection_info['workload'] == "admin" or self.connection_info['workload'] == "local": - error("Cannot remove %s. Try again." % self.connection_info['workload']) + if ( + self.connection_info["host"] == None + or self.connection_info["username"] == None + ): + error( + "-H/--host and -u/--username are required when interaction type is set to %s" + % self.interaction_type + ) + if ( + self.connection_info["workload"] == "admin" + or self.connection_info["workload"] == "local" + ): + error("Cannot remove %s. Try again." % self.connection_info["workload"]) else: logging.debug("[database] validating arguments for --import workflow") if ( - self.connection_info['host'] == None - or self.connection_info['team'] == None - or self.connection_info['username'] == None - or self.connection_info['workload'] == None + self.connection_info["host"] == None + or self.connection_info["team"] == None + or self.connection_info["username"] == None + or self.connection_info["workload"] == None ): - error("-H/--host, -w/--workload, -u/--username, and -t/--team are all required when interaction type is set to %s" % self.interaction_type) + error( + "-H/--host, -w/--workload, -u/--username, and -t/--team are all required when interaction type is set to %s" + % self.interaction_type + ) - if os.path.isdir(os.path.abspath(self.connection_info['workload'])): - is_workload_empty(self.connection_info['workload']) + if os.path.isdir(os.path.abspath(self.connection_info["workload"])): + is_workload_empty(self.connection_info["workload"]) else: error("--workload is invalid. Please pass path to a valid directory.") if len(self.args.team) > 13: error("--team exceeds 13 character limit. Try again.") - + # format path properly - self.connection_info['workload'] = os.path.abspath(self.connection_info['workload']) + self.connection_info["workload"] = os.path.abspath( + self.connection_info["workload"] + ) # Detect password - if self.connection_info['password'] == "": + if self.connection_info["password"] == "": try: - self.connection_info['password'] = getpass.getpass() + self.connection_info["password"] = getpass.getpass() except Exception as e: error("[database] PASSWORD ERROR %s" % e) else: logging.info("[database] Password recieved") else: - password = self.connection_info['password'] + password = self.connection_info["password"] # Establish client connection connection_str = ( @@ -177,11 +205,10 @@ def pre_processing(self): + self.connection_info["port"] + "/?authSource=admin" ) - self.client = MongoClient(connection_str, serverSelectionTimeoutMS=MAX_SERVER_SEL_DELAY) + self.client = MongoClient( + connection_str, serverSelectionTimeoutMS=MAX_SERVER_SEL_DELAY + ) try: self.client.server_info() except: error("[database] Unable to connect to the DB server.") - - - \ No newline at end of file diff --git a/src/utils/file_io.py b/src/utils/file_io.py index 1059bc433..5c54874a6 100644 --- a/src/utils/file_io.py +++ b/src/utils/file_io.py @@ -50,11 +50,12 @@ "id": 1, "title": "Dispatch List", "data source": [{"raw_csv_table": {"id": 2, "source": "pmc_dispatch_info.csv"}}], - } + }, } time_units = {"s": 10**9, "ms": 10**6, "us": 10**3, "ns": 1} + def load_sys_info(f): """ Load sys running info from csv file to a df. @@ -231,4 +232,4 @@ def is_single_panel_config(root_dir, supported_archs): return False else: logging.error("Found multiple panel config sets but incomplete for all archs!") - sys.exit(1) \ No newline at end of file + sys.exit(1) diff --git a/src/utils/gui.py b/src/utils/gui.py index d3e2b11c6..bc1b3d8e9 100644 --- a/src/utils/gui.py +++ b/src/utils/gui.py @@ -34,7 +34,8 @@ "mode.chained_assignment", None ) # ignore SettingWithCopyWarning pandas warning -IS_DARK = True #TODO: Remove hardcoded in favor of class property +IS_DARK = True # TODO: Remove hardcoded in favor of class property + ################## # HELPER FUNCTIONS @@ -168,31 +169,31 @@ def build_bar_chart(display_df, table_config, barchart_elements, norm_filt): ) # L2 Cache per channel # elif table_config["id"] in barchart_elements["l2_cache_per_chan"]: - # nested_bar = {} - # channels = [] - # for colName, colData in display_df.items(): - # if colName == "Channel": - # channels = list(colData.values) - # else: - # display_df[colName] = [ - # x.astype(float) if x != "" and x != None else float(0) - # for x in display_df[colName] - # ] - # nested_bar[colName] = list(display_df[colName]) - # for group, metric in nested_bar.items(): - # d_figs.append( - # px.bar( - # title=group[0 : group.rfind("(")], - # x=channels, - # y=metric, - # labels={ - # "x": "Channel", - # "y": group[group.rfind("(") + 1 : len(group) - 1].replace( - # "per", norm_filt - # ), - # }, - # ).update_yaxes(rangemode="nonnegative") - # ) + # nested_bar = {} + # channels = [] + # for colName, colData in display_df.items(): + # if colName == "Channel": + # channels = list(colData.values) + # else: + # display_df[colName] = [ + # x.astype(float) if x != "" and x != None else float(0) + # for x in display_df[colName] + # ] + # nested_bar[colName] = list(display_df[colName]) + # for group, metric in nested_bar.items(): + # d_figs.append( + # px.bar( + # title=group[0 : group.rfind("(")], + # x=channels, + # y=metric, + # labels={ + # "x": "Channel", + # "y": group[group.rfind("(") + 1 : len(group) - 1].replace( + # "per", norm_filt + # ), + # }, + # ).update_yaxes(rangemode="nonnegative") + # ) # Speed-of-light bar chart elif table_config["id"] in barchart_elements["sol"]: @@ -376,4 +377,4 @@ def build_table_chart( # print("DATA: \n", display_df.to_dict('records')) d_figs.append(d_t) return d_figs - # print(d_t.columns) \ No newline at end of file + # print(d_t.columns) diff --git a/src/utils/gui_components/memchart.py b/src/utils/gui_components/memchart.py index e8f553169..cdc655340 100644 --- a/src/utils/gui_components/memchart.py +++ b/src/utils/gui_components/memchart.py @@ -52,9 +52,9 @@ def insert_chart_data(mem_data, base_data): return G( className="data", children=[ - # ---------------------------------------- - # Instr Buff Block - #TODO: double check wave_occupancy + # ---------------------------------------- + # Instr Buff Block + # TODO: double check wave_occupancy Text( x="52", y="313", @@ -73,8 +73,8 @@ def insert_chart_data(mem_data, base_data): fontWeight="bold", children=memchart_values["Wave Life"], ), - # ---------------------------------------- - # Instr Dispatch Block + # ---------------------------------------- + # Instr Dispatch Block Text( x="386", y="46", @@ -139,8 +139,8 @@ def insert_chart_data(mem_data, base_data): fontSize="12px", children=memchart_values["BR"], ), - # ---------------------------------------- - # Exec Block + # ---------------------------------------- + # Exec Block Text( x="480", y="99", @@ -198,8 +198,8 @@ def insert_chart_data(mem_data, base_data): fontSize="12px", children=memchart_values["Workgroups"], ), - # ---------------------------------------- - # LDS Block + # ---------------------------------------- + # LDS Block Text( x="723", y="78", @@ -224,8 +224,8 @@ def insert_chart_data(mem_data, base_data): fontSize="12px", children=memchart_values["LDS Latency"], ), - # ---------------------------------------- - # Vector L1 Cache Block + # ---------------------------------------- + # Vector L1 Cache Block Text( x="708", y="204", @@ -306,8 +306,8 @@ def insert_chart_data(mem_data, base_data): fontSize="12px", children=memchart_values["VL1_L2 Atomic"], ), - # ---------------------------------------- - # Scalar L1D Cache Block + # ---------------------------------------- + # Scalar L1D Cache Block Text( x="709", y="384", @@ -356,8 +356,8 @@ def insert_chart_data(mem_data, base_data): fontSize="12px", children=memchart_values["VL1D_L2 Atomic"], ), - # ---------------------------------------- - # Instr L1 Cache Block + # ---------------------------------------- + # Instr L1 Cache Block Text( x="492", y="498", @@ -390,8 +390,8 @@ def insert_chart_data(mem_data, base_data): fontSize="12px", children=memchart_values["IL1_L2 Rd"], ), - # ---------------------------------------- - # L2 Cache Block(inside) + # ---------------------------------------- + # L2 Cache Block(inside) Text( x="1145", y="213", @@ -440,8 +440,8 @@ def insert_chart_data(mem_data, base_data): fontSize="12px", children=memchart_values["L2 Wr Lat"], ), - # ---------------------------------------- - # Fabric Block + # ---------------------------------------- + # Fabric Block Text( x="1317", y="243", diff --git a/src/utils/kernel_name_shortener.py b/src/utils/kernel_name_shortener.py index 9f07c3594..820330d1c 100644 --- a/src/utils/kernel_name_shortener.py +++ b/src/utils/kernel_name_shortener.py @@ -25,7 +25,7 @@ import os import sys import logging -import glob +import glob import re import subprocess import pandas as pd @@ -34,6 +34,7 @@ cache = dict() + # Note: shortener is now dependent on a rocprof install with llvm def kernel_name_shortener(workload_dir, level): def shorten_file(df, level): @@ -134,6 +135,8 @@ def shorten_file(df, level): modified_df = shorten_file(orig_df, level) modified_df.to_csv(fpath, index=False) except pd.errors.EmptyDataError: - logging.debug("[profiling] Skipping shortening on empty csv: %s" % str(fpath)) + logging.debug( + "[profiling] Skipping shortening on empty csv: %s" % str(fpath) + ) - logging.info("[profiling] Kernel_Name shortening complete.") \ No newline at end of file + logging.info("[profiling] Kernel_Name shortening complete.") diff --git a/src/utils/mem_chart.py b/src/utils/mem_chart.py index 686f45e25..8a4dcf1f8 100644 --- a/src/utils/mem_chart.py +++ b/src/utils/mem_chart.py @@ -1039,4 +1039,4 @@ def plot_mem_chart(arch, normal_unit, metric_dict): arch = "" normal_unit = "per_kernel" - print(plot_mem_chart(arch, normal_unit, metric_dict)) \ No newline at end of file + print(plot_mem_chart(arch, normal_unit, metric_dict)) diff --git a/src/utils/parser.py b/src/utils/parser.py index 23f502406..165415658 100644 --- a/src/utils/parser.py +++ b/src/utils/parser.py @@ -174,6 +174,7 @@ def to_round(a, b): else: return round(a, b) + def to_quantile(a, b): if a is None: return None @@ -182,6 +183,7 @@ def to_quantile(a, b): else: raise Exception("to_quantile: unsupported type.") + def to_mod(a, b): if isinstance(a, pd.core.series.Series): return a.mod(b) @@ -402,6 +404,7 @@ def gen_counter_list(formula): return visited, counters + def calc_builtin_var(var, sys_info): """ Calculate build-in variable based on sys_info: @@ -414,6 +417,7 @@ def calc_builtin_var(var, sys_info): print("Don't support", var) sys.exit(1) + def build_dfs(archConfigs, filter_metrics, sys_info): """ - Build dataframe for each type of data source within each panel. @@ -447,7 +451,6 @@ def build_dfs(archConfigs, filter_metrics, sys_info): type == "metric_table" and "metric" in data_config and "placeholder_range" in data_config["metric"] - ): # print(data_config["metric"]) new_metrics = {} @@ -475,16 +478,14 @@ def build_dfs(archConfigs, filter_metrics, sys_info): data_config["metric"] = new_metrics # print(data_config) # print(data_config["metric"]) - + for panel_id, panel in archConfigs.panel_configs.items(): for data_source in panel["data source"]: for type, data_config in data_source.items(): if type == "metric_table": headers = ["Metric_ID"] data_source_idx = str(data_config["id"] // 100) - if (data_source_idx != 0 or - data_source_idx in filter_metrics - ): + if data_source_idx != 0 or data_source_idx in filter_metrics: metric_list[data_source_idx] = panel["title"] if ( "cli_style" in data_config @@ -506,9 +507,9 @@ def build_dfs(archConfigs, filter_metrics, sys_info): headers.append("coll_level") if "tips" in data_config["header"].keys(): headers.append(data_config["header"]["tips"]) - + df = pd.DataFrame(columns=headers) - + i = 0 for key, entries in data_config["metric"].items(): data_source_idx = ( @@ -532,7 +533,7 @@ def build_dfs(archConfigs, filter_metrics, sys_info): ): values.append(metric_idx) values.append(key) - + metric_list[data_source_idx] = data_config["title"] if ( @@ -687,9 +688,9 @@ def eval_metric(dfs, dfs_type, sys_info, soc_spec, raw_pmc_df, debug): ammolite__numWavesPerCU = sys_info.maxWavesPerCU # todo: check do we still need it ammolite__numSQC = sys_info.numSQC ammolite__L2Banks = sys_info.L2Banks - ammolite__LDSBanks = ( - soc_spec['LDSBanks'] - ) # todo: eventually switch this over to sys_info. its a new spec so trying not to break compatibility + ammolite__LDSBanks = soc_spec[ + "LDSBanks" + ] # todo: eventually switch this over to sys_info. its a new spec so trying not to break compatibility ammolite__freq = sys_info.cur_sclk # todo: check do we still need it ammolite__mclk = sys_info.cur_mclk ammolite__sclk = sys_info.sclk @@ -910,7 +911,9 @@ def load_kernel_top(workload, dir): if file.exists(): tmp[id] = pd.read_csv(file) else: - logging.info("Warning: Issue loading top kernels. Check pmc_kernel_top.csv") + logging.info( + "Warning: Issue loading top kernels. Check pmc_kernel_top.csv" + ) elif "from_csv_columnwise" in df.columns: # NB: # Another way might be doing transpose in tty like metric_table. @@ -923,7 +926,9 @@ def load_kernel_top(workload, dir): # so tty could detect them and show them correctly in comparison. tmp[id].columns = ["Info"] else: - logging.info("Warning: Issue loading top kernels. Check pmc_kernel_top.csv") + logging.info( + "Warning: Issue loading top kernels. Check pmc_kernel_top.csv" + ) workload.dfs.update(tmp) @@ -957,6 +962,7 @@ def build_comparable_columns(time_unit): return comparable_columns + def correct_sys_info(df, specs_correction): """ Correct system spec items manually @@ -1012,5 +1018,3 @@ def correct_sys_info(df, specs_correction): df[name_map[k]] = v return df - - diff --git a/src/utils/roofline_calc.py b/src/utils/roofline_calc.py index a1cc8a9b0..2c3cf44de 100644 --- a/src/utils/roofline_calc.py +++ b/src/utils/roofline_calc.py @@ -96,35 +96,36 @@ def get_color(catagory): # Plot BW at each cache level # ------------------------------------------------------------------------------------- def calc_ceilings(roofline_parameters, dtype, benchmark_data): - """Given benchmarking data, calculate ceilings (or peak performance) for empirical roofline - """ + """Given benchmarking data, calculate ceilings (or peak performance) for empirical roofline""" # TODO: This is where filtering by memory level will need to occur for standalone graphPoints = {"hbm": [], "l2": [], "l1": [], "lds": [], "valu": [], "mfma": []} - if roofline_parameters['mem_level'] == "ALL": + if roofline_parameters["mem_level"] == "ALL": cacheHierarchy = ["HBM", "L2", "L1", "LDS"] else: - cacheHierarchy = roofline_parameters['mem_level'] + cacheHierarchy = roofline_parameters["mem_level"] x1 = y1 = x2 = y2 = -1 x1_mfma = y1_mfma = x2_mfma = y2_mfma = -1 target_precision = dtype[2:] if dtype != "FP16" and dtype != "I8": - peakOps = float( - benchmark_data[dtype + "Flops"][roofline_parameters['device_id']] - ) + peakOps = float(benchmark_data[dtype + "Flops"][roofline_parameters["device_id"]]) for i in range(0, len(cacheHierarchy)): - # Plot BW line + # Plot BW line logging.debug("[roofline] Current cache level is %s" % cacheHierarchy[i]) curr_bw = cacheHierarchy[i] + "Bw" - peakBw = float(benchmark_data[curr_bw][roofline_parameters['device_id']]) + peakBw = float(benchmark_data[curr_bw][roofline_parameters["device_id"]]) if dtype == "I8": - peakMFMA = float(benchmark_data["MFMAI8Ops"][roofline_parameters['device_id']]) + peakMFMA = float( + benchmark_data["MFMAI8Ops"][roofline_parameters["device_id"]] + ) else: peakMFMA = float( - benchmark_data["MFMAF{}Flops".format(target_precision)][roofline_parameters['device_id']] + benchmark_data["MFMAF{}Flops".format(target_precision)][ + roofline_parameters["device_id"] + ] ) x1 = float(XMIN) @@ -173,7 +174,9 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data): if x2_mfma < x0_mfma: x0_mfma = x2_mfma - logging.debug("MFMA ROOF [{}, {}], [{},{}]".format(x0_mfma, XMAX, peakMFMA, peakMFMA)) + logging.debug( + "MFMA ROOF [{}, {}], [{},{}]".format(x0_mfma, XMAX, peakMFMA, peakMFMA) + ) graphPoints["mfma"].append([x0_mfma, XMAX]) graphPoints["mfma"].append([peakMFMA, peakMFMA]) graphPoints["mfma"].append(peakMFMA) @@ -186,8 +189,7 @@ def calc_ceilings(roofline_parameters, dtype, benchmark_data): # ------------------------------------------------------------------------------------- # Calculate relevant metrics for ai calculation def calc_ai(sort_type, ret_df): - """Given counter data, calculate arithmetic intensity for each kernel in the application. - """ + """Given counter data, calculate arithmetic intensity for each kernel in the application.""" df = ret_df["pmc_perf"] # Sort by top kernels or top dispatches? df = df.sort_values(by=["Kernel_Name"]) @@ -261,7 +263,11 @@ def calc_ai(sort_type, ret_df): + (df["SQ_INSTS_VALU_MFMA_MOPS_F64"][idx] * 512) ) except KeyError: - logging.debug("[roofline] {}: Skipped total_flops at index {}".format(kernelName[:35], idx)) + logging.debug( + "[roofline] {}: Skipped total_flops at index {}".format( + kernelName[:35], idx + ) + ) pass try: valu_flops += ( @@ -288,7 +294,9 @@ def calc_ai(sort_type, ret_df): ) ) except KeyError: - logging.debug("{}: Skipped valu_flops at index {}".format(kernelName[:35], idx)) + logging.debug( + "{}: Skipped valu_flops at index {}".format(kernelName[:35], idx) + ) pass try: @@ -298,7 +306,9 @@ def calc_ai(sort_type, ret_df): mfma_flops_f64 += df["SQ_INSTS_VALU_MFMA_MOPS_F64"][idx] * 512 mfma_iops_i8 += df["SQ_INSTS_VALU_MFMA_MOPS_I8"][idx] * 512 except KeyError: - logging.debug("[roofline] {}: Skipped mfma ops at index {}".format(kernelName[:35], idx)) + logging.debug( + "[roofline] {}: Skipped mfma ops at index {}".format(kernelName[:35], idx) + ) pass try: @@ -308,13 +318,19 @@ def calc_ai(sort_type, ret_df): * L2_BANKS ) # L2_BANKS = 32 (since assuming mi200) except KeyError: - logging.debug("[roofline] {}: Skipped lds_data at index {}".format(kernelName[:35], idx)) + logging.debug( + "[roofline] {}: Skipped lds_data at index {}".format(kernelName[:35], idx) + ) pass try: L1cache_data += df["TCP_TOTAL_CACHE_ACCESSES_sum"][idx] * 64 except KeyError: - logging.debug("[roofline] {}: Skipped L1cache_data at index {}".format(kernelName[:35], idx)) + logging.debug( + "[roofline] {}: Skipped L1cache_data at index {}".format( + kernelName[:35], idx + ) + ) pass try: @@ -325,7 +341,11 @@ def calc_ai(sort_type, ret_df): + df["TCP_TCC_READ_REQ_sum"][idx] * 64 ) except KeyError: - logging.debug("[roofline] {}: Skipped L2cache_data at index {}".format(kernelName[:35], idx)) + logging.debug( + "[roofline] {}: Skipped L2cache_data at index {}".format( + kernelName[:35], idx + ) + ) pass try: hbm_data += ( @@ -335,7 +355,9 @@ def calc_ai(sort_type, ret_df): + ((df["TCC_EA_WRREQ_sum"][idx] - df["TCC_EA_WRREQ_64B_sum"][idx]) * 32) ) except KeyError: - logging.debug("[roofline] {}: Skipped hbm_data at index {}".format(kernelName[:35], idx)) + logging.debug( + "[roofline] {}: Skipped hbm_data at index {}".format(kernelName[:35], idx) + ) pass totalDuration += df["End_Timestamp"][idx] - df["Start_Timestamp"][idx] diff --git a/src/utils/specs.py b/src/utils/specs.py index 8e26f7fe2..f89c54726 100644 --- a/src/utils/specs.py +++ b/src/utils/specs.py @@ -37,6 +37,7 @@ from textwrap import dedent from utils.utils import error, get_hbm_stack_num + @dataclass class MachineSpecs: hostname: str @@ -135,7 +136,7 @@ def gpuinfo(): # we get the max mclk from rocm-smi --showmclkrange rocm_smi_mclk = run(["rocm-smi", "--showmclkrange"], exit_on_error=True) - gpu_info['max_mclk'] = search(r'(\d+)Mhz\s*$', rocm_smi_mclk) + gpu_info["max_mclk"] = search(r"(\d+)Mhz\s*$", rocm_smi_mclk) # Fixme: find better way to differentiate cards, GPU vs APU, etc. rocminfo_full = run(["rocminfo"]) @@ -151,81 +152,85 @@ def gpuinfo(): if not gpu_arch in SUPPORTED_ARCHS.keys(): return gpu_info - gpu_info['L1'], gpu_info['L1'] = "", "" + gpu_info["L1"], gpu_info["L1"] = "", "" for idx2, linetext in enumerate(rocminfo[idx1 + 1 :]): key = search(r"^\s*L1:\s+ ([a-zA-Z0-9]+)\s*", linetext) if key != None: - gpu_info['L1'] = key + gpu_info["L1"] = key continue key = search(r"^\s*L2:\s+ ([a-zA-Z0-9]+)\s*", linetext) if key != None: - gpu_info['L2'] = key + gpu_info["L2"] = key continue key = search(r"^\s*Max Clock Freq\. \(MHz\):\s+([0-9]+)", linetext) if key != None: - gpu_info['max_sclk'] = key + gpu_info["max_sclk"] = key continue key = search(r"^\s*Compute Unit:\s+ ([a-zA-Z0-9]+)\s*", linetext) if key != None: - gpu_info['num_CU'] = key + gpu_info["num_CU"] = key continue key = search(r"^\s*SIMDs per CU:\s+ ([a-zA-Z0-9]+)\s*", linetext) if key != None: - gpu_info['num_SIMD'] = key + gpu_info["num_SIMD"] = key continue key = search(r"^\s*Shader Engines:\s+ ([a-zA-Z0-9]+)\s*", linetext) if key != None: - gpu_info['num_SE'] = key + gpu_info["num_SE"] = key continue key = search(r"^\s*Wavefront Size:\s+ ([a-zA-Z0-9]+)\s*", linetext) if key != None: - gpu_info['wave_size'] = key + gpu_info["wave_size"] = key continue key = search(r"^\s*Workgroup Max Size:\s+ ([a-zA-Z0-9]+)\s*", linetext) if key != None: - gpu_info['grp_size'] = key + gpu_info["grp_size"] = key continue key = search(r"^\s*Max Waves Per CU:\s+ ([a-zA-Z0-9]+)\s*", linetext) if key != None: - gpu_info['max_waves_per_cu'] = key + gpu_info["max_waves_per_cu"] = key break try: - soc_module = importlib.import_module('omniperf_soc.soc_'+gpu_arch) + soc_module = importlib.import_module("omniperf_soc.soc_" + gpu_arch) except ModuleNotFoundError as e: - error("Arch %s marked as supported, but couldn't find class implementation %s." % (gpu_arch, e)) - + error( + "Arch %s marked as supported, but couldn't find class implementation %s." + % (gpu_arch, e) + ) + # load arch specific info try: gpu_name = list(SUPPORTED_ARCHS[gpu_arch].keys())[0].upper() - gpu_info['L2Banks'] = str(soc_module.SOC_PARAM['L2Banks']) - gpu_info['numSQC'] = str(soc_module.SOC_PARAM['numSQC']) - gpu_info['LDSBanks'] = str(soc_module.SOC_PARAM['LDSBanks']) - gpu_info['numPipes'] = str(soc_module.SOC_PARAM['numPipes']) + gpu_info["L2Banks"] = str(soc_module.SOC_PARAM["L2Banks"]) + gpu_info["numSQC"] = str(soc_module.SOC_PARAM["numSQC"]) + gpu_info["LDSBanks"] = str(soc_module.SOC_PARAM["LDSBanks"]) + gpu_info["numPipes"] = str(soc_module.SOC_PARAM["numPipes"]) except KeyError as e: - error("Incomplete class definition for %s. Expected a field for %s in SOC_PARAM." % (gpu_arch, e))\ - + error( + "Incomplete class definition for %s. Expected a field for %s in SOC_PARAM." + % (gpu_arch, e) + ) # specify gpu name for gfx942 hardware if gpu_name == "MI300": gpu_name = list(SUPPORTED_ARCHS[gpu_arch].values())[0][0] - if (gpu_info['gpu_arch'] == "gfx942") and ("MI300A" in rocminfo_full): + if (gpu_info["gpu_arch"] == "gfx942") and ("MI300A" in rocminfo_full): gpu_name = "MI300A_A1" if (gpu_arch == "gfx942") and ("MI300A" not in rocminfo_full): gpu_name = "MI300X_A1" - - gpu_info['gpu_name'] = gpu_name - gpu_info['gpu_arch'] = gpu_arch - gpu_info['compute_partition'] = "" - gpu_info['memory_partition'] = "" + gpu_info["gpu_name"] = gpu_name + gpu_info["gpu_arch"] = gpu_arch + gpu_info["compute_partition"] = "" + gpu_info["memory_partition"] = "" # verify all fields are filled for key, value in gpu_info.items(): @@ -235,7 +240,7 @@ def gpuinfo(): return gpu_info -def run(cmd,exit_on_error=False): +def run(cmd, exit_on_error=False): p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if exit_on_error: @@ -255,24 +260,18 @@ def search(pattern, string): return m.group(1) return None + def total_l2_banks(archname, L2Banks, memory_partition): # Fixme: support all supported partitioning mode # Fixme: "name" is a bad name! totalL2Banks = L2Banks - if ( - archname.lower() == "mi300a_a0" - or archname.lower() == "mi300a_a1" - ): - totalL2Banks = L2Banks * get_hbm_stack_num( - archname, memory_partition) - elif ( - archname.lower() == "mi300x_a0" - or archname.lower() == "mi300x_a1" - ): - totalL2Banks = L2Banks * get_hbm_stack_num( - archname, memory_partition) + if archname.lower() == "mi300a_a0" or archname.lower() == "mi300a_a1": + totalL2Banks = L2Banks * get_hbm_stack_num(archname, memory_partition) + elif archname.lower() == "mi300x_a0" or archname.lower() == "mi300x_a1": + totalL2Banks = L2Banks * get_hbm_stack_num(archname, memory_partition) return totalL2Banks + def get_machine_specs(devicenum): cpuinfo = path("/proc/cpuinfo").read_text() meminfo = path("/proc/meminfo").read_text() @@ -339,8 +338,8 @@ def get_machine_specs(devicenum): # these are just max's now, because the parsing was broken and this was inconsistent # with how we use the clocks elsewhere (all max, all the time) - cur_sclk = gpu_info['max_sclk'] - cur_mclk = gpu_info['max_mclk'] + cur_sclk = gpu_info["max_sclk"] + cur_mclk = gpu_info["max_mclk"] # FIXME with device vbios = search(r"VBIOS version: (.*?)$", run(["rocm-smi", "-v"], exit_on_error=True)) @@ -358,15 +357,16 @@ def get_machine_specs(devicenum): memory_partition = "NA" totalL2Banks = total_l2_banks( - gpu_info['gpu_name'], int(gpu_info['L2Banks']), memory_partition) + gpu_info["gpu_name"], int(gpu_info["L2Banks"]), memory_partition + ) hbmchannels = totalL2Banks if ( - gpu_info['gpu_name'].lower() == "mi300a_a0" - or gpu_info['gpu_name'].lower() == "mi300a_a1" + gpu_info["gpu_name"].lower() == "mi300a_a0" + or gpu_info["gpu_name"].lower() == "mi300a_a1" ) and memory_partition.lower() == "nps1": # we have an extra 32 channels for the CCD hbmchannels += 32 - hbmBW = str(int(gpu_info['max_mclk']) / 1000 * 32 * hbmchannels) + hbmBW = str(int(gpu_info["max_mclk"]) / 1000 * 32 * hbmchannels) totalL2Banks = str(totalL2Banks) return MachineSpecs( @@ -377,26 +377,26 @@ def get_machine_specs(devicenum): ram, distro, rocm_version, - gpu_info['gpu_name'], - gpu_info['gpu_arch'], + gpu_info["gpu_name"], + gpu_info["gpu_arch"], vbios, - gpu_info['L1'], - gpu_info['L2'], - gpu_info['num_CU'], - gpu_info['num_SIMD'], - gpu_info['num_SE'], - gpu_info['wave_size'], - gpu_info['grp_size'], - gpu_info['max_sclk'], - gpu_info['max_mclk'], + gpu_info["L1"], + gpu_info["L2"], + gpu_info["num_CU"], + gpu_info["num_SIMD"], + gpu_info["num_SE"], + gpu_info["wave_size"], + gpu_info["grp_size"], + gpu_info["max_sclk"], + gpu_info["max_mclk"], cur_sclk, cur_mclk, - gpu_info['max_waves_per_cu'], - gpu_info['L2Banks'], + gpu_info["max_waves_per_cu"], + gpu_info["L2Banks"], totalL2Banks, - gpu_info['LDSBanks'], - gpu_info['numSQC'], - gpu_info['numPipes'], + gpu_info["LDSBanks"], + gpu_info["numSQC"], + gpu_info["numPipes"], hbmBW, compute_partition, memory_partition, diff --git a/src/utils/tty.py b/src/utils/tty.py index 19b49a982..4870ef7e6 100644 --- a/src/utils/tty.py +++ b/src/utils/tty.py @@ -201,9 +201,9 @@ def show_all(args, runs, archConfigs, output): ) # Only show top N kernels (as specified in --max-kernel-num) in "Top Stats" section - if( - type == "raw_csv_table" - and (table_config["source"] == "pmc_kernel_top.csv" or table_config["source"] == "pmc_dispatch_info.csv") + if type == "raw_csv_table" and ( + table_config["source"] == "pmc_kernel_top.csv" + or table_config["source"] == "pmc_dispatch_info.csv" ): df = df.head(args.max_stat_num) # NB: @@ -251,7 +251,9 @@ def show_kernel_stats(args, runs, archConfigs, output): # sorted when load_table_data. if table_config["id"] == 1: print("\n" + "-" * 80, file=output) - print("Detected Kernels (sorted decending by duration)", file=output) + print( + "Detected Kernels (sorted decending by duration)", file=output + ) df = pd.concat([df, single_df["Kernel_Name"]], axis=1) if table_config["id"] == 2: @@ -268,4 +270,3 @@ def show_kernel_stats(args, runs, archConfigs, output): ), file=output, ) - diff --git a/tests/test_profile_general.py b/tests/test_profile_general.py index cb9aa1cd2..fc71c8012 100644 --- a/tests/test_profile_general.py +++ b/tests/test_profile_general.py @@ -358,7 +358,10 @@ def baseline_compare_metric(test_name, workload_dir, args=[]): ) isValid = ( - (abs(absolute_diff) <= METRIC_THRESHOLDS[metric_idx]["absolute"]) + ( + abs(absolute_diff) + <= METRIC_THRESHOLDS[metric_idx]["absolute"] + ) if (threshold_type == "absolute") else ( abs(relative_diff) From 3d666c511d71e4553242815ad2a7efaeeb5e9dcf Mon Sep 17 00:00:00 2001 From: Karl W Schulz Date: Fri, 16 Feb 2024 15:41:31 -0600 Subject: [PATCH 2/3] more code formatting updates Signed-off-by: Karl W Schulz --- src/omniperf_analyze/analysis_webui.py | 121 ++++++++++++------------- src/utils/gui_components/header.py | 13 +-- tests/test_utils.py | 2 +- 3 files changed, 61 insertions(+), 75 deletions(-) diff --git a/src/omniperf_analyze/analysis_webui.py b/src/omniperf_analyze/analysis_webui.py index 9f174ca94..5192e7c88 100644 --- a/src/omniperf_analyze/analysis_webui.py +++ b/src/omniperf_analyze/analysis_webui.py @@ -44,7 +44,7 @@ def __init__(self, args, supported_archs): self.app = dash.Dash(__name__, external_stylesheets=[dbc.themes.CYBORG]) self.dest_dir = os.path.abspath(args.path[0][0]) self.arch = None - + self.__hidden_sections = ["Memory Chart", "Roofline"] self.__hidden_columns = ["Tips", "coll_level"] # define different types of bar charts @@ -52,18 +52,13 @@ def __init__(self, args, supported_archs): "instr_mix": [1001, 1002], "multi_bar": [1604, 1704], "sol": [1101, 1201, 1301, 1401, 1601, 1701], - #"l2_cache_per_chan": [1802, 1803] + # "l2_cache_per_chan": [1802, 1803] } # define any elements which will have full width self.__full_width_elements = {1801} - @demarcate - def build_layout( - self, - input_filters, - arch_configs - ): + def build_layout(self, input_filters, arch_configs): """ Build gui layout """ @@ -78,7 +73,7 @@ def build_layout( kernel_top_df = base_data.dfs[1] for kernel_id in base_data.filter_kernel_ids: filt_kernel_names.append(kernel_top_df.loc[kernel_id, "Kernel_Name"]) - + self.app.layout.children = html.Div( children=[ dbc.Spinner( @@ -107,10 +102,12 @@ def generate_from_filter( ): logging.debug("[analysis] gui normalization is %s" % norm_filt) - base_data = self.initalize_runs() # Re-initalizes everything + base_data = self.initalize_runs() # Re-initalizes everything panel_configs = copy.deepcopy(arch_configs.panel_configs) # Generate original raw df - base_data[base_run].raw_pmc = file_io.create_df_pmc(self.dest_dir, self.get_args().verbose) + base_data[base_run].raw_pmc = file_io.create_df_pmc( + self.dest_dir, self.get_args().verbose + ) logging.debug("[analysis] gui dispatch filter is %s" % disp_filt) logging.debug("[analysis] gui kernel filter is %s" % kernel_filter) logging.debug("[analysis] gui gpu filter is %s" % gcd_filter) @@ -145,8 +142,8 @@ def generate_from_filter( panel_configs = temp # All filtering will occur here parser.load_table_data( - workload=base_data[base_run], - dir=self.dest_dir, + workload=base_data[base_run], + dir=self.dest_dir, is_gui=True, debug=self.get_args().debug, verbose=self.get_args().verbose, @@ -166,21 +163,26 @@ def generate_from_filter( # update roofline for visualization in GUI self.get_socs()[self.arch].analysis_setup( roofline_parameters={ - 'path_to_dir': self.dest_dir, - 'device_id': 0, - 'sort_type': 'kernels', - 'mem_level': 'ALL', - 'include_kernel_names': False, - 'is_standalone': False + "path_to_dir": self.dest_dir, + "device_id": 0, + "sort_type": "kernels", + "mem_level": "ALL", + "include_kernel_names": False, + "is_standalone": False, } ) roof_obj = self.get_socs()[self.arch].roofline_obj div_children.append( roof_obj.empirical_roofline( - ret_df=parser.apply_filters(workload=base_data[base_run], dir=self.dest_dir, is_gui=True, debug=self.get_args().debug) + ret_df=parser.apply_filters( + workload=base_data[base_run], + dir=self.dest_dir, + is_gui=True, + debug=self.get_args().debug, + ) ) ) - + # Iterate over each section as defined in panel configs for panel_id, panel in panel_configs.items(): title = str(panel_id // 100) + ". " + panel["title"] @@ -202,7 +204,7 @@ def generate_from_filter( # The sys info table need to add index back if t_type == "raw_csv_table" and "Info" in original_df.keys(): original_df.reset_index(inplace=True) - + content = determine_chart_type( original_df=original_df, table_config=table_config, @@ -210,7 +212,7 @@ def generate_from_filter( barchart_elements=self.__barchart_elements, norm_filt=norm_filt, comparable_columns=comparable_columns, - decimal=self.get_args().decimal + decimal=self.get_args().decimal, ) # Update content for this section @@ -237,7 +239,9 @@ def generate_from_filter( children=title, style={"color": "white"}, ), - html.Div(className="float-container", children=html_section), + html.Div( + className="float-container", children=html_section + ), ], ) ) @@ -254,17 +258,15 @@ def generate_from_filter( ], ) ) - - return div_children + return div_children - #----------------------- + # ----------------------- # Required child methods - #----------------------- + # ----------------------- @demarcate def pre_processing(self): - """Perform any pre-processing steps prior to analysis. - """ + """Perform any pre-processing steps prior to analysis.""" super().pre_processing() if len(self._runs) == 1: args = self.get_args() @@ -282,16 +284,16 @@ def pre_processing(self): # create the loaded kernel stats parser.load_kernel_top(self._runs[self.dest_dir], self.dest_dir) # set architecture - self.arch = self._runs[self.dest_dir].sys_info.iloc[0]['gpu_soc'] - + self.arch = self._runs[self.dest_dir].sys_info.iloc[0]["gpu_soc"] + else: - self.error("Multiple runs not yet supported in GUI. Retry without --gui flag.") + self.error( + "Multiple runs not yet supported in GUI. Retry without --gui flag." + ) - @demarcate def run_analysis(self): - """Run CLI analysis. - """ + """Run CLI analysis.""" super().run_analysis() args = self.get_args() input_filters = { @@ -301,27 +303,29 @@ def run_analysis(self): "normalization": args.normal_unit, "top_n": args.max_stat_num, } - + self.build_layout( input_filters, self._arch_configs[self.arch], ) if args.random_port: - self.app.run_server(debug=False, host="0.0.0.0", port=random.randint(1024, 49151)) + self.app.run_server( + debug=False, host="0.0.0.0", port=random.randint(1024, 49151) + ) else: self.app.run_server(debug=False, host="0.0.0.0", port=args.gui) @demarcate def determine_chart_type( - original_df, - table_config, - hidden_columns, - barchart_elements, - norm_filt, - comparable_columns, - decimal - ): + original_df, + table_config, + hidden_columns, + barchart_elements, + norm_filt, + comparable_columns, + decimal, +): content = [] display_columns = original_df.columns.values.tolist().copy() @@ -333,13 +337,12 @@ def determine_chart_type( # Determine chart type: # a) Barchart - if table_config["id"] in [ - x for i in barchart_elements.values() for x in i - ]: + if table_config["id"] in [x for i in barchart_elements.values() for x in i]: d_figs = build_bar_chart(display_df, table_config, barchart_elements, norm_filt) # Smaller formatting if barchart yeilds several graphs if ( - len(d_figs) > 2 + len(d_figs) + > 2 # and not table_config["id"] # in barchart_elements["l2_cache_per_chan"] ): @@ -348,24 +351,14 @@ def determine_chart_type( temp_obj.append( html.Div( className="float-child", - children=[ - dcc.Graph( - figure=fig, style={"margin": "2%"} - ) - ], + children=[dcc.Graph(figure=fig, style={"margin": "2%"})], ) ) - content.append( - html.Div( - className="float-container", children=temp_obj - ) - ) + content.append(html.Div(className="float-container", children=temp_obj)) # Normal formatting if < 2 graphs else: for fig in d_figs: - content.append( - dcc.Graph(figure=fig, style={"margin": "2%"}) - ) + content.append(dcc.Graph(figure=fig, style={"margin": "2%"})) # B) Tablechart else: d_figs = build_table_chart( @@ -397,4 +390,4 @@ def determine_chart_type( style={"color": "white"}, ), ) - return content \ No newline at end of file + return content diff --git a/src/utils/gui_components/header.py b/src/utils/gui_components/header.py index cd1bef5e1..137a7f290 100644 --- a/src/utils/gui_components/header.py +++ b/src/utils/gui_components/header.py @@ -40,19 +40,14 @@ def list_unique(orig_list, is_numeric): def create_span(input): - return { - "label": html.Span(str(input), title=str(input)), - "value": str(input) - } + return {"label": html.Span(str(input), title=str(input)), "value": str(input)} def get_header(raw_pmc, input_filters, kernel_names): kernel_names = list( map( str, - raw_pmc[ - schema.pmc_perf_file_prefix - ]["Kernel_Name"], + raw_pmc[schema.pmc_perf_file_prefix]["Kernel_Name"], ) ) kernel_names = [x.strip() for x in kernel_names] @@ -286,9 +281,7 @@ def get_header(raw_pmc, input_filters, kernel_names): ), id="kernel-filt", multi=True, - value=input_filters[ - "kernel" - ], + value=input_filters["kernel"], optionHeight=150, placeholder="ALL", style={ diff --git a/tests/test_utils.py b/tests/test_utils.py index 78bdda632..b79dbed0b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -79,7 +79,7 @@ def clean_output_dir(cleanup, output_dir): return -def check_csv_files(output_dir,num_devices, num_kernels) : +def check_csv_files(output_dir, num_devices, num_kernels): """Check profiling output csv files for expected number of entries (based on kernel invocations) Args: From 1739a4bd9143cc6124f496390bab678b9774ea72 Mon Sep 17 00:00:00 2001 From: Karl W Schulz Date: Thu, 22 Feb 2024 15:40:02 -0600 Subject: [PATCH 3/3] more code formatting updates Signed-off-by: Karl W Schulz --- src/roofline.py | 46 ++++++++------- src/utils/gui.py | 94 ++++++++++++++++-------------- src/utils/roofline_calc.py | 90 +++++++++++------------------ src/utils/tty.py | 20 ++++--- src/utils/utils.py | 113 ++++++++++++++++++++++--------------- 5 files changed, 189 insertions(+), 174 deletions(-) diff --git a/src/roofline.py b/src/roofline.py index eebda645b..e0424e861 100644 --- a/src/roofline.py +++ b/src/roofline.py @@ -223,10 +223,12 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): "{} GB/s".format( to_int(self.__ceiling_data[cache_level.lower()][2]) ), - None - if self.__run_parameters["is_standalone"] - else "{} GB/s".format( - to_int(self.__ceiling_data[cache_level.lower()][2]) + ( + None + if self.__run_parameters["is_standalone"] + else "{} GB/s".format( + to_int(self.__ceiling_data[cache_level.lower()][2]) + ) ), ], textposition="top right", @@ -243,9 +245,13 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): mode=plot_mode, hovertemplate="%{text}", text=[ - None - if self.__run_parameters["is_standalone"] - else "{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])), + ( + None + if self.__run_parameters["is_standalone"] + else "{} GFLOP/s".format( + to_int(self.__ceiling_data["valu"][2]) + ) + ), "{} GFLOP/s".format(to_int(self.__ceiling_data["valu"][2])), ], textposition="top left", @@ -265,9 +271,11 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): mode=plot_mode, hovertemplate="%{text}", text=[ - None - if self.__run_parameters["is_standalone"] - else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])), + ( + None + if self.__run_parameters["is_standalone"] + else "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])) + ), "{} GFLOP/s".format(to_int(self.__ceiling_data["mfma"][2])), ], textposition=pos, @@ -285,9 +293,9 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): name="ai_l1", mode="markers", marker={"color": "#00CC96"}, - marker_symbol=SYMBOLS - if self.__run_parameters["include_kernel_names"] - else None, + marker_symbol=( + SYMBOLS if self.__run_parameters["include_kernel_names"] else None + ), ) ) fig.add_trace( @@ -297,9 +305,9 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): name="ai_l2", mode="markers", marker={"color": "#EF553B"}, - marker_symbol=SYMBOLS - if self.__run_parameters["include_kernel_names"] - else None, + marker_symbol=( + SYMBOLS if self.__run_parameters["include_kernel_names"] else None + ), ) ) fig.add_trace( @@ -309,9 +317,9 @@ def generate_plot(self, dtype, fig=None) -> go.Figure(): name="ai_hbm", mode="markers", marker={"color": "#636EFA"}, - marker_symbol=SYMBOLS - if self.__run_parameters["include_kernel_names"] - else None, + marker_symbol=( + SYMBOLS if self.__run_parameters["include_kernel_names"] else None + ), ) ) diff --git a/src/utils/gui.py b/src/utils/gui.py index bc1b3d8e9..78244b6a5 100644 --- a/src/utils/gui.py +++ b/src/utils/gui.py @@ -298,9 +298,11 @@ def build_table_chart( [ { column: { - "value": str(row["Tips"]) - if column == display_columns[0] and row["Tips"] - else "", + "value": ( + str(row["Tips"]) + if column == display_columns[0] and row["Tips"] + else "" + ), "type": "markdown", } for column, value in row.items() @@ -325,52 +327,58 @@ def build_table_chart( # style cell style_cell={"maxWidth": "500px"}, # display style - style_header={ - "backgroundColor": "rgb(30, 30, 30)", - "color": "white", - "fontWeight": "bold", - } - if IS_DARK - else {}, - style_data={ - "backgroundColor": "rgb(50, 50, 50)", - "color": "white", - "whiteSpace": "normal", - "height": "auto", - } - if IS_DARK - else {}, - style_data_conditional=[ - {"if": {"row_index": "odd"}, "backgroundColor": "rgb(60, 60, 60)"}, + style_header=( { - "if": {"column_id": "PoP", "filter_query": "{PoP} > 50"}, - "backgroundColor": "#ffa90a", + "backgroundColor": "rgb(30, 30, 30)", "color": "white", - }, + "fontWeight": "bold", + } + if IS_DARK + else {} + ), + style_data=( { - "if": {"column_id": "PoP", "filter_query": "{PoP} > 80"}, - "backgroundColor": "#ff120a", + "backgroundColor": "rgb(50, 50, 50)", "color": "white", - }, - { - "if": { - "column_id": "Avg", - "filter_query": "{Unit} = Pct && {Avg} > 50", + "whiteSpace": "normal", + "height": "auto", + } + if IS_DARK + else {} + ), + style_data_conditional=( + [ + {"if": {"row_index": "odd"}, "backgroundColor": "rgb(60, 60, 60)"}, + { + "if": {"column_id": "PoP", "filter_query": "{PoP} > 50"}, + "backgroundColor": "#ffa90a", + "color": "white", }, - "backgroundColor": "#ffa90a", - "color": "white", - }, - { - "if": { - "column_id": "Avg", - "filter_query": "{Unit} = Pct && {Avg} > 80", + { + "if": {"column_id": "PoP", "filter_query": "{PoP} > 80"}, + "backgroundColor": "#ff120a", + "color": "white", }, - "backgroundColor": "#ff120a", - "color": "white", - }, - ] - if IS_DARK - else [], + { + "if": { + "column_id": "Avg", + "filter_query": "{Unit} = Pct && {Avg} > 50", + }, + "backgroundColor": "#ffa90a", + "color": "white", + }, + { + "if": { + "column_id": "Avg", + "filter_query": "{Unit} = Pct && {Avg} > 80", + }, + "backgroundColor": "#ff120a", + "color": "white", + }, + ] + if IS_DARK + else [] + ), # the df to display data=display_df.to_dict("records"), ) diff --git a/src/utils/roofline_calc.py b/src/utils/roofline_calc.py index 2c3cf44de..e8367d118 100644 --- a/src/utils/roofline_calc.py +++ b/src/utils/roofline_calc.py @@ -195,21 +195,11 @@ def calc_ai(sort_type, ret_df): df = df.sort_values(by=["Kernel_Name"]) df = df.reset_index(drop=True) - total_flops = ( - valu_flops - ) = ( - mfma_flops_bf16 - ) = ( - mfma_flops_f16 - ) = ( - mfma_iops_i8 - ) = ( + total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = ( mfma_flops_f32 - ) = ( - mfma_flops_f64 - ) = ( - lds_data - ) = L1cache_data = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0 + ) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = calls = ( + totalDuration + ) = avgDuration = 0.0 kernelName = "" @@ -390,23 +380,11 @@ def calc_ai(sort_type, ret_df): kernelName, idx, calls ) ) - total_flops = ( - valu_flops - ) = ( - mfma_flops_bf16 - ) = ( - mfma_flops_f16 - ) = ( - mfma_iops_i8 - ) = ( + total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = ( mfma_flops_f32 - ) = ( - mfma_flops_f64 - ) = ( - lds_data - ) = ( - L1cache_data - ) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0 + ) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = ( + calls + ) = totalDuration = avgDuration = 0.0 if sort_type == "dispatches": myList.append( @@ -428,23 +406,11 @@ def calc_ai(sort_type, ret_df): avgDuration, ) ) - total_flops = ( - valu_flops - ) = ( - mfma_flops_bf16 - ) = ( - mfma_flops_f16 - ) = ( - mfma_iops_i8 - ) = ( + total_flops = valu_flops = mfma_flops_bf16 = mfma_flops_f16 = mfma_iops_i8 = ( mfma_flops_f32 - ) = ( - mfma_flops_f64 - ) = ( - lds_data - ) = ( - L1cache_data - ) = L2cache_data = hbm_data = calls = totalDuration = avgDuration = 0.0 + ) = mfma_flops_f64 = lds_data = L1cache_data = L2cache_data = hbm_data = ( + calls + ) = totalDuration = avgDuration = 0.0 myList.sort(key=lambda x: x.totalDuration, reverse=True) @@ -456,24 +422,32 @@ def calc_ai(sort_type, ret_df): # Create list of top 5 intensities while i < TOP_N and i != len(myList): kernelNames.append(myList[i].KernelName) - intensities["ai_l1"].append( - myList[i].total_flops / myList[i].L1cache_data - ) if myList[i].L1cache_data else intensities["ai_l1"].append(0) + ( + intensities["ai_l1"].append(myList[i].total_flops / myList[i].L1cache_data) + if myList[i].L1cache_data + else intensities["ai_l1"].append(0) + ) # print("cur_ai_L1", myList[i].total_flops/myList[i].L1cache_data) if myList[i].L1cache_data else print("null") # print() - intensities["ai_l2"].append( - myList[i].total_flops / myList[i].L2cache_data - ) if myList[i].L2cache_data else intensities["ai_l2"].append(0) + ( + intensities["ai_l2"].append(myList[i].total_flops / myList[i].L2cache_data) + if myList[i].L2cache_data + else intensities["ai_l2"].append(0) + ) # print("cur_ai_L2", myList[i].total_flops/myList[i].L2cache_data) if myList[i].L2cache_data else print("null") # print() - intensities["ai_hbm"].append( - myList[i].total_flops / myList[i].hbm_data - ) if myList[i].hbm_data else intensities["ai_hbm"].append(0) + ( + intensities["ai_hbm"].append(myList[i].total_flops / myList[i].hbm_data) + if myList[i].hbm_data + else intensities["ai_hbm"].append(0) + ) # print("cur_ai_hbm", myList[i].total_flops/myList[i].hbm_data) if myList[i].hbm_data else print("null") # print() - curr_perf.append(myList[i].total_flops / myList[i].avgDuration) if myList[ - i - ].avgDuration else curr_perf.append(0) + ( + curr_perf.append(myList[i].total_flops / myList[i].avgDuration) + if myList[i].avgDuration + else curr_perf.append(0) + ) # print("cur_perf", myList[i].total_flops/myList[i].avgDuration) if myList[i].avgDuration else print("null") i += 1 diff --git a/src/utils/tty.py b/src/utils/tty.py index 4870ef7e6..5d1f68a70 100644 --- a/src/utils/tty.py +++ b/src/utils/tty.py @@ -170,9 +170,11 @@ def show_all(args, runs, archConfigs, output): else: cur_df_copy = copy.deepcopy(cur_df) cur_df_copy[header] = [ - round(float(x), args.decimal) - if x != "" - else x + ( + round(float(x), args.decimal) + if x != "" + else x + ) for x in base_df[header] ] df = pd.concat([df, cur_df_copy[header]], axis=1) @@ -214,11 +216,13 @@ def show_all(args, runs, archConfigs, output): # fash for now. ss += ( tabulate( - df.transpose() - if type != "raw_csv_table" - and "columnwise" in table_config - and table_config["columnwise"] == True - else df, + ( + df.transpose() + if type != "raw_csv_table" + and "columnwise" in table_config + and table_config["columnwise"] == True + else df + ), headers="keys", tablefmt="fancy_grid", floatfmt="." + str(args.decimal) + "f", diff --git a/src/utils/utils.py b/src/utils/utils.py index 0b7e91bea..8e4bed595 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -39,26 +39,30 @@ rocprof_cmd = "" + def demarcate(function): def wrap_function(*args, **kwargs): logging.trace("----- [entering function] -> %s()" % (function.__qualname__)) result = function(*args, **kwargs) logging.trace("----- [exiting function] -> %s()" % function.__qualname__) return result + return wrap_function + def error(message): logging.error("") logging.error("[ERROR]: " + message) logging.error("") sys.exit(1) + def trace_logger(message, *args, **kwargs): logging.log(logging.TRACE, message, *args, **kwargs) + def get_version(omniperf_home) -> dict: - """Return Omniperf versioning info - """ + """Return Omniperf versioning info""" # symantic version info version = os.path.join(omniperf_home.parent, "VERSION") try: @@ -96,9 +100,9 @@ def get_version(omniperf_home) -> dict: versionData = {"version": VER, "sha": SHA, "mode": MODE} return versionData + def get_version_display(version, sha, mode): - """Pretty print versioning info - """ + """Pretty print versioning info""" buf = io.StringIO() print("-" * 40, file=buf) print("Omniperf version: %s (%s)" % (version, mode), file=buf) @@ -106,30 +110,36 @@ def get_version_display(version, sha, mode): print("-" * 40, file=buf) return buf.getvalue() + def detect_rocprof(): - """Detect loaded rocprof version. Resolve path and set cmd globally. - """ + """Detect loaded rocprof version. Resolve path and set cmd globally.""" global rocprof_cmd # detect rocprof if not "ROCPROF" in os.environ.keys(): rocprof_cmd = "rocprof" else: rocprof_cmd = os.environ["ROCPROF"] - + # resolve rocprof path rocprof_path = shutil.which(rocprof_cmd) if not rocprof_path: rocprof_cmd = "rocprof" - logging.warning("Warning: Unable to resolve path to %s binary. Reverting to default." % rocprof_cmd) + logging.warning( + "Warning: Unable to resolve path to %s binary. Reverting to default." + % rocprof_cmd + ) rocprof_path = shutil.which(rocprof_cmd) if not rocprof_path: - error("Please verify installation or set ROCPROF environment variable with full path.") + error( + "Please verify installation or set ROCPROF environment variable with full path." + ) else: # Resolve any sym links in file path rocprof_path = os.path.realpath(rocprof_path.rstrip("\n")) logging.info("ROC Profiler: " + str(rocprof_path)) - return rocprof_cmd #TODO: Do we still need to return this? It's not being used in the function call + return rocprof_cmd # TODO: Do we still need to return this? It's not being used in the function call + def capture_subprocess_output(subprocess_args, new_env=None): # Start subprocess @@ -180,7 +190,7 @@ def handle_output(stream, mask): return_code = process.wait() selector.close() - success = (return_code == 0) + success = return_code == 0 # Store buffered output output = buf.getvalue() @@ -188,22 +198,26 @@ def handle_output(stream, mask): return (success, output) + def run_prof(fname, profiler_options, target, workload_dir): fbase = os.path.splitext(os.path.basename(fname))[0] m_specs = specs.get_machine_specs(0) - + logging.debug("pmc file: %s" % str(os.path.basename(fname))) # standard rocprof options - default_options = [ - "-i", fname - ] + default_options = ["-i", fname] options = default_options + profiler_options # set required env var for mi300 new_env = None - if (target.lower() == "mi300x_a0" or target.lower() == "mi300x_a1" or target.lower() == "mi300a_a0" or target.lower() == "mi300a_a1") and ( + if ( + target.lower() == "mi300x_a0" + or target.lower() == "mi300x_a1" + or target.lower() == "mi300a_a0" + or target.lower() == "mi300a_a1" + ) and ( os.path.basename(fname) == "pmc_perf_13.txt" or os.path.basename(fname) == "pmc_perf_14.txt" or os.path.basename(fname) == "pmc_perf_15.txt" @@ -215,13 +229,9 @@ def run_prof(fname, profiler_options, target, workload_dir): # profile the app if new_env: - success, output = capture_subprocess_output( - [ rocprof_cmd ] + options, new_env - ) + success, output = capture_subprocess_output([rocprof_cmd] + options, new_env) else: - success, output = capture_subprocess_output( - [ rocprof_cmd ] + options - ) + success, output = capture_subprocess_output([rocprof_cmd] + options) if not success: error(output) @@ -230,9 +240,7 @@ def run_prof(fname, profiler_options, target, workload_dir): # flatten tcc for applicable mi300 input f = path(workload_dir + "/out/pmc_1/results_" + fbase + ".csv") hbm_stack_num = get_hbm_stack_num(target, m_specs.memory_partition) - df = flatten_tcc_info_across_hbm_stacks( - f, hbm_stack_num, int(m_specs.L2Banks) - ) + df = flatten_tcc_info_across_hbm_stacks(f, hbm_stack_num, int(m_specs.L2Banks)) df.to_csv(f, index=False) if os.path.exists(workload_dir + "/out"): @@ -270,10 +278,11 @@ def run_prof(fname, profiler_options, target, workload_dir): df = pd.read_csv(workload_dir + "/" + fbase + ".csv") df.rename(columns=output_headers, inplace=True) df.to_csv(workload_dir + "/" + fbase + ".csv", index=False) - + # write rocprof output to logging logging.info(output) + def replace_timestamps(workload_dir): df_stamps = pd.read_csv(workload_dir + "/timestamps.csv") if "Start_Timestamp" in df_stamps.columns and "End_Timestamp" in df_stamps.columns: @@ -286,9 +295,12 @@ def replace_timestamps(workload_dir): df_pmc_perf["End_Timestamp"] = df_stamps["End_Timestamp"] df_pmc_perf.to_csv(fname, index=False) else: - warning = "WARNING: Incomplete profiling data detected. Unable to update timestamps." + warning = ( + "WARNING: Incomplete profiling data detected. Unable to update timestamps." + ) logging.warning(warning + "\n") + def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof_only): # Record system information mspec = specs.get_machine_specs(0) @@ -367,6 +379,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof sysinfo.write(",".join(param)) sysinfo.close() + def detect_roofline(): mspec = specs.get_machine_specs(0) rocm_ver = mspec.rocm_version[:1] @@ -389,8 +402,9 @@ def detect_roofline(): # Must be a valid RHEL machine distro = "platform:el8" elif ( - (type(sles_distro) == str and len(sles_distro) >= 3) and # confirm string and len - sles_distro[:2] == "15" and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3 + (type(sles_distro) == str and len(sles_distro) >= 3) + and sles_distro[:2] == "15" # confirm string and len + and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3 ): # Must be a valid SLES machine # Use SP3 binary for all forward compatible service pack versions @@ -399,12 +413,15 @@ def detect_roofline(): # Must be a valid Ubuntu machine distro = ubuntu_distro else: - logging.error("ROOFLINE ERROR: Cannot find a valid binary for your operating system") + logging.error( + "ROOFLINE ERROR: Cannot find a valid binary for your operating system" + ) sys.exit(1) target_binary = {"rocm_ver": rocm_ver, "distro": distro} return target_binary + def run_rocscope(args, fname): # profile the app if args.use_rocscope == True: @@ -417,23 +434,21 @@ def run_rocscope(args, fname): args.path, "-n", args.name, - "-t", + "-t", fname, "--", ] for i in args.remaining.split(): rs_cmd.append(i) logging.info(rs_cmd) - success, output = capture_subprocess_output( - rs_cmd - ) + success, output = capture_subprocess_output(rs_cmd) if not success: logging.error(result.stderr.decode("ascii")) sys.exit(1) + def mibench(args): - """Run roofline microbenchmark to generate peak BW and FLOP measurements. - """ + """Run roofline microbenchmark to generate peak BW and FLOP measurements.""" logging.info("[roofline] No roofline data found. Generating...") distro_map = {"platform:el8": "rhel8", "15.3": "sle15sp3", "20.04": "ubuntu20_04"} @@ -454,7 +469,9 @@ def mibench(args): # Distro is valid but cant find rocm ver if not os.path.exists(path_to_binary): - logging.error("ROOFLINE ERROR: Unable to locate expected binary (%s)." % path_to_binary) + logging.error( + "ROOFLINE ERROR: Unable to locate expected binary (%s)." % path_to_binary + ) sys.exit(1) subprocess.run( @@ -465,9 +482,10 @@ def mibench(args): "-d", str(args.device), ], - check=True + check=True, ) + def flatten_tcc_info_across_hbm_stacks(file, stack_num, tcc_channel_per_stack): """ Flatten TCC per channel counters across all HBM stacks in used. @@ -532,6 +550,7 @@ def flatten_tcc_info_across_hbm_stacks(file, stack_num, tcc_channel_per_stack): return df + def get_hbm_stack_num(gpu_name, memory_partition): """ Get total HBM stack numbers based on memory partition for MI300. @@ -564,15 +583,15 @@ def get_hbm_stack_num(gpu_name, memory_partition): else: # Fixme: add proper numbers for other archs return -1 - + + def get_submodules(package_name): - """List all submodules for a target package - """ + """List all submodules for a target package""" import importlib import pkgutil submodules = [] - + # walk all submodules in target package package = importlib.import_module(package_name) for _, name, _ in pkgutil.walk_packages(package.__path__): @@ -583,15 +602,17 @@ def get_submodules(package_name): return submodules + def is_workload_empty(path): - """Peek workload directory to verify valid profiling output - """ + """Peek workload directory to verify valid profiling output""" pmc_perf_path = path + "/pmc_perf.csv" if os.path.isfile(pmc_perf_path): temp_df = pd.read_csv(pmc_perf_path) if temp_df.dropna().empty: - error("[profiling] Error. Found empty cells in %s.\nProfiling data could be corrupt." % pmc_perf_path) + error( + "[profiling] Error. Found empty cells in %s.\nProfiling data could be corrupt." + % pmc_perf_path + ) else: error("[profiling] Error. Cannot find pmc_perf.csv in %s" % path) -