From af64f3cce93a081a8a8568ecee1a6d30126b0d40 Mon Sep 17 00:00:00 2001 From: zjgarvey Date: Mon, 7 Oct 2024 18:43:35 -0500 Subject: [PATCH] add some tools for generating all dumps and scraping for metadata --- common_tools/get_dumps.py | 40 ++++++++++++++++ common_tools/instruction_count.py | 77 +++++++++++++++++++++++++++---- 2 files changed, 107 insertions(+), 10 deletions(-) create mode 100644 common_tools/get_dumps.py diff --git a/common_tools/get_dumps.py b/common_tools/get_dumps.py new file mode 100644 index 0000000..8977895 --- /dev/null +++ b/common_tools/get_dumps.py @@ -0,0 +1,40 @@ +import os +from pathlib import Path +import argparse + + +def compile_dumps(mlir_dir: str): + dumps_dir = Path(args.dir).parent.joinpath("dumps") + i_o_paths = [] + for root, _, files in os.walk(mlir_dir): + for file in files: + if not file.endswith(".mlir") and not file.endswith(".mlirbc"): + continue + f_path = Path(root).joinpath(file) + d_path = str(dumps_dir.joinpath(f_path.stem)) + os.makedirs(d_path, exist_ok=True) + i_o_paths.append((str(f_path), d_path)) + num_jobs = len(i_o_paths) + for job, (f_path, d_path) in enumerate(i_o_paths): + print(f"Compiling {job} of {num_jobs}...", end="\r") + script = f"iree-compile --iree-hal-target-backends=rocm --iree-hip-target=gfx942 {f_path} -o {os.path.join(d_path, 'gemm.vmfb')} --iree-hal-dump-executable-files-to={d_path}" + os.system(script) + print( + f"All jobs completed. Check for dumps in {Path(dumps_dir).absolute()}" + + 20 * " ", + end="\n", + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generates IREE compilation executable dumps for all mlir files in a directory." + ) + parser.add_argument( + "dir", + help="The directory from which to scan for mlir files.", + type=str, + default=None, + ) + args = parser.parse_args() + compile_dumps(args.dir) diff --git a/common_tools/instruction_count.py b/common_tools/instruction_count.py index 99db184..fe857d4 100644 --- a/common_tools/instruction_count.py +++ b/common_tools/instruction_count.py @@ -1,44 +1,101 @@ import os import argparse import csv +from typing import Dict + def count_instr_in_file(file_path): - with open(file_path, 'r', encoding='utf-8', errors='ignore') as file: + with open(file_path, "r", encoding="utf-8", errors="ignore") as file: for idx, line in enumerate(file): if "s_endpgm" in line: - return idx + return idx return -1 + +def get_metadata_dict(file_path, keys) -> Dict: + metadata = dict() + start = None + end = None + with open(file_path, "r", encoding="utf-8", errors="ignore") as file: + for idx, line in enumerate(file): + if ".end_amdgpu_metadata" in line: + end = idx + elif ".amdgpu_metadata" in line: + start = idx + elif start and not end: + f_line = line.lstrip(" -.").rstrip(" \n") + key_end = f_line.find(":") + key = f_line[0:key_end] + if not key in keys: + continue + value = f_line[key_end + 1 :].lstrip(" ") + if key not in metadata.keys(): + metadata[key] = value + else: + if isinstance(metadata[key], list): + metadata[key].append(value) + else: + metadata[key] = [metadata[key], value] + return metadata + + def search_directory(directory): """Search for .rocmasm files and count their lines.""" results = [] for root, _, files in os.walk(directory): for file in files: - if file.endswith('.rocmasm'): + if file.endswith(".rocmasm"): file_path = os.path.join(root, file) line_count = count_instr_in_file(file_path) results.append((file_path, line_count)) return results -def write_results_to_csv(results, output_file): + +def write_results_to_csv(results, output_file, metadata_items): """Write the results to a CSV file.""" # Sort results by line count (second item in tuple) results.sort(key=lambda x: x[1], reverse=True) - with open(output_file, 'w', newline='', encoding='utf-8') as csvfile: + with open(output_file, "w", newline="", encoding="utf-8") as csvfile: csv_writer = csv.writer(csvfile) - csv_writer.writerow(['Filename', 'Instruction Count']) + headers = ["Filename", "Instruction Count"] + for item in metadata_items: + headers.append(item) + csv_writer.writerow(headers) csv_writer.writerows(results) + if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Data collection tool targeting HSA dumps.") - parser.add_argument("dir", help="The directory from which to scan for ISA file dumps (.rocmasm).", type=str, default=None) + default_metadata = ["agpr_count", "vgpr_count", "vgpr_spill_count"] + parser = argparse.ArgumentParser( + description="Data collection tool targeting HSA dumps." + ) + parser.add_argument( + "dir", + help="The directory from which to scan for ISA file dumps (.rocmasm).", + type=str, + default=None, + ) + parser.add_argument( + "--metadata", + nargs="*", + default=default_metadata, + help="Manually specify which metadata items to extract from ISA files.", + ) args = parser.parse_args() - output_file = 'rocmasm_instr_counts.csv' + output_file = "rocmasm_data.csv" results = search_directory(args.dir) + for i, r in enumerate(results): + assert len(r) == 2 + f = r[0] + metadata_dict = get_metadata_dict(f, args.metadata) + r = [f, r[1]] + for d in args.metadata: + r.append(metadata_dict[d]) + results[i] = tuple(r) if results: - write_results_to_csv(results, output_file) + write_results_to_csv(results, output_file, args.metadata) print(f"Results written to {output_file}\n") else: print("No .rocmasm files found.\n")