From af64f3cce93a081a8a8568ecee1a6d30126b0d40 Mon Sep 17 00:00:00 2001
From: zjgarvey <zjgarvey@gmail.com>
Date: Mon, 7 Oct 2024 18:43:35 -0500
Subject: [PATCH] add some tools for generating all dumps and scraping for
 metadata

---
 common_tools/get_dumps.py         | 40 ++++++++++++++++
 common_tools/instruction_count.py | 77 +++++++++++++++++++++++++++----
 2 files changed, 107 insertions(+), 10 deletions(-)
 create mode 100644 common_tools/get_dumps.py

diff --git a/common_tools/get_dumps.py b/common_tools/get_dumps.py
new file mode 100644
index 0000000..8977895
--- /dev/null
+++ b/common_tools/get_dumps.py
@@ -0,0 +1,40 @@
+import os
+from pathlib import Path
+import argparse
+
+
+def compile_dumps(mlir_dir: str):
+    dumps_dir = Path(args.dir).parent.joinpath("dumps")
+    i_o_paths = []
+    for root, _, files in os.walk(mlir_dir):
+        for file in files:
+            if not file.endswith(".mlir") and not file.endswith(".mlirbc"):
+                continue
+            f_path = Path(root).joinpath(file)
+            d_path = str(dumps_dir.joinpath(f_path.stem))
+            os.makedirs(d_path, exist_ok=True)
+            i_o_paths.append((str(f_path), d_path))
+    num_jobs = len(i_o_paths)
+    for job, (f_path, d_path) in enumerate(i_o_paths):
+        print(f"Compiling {job} of {num_jobs}...", end="\r")
+        script = f"iree-compile --iree-hal-target-backends=rocm --iree-hip-target=gfx942 {f_path} -o {os.path.join(d_path, 'gemm.vmfb')} --iree-hal-dump-executable-files-to={d_path}"
+        os.system(script)
+    print(
+        f"All jobs completed. Check for dumps in {Path(dumps_dir).absolute()}"
+        + 20 * " ",
+        end="\n",
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Generates IREE compilation executable dumps for all mlir files in a directory."
+    )
+    parser.add_argument(
+        "dir",
+        help="The directory from which to scan for mlir files.",
+        type=str,
+        default=None,
+    )
+    args = parser.parse_args()
+    compile_dumps(args.dir)
diff --git a/common_tools/instruction_count.py b/common_tools/instruction_count.py
index 99db184..fe857d4 100644
--- a/common_tools/instruction_count.py
+++ b/common_tools/instruction_count.py
@@ -1,44 +1,101 @@
 import os
 import argparse
 import csv
+from typing import Dict
+
 
 def count_instr_in_file(file_path):
-    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
+    with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
         for idx, line in enumerate(file):
             if "s_endpgm" in line:
-               return idx
+                return idx
     return -1
 
+
+def get_metadata_dict(file_path, keys) -> Dict:
+    metadata = dict()
+    start = None
+    end = None
+    with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
+        for idx, line in enumerate(file):
+            if ".end_amdgpu_metadata" in line:
+                end = idx
+            elif ".amdgpu_metadata" in line:
+                start = idx
+            elif start and not end:
+                f_line = line.lstrip(" -.").rstrip(" \n")
+                key_end = f_line.find(":")
+                key = f_line[0:key_end]
+                if not key in keys:
+                    continue
+                value = f_line[key_end + 1 :].lstrip(" ")
+                if key not in metadata.keys():
+                    metadata[key] = value
+                else:
+                    if isinstance(metadata[key], list):
+                        metadata[key].append(value)
+                    else:
+                        metadata[key] = [metadata[key], value]
+    return metadata
+
+
 def search_directory(directory):
     """Search for .rocmasm files and count their lines."""
     results = []
     for root, _, files in os.walk(directory):
         for file in files:
-            if file.endswith('.rocmasm'):
+            if file.endswith(".rocmasm"):
                 file_path = os.path.join(root, file)
                 line_count = count_instr_in_file(file_path)
                 results.append((file_path, line_count))
     return results
 
-def write_results_to_csv(results, output_file):
+
+def write_results_to_csv(results, output_file, metadata_items):
     """Write the results to a CSV file."""
     # Sort results by line count (second item in tuple)
     results.sort(key=lambda x: x[1], reverse=True)
-    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
+    with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
         csv_writer = csv.writer(csvfile)
-        csv_writer.writerow(['Filename', 'Instruction Count'])
+        headers = ["Filename", "Instruction Count"]
+        for item in metadata_items:
+            headers.append(item)
+        csv_writer.writerow(headers)
         csv_writer.writerows(results)
 
+
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Data collection tool targeting HSA dumps.")
-    parser.add_argument("dir", help="The directory from which to scan for ISA file dumps (.rocmasm).", type=str, default=None)
+    default_metadata = ["agpr_count", "vgpr_count", "vgpr_spill_count"]
+    parser = argparse.ArgumentParser(
+        description="Data collection tool targeting HSA dumps."
+    )
+    parser.add_argument(
+        "dir",
+        help="The directory from which to scan for ISA file dumps (.rocmasm).",
+        type=str,
+        default=None,
+    )
+    parser.add_argument(
+        "--metadata",
+        nargs="*",
+        default=default_metadata,
+        help="Manually specify which metadata items to extract from ISA files.",
+    )
     args = parser.parse_args()
-    output_file = 'rocmasm_instr_counts.csv'
+    output_file = "rocmasm_data.csv"
 
     results = search_directory(args.dir)
+    for i, r in enumerate(results):
+        assert len(r) == 2
+        f = r[0]
+        metadata_dict = get_metadata_dict(f, args.metadata)
+        r = [f, r[1]]
+        for d in args.metadata:
+            r.append(metadata_dict[d])
+        results[i] = tuple(r)
 
     if results:
-        write_results_to_csv(results, output_file)
+        write_results_to_csv(results, output_file, args.metadata)
         print(f"Results written to {output_file}\n")
     else:
         print("No .rocmasm files found.\n")