-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c9209aa
commit b5fb8c0
Showing
1 changed file
with
193 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# See file LICENSE for terms. | ||
|
||
import sqlite3 | ||
import pandas as pd | ||
import numpy as np | ||
import subprocess | ||
import argparse | ||
import pathlib | ||
import os | ||
|
||
|
||
class Analyzer: | ||
def __init__(self, args: argparse.Namespace): | ||
self.nsys_report_path = args.nsys_report_path | ||
|
||
self.sql_path = None | ||
if args.sql_path is None: | ||
report_basename_no_ext = pathlib.Path(self.nsys_report_path).stem | ||
self.sql_path = os.getcwd() + os.sep + report_basename_no_ext + ".sqlite" | ||
else: | ||
self.sql_path = args.sql_path | ||
|
||
self.nsys_binary = None | ||
if args.nsys_binary is None: | ||
self.nsys_binary = "nsys" | ||
else: | ||
self.nsys_binary = args.nsys_binary | ||
|
||
def _export_report_to_sqlite(self): | ||
full_cmd_str = ( | ||
f"{self.nsys_binary} export --type=sqlite --lazy=false " | ||
+ f"--force-overwrite=true --output={self.sql_path} {self.nsys_report_path} " | ||
+ "--tables=StringIds,NVTX_EVENTS" | ||
) | ||
full_cmd_list = full_cmd_str.split() | ||
print(f"Command: {full_cmd_str}") | ||
subprocess.run(full_cmd_list) | ||
|
||
def _initialize_bins(self): | ||
"""Create bins ranging from 0 B to 512 PiB""" | ||
|
||
tmp = np.logspace(start=0, stop=59, num=60, base=2, | ||
dtype=np.float64) # 2^0 2^1 ... 2^59 | ||
self.bin_full = np.insert(tmp, 0, 0.0) # 0 2^0 2^1 ... 2^59 | ||
self.bin_full_in_MiB = self.bin_full / 1024.0 / 1024.0 | ||
|
||
def _sql_query(self, filter_string: str) -> pd.DataFrame: | ||
"""Perform SQL query. | ||
The SQLite schema in nsys is not forward compatible, and may change completely in a new release. | ||
Refer to https://docs.nvidia.com/nsight-systems/UserGuide/index.html?highlight=schema#sqlite-schema-reference | ||
:param filter_string: NVTX annotation string serving as a filter for the query. | ||
:type filter_string: str | ||
:return: Pandas dataframe containing the SQL query result. | ||
:rtype: pd.DataFrame | ||
""" | ||
|
||
sql_expr = ( | ||
"WITH io_string AS ( " | ||
+ " SELECT * " | ||
+ " FROM " | ||
+ " StringIds " | ||
+ " WHERE " | ||
+ " value LIKE '%%{}%%' ".format(filter_string) | ||
+ "), " | ||
+ "io_marker AS ( " | ||
+ " SELECT " | ||
+ " start AS startTimeInNs, " | ||
+ " int64Value AS ioSize, " | ||
+ " value AS nvtxAnnotation " | ||
+ " FROM NVTX_EVENTS " | ||
+ " CROSS JOIN io_string " | ||
+ " WHERE textId = io_string.id " | ||
+ " ORDER BY start " | ||
+ ") " | ||
+ "SELECT * " | ||
+ "FROM io_marker;" | ||
) | ||
|
||
df = pd.read_sql(sql_expr, self.db_connection) | ||
if df.empty: | ||
print( | ||
f'Warning: SQL result is empty for filter string "{filter_string}"') | ||
return df | ||
|
||
def _generate_hist(self, df: pd.DataFrame) -> tuple[np.ndarray, np.ndarray]: | ||
my_series = df["ioSize"] | ||
|
||
# Determine the appropriate bins for the histogram | ||
idx_upperbound = -1 | ||
max_v = np.amax(my_series) | ||
for idx in range(len(self.bin_full_in_MiB)): | ||
if self.bin_full_in_MiB[idx] >= max_v: | ||
idx_upperbound = idx | ||
break | ||
|
||
tight_bin_edges = self.bin_full_in_MiB[0: (idx_upperbound + 1)] | ||
if max_v > self.bin_full_in_MiB[-1]: | ||
tight_bin_edges.append(max_v) | ||
return np.histogram(my_series, tight_bin_edges) | ||
|
||
def _get_compact_filesize(self, file_size_inB: np.float64) -> str: | ||
KiB = 1024.0 | ||
MiB = 1024.0 * KiB | ||
GiB = 1024.0 * MiB | ||
TiB = 1024.0 * GiB | ||
PiB = 1024.0 * TiB | ||
EiB = 1024.0 * PiB | ||
|
||
if file_size_inB >= 0 and file_size_inB < KiB: | ||
return f"{int(file_size_inB)} B" | ||
elif file_size_inB >= KiB and file_size_inB < MiB: | ||
return f"{int(file_size_inB / KiB)} KiB" | ||
elif file_size_inB >= MiB and file_size_inB < GiB: | ||
return f"{int(file_size_inB / MiB)} MiB" | ||
elif file_size_inB >= GiB and file_size_inB < TiB: | ||
return f"{int(file_size_inB / GiB)} GiB" | ||
elif file_size_inB >= TiB and file_size_inB < PiB: | ||
return f"{int(file_size_inB / TiB)} TiB" | ||
elif file_size_inB >= PiB and file_size_inB < EiB: | ||
return f"{int(file_size_inB / PiB)} PiB" | ||
else: | ||
raise Exception("Invalid value for file_size.") | ||
|
||
def _print(self, title, hist, bin_edges): | ||
print(f"\n{title}") | ||
print(" Bins ...... Count") | ||
for idx in range(len(hist)): | ||
symbol = ")" | ||
if idx == len(hist) - 1: | ||
symbol = "]" | ||
|
||
print( | ||
" [{:>8}, {:>8}{} ...... {}".format( | ||
self._get_compact_filesize(bin_edges[idx]), | ||
self._get_compact_filesize(bin_edges[idx + 1]), | ||
symbol, | ||
hist[idx], | ||
) | ||
) | ||
|
||
def _process(self, filter_string: str): | ||
df = self._sql_query(filter_string) | ||
if df.empty: | ||
return | ||
|
||
hist, bin_edges = self._generate_hist(df) | ||
self._print(filter_string, hist, bin_edges) | ||
|
||
def run(self): | ||
self._initialize_bins() | ||
|
||
self._export_report_to_sqlite() | ||
self.db_connection = sqlite3.connect(self.sql_path) | ||
|
||
filter_string_list = [ | ||
"FileHandle::pread()", | ||
"FileHandle::pwrite()", | ||
"posix_device_read()", | ||
"posix_device_write()", | ||
"posix_host_read()", | ||
"posix_host_write()", | ||
"cufileRead()", | ||
"cufileWrite()", | ||
"RemoteHandle::read()", | ||
"RemoteHandle::pread()", | ||
] | ||
|
||
for filter_string in filter_string_list: | ||
self._process(filter_string) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
prog="kvikio_stat", description="Generate I/O size histogram from Nsight System report" | ||
) | ||
parser.add_argument("--nsys-report-path", required=True, | ||
help="The path of the Nsight System report.", type=str) | ||
parser.add_argument( | ||
"--sql-path", | ||
help="The path of the SQL database exported from the Nsight System report. " | ||
+ "If unspecified, the current working directory is used to store the SQL database, " | ||
+ "and the file name is derived from the Nsight System report.", | ||
type=str, | ||
) | ||
parser.add_argument( | ||
"--nsys-binary", help='The path of the Nsight System CLI program. If unspecified, "nsys" is used.', type=str | ||
) | ||
args = parser.parse_args() | ||
|
||
az = Analyzer(args) | ||
az.run() |