Skip to content

Commit

Permalink
optimize memory view using lttb sample (#776)
Browse files Browse the repository at this point in the history
Summary:
Enhancement for Issue #760.

Hey guys, I've optimized the speed of the memory view using the LTTB sampling (Largest-Triangle-Three-Buckets sampling, which is able to downsample time series–like data while retaining the overall shape and variability in the data).

I've tested this using a PyTorch profiler trace of 2G, and the memory view page will not get crashed and the scaling operation is smooth and rather acceptable to me.

Pull Request resolved: #776

Reviewed By: chaekit

Differential Revision: D47850048

Pulled By: aaronenyeshi

fbshipit-source-id: 4d32666f972c7f1b5d18817f69c3266bcb619d92
  • Loading branch information
lh-ycx authored and facebook-github-bot committed Jul 31, 2023
1 parent 465ff4c commit 170d45a
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 1 deletion.
4 changes: 3 additions & 1 deletion tb_plugin/torch_tb_profiler/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .profiler.memory_parser import MemoryMetrics, MemoryRecord, MemorySnapshot
from .profiler.module_op import Stats
from .profiler.node import OperatorNode
from .utils import Canonicalizer, DisplayRounder
from .utils import Canonicalizer, DisplayRounder, lttb_sample

logger = utils.get_logger()

Expand Down Expand Up @@ -294,6 +294,8 @@ def patch_curves_for_step_plot(curves: Dict[str, List]):
default_device = dev
break

curves = lttb_sample(curves)

return {
'metadata': {
'default_device': default_device,
Expand Down
74 changes: 74 additions & 0 deletions tb_plugin/torch_tb_profiler/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import time
from contextlib import contextmanager
from math import pow
import numpy as np

from . import consts

Expand Down Expand Up @@ -120,3 +121,76 @@ def timing(description: str, force: bool = False) -> None:
logger.info(f'{description}: {elapsed_time}')
else:
yield


def _areas_of_triangles(a, bs, c):
"""Calculate areas of triangles from duples of vertex coordinates.
Uses implicit numpy broadcasting along first axis of ``bs``.
Returns
-------
numpy.array
Array of areas of shape (len(bs),)
"""
bs_minus_a = bs - a
a_minus_bs = a - bs
return 0.5 * abs(
(a[0] - c[0]) * (bs_minus_a[:, 1]) - (a_minus_bs[:, 0]) * (c[1] - a[1])
)


def lttb_sample(memory_curves, n_out = 10240):
"""
sample ``memory_curves`` to ``n_out`` points using the LTTB algorithm.
Parameters
----------
memory_curves : dict(str, list(list(time,allocated,reverved)))
A dict, key for device (cpu, gpu0, gpu1, ...),
value is a list of list of (time,allocated,reverved)
n_out : int
Number of data points to downsample to
Returns
-------
sumpled memory_curves with at most n_out points.
"""
sampled_memory_curves = {}
for key in memory_curves:
data = memory_curves[key]
length = len(data)
if n_out >= length:
sampled_memory_curves[key] = memory_curves[key]
continue

# Split data into bins
n_bins = n_out - 2
data = np.array(data)
data_bins = np.array_split(data[1 : length - 1], n_bins)

# Prepare output array
# First and last points are the same as in the input.
out = np.zeros((n_out, 3))
out[0] = data[0]
out[len(out) - 1] = data[length - 1]

# note that we only need to perform LTTB on (time,allocated)
# Largest Triangle Three Buckets (LTTB):
# In each bin, find the point that makes the largest triangle
# with the point saved in the previous bin
# and the centroid of the points in the next bin.
for i in range(len(data_bins)):
this_bin = data_bins[i]
if i < n_bins - 1:
next_bin = data_bins[i + 1]
else:
next_bin = data[len(data) - 1 :]
a = out[i]
bs = this_bin
c = next_bin.mean(axis=0)
areas = _areas_of_triangles(a, bs, c)
out[i + 1] = bs[np.argmax(areas)]

sampled_memory_curves[key] = out.tolist()
return sampled_memory_curves

0 comments on commit 170d45a

Please sign in to comment.