Skip to content

Commit

Permalink
safeguarding all custom CUDA and C++ routines via the _cuda_extension… (
Browse files Browse the repository at this point in the history
#54)

* safeguarding all custom CUDA and C++ routines via the _cuda_extension_available flag

* bumping up version number
  • Loading branch information
bonevbs authored Oct 1, 2024
1 parent 4fea88b commit 663bea1
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 25 deletions.
1 change: 1 addition & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* Added resampling modules for convenience
* Changing behavior of distributed SHT to use `dim=-3` as channel dimension
* Fixing SHT unittests to test SHT and ISHT individually, rather than the roundtrip
* Changing the way custom CUDA extensions are handled

### v0.7.1

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,16 @@ torch-harmonics has been used to implement a variety of differentiable PDE solve


## Installation
Download directly from PyPI:
A simple installation can be directly done from PyPI:

```bash
pip install torch-harmonics
```
If you would like to enforce the compilation of CUDA extensions for the discrete-continuous convolutions, you can do so by setting the `FORCE_CUDA_EXTENSION` flag. You may also want to set appropriate architectures with the `TORCH_CUDA_ARCH_LIST` flag.
If you are planning to use spherical convolutions, we recommend building the corresponding custom CUDA kernels. To enforce this, you can set the `FORCE_CUDA_EXTENSION` flag. You may also want to set appropriate architectures with the `TORCH_CUDA_ARCH_LIST` flag. Finally, make sure to disable build isolation via the `--no-build-isolation` flag to ensure that the custom kernels are built with the existing torch installation.
```bash
export FORCE_CUDA_EXTENSION=1
export TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX"
pip install torch-harmonics
pip install --no-build-isolation torch-harmonics
```
:warning: Please note that the custom CUDA extensions currently only support CUDA architectures >= 7.0.

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = [ "setuptools", "setuptools-scm", "torch>=2.4.0"]
requires = [ "setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[project]
Expand Down
2 changes: 1 addition & 1 deletion torch_harmonics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

__version__ = "0.7.1"
__version__ = "0.7.2"

from .sht import RealSHT, InverseRealSHT, RealVectorSHT, InverseRealVectorSHT
from .convolution import DiscreteContinuousConvS2, DiscreteContinuousConvTransposeS2
Expand Down
24 changes: 14 additions & 10 deletions torch_harmonics/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,10 @@
from torch_harmonics._disco_convolution import _disco_s2_contraction_torch, _disco_s2_transpose_contraction_torch
from torch_harmonics._disco_convolution import _disco_s2_contraction_cuda, _disco_s2_transpose_contraction_cuda

# import custom C++/CUDA extensions
from disco_helpers import preprocess_psi

# import custom C++/CUDA extensions if available
try:
from disco_helpers import preprocess_psi
import disco_cuda_extension

_cuda_extension_available = True
except ImportError as err:
disco_cuda_extension = None
Expand Down Expand Up @@ -377,10 +375,13 @@ def __init__(
row_idx = idx[1, ...].contiguous()
col_idx = idx[2, ...].contiguous()
vals = vals.contiguous()
roff_idx = preprocess_psi(self.kernel_size, out_shape[0], ker_idx, row_idx, col_idx, vals).contiguous()

# preprocessed data-structure for GPU kernel
self.register_buffer("psi_roff_idx", roff_idx, persistent=False)
if _cuda_extension_available:
# preprocessed data-structure for GPU kernel
roff_idx = preprocess_psi(self.kernel_size, out_shape[0], ker_idx, row_idx, col_idx, vals).contiguous()
self.register_buffer("psi_roff_idx", roff_idx, persistent=False)

# save all datastructures
self.register_buffer("psi_ker_idx", ker_idx, persistent=False)
self.register_buffer("psi_row_idx", row_idx, persistent=False)
self.register_buffer("psi_col_idx", col_idx, persistent=False)
Expand Down Expand Up @@ -468,10 +469,13 @@ def __init__(
row_idx = idx[1, ...].contiguous()
col_idx = idx[2, ...].contiguous()
vals = vals.contiguous()
roff_idx = preprocess_psi(self.kernel_size, in_shape[0], ker_idx, row_idx, col_idx, vals).contiguous()

# preprocessed data-structure for GPU kernel
self.register_buffer("psi_roff_idx", roff_idx, persistent=False)
if _cuda_extension_available:
# preprocessed data-structure for GPU kernel
roff_idx = preprocess_psi(self.kernel_size, in_shape[0], ker_idx, row_idx, col_idx, vals).contiguous()
self.register_buffer("psi_roff_idx", roff_idx, persistent=False)

# save all datastructures
self.register_buffer("psi_ker_idx", ker_idx, persistent=False)
self.register_buffer("psi_row_idx", row_idx, persistent=False)
self.register_buffer("psi_col_idx", col_idx, persistent=False)
Expand Down
22 changes: 12 additions & 10 deletions torch_harmonics/distributed/distributed_convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,10 @@
from torch_harmonics.distributed import polar_group_rank, azimuth_group_rank
from torch_harmonics.distributed import compute_split_shapes, split_tensor_along_dim

# import custom C++/CUDA extensions
from disco_helpers import preprocess_psi

# import custom C++/CUDA extensions if available
try:
from disco_helpers import preprocess_psi
import disco_cuda_extension

_cuda_extension_available = True
except ImportError as err:
disco_cuda_extension = None
Expand Down Expand Up @@ -240,10 +238,12 @@ def __init__(
row_idx = idx[1, ...].contiguous()
col_idx = idx[2, ...].contiguous()
vals = vals.contiguous()
roff_idx = preprocess_psi(self.kernel_size, self.nlat_out_local, ker_idx, row_idx, col_idx, vals).contiguous()

# preprocessed data-structure for GPU kernel
self.register_buffer("psi_roff_idx", roff_idx, persistent=False)
if _cuda_extension_available:
# preprocessed data-structure for GPU kernel
roff_idx = preprocess_psi(self.kernel_size, self.nlat_out_local, ker_idx, row_idx, col_idx, vals).contiguous()
self.register_buffer("psi_roff_idx", roff_idx, persistent=False)

self.register_buffer("psi_ker_idx", ker_idx, persistent=False)
self.register_buffer("psi_row_idx", row_idx, persistent=False)
self.register_buffer("psi_col_idx", col_idx, persistent=False)
Expand Down Expand Up @@ -370,10 +370,12 @@ def __init__(
row_idx = idx[1, ...].contiguous()
col_idx = idx[2, ...].contiguous()
vals = vals.contiguous()
roff_idx = preprocess_psi(self.kernel_size, self.nlat_in_local, ker_idx, row_idx, col_idx, vals).contiguous()

# preprocessed data-structure for GPU kernel
self.register_buffer("psi_roff_idx", roff_idx, persistent=False)
if _cuda_extension_available:
# preprocessed data-structure for GPU kernel
roff_idx = preprocess_psi(self.kernel_size, self.nlat_in_local, ker_idx, row_idx, col_idx, vals).contiguous()
self.register_buffer("psi_roff_idx", roff_idx, persistent=False)

self.register_buffer("psi_ker_idx", ker_idx, persistent=False)
self.register_buffer("psi_row_idx", row_idx, persistent=False)
self.register_buffer("psi_col_idx", col_idx, persistent=False)
Expand Down

0 comments on commit 663bea1

Please sign in to comment.