Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Features/1563 Add Dynamic Mode Decomposition (DMD) #1639

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2953117
Merge pull request #1538 from helmholtz-analytics/features/1455-Add_d…
mrfh92 Jun 21, 2024
60049bd
Merge branch 'features/1455-Add_decomposition-module_and_PCA-interfac…
Jul 8, 2024
b4198ea
Merge branch 'features/1448-random_arrays_of_arbitrary_size' into fea…
Jul 8, 2024
b5019e9
Merge branch 'main' into features/ESAPCA
Jul 8, 2024
9c99157
first draft of rSVD... still buggy and to be completed
Jul 8, 2024
00cc278
finalized rSVD implementation
Aug 12, 2024
4ece637
Merge branch 'main' into features/1457-Add_randomized_SVD
mrfh92 Aug 12, 2024
6bda77b
redo tests
Aug 12, 2024
9a452c6
added tests for catching wrong inputs in rSVD
Aug 12, 2024
6948010
integrated rSVD in PCA interface
Aug 12, 2024
5158b26
-..
Aug 12, 2024
e5dc9f4
Merge branch 'main' into features/1457-Add_randomized_SVD
mrfh92 Aug 12, 2024
5b8ecdb
coverage for last line
Aug 13, 2024
be0b468
started working on draft for DMD
Aug 22, 2024
35e6743
Merge branch 'main' into features/1563-Add_DMD
Sep 2, 2024
f4b588e
started working on structure of DMD class
Sep 2, 2024
486165c
...
Sep 2, 2024
39d14d6
Merge branch 'features/1457-Add_randomized_SVD' into features/1563-Ad…
Sep 2, 2024
64e278c
added first draft of DMD
Sep 2, 2024
1a4eabe
worked on tests for DMD and also on predict_next
Sep 5, 2024
d72c375
completed tests
Sep 6, 2024
ebc17f8
Merge branch 'main' into features/1563-Add_DMD
Sep 6, 2024
32c7812
Merge branch 'main' into features/1563-Add_DMD
Oct 8, 2024
92118cd
implemented batched prediction for DMD, modified some tests
Oct 9, 2024
f9fef53
added some exception tests
Oct 10, 2024
8b85afe
further work on DMD
Oct 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions ausprobieren.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
for debugging will be removed later
"""

import heat as ht
import torch
import heat.decomposition as htd

r = 6
A_red = ht.array(
[
[0.0, -1.0, 0.0, 0.0, 0.0, 0.0],
[1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 1.5, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.5, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, -1.5, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0, -0.5],
],
split=None,
dtype=ht.float32,
)
x0_red = ht.random.randn(r, 1, split=None)
m, n = 25 * ht.MPI_WORLD.size, 15
X = ht.hstack(
[(ht.array(torch.linalg.matrix_power(A_red.larray, i) @ x0_red.larray)) for i in range(n)]
)
U = ht.random.randn(m, r, split=0)
U, _ = ht.linalg.qr(U)
X = U @ X


dmd = htd.DMD(svd_solver="full", svd_rank=r)
dmd.fit(X)
print(dmd.rom_basis_.shape, dmd.rom_basis_.split)
dmd.rom_basis_.resplit_(None)

# check prediction of next states
Y = dmd.predict_next(X)
print(ht.allclose(Y[:, : n - 1], X[:, 1:], atol=1e-4, rtol=1e-4))

# check batch prediction
Y = dmd.predict(X[:, 0].resplit_(None), n)
print(Y.shape)
print(X - Y)
128 changes: 106 additions & 22 deletions heat/core/linalg/svdtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..dndarray import DNDarray
from .. import factories
from .. import types
from ..linalg import matmul, vector_norm
from ..linalg import matmul, vector_norm, qr, svd
from ..indexing import where
from ..random import randn

Expand All @@ -21,7 +21,21 @@
from math import log, ceil, floor, sqrt


__all__ = ["hsvd_rank", "hsvd_rtol", "hsvd"]
__all__ = ["hsvd_rank", "hsvd_rtol", "hsvd", "rsvd"]


def _check_SVD_input(A):
if not isinstance(A, DNDarray):
raise TypeError(f"Argument needs to be a DNDarray but is {type(A)}.")
if not A.ndim == 2:
raise ValueError("A needs to be a 2D matrix")
if not A.dtype == types.float32 and not A.dtype == types.float64:
raise TypeError(
"Argument needs to be a DNDarray with datatype float32 or float64, but data type is {}.".format(
A.dtype
)
)
return None


#######################################################################################
Expand Down Expand Up @@ -85,16 +99,7 @@ def hsvd_rank(
[1] Iwen, Ong. A distributed and incremental SVD algorithm for agglomerative data analysis on large networks. SIAM J. Matrix Anal. Appl., 37(4), 2016.
[2] Himpe, Leibner, Rave. Hierarchical approximate proper orthogonal decomposition. SIAM J. Sci. Comput., 40 (5), 2018.
"""
if not isinstance(A, DNDarray):
raise TypeError(f"Argument needs to be a DNDarray but is {type(A)}.")
if not A.ndim == 2:
raise ValueError("A needs to be a 2D matrix")
if not A.dtype == types.float32 and not A.dtype == types.float64:
raise TypeError(
"Argument needs to be a DNDarray with datatype float32 or float64, but data type is {}.".format(
A.dtype
)
)
_check_SVD_input(A) # check if A is suitable input
A_local_size = max(A.lshape_map[:, 1])

if maxmergedim is not None and maxmergedim < 2 * (maxrank + safetyshift) + 1:
Expand Down Expand Up @@ -197,16 +202,7 @@ def hsvd_rtol(
[1] Iwen, Ong. A distributed and incremental SVD algorithm for agglomerative data analysis on large networks. SIAM J. Matrix Anal. Appl., 37(4), 2016.
[2] Himpe, Leibner, Rave. Hierarchical approximate proper orthogonal decomposition. SIAM J. Sci. Comput., 40 (5), 2018.
"""
if not isinstance(A, DNDarray):
raise TypeError(f"Argument needs to be a DNDarray but is {type(A)}.")
if not A.ndim == 2:
raise ValueError("A needs to be a 2D matrix")
if not A.dtype == types.float32 and not A.dtype == types.float64:
raise TypeError(
"Argument needs to be a DNDarray with datatype float32 or float64, but data type is {}.".format(
A.dtype
)
)
_check_SVD_input(A) # check if A is suitable input
A_local_size = max(A.lshape_map[:, 1])

if maxmergedim is not None and maxrank is None:
Expand Down Expand Up @@ -529,3 +525,91 @@ def compute_local_truncated_svd(
sigma_loc = torch.zeros(1, dtype=U_loc.dtype, device=U_loc.device)
U_loc = torch.zeros(U_loc.shape[0], 1, dtype=U_loc.dtype, device=U_loc.device)
return U_loc, sigma_loc, err_squared_loc


##############################################################################################
# Randomized SVD
##############################################################################################


def rsvd(
A: DNDarray,
rank: int,
n_oversamples: int = 10,
power_iter: int = 0,
qr_procs_to_merge: int = 2,
) -> Union[Tuple[DNDarray, DNDarray, DNDarray], Tuple[DNDarray, DNDarray]]:
"""
Randomized SVD (rSVD) with prescribed truncation rank `rank`.
If A = U diag(sigma) V^T is the true SVD of A, this routine computes an approximation for U[:,:rank] (and sigma[:rank], V[:,:rank]).

The accuracy of this approximation depends on the structure of A ("low-rank" is best) and appropriate choice of parameters.

Parameters
----------
A : DNDarray
2D-array (float32/64) of which the rSVD has to be computed.
rank : int
truncation rank. (This parameter corresponds to `n_components` in sci-kit learn's TruncatedSVD.)
n_oversamples : int, optional
number of oversamples. The default is 10.
power_iter : int, optional
number of power iterations. The default is 1.
qr_procs_to_merge : int, optional
number of processes to merge at each step of QR decomposition in the power iteration (if power_iter > 0). The default is 2. See the corresponding remarks for `heat.linalg.qr` for more details.

Notes
------
Memory requirements: the SVD computation of a matrix of size (rank + n_oversamples) x (rank + n_oversamples) must fit into the memory of a single process.
The implementation follows Algorithm 4.4 (randomized range finder) and Algorithm 5.1 (direct SVD) in [1].

References
-----------
[1] Halko, N., Martinsson, P. G., & Tropp, J. A. (2011). Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions. SIAM review, 53(2), 217-288.
"""
_check_SVD_input(A) # check if A is suitable input
if not isinstance(rank, int):
raise TypeError(f"rank must be an integer, but is {type(rank)}.")
if rank < 1:
raise ValueError(f"rank must be positive, but is {rank}.")
if not isinstance(n_oversamples, int):
raise TypeError(
f"if provided, n_oversamples must be an integer, but is {type(n_oversamples)}."
)
if n_oversamples < 0:
raise ValueError(f"n_oversamples must be non-negative, but is {n_oversamples}.")
if not isinstance(power_iter, int):
raise TypeError(f"if provided, power_iter must be an integer, but is {type(power_iter)}.")
if power_iter < 0:
raise ValueError(f"power_iter must be non-negative, but is {power_iter}.")

ell = rank + n_oversamples
q = power_iter

# random matrix
splitOmega = 1 if A.split == 0 else 0
Omega = randn(A.shape[1], ell, dtype=A.dtype, device=A.device, split=splitOmega)

# compute the range of A
Y = matmul(A, Omega)
Q, _ = qr(Y, procs_to_merge=qr_procs_to_merge)

# power iterations
for _ in range(q):
Y = matmul(A.T, Q)
Q, _ = qr(Y, procs_to_merge=qr_procs_to_merge)
Y = matmul(A, Q)
Q, _ = qr(Y, procs_to_merge=qr_procs_to_merge)

# compute the SVD of the projected matrix
B = matmul(Q.T, A)
B.resplit_(
None
) # B will be of size ell x ell and thus small enough to fit into memory of a single process
U, sigma, V = svd.svd(B) # actually just torch svd as input is not split anymore
U = matmul(Q, U)[:, :rank]
U.balance_()
S = sigma[:rank]
V = V[:, :rank]
V.balance_()
return U, S, V
55 changes: 55 additions & 0 deletions heat/core/linalg/tests/test_svdtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,58 @@ def test_hsvd_rank_part2(self):
self.assertTrue(U_orth_err <= dtype_tol)
self.assertTrue(V_orth_err <= dtype_tol)
self.assertTrue(true_rel_err <= dtype_tol)


class TestRSVD(TestCase):
def test_rsvd(self):
for dtype in [ht.float32, ht.float64]:
dtype_tol = 1e-4 if dtype == ht.float32 else 1e-10
for split in [0, 1, None]:
X = ht.random.randn(200, 200, dtype=dtype, split=split)
for rank in [ht.MPI_WORLD.size, 10]:
for n_oversamples in [5, 10]:
for power_iter in [0, 1, 2, 3]:
U, S, V = ht.linalg.rsvd(
X, rank, n_oversamples=n_oversamples, power_iter=power_iter
)
self.assertEqual(U.shape, (X.shape[0], rank))
self.assertEqual(S.shape, (rank,))
self.assertEqual(V.shape, (X.shape[1], rank))
self.assertTrue(ht.all(S >= 0))
self.assertTrue(
ht.allclose(
U.T @ U,
ht.eye(rank, dtype=U.dtype, split=U.split),
rtol=dtype_tol,
atol=dtype_tol,
)
)
self.assertTrue(
ht.allclose(
V.T @ V,
ht.eye(rank, dtype=V.dtype, split=V.split),
rtol=dtype_tol,
atol=dtype_tol,
)
)

def test_rsvd_catch_wrong_inputs(self):
X = ht.random.randn(10, 10)
# wrong dtype for rank
with self.assertRaises(TypeError):
ht.linalg.rsvd(X, "a")
# rank zero
with self.assertRaises(ValueError):
ht.linalg.rsvd(X, 0)
# wrong dtype for n_oversamples
with self.assertRaises(TypeError):
ht.linalg.rsvd(X, 10, n_oversamples="a")
# n_oversamples negative
with self.assertRaises(ValueError):
ht.linalg.rsvd(X, 10, n_oversamples=-1)
# wrong dtype for power_iter
with self.assertRaises(TypeError):
ht.linalg.rsvd(X, 10, power_iter="a")
# power_iter negative
with self.assertRaises(ValueError):
ht.linalg.rsvd(X, 10, power_iter=-1)
5 changes: 2 additions & 3 deletions heat/core/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ def __counter_sequence(
c_0 = (__counter & (max_count << 64)) >> 64
c_1 = __counter & max_count
total_elements = torch.prod(torch.tensor(shape))
if total_elements.item() > 2 * max_count:
raise ValueError(f"Shape is to big with {total_elements} elements")
# if total_elements.item() > 2 * max_count:
# raise ValueError(f"Shape is to big with {total_elements} elements")

if split is None:
values = total_elements.item() // 2 + total_elements.item() % 2
Expand Down Expand Up @@ -619,7 +619,6 @@ def randint(
x_0, x_1 = __threefry32(x_0, x_1, seed=__seed)
else: # torch.int64
x_0, x_1 = __threefry64(x_0, x_1, seed=__seed)

# stack the resulting sequence and normalize to given range
values = torch.stack([x_0, x_1], dim=1).flatten()[lslice].reshape(lshape)
# ATTENTION: this is biased and known, bias-free rejection sampling is difficult to do in parallel
Expand Down
2 changes: 1 addition & 1 deletion heat/core/tests/test_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ def test_rand(self):
self.assertTrue(ht.equal(a, b))

# Too big arrays cant be created
with self.assertRaises(ValueError):
with self.assertRaises(RuntimeError):
ht.random.randn(0x7FFFFFFFFFFFFFFF)
with self.assertRaises(ValueError):
ht.random.rand(3, 2, -2, 5, split=1)
Expand Down
1 change: 1 addition & 0 deletions heat/decomposition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
"""

from .pca import *
from .dmd import *
Loading