From 5b730a8c3be0dd1a2dc43bcdd9bb7506b041f05d Mon Sep 17 00:00:00 2001 From: jeffry1829 Date: Wed, 26 Jul 2023 12:05:17 +0800 Subject: [PATCH] do contiguous automatically before (i)Add/(i)Sub --- src/linalg/Add.cpp | 18 ++++++++++++++---- src/linalg/Sub.cpp | 18 ++++++++++++++---- src/linalg/iSub.cpp | 28 ++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 12 deletions(-) diff --git a/src/linalg/Add.cpp b/src/linalg/Add.cpp index d2889e0c2..ece70aa2d 100644 --- a/src/linalg/Add.cpp +++ b/src/linalg/Add.cpp @@ -62,10 +62,20 @@ namespace cytnx { Rt._impl->invmapper(), 0); } else { #ifdef UNI_GPU - cytnx_error_msg(true, - "[Add][on GPU/CUDA] error two tensors must be contiguous. Call " - "Contiguous_() or Contiguous() first%s", - "\n"); + // cytnx_error_msg(true, + // "[Add][on GPU/CUDA] error two tensors must be contiguous. Call " + // "Contiguous_() or Contiguous() first%s", + // "\n"); + cytnx_warning_msg( + true, + "[Add][on GPU/CUDA] error two tensors must be contiguous. Call Contiguous_() or " + "Contiguous() first. Automatically did it.%s", + "\n"); + Tensor _Tl = Lt.contiguous(), _Tr = Rt.contiguous(); + checkCudaErrors(cudaSetDevice(Rt.device())); + linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( + out._impl->storage()._impl, _Tl._impl->storage()._impl, _Tr._impl->storage()._impl, + out._impl->storage()._impl->size(), {}, {}, {}, 0); #else cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); diff --git a/src/linalg/Sub.cpp b/src/linalg/Sub.cpp index bb814a53e..42ee69ff1 100644 --- a/src/linalg/Sub.cpp +++ b/src/linalg/Sub.cpp @@ -55,10 +55,20 @@ namespace cytnx { Rt._impl->invmapper(), 2); } else { #ifdef UNI_GPU - cytnx_error_msg(true, - "[Sub][on GPU/CUDA] error two tensors must be contiguous. Call " - "Contiguous_() or Contiguous() first%s", - "\n"); + // cytnx_error_msg(true, + // "[Sub][on GPU/CUDA] error two tensors must be contiguous. Call " + // "Contiguous_() or Contiguous() first%s", + // "\n"); + cytnx_warning_msg( + true, + "[Sub][on GPU/CUDA] error two tensors must be contiguous. Call Contiguous_() or " + "Contiguous() first. Automatically did it.%s", + "\n"); + Tensor _Tl = Lt.contiguous(), _Tr = Rt.contiguous(); + checkCudaErrors(cudaSetDevice(Rt.device())); + cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( + out._impl->storage()._impl, _Tl._impl->storage()._impl, _Tr._impl->storage()._impl, + out._impl->storage()._impl->size(), {}, {}, {}, 2); #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s", "\n"); diff --git a/src/linalg/iSub.cpp b/src/linalg/iSub.cpp index 58af932e8..e807afd7a 100644 --- a/src/linalg/iSub.cpp +++ b/src/linalg/iSub.cpp @@ -61,10 +61,30 @@ namespace cytnx { Rt._impl->invmapper(), 2); } else { #ifdef UNI_GPU - cytnx_error_msg(true, - "[iSub][on GPU/CUDA] error two tensors must be contiguous. Call " - "Contiguous_() or Contiguous() first%s", - "\n"); + // cytnx_error_msg(true, + // "[iSub][on GPU/CUDA] error two tensors must be contiguous. Call " + // "Contiguous_() or Contiguous() first%s", + // "\n"); + cytnx_warning_msg( + true, + "[iSub][on GPU/CUDA] error two tensors must be contiguous. Call Contiguous_() or " + "Contiguous() first. Automatically did it.%s", + "\n"); + + Lt.contiguous_(); + R.contiguous_(); + checkCudaErrors(cudaSetDevice(Rt.device())); + Tensor tmpo; + if (Lt.dtype() <= Rt.dtype()) + tmpo = Lt; + else + tmpo = Lt.clone(); + linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()]( + tmpo._impl->storage()._impl, Lt._impl->storage()._impl, R._impl->storage()._impl, + Lt._impl->storage()._impl->size(), {}, {}, {}, 2); + // cytnx_error_msg(true, "[Developing] iAdd for GPU%s", "\n"); + + if (Lt.dtype() > Rt.dtype()) Lt = tmpo; #else cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s",