do contiguous automatically before (i)Add/(i)Sub

Cytnx-dev · Jul 26, 2023 · 5b730a8 · 5b730a8
1 parent c9717a5
commit 5b730a8
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 12 deletions.
diff --git a/src/linalg/Add.cpp b/src/linalg/Add.cpp
@@ -62,10 +62,20 @@ namespace cytnx {
             Rt._impl->invmapper(), 0);
         } else {
 #ifdef UNI_GPU
-          cytnx_error_msg(true,
-                          "[Add][on GPU/CUDA] error two tensors must be contiguous. Call "
-                          "Contiguous_() or Contiguous() first%s",
-                          "\n");
+          // cytnx_error_msg(true,
+          //                 "[Add][on GPU/CUDA] error two tensors must be contiguous. Call "
+          //                 "Contiguous_() or Contiguous() first%s",
+          //                 "\n");
+          cytnx_warning_msg(
+            true,
+            "[Add][on GPU/CUDA] error two tensors must be contiguous. Call Contiguous_() or "
+            "Contiguous() first. Automatically did it.%s",
+            "\n");
+          Tensor _Tl = Lt.contiguous(), _Tr = Rt.contiguous();
+          checkCudaErrors(cudaSetDevice(Rt.device()));
+          linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()](
+            out._impl->storage()._impl, _Tl._impl->storage()._impl, _Tr._impl->storage()._impl,
+            out._impl->storage()._impl->size(), {}, {}, {}, 0);
 #else
           cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s",
                           "\n");

diff --git a/src/linalg/Sub.cpp b/src/linalg/Sub.cpp
@@ -55,10 +55,20 @@ namespace cytnx {
             Rt._impl->invmapper(), 2);
         } else {
 #ifdef UNI_GPU
-          cytnx_error_msg(true,
-                          "[Sub][on GPU/CUDA] error two tensors must be contiguous. Call "
-                          "Contiguous_() or Contiguous() first%s",
-                          "\n");
+          // cytnx_error_msg(true,
+          //                 "[Sub][on GPU/CUDA] error two tensors must be contiguous. Call "
+          //                 "Contiguous_() or Contiguous() first%s",
+          //                 "\n");
+          cytnx_warning_msg(
+            true,
+            "[Sub][on GPU/CUDA] error two tensors must be contiguous. Call Contiguous_() or "
+            "Contiguous() first. Automatically did it.%s",
+            "\n");
+          Tensor _Tl = Lt.contiguous(), _Tr = Rt.contiguous();
+          checkCudaErrors(cudaSetDevice(Rt.device()));
+          cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()](
+            out._impl->storage()._impl, _Tl._impl->storage()._impl, _Tr._impl->storage()._impl,
+            out._impl->storage()._impl->size(), {}, {}, {}, 2);
 #else
           cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s",
                           "\n");

diff --git a/src/linalg/iSub.cpp b/src/linalg/iSub.cpp
@@ -61,10 +61,30 @@ namespace cytnx {
             Rt._impl->invmapper(), 2);
         } else {
 #ifdef UNI_GPU
-          cytnx_error_msg(true,
-                          "[iSub][on GPU/CUDA] error two tensors must be contiguous. Call "
-                          "Contiguous_() or Contiguous() first%s",
-                          "\n");
+          // cytnx_error_msg(true,
+          //                 "[iSub][on GPU/CUDA] error two tensors must be contiguous. Call "
+          //                 "Contiguous_() or Contiguous() first%s",
+          //                 "\n");
+          cytnx_warning_msg(
+            true,
+            "[iSub][on GPU/CUDA] error two tensors must be contiguous. Call Contiguous_() or "
+            "Contiguous() first. Automatically did it.%s",
+            "\n");
+
+          Lt.contiguous_();
+          R.contiguous_();
+          checkCudaErrors(cudaSetDevice(Rt.device()));
+          Tensor tmpo;
+          if (Lt.dtype() <= Rt.dtype())
+            tmpo = Lt;
+          else
+            tmpo = Lt.clone();
+          linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()](
+            tmpo._impl->storage()._impl, Lt._impl->storage()._impl, R._impl->storage()._impl,
+            Lt._impl->storage()._impl->size(), {}, {}, {}, 2);
+          // cytnx_error_msg(true, "[Developing] iAdd for GPU%s", "\n");
+
+          if (Lt.dtype() > Rt.dtype()) Lt = tmpo;
 
 #else
           cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s",