From 5b730a8c3be0dd1a2dc43bcdd9bb7506b041f05d Mon Sep 17 00:00:00 2001
From: jeffry1829 <jeffry1829@gmail.com>
Date: Wed, 26 Jul 2023 12:05:17 +0800
Subject: [PATCH] do contiguous automatically before (i)Add/(i)Sub

---
 src/linalg/Add.cpp  | 18 ++++++++++++++----
 src/linalg/Sub.cpp  | 18 ++++++++++++++----
 src/linalg/iSub.cpp | 28 ++++++++++++++++++++++++----
 3 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/src/linalg/Add.cpp b/src/linalg/Add.cpp
index d2889e0c2..ece70aa2d 100644
--- a/src/linalg/Add.cpp
+++ b/src/linalg/Add.cpp
@@ -62,10 +62,20 @@ namespace cytnx {
             Rt._impl->invmapper(), 0);
         } else {
 #ifdef UNI_GPU
-          cytnx_error_msg(true,
-                          "[Add][on GPU/CUDA] error two tensors must be contiguous. Call "
-                          "Contiguous_() or Contiguous() first%s",
-                          "\n");
+          // cytnx_error_msg(true,
+          //                 "[Add][on GPU/CUDA] error two tensors must be contiguous. Call "
+          //                 "Contiguous_() or Contiguous() first%s",
+          //                 "\n");
+          cytnx_warning_msg(
+            true,
+            "[Add][on GPU/CUDA] error two tensors must be contiguous. Call Contiguous_() or "
+            "Contiguous() first. Automatically did it.%s",
+            "\n");
+          Tensor _Tl = Lt.contiguous(), _Tr = Rt.contiguous();
+          checkCudaErrors(cudaSetDevice(Rt.device()));
+          linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()](
+            out._impl->storage()._impl, _Tl._impl->storage()._impl, _Tr._impl->storage()._impl,
+            out._impl->storage()._impl->size(), {}, {}, {}, 0);
 #else
           cytnx_error_msg(true, "[Add] fatal error, the tensor is on GPU without CUDA support.%s",
                           "\n");
diff --git a/src/linalg/Sub.cpp b/src/linalg/Sub.cpp
index bb814a53e..42ee69ff1 100644
--- a/src/linalg/Sub.cpp
+++ b/src/linalg/Sub.cpp
@@ -55,10 +55,20 @@ namespace cytnx {
             Rt._impl->invmapper(), 2);
         } else {
 #ifdef UNI_GPU
-          cytnx_error_msg(true,
-                          "[Sub][on GPU/CUDA] error two tensors must be contiguous. Call "
-                          "Contiguous_() or Contiguous() first%s",
-                          "\n");
+          // cytnx_error_msg(true,
+          //                 "[Sub][on GPU/CUDA] error two tensors must be contiguous. Call "
+          //                 "Contiguous_() or Contiguous() first%s",
+          //                 "\n");
+          cytnx_warning_msg(
+            true,
+            "[Sub][on GPU/CUDA] error two tensors must be contiguous. Call Contiguous_() or "
+            "Contiguous() first. Automatically did it.%s",
+            "\n");
+          Tensor _Tl = Lt.contiguous(), _Tr = Rt.contiguous();
+          checkCudaErrors(cudaSetDevice(Rt.device()));
+          cytnx::linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()](
+            out._impl->storage()._impl, _Tl._impl->storage()._impl, _Tr._impl->storage()._impl,
+            out._impl->storage()._impl->size(), {}, {}, {}, 2);
 #else
           cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s",
                           "\n");
diff --git a/src/linalg/iSub.cpp b/src/linalg/iSub.cpp
index 58af932e8..e807afd7a 100644
--- a/src/linalg/iSub.cpp
+++ b/src/linalg/iSub.cpp
@@ -61,10 +61,30 @@ namespace cytnx {
             Rt._impl->invmapper(), 2);
         } else {
 #ifdef UNI_GPU
-          cytnx_error_msg(true,
-                          "[iSub][on GPU/CUDA] error two tensors must be contiguous. Call "
-                          "Contiguous_() or Contiguous() first%s",
-                          "\n");
+          // cytnx_error_msg(true,
+          //                 "[iSub][on GPU/CUDA] error two tensors must be contiguous. Call "
+          //                 "Contiguous_() or Contiguous() first%s",
+          //                 "\n");
+          cytnx_warning_msg(
+            true,
+            "[iSub][on GPU/CUDA] error two tensors must be contiguous. Call Contiguous_() or "
+            "Contiguous() first. Automatically did it.%s",
+            "\n");
+
+          Lt.contiguous_();
+          R.contiguous_();
+          checkCudaErrors(cudaSetDevice(Rt.device()));
+          Tensor tmpo;
+          if (Lt.dtype() <= Rt.dtype())
+            tmpo = Lt;
+          else
+            tmpo = Lt.clone();
+          linalg_internal::lii.cuAri_ii[Lt.dtype()][Rt.dtype()](
+            tmpo._impl->storage()._impl, Lt._impl->storage()._impl, R._impl->storage()._impl,
+            Lt._impl->storage()._impl->size(), {}, {}, {}, 2);
+          // cytnx_error_msg(true, "[Developing] iAdd for GPU%s", "\n");
+
+          if (Lt.dtype() > Rt.dtype()) Lt = tmpo;
 
 #else
           cytnx_error_msg(true, "[Sub] fatal error, the tensor is on GPU without CUDA support.%s",