From 266d173669bdf297b7035dac75fcd12ef03dc4d7 Mon Sep 17 00:00:00 2001 From: HanatoK Date: Tue, 8 Oct 2024 14:25:26 -0500 Subject: [PATCH] Determine dl, dq or ds by compile-time template options --- src/colvar_rotation_derivative.h | 41 +++++++++++++++++--------------- src/colvaratoms.cpp | 2 +- src/colvarcomp_distances.cpp | 4 ++-- src/colvarcomp_rotations.cpp | 16 ++++++------- 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/colvar_rotation_derivative.h b/src/colvar_rotation_derivative.h index ff409d06c..17bbd0b85 100644 --- a/src/colvar_rotation_derivative.h +++ b/src/colvar_rotation_derivative.h @@ -9,7 +9,7 @@ template ::value, bool>::type = true> inline void read_atom_coord( size_t ia, const std::vector& pos, - cvm::real* x, cvm::real* y, cvm::real* z) { + cvm::real* __restrict__ x, cvm::real* __restrict__ y, cvm::real* __restrict__ z) { *x = pos[ia].x; *y = pos[ia].y; *z = pos[ia].z; @@ -18,7 +18,7 @@ inline void read_atom_coord( template ::value, bool>::type = true> inline void read_atom_coord( size_t ia, const std::vector& pos, - cvm::real* x, cvm::real* y, cvm::real* z) { + cvm::real* __restrict__ x, cvm::real* __restrict__ y, cvm::real* __restrict__ z) { *x = pos[ia].pos.x; *y = pos[ia].pos.y; *z = pos[ia].pos.z; @@ -327,12 +327,13 @@ struct rotation_derivative { * @param[out] dq0_out The output of derivative of Q * @param[out] ds_out The output of derivative of overlap matrix S */ + template void calc_derivative_impl( const cvm::rvector (&ds)[4][4], - cvm::rvector* const dl0_out, - cvm::vector1d* const dq0_out, - cvm::matrix2d* const ds_out) const { - if (ds_out != nullptr) { + cvm::rvector* __restrict__ const dl0_out, + cvm::vector1d* __restrict__ const dq0_out, + cvm::matrix2d* __restrict__ const ds_out) const { + if (use_ds) { // this code path is for debug_gradients, so not necessary to unroll the loop *ds_out = cvm::matrix2d(4, 4); for (int i = 0; i < 4; ++i) { @@ -341,7 +342,7 @@ struct rotation_derivative { } } } - if (dl0_out != nullptr) { + if (use_dl) { /* manually loop unrolling of the following loop: dl0_1.reset(); for (size_t i = 0; i < 4; i++) { @@ -367,7 +368,7 @@ struct rotation_derivative { tmp_Q0Q0[3][2] * ds[3][2] + tmp_Q0Q0[3][3] * ds[3][3]; } - if (dq0_out != nullptr) { + if (use_dq) { // we can skip this check if a fixed-size array is used if (dq0_out->size() != 4) dq0_out->resize(4); /* manually loop unrolling of the following loop: @@ -462,11 +463,12 @@ struct rotation_derivative { * @param[out] ds_1_out The output of derivative of overlap matrix S with * respect to ia-th atom of group 1 */ + template void calc_derivative_wrt_group1( - size_t ia, cvm::rvector* const dl0_1_out = nullptr, - cvm::vector1d* const dq0_1_out = nullptr, - cvm::matrix2d* const ds_1_out = nullptr) const { - if (dl0_1_out == nullptr && dq0_1_out == nullptr) return; + size_t ia, cvm::rvector* __restrict__ const dl0_1_out = nullptr, + cvm::vector1d* __restrict__ const dq0_1_out = nullptr, + cvm::matrix2d* __restrict__ const ds_1_out = nullptr) const { + // if (dl0_1_out == nullptr && dq0_1_out == nullptr) return; cvm::real a2x, a2y, a2z; // we can get rid of the helper function read_atom_coord if C++17 (constexpr) is available read_atom_coord(ia, m_pos2, &a2x, &a2y, &a2z); @@ -475,7 +477,7 @@ struct rotation_derivative { {{ 0.0, a2z, -a2y}, { a2x, -a2y, -a2z}, { a2y, a2x, 0.0}, { a2z, 0.0, a2x}}, {{-a2z, 0.0, a2x}, { a2y, a2x, 0.0}, {-a2x, a2y, -a2z}, { 0.0, a2z, a2y}}, {{ a2y, -a2x, 0.0}, { a2z, 0.0, a2x}, { 0.0, a2z, a2y}, {-a2x, -a2y, a2z}}}; - calc_derivative_impl(ds_1, dl0_1_out, dq0_1_out, ds_1_out); + calc_derivative_impl(ds_1, dl0_1_out, dq0_1_out, ds_1_out); } /*! @brief Calculate the derivatives of S, the leading eigenvalue L and * the leading eigenvector Q with respect to `m_pos2` @@ -487,11 +489,12 @@ struct rotation_derivative { * @param[out] ds_2_out The output of derivative of overlap matrix S with * respect to ia-th atom of group 2 */ + template void calc_derivative_wrt_group2( - size_t ia, cvm::rvector* const dl0_2_out = nullptr, - cvm::vector1d* const dq0_2_out = nullptr, - cvm::matrix2d* const ds_2_out = nullptr) const { - if (dl0_2_out == nullptr && dq0_2_out == nullptr) return; + size_t ia, cvm::rvector* __restrict__ const dl0_2_out = nullptr, + cvm::vector1d* __restrict__ const dq0_2_out = nullptr, + cvm::matrix2d* __restrict__ const ds_2_out = nullptr) const { + // if (dl0_2_out == nullptr && dq0_2_out == nullptr) return; cvm::real a1x, a1y, a1z; // we can get rid of the helper function read_atom_coord if C++17 (constexpr) is available read_atom_coord(ia, m_pos1, &a1x, &a1y, &a1z); @@ -500,7 +503,7 @@ struct rotation_derivative { {{ 0.0, -a1z, a1y}, { a1x, -a1y, -a1z}, { a1y, a1x, 0.0}, { a1z, 0.0, a1x}}, {{ a1z, 0.0, -a1x}, { a1y, a1x, 0.0}, {-a1x, a1y, -a1z}, { 0.0, a1z, a1y}}, {{-a1y, a1x, 0.0}, { a1z, 0.0, a1x}, { 0.0, a1z, a1y}, {-a1x, -a1y, a1z}}}; - calc_derivative_impl(ds_2, dl0_2_out, dq0_2_out, ds_2_out); + calc_derivative_impl(ds_2, dl0_2_out, dq0_2_out, ds_2_out); } }; @@ -564,7 +567,7 @@ void debug_gradients( // cvm::real const &a1x = pos1[ia].x; // cvm::real const &a1y = pos1[ia].y; // cvm::real const &a1z = pos1[ia].z; - deriv.calc_derivative_wrt_group2(ia, &dl0_2, &dq0_2, &ds_2); + deriv.template calc_derivative_wrt_group2(ia, &dl0_2, &dq0_2, &ds_2); // make an infitesimal move along each cartesian coordinate of // this atom, and solve again the eigenvector problem for (size_t comp = 0; comp < 3; comp++) { diff --git a/src/colvaratoms.cpp b/src/colvaratoms.cpp index 092bc607f..5e1ca2f44 100644 --- a/src/colvaratoms.cpp +++ b/src/colvaratoms.cpp @@ -1283,7 +1283,7 @@ void cvm::atom_group::calc_fit_forces_impl( fitting_force_grad += atom_grad; } if (B_ag_rotate) { - rot_deriv->calc_derivative_wrt_group1(j, nullptr, &dq0_1); + rot_deriv->calc_derivative_wrt_group1(j, nullptr, &dq0_1); // multiply by {\partial q}/\partial\vec{x}_j and add it to the fit gradients fitting_force_grad += sum_dxdq[0] * dq0_1[0] + sum_dxdq[1] * dq0_1[1] + diff --git a/src/colvarcomp_distances.cpp b/src/colvarcomp_distances.cpp index 1c7267f1e..6de68264c 100644 --- a/src/colvarcomp_distances.cpp +++ b/src/colvarcomp_distances.cpp @@ -1003,7 +1003,7 @@ void colvar::rmsd::calc_Jacobian_derivative() for (size_t ia = 0; ia < atoms->size(); ia++) { // Gradient of optimal quaternion wrt current Cartesian position - atoms->rot_deriv->calc_derivative_wrt_group1(ia, nullptr, &dq); + atoms->rot_deriv->calc_derivative_wrt_group1(ia, nullptr, &dq); g11 = 2.0 * (atoms->rot.q)[1]*dq[1]; g22 = 2.0 * (atoms->rot.q)[2]*dq[2]; @@ -1302,7 +1302,7 @@ void colvar::eigenvector::calc_Jacobian_derivative() // Gradient of optimal quaternion wrt current Cartesian position // trick: d(R^-1)/dx = d(R^t)/dx = (dR/dx)^t // we can just transpose the derivatives of the direct matrix - atoms->rot_deriv->calc_derivative_wrt_group1(ia, nullptr, &dq_1); + atoms->rot_deriv->calc_derivative_wrt_group1(ia, nullptr, &dq_1); g11 = 2.0 * quat0[1]*dq_1[1]; g22 = 2.0 * quat0[2]*dq_1[2]; diff --git a/src/colvarcomp_rotations.cpp b/src/colvarcomp_rotations.cpp index 12dd90e33..766a0870d 100644 --- a/src/colvarcomp_rotations.cpp +++ b/src/colvarcomp_rotations.cpp @@ -139,7 +139,7 @@ void colvar::orientation::apply_force(colvarvalue const &force) cvm::vector1d dq0_2; auto ag_force = atoms->get_group_force_object(); for (size_t ia = 0; ia < atoms->size(); ia++) { - rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); + rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); const auto f_ia = FQ[0] * dq0_2[0] + FQ[1] * dq0_2[1] + FQ[2] * dq0_2[2] + @@ -208,7 +208,7 @@ void colvar::orientation_angle::calc_gradients() rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq); cvm::vector1d dq0_2; for (size_t ia = 0; ia < atoms->size(); ia++) { - rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); + rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); (*atoms)[ia].grad = (dxdq0 * dq0_2[0]); } } @@ -268,7 +268,7 @@ void colvar::orientation_proj::calc_gradients() rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq); cvm::vector1d dq0_2; for (size_t ia = 0; ia < atoms->size(); ia++) { - rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); + rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); (*atoms)[ia].grad = (dxdq0 * dq0_2[0]); } } @@ -317,7 +317,7 @@ void colvar::tilt::calc_gradients() cvm::vector1d dq0_2; for (size_t ia = 0; ia < atoms->size(); ia++) { (*atoms)[ia].grad = cvm::rvector(0.0, 0.0, 0.0); - rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); + rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); for (size_t iq = 0; iq < 4; iq++) { (*atoms)[ia].grad += (dxdq[iq] * dq0_2[iq]); } @@ -354,7 +354,7 @@ void colvar::spin_angle::calc_gradients() cvm::vector1d dq0_2; for (size_t ia = 0; ia < atoms->size(); ia++) { (*atoms)[ia].grad = cvm::rvector(0.0, 0.0, 0.0); - rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); + rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); for (size_t iq = 0; iq < 4; iq++) { (*atoms)[ia].grad += (dxdq[iq] * dq0_2[iq]); } @@ -402,7 +402,7 @@ void colvar::euler_phi::calc_gradients() rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq); cvm::vector1d dq0_2; for (size_t ia = 0; ia < atoms->size(); ia++) { - rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); + rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); (*atoms)[ia].grad = (dxdq0 * dq0_2[0]) + (dxdq1 * dq0_2[1]) + (dxdq2 * dq0_2[2]) + @@ -451,7 +451,7 @@ void colvar::euler_psi::calc_gradients() rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq); cvm::vector1d dq0_2; for (size_t ia = 0; ia < atoms->size(); ia++) { - rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); + rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); (*atoms)[ia].grad = (dxdq0 * dq0_2[0]) + (dxdq1 * dq0_2[1]) + (dxdq2 * dq0_2[2]) + @@ -498,7 +498,7 @@ void colvar::euler_theta::calc_gradients() rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq); cvm::vector1d dq0_2; for (size_t ia = 0; ia < atoms->size(); ia++) { - rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); + rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2); (*atoms)[ia].grad = (dxdq0 * dq0_2[0]) + (dxdq1 * dq0_2[1]) + (dxdq2 * dq0_2[2]) +