diff --git a/qiskit_aer/backends/wrappers/aer_circuit_binding.hpp b/qiskit_aer/backends/wrappers/aer_circuit_binding.hpp index c67918485e..943f579693 100644 --- a/qiskit_aer/backends/wrappers/aer_circuit_binding.hpp +++ b/qiskit_aer/backends/wrappers/aer_circuit_binding.hpp @@ -143,7 +143,7 @@ void bind_aer_circuit(MODULE m) { << ", num_registers=" << circ.num_registers; ss << ", ops={"; - for (auto i = 0; i < circ.ops.size(); ++i) + for (uint_t i = 0; i < circ.ops.size(); ++i) if (i == 0) ss << circ.ops[i]; else diff --git a/qiskit_aer/backends/wrappers/aer_state_binding.hpp b/qiskit_aer/backends/wrappers/aer_state_binding.hpp index 45dd55bcbf..c3dd880bf4 100644 --- a/qiskit_aer/backends/wrappers/aer_state_binding.hpp +++ b/qiskit_aer/backends/wrappers/aer_state_binding.hpp @@ -130,8 +130,8 @@ void bind_aer_state(MODULE m) { size_t mat_len = (1UL << qubits.size()); auto ptr = values.unchecked<2>(); cmatrix_t mat(mat_len, mat_len); - for (auto i = 0; i < mat_len; ++i) - for (auto j = 0; j < mat_len; ++j) + for (uint_t i = 0; i < mat_len; ++i) + for (uint_t j = 0; j < mat_len; ++j) mat(i, j) = ptr(i, j); state.apply_unitary(qubits, mat); }); @@ -144,10 +144,10 @@ void bind_aer_state(MODULE m) { size_t mat_size = (1UL << control_qubits.size()); auto ptr = values.unchecked<3>(); std::vector mats; - for (auto i = 0; i < mat_size; ++i) { + for (uint_t i = 0; i < mat_size; ++i) { cmatrix_t mat(mat_len, mat_len); - for (auto j = 0; j < mat_len; ++j) - for (auto k = 0; k < mat_len; ++k) + for (uint_t j = 0; j < mat_len; ++j) + for (uint_t k = 0; k < mat_len; ++k) mat(j, k) = ptr(i, j, k); mats.push_back(mat); } diff --git a/src/controllers/aer_controller.hpp b/src/controllers/aer_controller.hpp index 998d663d6c..b714ca4c33 100755 --- a/src/controllers/aer_controller.hpp +++ b/src/controllers/aer_controller.hpp @@ -414,7 +414,7 @@ size_t Controller::get_system_memory_mb() { size_t Controller::get_gpu_memory_mb() { size_t total_physical_memory = 0; #ifdef AER_THRUST_GPU - for (int_t iDev = 0; iDev < target_gpus_.size(); iDev++) { + for (uint_t iDev = 0; iDev < target_gpus_.size(); iDev++) { size_t freeMem, totalMem; cudaSetDevice(target_gpus_[iDev]); cudaMemGetInfo(&freeMem, &totalMem); @@ -515,7 +515,7 @@ Result Controller::execute(std::vector> &circuits, uint_t result_size; reg_t result_offset(circuits.size()); result_size = 0; - for (int_t i = 0; i < circuits.size(); i++) { + for (uint_t i = 0; i < circuits.size(); i++) { result_offset[i] = result_size; result_size += circuits[i]->num_bind_params; } @@ -532,11 +532,11 @@ Result Controller::execute(std::vector> &circuits, // set parallelization for experiments try { uint_t res_pos = 0; - for (int i = 0; i < circuits.size(); i++) { + for (uint_t i = 0; i < circuits.size(); i++) { executors[i] = make_circuit_executor(methods[i]); required_memory_mb_list[i] = executors[i]->required_memory_mb(config, *circuits[i], noise_model); - for (int j = 0; j < circuits[i]->num_bind_params; j++) { + for (uint_t j = 0; j < circuits[i]->num_bind_params; j++) { result.results[res_pos++].metadata.add(required_memory_mb_list[i], "required_memory_mb"); } @@ -588,9 +588,9 @@ Result Controller::execute(std::vector> &circuits, reg_t seeds(result_size); reg_t avg_seeds(result_size); int_t iseed = 0; - for (int_t i = 0; i < circuits.size(); i++) { + for (uint_t i = 0; i < circuits.size(); i++) { if (circuits[i]->num_bind_params > 1) { - for (int_t j = 0; i < circuits[i]->num_bind_params; i++) + for (uint_t j = 0; i < circuits[i]->num_bind_params; i++) seeds[iseed++] = circuits[i]->seed_for_params[j]; } else seeds[iseed++] = circuits[i]->seed; @@ -598,9 +598,9 @@ Result Controller::execute(std::vector> &circuits, MPI_Allreduce(seeds.data(), avg_seeds.data(), result_size, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); iseed = 0; - for (int_t i = 0; i < circuits.size(); i++) { + for (uint_t i = 0; i < circuits.size(); i++) { if (circuits[i]->num_bind_params > 1) { - for (int_t j = 0; i < circuits[i]->num_bind_params; i++) + for (uint_t j = 0; i < circuits[i]->num_bind_params; i++) circuits[i]->seed_for_params[j] = avg_seeds[iseed++] / num_processes_; } else @@ -626,7 +626,7 @@ Result Controller::execute(std::vector> &circuits, bool all_failed = true; result.status = Result::Status::completed; - for (int i = 0; i < result.results.size(); ++i) { + for (uint_t i = 0; i < result.results.size(); ++i) { auto &experiment = result.results[i]; if (experiment.status == ExperimentResult::Status::completed) { all_failed = false; diff --git a/src/controllers/controller_execute.hpp b/src/controllers/controller_execute.hpp index f3128a7739..88d9c460f1 100644 --- a/src/controllers/controller_execute.hpp +++ b/src/controllers/controller_execute.hpp @@ -118,13 +118,13 @@ Result controller_execute(std::vector> &input_circs, param_circ->global_phase_for_params.resize(num_params); for (size_t j = 0; j < num_params; j++) param_circ->global_phase_for_params[j] = params.second[j]; - } else if (instr_pos >= num_instr) { + } else if ((uint_t)instr_pos >= num_instr) { throw std::invalid_argument( R"(Invalid parameterized qobj: instruction position out of range)"); } auto &op = param_circ->ops[instr_pos]; if (!op.has_bind_params) { - if (param_pos >= op.params.size()) { + if ((uint_t)param_pos >= op.params.size()) { throw std::invalid_argument( R"(Invalid parameterized qobj: instruction param position out of range)"); } @@ -160,7 +160,7 @@ Result controller_execute(std::vector> &input_circs, // negative position is for global phase circ->global_phase_angle = params.second[j]; } else { - if (instr_pos >= num_instr) { + if ((uint_t)instr_pos >= num_instr) { std::cout << "Invalid parameterization: instruction position " "out of range: " << instr_pos << std::endl; @@ -168,7 +168,7 @@ Result controller_execute(std::vector> &input_circs, R"(Invalid parameterization: instruction position out of range)"); } auto &op = param_circ->ops[instr_pos]; - if (param_pos >= op.params.size()) { + if ((uint_t)param_pos >= op.params.size()) { throw std::invalid_argument( R"(Invalid parameterization: instruction param position out of range)"); } @@ -215,7 +215,7 @@ Result controller_execute(std::vector> &input_circs, for (auto &circ : circs) { circ->seed = seed + seed_shift; circ->seed_for_params.resize(circ->num_bind_params); - for (int_t i = 0; i < circ->num_bind_params; i++) { + for (uint_t i = 0; i < circ->num_bind_params; i++) { circ->seed_for_params[i] = seed + seed_shift; seed_shift += 2113; } diff --git a/src/controllers/state_controller.hpp b/src/controllers/state_controller.hpp index 62d316b6e8..028806e822 100644 --- a/src/controllers/state_controller.hpp +++ b/src/controllers/state_controller.hpp @@ -630,7 +630,7 @@ void AerState::set_seed(int_t seed) { reg_t AerState::allocate_qubits(uint_t num_qubits) { assert_not_initialized(); reg_t ret; - for (auto i = 0; i < num_qubits; ++i) + for (uint_t i = 0; i < num_qubits; ++i) ret.push_back(num_of_qubits_++); return ret; }; @@ -816,7 +816,7 @@ reg_t AerState::initialize_statevector(uint_t num_of_qubits, complex_t *data, reg_t ret; ret.reserve(num_of_qubits); - for (auto i = 0; i < num_of_qubits; ++i) + for (uint_t i = 0; i < num_of_qubits; ++i) ret.push_back(i); return ret; }; @@ -861,7 +861,7 @@ reg_t AerState::initialize_density_matrix(uint_t num_of_qubits, complex_t *data, reg_t ret; ret.reserve(num_of_qubits); - for (auto i = 0; i < num_of_qubits; ++i) + for (uint_t i = 0; i < num_of_qubits; ++i) ret.push_back(i); return ret; }; @@ -892,7 +892,7 @@ AER::Vector AerState::move_to_vector() { throw std::runtime_error("move_to_vector() supports only statevector or " "matrix_product_state or density_matrix methods"); } - for (auto i = 0; i < num_of_qubits_; ++i) + for (uint_t i = 0; i < num_of_qubits_; ++i) op.qubits.push_back(i); op.string_params.push_back("s"); op.save_type = Operations::DataSubType::single; @@ -907,7 +907,7 @@ AER::Vector AerState::move_to_vector() { .value()["s"] .value()); clear(); - return std::move(vec); + return vec; } else if (method_ == Method::density_matrix) { auto mat = std::move(static_cast, 1>>( @@ -917,7 +917,7 @@ AER::Vector AerState::move_to_vector() { auto vec = Vector::move_from_buffer( mat.GetColumns() * mat.GetRows(), mat.move_to_buffer()); clear(); - return std::move(vec); + return vec; } else { throw std::runtime_error("move_to_vector() supports only statevector or " "matrix_product_state or density_matrix methods"); @@ -941,7 +941,7 @@ matrix AerState::move_to_matrix() { throw std::runtime_error("move_to_matrix() supports only statevector or " "matrix_product_state or density_matrix methods"); } - for (auto i = 0; i < num_of_qubits_; ++i) + for (uint_t i = 0; i < num_of_qubits_; ++i) op.qubits.push_back(i); op.string_params.push_back("s"); op.save_type = Operations::DataSubType::single; @@ -966,7 +966,7 @@ matrix AerState::move_to_matrix() { .value())["s"] .value()); clear(); - return std::move(mat); + return mat; } else { throw std::runtime_error("move_to_matrix() supports only statevector or " "matrix_product_state or density_matrix methods"); diff --git a/src/framework/json.hpp b/src/framework/json.hpp index 172f4eb1ee..f400641c50 100644 --- a/src/framework/json.hpp +++ b/src/framework/json.hpp @@ -263,7 +263,7 @@ void std::from_json(const json_t &js, template void std::to_json(json_t &js, const AER::Vector> &vec) { std::vector> out; - for (int64_t i = 0; i < vec.size(); ++i) { + for (size_t i = 0; i < vec.size(); ++i) { auto &z = vec[i]; out.push_back(std::vector{real(z), imag(z)}); } diff --git a/src/framework/linalg/vector.hpp b/src/framework/linalg/vector.hpp index 21cd0c9a7e..f8ee5bde84 100644 --- a/src/framework/linalg/vector.hpp +++ b/src/framework/linalg/vector.hpp @@ -35,7 +35,8 @@ T *malloc_data(size_t size) { // Data allocated here may need to be properly aligned to be compliant with // AVX2. void *data = nullptr; - posix_memalign(&data, 64, sizeof(T) * size); + if (posix_memalign(&data, 64, sizeof(T) * size) != 0) + throw std::runtime_error("Cannot allocate memory by posix_memalign"); return reinterpret_cast(data); #else return reinterpret_cast(malloc(sizeof(T) * size)); diff --git a/src/framework/operations.hpp b/src/framework/operations.hpp index 317edfcead..dc4cbb5b06 100644 --- a/src/framework/operations.hpp +++ b/src/framework/operations.hpp @@ -52,11 +52,13 @@ enum class BinaryOp { GreaterEqual }; +bool isBoolBinaryOp(const BinaryOp binary_op); bool isBoolBinaryOp(const BinaryOp binary_op) { return binary_op != BinaryOp::BitAnd && binary_op != BinaryOp::BitOr && binary_op != BinaryOp::BitXor; } +uint_t truncate(const uint_t val, const size_t width); uint_t truncate(const uint_t val, const size_t width) { size_t shift = 64 - width; return (val << shift) >> shift; @@ -68,8 +70,8 @@ enum class ValueType { Bool, Uint }; class ScalarType { public: - ScalarType(const ValueType type_, const size_t width_) - : type(type_), width(width_) {} + ScalarType(const ValueType _type, const size_t width_) + : type(_type), width(width_) {} public: const ValueType type; @@ -97,8 +99,8 @@ class Bool : public ScalarType { class CExpr { public: - CExpr(const CExprType expr_type_, const std::shared_ptr type_) - : expr_type(expr_type_), type(type_) {} + CExpr(const CExprType _expr_type, const std::shared_ptr _type) + : expr_type(_expr_type), type(_type) {} virtual bool eval_bool(const std::string &memory) { return false; }; virtual uint_t eval_uint(const std::string &memory) { return 0ul; }; @@ -109,9 +111,9 @@ class CExpr { class CastExpr : public CExpr { public: - CastExpr(std::shared_ptr type, + CastExpr(std::shared_ptr _type, const std::shared_ptr operand_) - : CExpr(CExprType::Cast, type), operand(operand_) {} + : CExpr(CExprType::Cast, _type), operand(operand_) {} virtual bool eval_bool(const std::string &memory) { if (type->type != ValueType::Bool) @@ -143,9 +145,9 @@ class CastExpr : public CExpr { class VarExpr : public CExpr { public: - VarExpr(std::shared_ptr type, - const std::vector &cbit_idxs) - : CExpr(CExprType::Var, type), cbit_idxs(cbit_idxs) {} + VarExpr(std::shared_ptr _type, + const std::vector &_cbit_idxs) + : CExpr(CExprType::Var, _type), cbit_idxs(_cbit_idxs) {} virtual bool eval_bool(const std::string &memory) { if (type->type != ValueType::Bool) @@ -164,7 +166,6 @@ class VarExpr : public CExpr { private: uint_t eval_uint_(const std::string &memory) { uint_t val = 0ul; - const uint_t memory_size = memory.size(); uint_t shift = 0; for (const uint_t cbit_idx : cbit_idxs) { if (memory.size() <= cbit_idx) @@ -182,7 +183,8 @@ class VarExpr : public CExpr { class ValueExpr : public CExpr { public: - ValueExpr(std::shared_ptr type) : CExpr(CExprType::Value, type) {} + ValueExpr(std::shared_ptr _type) + : CExpr(CExprType::Value, _type) {} }; class UintValue : public ValueExpr { @@ -943,6 +945,11 @@ inline Op make_bfunc(const std::string &mask, const std::string &val, return op; } +Op make_gate(const std::string &name, const reg_t &qubits, + const std::vector ¶ms, + const std::vector &string_params, + const int_t conditional, const std::shared_ptr expr, + const std::string &label); Op make_gate(const std::string &name, const reg_t &qubits, const std::vector ¶ms, const std::vector &string_params, @@ -1313,12 +1320,12 @@ inline Op bind_parameter(const Op &src, const uint_t iparam, if (src.params.size() > 0) { uint_t stride = src.params.size() / num_params; op.params.resize(stride); - for (int_t i = 0; i < stride; i++) + for (uint_t i = 0; i < stride; i++) op.params[i] = src.params[iparam * stride + i]; } else if (src.mats.size() > 0) { uint_t stride = src.mats.size() / num_params; op.mats.resize(stride); - for (int_t i = 0; i < stride; i++) + for (uint_t i = 0; i < stride; i++) op.mats[i] = src.mats[iparam * stride + i]; } return op; @@ -1528,6 +1535,7 @@ json_t op_to_json(const Op &op) { return ret; } +void to_json(json_t &js, const OpType &type); void to_json(json_t &js, const OpType &type) { std::stringstream ss; ss << type; diff --git a/src/framework/pybind_json.hpp b/src/framework/pybind_json.hpp index 7ac889c3c2..108e1b34ec 100644 --- a/src/framework/pybind_json.hpp +++ b/src/framework/pybind_json.hpp @@ -32,6 +32,8 @@ #include "misc/warnings.hpp" DISABLE_WARNING_PUSH +#pragma GCC diagnostic ignored "-Wfloat-equal" + #include #include #include @@ -40,6 +42,7 @@ DISABLE_WARNING_PUSH #include DISABLE_WARNING_POP +#pragma GCC diagnostic warning "-Wfloat-equal" #include "framework/json.hpp" @@ -293,7 +296,7 @@ void std::from_json(const json_t &js, py::object &o) { o = py::str(js.get()); } else if (js.is_array()) { std::vector obj(js.size()); - for (auto i = 0; i < js.size(); i++) { + for (size_t i = 0; i < js.size(); i++) { py::object tmp; from_json(js[i], tmp); obj[i] = tmp; diff --git a/src/framework/qobj.hpp b/src/framework/qobj.hpp index 01084fd20e..2a25f8cfe2 100644 --- a/src/framework/qobj.hpp +++ b/src/framework/qobj.hpp @@ -155,12 +155,12 @@ Qobj::Qobj(const inputdata_t &input) { // negative position is for global phase param_circuit->global_phase_angle = params.second[j]; } else { - if (instr_pos >= num_instr) { + if ((uint_t)instr_pos >= num_instr) { throw std::invalid_argument( R"(Invalid parameterized qobj: instruction position out of range)"); } auto &op = param_circuit->ops[instr_pos]; - if (param_pos >= op.params.size()) { + if ((uint_t)param_pos >= op.params.size()) { throw std::invalid_argument( R"(Invalid parameterized qobj: instruction param position out of range)"); } diff --git a/src/framework/utils.hpp b/src/framework/utils.hpp index 11a9fb28d0..8e750e336d 100644 --- a/src/framework/utils.hpp +++ b/src/framework/utils.hpp @@ -1270,7 +1270,7 @@ uint_t (*popcount)(uint_t) = is_avx2_supported() ? &_instrinsic_weight bool (*hamming_parity)(uint_t) = &_naive_parity; uint_t (*popcount)(uint_t) = &_naive_weight; #endif - +size_t get_system_memory_mb(); size_t get_system_memory_mb() { size_t total_physical_memory = 0; #if defined(__linux__) diff --git a/src/noise/noise_model.hpp b/src/noise/noise_model.hpp index 834916b75b..23dbccc9bd 100644 --- a/src/noise/noise_model.hpp +++ b/src/noise/noise_model.hpp @@ -386,7 +386,7 @@ void NoiseModel::enable_superop_method(int num_threads) { exs.resize(std::max(num_threads, 1)); #pragma omp parallel for if (num_threads > 1 && quantum_errors_.size() > 10) \ num_threads(num_threads) - for (int i = 0; i < quantum_errors_.size(); i++) { + for (int i = 0; i < (int_t)quantum_errors_.size(); i++) { try { quantum_errors_[i].compute_superoperator(); } catch (...) { @@ -406,7 +406,7 @@ void NoiseModel::enable_kraus_method(int num_threads) { exs.resize(std::max(num_threads, 1)); #pragma omp parallel for if (num_threads > 1 && quantum_errors_.size() > 10) \ num_threads(num_threads) - for (int i = 0; i < quantum_errors_.size(); i++) { + for (int i = 0; i < (int_t)quantum_errors_.size(); i++) { try { quantum_errors_[i].compute_kraus(); } catch (...) { @@ -851,6 +851,8 @@ cmatrix_t NoiseModel::op2superop(const Operations::Op &op) const { case ParamGate::cu: return Linalg::SMatrix::cu(op.params[0], op.params[1], op.params[2], op.params[3]); + default: + break; } } else { // Check if we can convert this gate to a standard superoperator matrix @@ -897,6 +899,8 @@ cmatrix_t NoiseModel::op2unitary(const Operations::Op &op) const { return Linalg::Matrix::rzx(op.params[0]); case ParamGate::cp: return Linalg::Matrix::cphase(op.params[0]); + default: + break; } } else { // Check if we can convert this gate to a standard superoperator matrix diff --git a/src/simulators/batch_shots_executor.hpp b/src/simulators/batch_shots_executor.hpp index 8f218d049b..612e5ed289 100644 --- a/src/simulators/batch_shots_executor.hpp +++ b/src/simulators/batch_shots_executor.hpp @@ -133,7 +133,7 @@ void BatchShotsExecutor::set_parallelization( enable_batch_multi_shots_ = false; if (batched_shots_gpu_ && Base::sim_device_ != Device::CPU) { enable_batch_multi_shots_ = true; - if (circ.num_qubits > batched_shots_gpu_max_qubits_) + if (circ.num_qubits > (uint_t)batched_shots_gpu_max_qubits_) enable_batch_multi_shots_ = false; else if (circ.shots == 1 && circ.num_bind_params == 1) enable_batch_multi_shots_ = false; @@ -156,8 +156,7 @@ void BatchShotsExecutor::run_circuit_with_sampling( } Noise::NoiseModel dummy_noise; state_t dummy_state; - int_t i; - int_t i_begin, n_shots; + uint_t i_begin, n_shots; Base::num_qubits_ = circ.num_qubits; Base::num_creg_memory_ = circ.num_memory; @@ -195,7 +194,7 @@ void BatchShotsExecutor::run_circuit_with_sampling( fusion_result); auto time_taken = std::chrono::duration(myclock_t::now() - timer_start).count(); - for (i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.metadata.copy(fusion_result.metadata); // Add batched multi-shots optimizaiton metadata @@ -222,13 +221,13 @@ void BatchShotsExecutor::run_circuit_with_sampling( while (i_begin < Base::num_local_states_) { // loop for states can be stored in available memory n_shots = Base::num_local_states_ - i_begin; - n_shots = std::min(n_shots, (int_t)Base::num_max_shots_); + n_shots = std::min(n_shots, Base::num_max_shots_); // allocate shots this->allocate_states(n_shots, config); // Set state config - for (i = 0; i < n_shots; i++) { + for (uint_t i = 0; i < n_shots; i++) { Base::states_[i].set_parallelization(Base::parallel_state_update_); } @@ -256,7 +255,7 @@ void BatchShotsExecutor::run_circuit_with_sampling( auto apply_ops_lambda = [this, circ, init_rng, first_meas, final_ops, dummy_noise, &result_it](int_t i) { std::vector rng(Base::num_states_in_group_[i]); - for (int_t j = 0; j < Base::num_states_in_group_[i]; j++) { + for (uint_t j = 0; j < Base::num_states_in_group_[i]; j++) { uint_t iparam = Base::global_state_index_ + Base::top_state_of_group_[i] + j; if (iparam == 0) @@ -284,8 +283,8 @@ void BatchShotsExecutor::run_circuit_with_sampling( if (Base::num_process_per_experiment_ > 1) { Base::gather_creg_memory(Base::cregs_, Base::state_index_begin_); - for (i = 0; i < circ.num_bind_params; i++) { - for (int_t j = 0; j < circ.shots; j++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t j = 0; j < circ.shots; j++) { (result_it + i) ->save_count_data(Base::cregs_[i * circ.shots + j], Base::save_creg_memory_); @@ -304,7 +303,7 @@ void BatchShotsExecutor::run_circuit_with_sampling( } if (nDev > Base::num_groups_) nDev = Base::num_groups_; - for (i = 0; i < circ.num_bind_params; i++) + for (uint_t i = 0; i < circ.num_bind_params; i++) (result_it + i) ->metadata.add(nDev, "batched_shots_optimization_parallel_gpus"); } @@ -362,10 +361,9 @@ void BatchShotsExecutor::run_circuit_shots( Base::max_matrix_qubits_ = Base::get_max_matrix_qubits(circ_opt); - int_t i; - int_t i_begin, n_shots; + uint_t i_begin, n_shots; - for (i = 0; i < Base::num_bind_params_; i++) { + for (uint_t i = 0; i < Base::num_bind_params_; i++) { ExperimentResult &result = *(result_it + i); result.metadata.copy(fusion_result.metadata); // Add batched multi-shots optimizaiton metadata @@ -382,13 +380,13 @@ void BatchShotsExecutor::run_circuit_shots( while (i_begin < Base::num_local_states_) { // loop for states can be stored in available memory n_shots = Base::num_local_states_ - i_begin; - n_shots = std::min(n_shots, (int_t)Base::num_max_shots_); + n_shots = std::min(n_shots, Base::num_max_shots_); // allocate shots this->allocate_states(n_shots, config); // Set state config - for (i = 0; i < n_shots; i++) { + for (uint_t i = 0; i < n_shots; i++) { Base::states_[i].set_parallelization(Base::parallel_state_update_); } @@ -418,7 +416,7 @@ void BatchShotsExecutor::run_circuit_shots( noise](int_t i) { par_results[i].resize(circ.num_bind_params); std::vector rng(Base::num_states_in_group_[i]); - for (int_t j = 0; j < Base::num_states_in_group_[i]; j++) { + for (uint_t j = 0; j < Base::num_states_in_group_[i]; j++) { uint_t ishot = Base::global_state_index_ + Base::top_state_of_group_[i] + j; uint_t iparam = ishot / Base::num_shots_per_bind_param_; @@ -441,13 +439,13 @@ void BatchShotsExecutor::run_circuit_shots( Base::num_groups_, apply_ops_lambda, Base::num_groups_); for (auto &res : par_results) { - for (i = 0; i < Base::num_bind_params_; i++) { + for (uint_t i = 0; i < Base::num_bind_params_; i++) { (result_it + i)->combine(std::move(res[i])); } } // collect measured bits and copy memory - for (i = 0; i < n_shots; i++) { + for (uint_t i = 0; i < n_shots; i++) { if (Base::num_process_per_experiment_ > 1) { Base::states_[i].qreg().read_measured_data( Base::cregs_[Base::global_state_index_ + i_begin + i]); @@ -469,7 +467,7 @@ void BatchShotsExecutor::run_circuit_shots( if (Base::num_process_per_experiment_ > 1) { Base::gather_creg_memory(Base::cregs_, Base::state_index_begin_); - for (i = 0; i < circ_opt.shots; i++) { + for (uint_t i = 0; i < circ_opt.shots; i++) { uint_t iparam = i / Base::num_shots_per_bind_param_; (result_it + iparam) ->save_count_data(Base::cregs_[i], Base::save_creg_memory_); @@ -487,7 +485,7 @@ void BatchShotsExecutor::run_circuit_shots( } if (nDev > Base::num_groups_) nDev = Base::num_groups_; - for (i = 0; i < Base::num_bind_params_; i++) + for (uint_t i = 0; i < Base::num_bind_params_; i++) (result_it + i) ->metadata.add(nDev, "batched_shots_optimization_parallel_gpus"); } @@ -516,7 +514,7 @@ void BatchShotsExecutor::apply_ops_batched_shots_for_group( Base::states_[j].qreg().read_measured_data(Base::states_[j].creg()); std::vector nops = noise.sample_noise_loc( *op, rng[j - Base::top_state_of_group_[i_group]]); - for (int_t k = 0; k < nops.size(); k++) { + for (uint_t k = 0; k < nops.size(); k++) { Base::states_[j].apply_op( nops[k], *result_it, rng[j - Base::top_state_of_group_[i_group]], false); @@ -534,13 +532,13 @@ void BatchShotsExecutor::apply_ops_batched_shots_for_group( uint_t non_pauli_gate_count = 0; if (num_inner_threads > 1) { #pragma omp parallel for reduction(+: count_ops,non_pauli_gate_count) num_threads(num_inner_threads) - for (int_t j = 0; j < count; j++) { + for (int_t j = 0; j < (int_t)count; j++) { noise_ops[j] = noise.sample_noise_loc(*op, rng[j]); if (!(noise_ops[j].size() == 0 || (noise_ops[j].size() == 1 && noise_ops[j][0].name == "id"))) { count_ops++; - for (int_t k = 0; k < noise_ops[j].size(); k++) { + for (uint_t k = 0; k < noise_ops[j].size(); k++) { if (noise_ops[j][k].name != "id" && noise_ops[j][k].name != "x" && noise_ops[j][k].name != "y" && noise_ops[j][k].name != "z" && noise_ops[j][k].name != "pauli") { @@ -551,13 +549,13 @@ void BatchShotsExecutor::apply_ops_batched_shots_for_group( } } } else { - for (int_t j = 0; j < count; j++) { + for (uint_t j = 0; j < count; j++) { noise_ops[j] = noise.sample_noise_loc(*op, rng[j]); if (!(noise_ops[j].size() == 0 || (noise_ops[j].size() == 1 && noise_ops[j][0].name == "id"))) { count_ops++; - for (int_t k = 0; k < noise_ops[j].size(); k++) { + for (uint_t k = 0; k < noise_ops[j].size(); k++) { if (noise_ops[j][k].name != "id" && noise_ops[j][k].name != "x" && noise_ops[j][k].name != "y" && noise_ops[j][k].name != "z" && noise_ops[j][k].name != "pauli") { @@ -584,7 +582,7 @@ void BatchShotsExecutor::apply_ops_batched_shots_for_group( continue; } // call apply_op for each state - for (int_t j = 0; j < Base::num_states_in_group_[i_group]; j++) { + for (uint_t j = 0; j < Base::num_states_in_group_[i_group]; j++) { uint_t is = Base::top_state_of_group_[i_group] + j; uint_t ip = (Base::global_state_index_ + is) / Base::num_shots_per_bind_param_; @@ -602,13 +600,13 @@ template void BatchShotsExecutor::apply_batched_noise_ops( const int_t i_group, const std::vector> &ops, ResultItr result_it, std::vector &rng) { - int_t i, j, k, count, nop, pos = 0; + uint_t count; uint_t istate = Base::top_state_of_group_[i_group]; count = ops.size(); reg_t mask(count); std::vector finished(count, false); - for (i = 0; i < count; i++) { + for (uint_t i = 0; i < count; i++) { int_t cond_reg = -1; if (finished[i]) @@ -620,7 +618,7 @@ void BatchShotsExecutor::apply_batched_noise_ops( mask[i] = 1; // find same ops to be exectuted in a batch - for (j = i + 1; j < count; j++) { + for (uint_t j = i + 1; j < count; j++) { if (finished[j]) { mask[j] = 0; continue; @@ -638,7 +636,7 @@ void BatchShotsExecutor::apply_batched_noise_ops( } mask[j] = true; - for (k = 0; k < ops[i].size(); k++) { + for (uint_t k = 0; k < ops[i].size(); k++) { if (ops[i][k].conditional) { cond_reg = ops[i][k].conditional_reg; } @@ -657,7 +655,7 @@ void BatchShotsExecutor::apply_batched_noise_ops( cond_reg, mask); // batched execution on same ops - for (k = 0; k < ops[i].size(); k++) { + for (uint_t k = 0; k < ops[i].size(); k++) { Operations::Op cop = ops[i][k]; // mark op conditional to mask shots @@ -666,7 +664,7 @@ void BatchShotsExecutor::apply_batched_noise_ops( if (!apply_batched_op(istate, cop, result_it, rng, false)) { // call apply_op for each state - for (int_t j = 0; j < Base::num_states_in_group_[i_group]; j++) { + for (uint_t j = 0; j < Base::num_states_in_group_[i_group]; j++) { uint_t is = Base::top_state_of_group_[i_group] + j; uint_t ip = (Base::global_state_index_ + is) / Base::num_shots_per_bind_param_; @@ -688,7 +686,7 @@ void BatchShotsExecutor::apply_batched_expval(const int_t istate, ResultItr result) { std::vector val; bool variance = (op.type == Operations::OpType::save_expval_var); - for (int_t i = 0; i < op.expval_params.size(); i++) { + for (uint_t i = 0; i < op.expval_params.size(); i++) { std::complex cprm; if (variance) @@ -706,7 +704,7 @@ void BatchShotsExecutor::apply_batched_expval(const int_t istate, return; if (variance) { - for (int_t i = 0; i < val.size() / 2; i++) { + for (uint_t i = 0; i < val.size() / 2; i++) { uint_t ip = (Base::global_state_index_ + istate + i) / Base::num_shots_per_bind_param_; @@ -719,7 +717,7 @@ void BatchShotsExecutor::apply_batched_expval(const int_t istate, op.save_type); } } else { - for (int_t i = 0; i < val.size(); i++) { + for (uint_t i = 0; i < val.size(); i++) { uint_t ip = (Base::global_state_index_ + istate + i) / Base::num_shots_per_bind_param_; @@ -737,7 +735,7 @@ void BatchShotsExecutor::batched_measure_sampler( InputIterator first_meas, InputIterator last_meas, uint_t shots, uint_t i_group, ResultItr result, std::vector &rng) { uint_t par_states = 1; - if (Base::max_parallel_threads_ >= Base::num_groups_ * 2) { + if ((uint_t)Base::max_parallel_threads_ >= Base::num_groups_ * 2) { par_states = std::min((uint_t)(Base::max_parallel_threads_ / Base::num_groups_), Base::num_states_in_group_[i_group]); @@ -798,7 +796,7 @@ void BatchShotsExecutor::batched_measure_sampler( state_end = Base::num_states_in_group_[i_group] * (i + 1) / par_states; for (; i_state < state_end; i_state++) { - for (int_t j = 0; j < shots; j++) + for (uint_t j = 0; j < shots; j++) rnd_shots[i_state * shots + j] = rng[i_state].rand(0, 1) + (double)i_state; } @@ -830,14 +828,14 @@ void BatchShotsExecutor::batched_measure_sampler( uint_t is = Base::top_state_of_group_[i_group] + i_state; uint_t ip = (Base::global_state_index_ + is); - for (int_t i = 0; i < shots; i++) { + for (uint_t i = 0; i < shots; i++) { ClassicalRegister creg; creg.initialize(num_memory, num_registers); reg_t all_samples(meas_qubits.size()); uint_t val = allbit_samples[i_state * shots + i] & mask; reg_t allbit_sample = Utils::int2reg(val, 2, Base::num_qubits_); - for (int_t mq = 0; mq < meas_qubits.size(); mq++) { + for (uint_t mq = 0; mq < meas_qubits.size(); mq++) { all_samples[mq] = allbit_sample[meas_qubits[mq]]; } @@ -870,7 +868,7 @@ void BatchShotsExecutor::batched_measure_sampler( auto time_taken = std::chrono::duration(myclock_t::now() - timer_start).count(); - for (int_t i_state = 0; i_state < Base::num_states_in_group_[i_group]; + for (uint_t i_state = 0; i_state < Base::num_states_in_group_[i_group]; i_state++) { uint_t ip = Base::global_state_index_ + Base::top_state_of_group_[i_group] + i_state; diff --git a/src/simulators/chunk_utils.hpp b/src/simulators/chunk_utils.hpp index 3277e2c0fd..fa52c6282f 100644 --- a/src/simulators/chunk_utils.hpp +++ b/src/simulators/chunk_utils.hpp @@ -22,13 +22,22 @@ namespace AER { namespace Chunk { +void get_qubits_inout(const int chunk_qubits, const reg_t &qubits, + reg_t &qubits_in, reg_t &qubits_out); +void get_inout_ctrl_qubits(const Operations::Op &op, const uint_t num_qubits, + reg_t &qubits_in, reg_t &qubits_out); +Operations::Op correct_gate_op_in_chunk(const Operations::Op &op, + reg_t &qubits_in); +void block_diagonal_matrix(const uint_t gid, const uint_t chunk_bits, + reg_t &qubits, cvector_t &diag); + void get_qubits_inout(const int chunk_qubits, const reg_t &qubits, reg_t &qubits_in, reg_t &qubits_out) { - int_t i; + uint_t i; qubits_in.clear(); qubits_out.clear(); for (i = 0; i < qubits.size(); i++) { - if (qubits[i] < chunk_qubits) { // in chunk + if (qubits[i] < (uint_t)chunk_qubits) { // in chunk qubits_in.push_back(qubits[i]); } else { qubits_out.push_back(qubits[i]); @@ -40,7 +49,7 @@ void get_inout_ctrl_qubits(const Operations::Op &op, const uint_t num_qubits, reg_t &qubits_in, reg_t &qubits_out) { if (op.type == Operations::OpType::gate && (op.name[0] == 'c' || op.name.find("mc") == 0)) { - for (int i = 0; i < op.qubits.size(); i++) { + for (uint_t i = 0; i < op.qubits.size(); i++) { if (op.qubits[i] < num_qubits) qubits_in.push_back(op.qubits[i]); else diff --git a/src/simulators/circuit_executor.hpp b/src/simulators/circuit_executor.hpp index 75bd1e1d3a..e49eef13f2 100644 --- a/src/simulators/circuit_executor.hpp +++ b/src/simulators/circuit_executor.hpp @@ -114,7 +114,7 @@ class Executor : public Base { uint_t distributed_group_; // group id of distribution int_t distributed_proc_bits_; // distributed_procs_=2^distributed_proc_bits_ // (if nprocs != power of 2, set -1) - int num_process_per_experiment_ = 1; + uint_t num_process_per_experiment_ = 1; #ifdef AER_MPI // communicator group to simulate a circuit (for multi-experiments) @@ -215,6 +215,20 @@ class Executor : public Base { void gather_creg_memory(std::vector &cregs, reg_t &shot_index); #endif + + // Sample n-measurement outcomes without applying the measure operation + // to the system state + virtual std::vector sample_measure(const reg_t &qubits, uint_t shots, + RngEngine &rng) const { + std::vector ret; + return ret; + }; + virtual std::vector sample_measure(state_t &state, const reg_t &qubits, + uint_t shots, + std::vector &rng) const { + // this is for single rng, impement in sub-class for multi-shots case + return state.sample_measure(qubits, shots, rng[0]); + } }; template @@ -437,7 +451,6 @@ void Executor::set_parallelization(const Config &config, distributed_group_ = myrank_ / distributed_procs_; distributed_proc_bits_ = 0; - int proc_bits = 0; uint_t p = distributed_procs_; while (p > 1) { if ((p & 1) != 0) { // procs is not power of 2 @@ -518,11 +531,11 @@ void Executor::set_parallelization(const Config &config, // Parallel shots is > 1 // Limit parallel shots by available memory and number of shots // And assign the remaining threads to state update - int circ_memory_mb = - required_memory_mb(config, circ, noise) / num_process_per_experiment_; + int circ_memory_mb = (int)(required_memory_mb(config, circ, noise) / + num_process_per_experiment_); size_t mem_size = (sim_device_ == Device::GPU) ? max_gpu_memory_mb_ : max_memory_mb_; - if (mem_size < circ_memory_mb) + if (mem_size < (size_t)circ_memory_mb) throw std::runtime_error( "a circuit requires more memory than max_memory_mb."); // If circ memory is 0, set it to 1 so that we don't divide by zero @@ -561,7 +574,7 @@ void Executor::run_circuit(Circuit &circ, rng.set_seed(circ.seed); // Output data container - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.set_config(config); result.metadata.add(method_names_.at(method), "method"); @@ -602,7 +615,7 @@ void Executor::run_circuit(Circuit &circ, // Ideal circuit if (noise.is_ideal()) { opt_circ = circ; - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.metadata.add("ideal", "noise"); } @@ -610,7 +623,7 @@ void Executor::run_circuit(Circuit &circ, // Readout error only else if (noise.has_quantum_errors() == false) { opt_circ = noise.sample_noise(circ, rng); - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.metadata.add("readout", "noise"); } @@ -621,7 +634,7 @@ void Executor::run_circuit(Circuit &circ, // Sample noise using SuperOp method opt_circ = noise.sample_noise(circ, rng, Noise::NoiseModel::Method::superop); - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.metadata.add("superop", "noise"); } @@ -631,7 +644,7 @@ void Executor::run_circuit(Circuit &circ, noise.opset().contains(Operations::OpType::superop)) { opt_circ = noise.sample_noise(circ, rng, Noise::NoiseModel::Method::kraus); - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.metadata.add("kraus", "noise"); } @@ -639,7 +652,7 @@ void Executor::run_circuit(Circuit &circ, // General circuit noise sampling else { noise_sampling = true; - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.metadata.add("circuit", "noise"); } @@ -658,7 +671,7 @@ void Executor::run_circuit(Circuit &circ, run_circuit_shots(opt_circ, noise, config, rng, result_it, false); } } - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); // Report success result.status = ExperimentResult::Status::completed; @@ -692,7 +705,7 @@ void Executor::run_circuit(Circuit &circ, auto timer_stop = myclock_t::now(); // stop timer double time_taken = std::chrono::duration(timer_stop - timer_start).count(); - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.time_taken = time_taken; // save time also to metadata to pick time in primitive result @@ -701,7 +714,7 @@ void Executor::run_circuit(Circuit &circ, } // If an exception occurs during execution, catch it and pass it to the output catch (std::exception &e) { - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.status = ExperimentResult::Status::error; result.message = e.what(); @@ -816,30 +829,26 @@ void Executor::run_circuit_shots( std::vector cregs; reg_t shot_begin(distributed_procs_); reg_t shot_end(distributed_procs_); - for (int_t i = 0; i < distributed_procs_; i++) { + for (uint_t i = 0; i < distributed_procs_; i++) { shot_begin[i] = num_shots * i / distributed_procs_; shot_end[i] = num_shots * (i + 1) / distributed_procs_; } uint_t num_local_shots = shot_end[distributed_rank_] - shot_begin[distributed_rank_]; - int max_matrix_qubits; - auto fusion_pass = transpile_fusion(circ.opset(), config); + int max_matrix_qubits = 1; if (!sample_noise) { Noise::NoiseModel dummy_noise; state_t dummy_state; - auto fusion_pass = transpile_fusion(circ.opset(), config); ExperimentResult fusion_result; + auto fusion_pass = transpile_fusion(circ.opset(), config); fusion_pass.optimize_circuit(circ, dummy_noise, dummy_state.opset(), fusion_result); - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { ExperimentResult &result = *(result_it + i); result.metadata.copy(fusion_result.metadata); } max_matrix_qubits = get_max_matrix_qubits(circ); - } else { - max_matrix_qubits = get_max_matrix_qubits(circ); - max_matrix_qubits = std::max(max_matrix_qubits, (int)fusion_pass.max_qubit); } num_bind_params_ = circ.num_bind_params; @@ -857,9 +866,9 @@ void Executor::run_circuit_shots( init_rng, max_matrix_qubits, num_local_shots](int_t i) { state_t state; - uint_t i_shot, shot_end; + uint_t i_shot, e_shot; i_shot = num_local_shots * i / par_shots; - shot_end = num_local_shots * (i + 1) / par_shots; + e_shot = num_local_shots * (i + 1) / par_shots; auto fusion_pass = transpile_fusion(circ.opset(), config); @@ -871,7 +880,7 @@ void Executor::run_circuit_shots( state.set_distribution(this->num_process_per_experiment_); state.set_num_global_qubits(circ.num_qubits); - for (; i_shot < shot_end; i_shot++) { + for (; i_shot < e_shot; i_shot++) { RngEngine rng; uint_t shot_index = shot_begin[distributed_rank_] + i_shot; uint_t iparam = shot_index / circ.shots; @@ -892,7 +901,9 @@ void Executor::run_circuit_shots( circ_opt = noise.sample_noise(circ, rng); fusion_pass.optimize_circuit(circ_opt, dummy_noise, state.opset(), result); - state.set_max_matrix_qubits(get_max_matrix_qubits(circ_opt)); + int max_bits = get_max_matrix_qubits(circ_opt); + state.set_max_matrix_qubits( + std::max(max_bits, (int)fusion_pass.max_qubit)); } else state.set_max_matrix_qubits(max_matrix_qubits); @@ -947,11 +958,11 @@ void Executor::run_circuit_shots( num_shots = circ.shots * circ.num_bind_params; auto save_cregs = [this, &par_results, par_shots, num_shots, circ, cregs](int_t i) { - uint_t i_shot, shot_end; + uint_t i_shot, e_shot; i_shot = num_shots * i / par_shots; - shot_end = num_shots * (i + 1) / par_shots; + e_shot = num_shots * (i + 1) / par_shots; - for (; i_shot < shot_end; i_shot++) { + for (; i_shot < e_shot; i_shot++) { uint_t ip = i_shot / circ.shots; par_results[i][ip].save_count_data(cregs[i_shot], save_creg_memory_); } @@ -962,12 +973,12 @@ void Executor::run_circuit_shots( #endif for (auto &res : par_results) { - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { (result_it + i)->combine(std::move(res[i])); } } if (sim_device_ == Device::GPU) { - for (int_t i = 0; i < circ.num_bind_params; i++) { + for (uint_t i = 0; i < circ.num_bind_params; i++) { #ifdef AER_CUSTATEVEC (result_it + i)->metadata.add(cuStateVec_enable_, "cuStateVec_enable"); #endif @@ -1292,7 +1303,7 @@ int_t Executor::get_matrix_bits(const Operations::Op &op) const { template int_t Executor::get_max_matrix_qubits(const Circuit &circ) const { int_t max_bits = 0; - int_t i; + uint_t i; if (sim_device_ != Device::CPU) { // Only applicable for GPU (and Thrust) for (i = 0; i < circ.ops.size(); i++) { @@ -1315,7 +1326,6 @@ bool Executor::has_statevector_ops(const Circuit &circ) const { template void Executor::gather_creg_memory( std::vector &cregs, reg_t &shot_index) { - int_t i, j; uint_t n64, i64, ibit, num_local_shots; if (distributed_procs_ == 0) @@ -1337,9 +1347,9 @@ void Executor::gather_creg_memory( reg_t bin_memory(n64 * num_local_shots, 0); // compress memory string to binary -#pragma omp parallel for private(i, j, i64, ibit) - for (i = 0; i < num_local_shots; i++) { - for (j = 0; j < size; j++) { +#pragma omp parallel for private(i64, ibit) + for (int_t i = 0; i < (int_t)num_local_shots; i++) { + for (int_t j = 0; j < size; j++) { i64 = j >> 6; ibit = j & 63; if (cregs[shot_index[distributed_rank_] + i].creg_memory()[j] == '1') { @@ -1352,21 +1362,22 @@ void Executor::gather_creg_memory( std::vector recv_counts(distributed_procs_); std::vector recv_offset(distributed_procs_); - for (i = 0; i < distributed_procs_ - 1; i++) { + for (uint_t i = 0; i < distributed_procs_ - 1; i++) { recv_offset[i] = shot_index[i]; recv_counts[i] = shot_index[i + 1] - shot_index[i]; } recv_offset[distributed_procs_ - 1] = shot_index[distributed_procs_ - 1]; - recv_counts[i] = cregs.size() - shot_index[distributed_procs_ - 1]; + recv_counts[distributed_procs_ - 1] = + cregs.size() - shot_index[distributed_procs_ - 1]; MPI_Allgatherv(&bin_memory[0], n64 * num_local_shots, MPI_UINT64_T, &recv[0], &recv_counts[0], &recv_offset[0], MPI_UINT64_T, distributed_comm_); // store gathered memory -#pragma omp parallel for private(i, j, i64, ibit) - for (i = 0; i < cregs.size(); i++) { - for (j = 0; j < size; j++) { +#pragma omp parallel for private(i64, ibit) + for (int_t i = 0; i < (int_t)cregs.size(); i++) { + for (int_t j = 0; j < size; j++) { i64 = j >> 6; ibit = j & 63; if (((recv[i * n64 + i64] >> ibit) & 1) == 1) diff --git a/src/simulators/density_matrix/densitymatrix.hpp b/src/simulators/density_matrix/densitymatrix.hpp index cdbc6c8336..206d458fcb 100755 --- a/src/simulators/density_matrix/densitymatrix.hpp +++ b/src/simulators/density_matrix/densitymatrix.hpp @@ -242,13 +242,13 @@ void DensityMatrix::initialize_from_vector(list_t &&vec) { template void DensityMatrix::transpose() { - const size_t rows = BaseMatrix::num_rows(); + const int_t rows = BaseMatrix::num_rows(); #pragma omp parallel for if (BaseVector::num_qubits_ > \ BaseVector::omp_threshold_ && \ BaseVector::omp_threads_ > 1) \ num_threads(BaseVector::omp_threads_) for (int_t i = 0; i < rows; i++) { - for (int_t j = i + 1; j < rows; j++) { + for (uint_t j = i + 1; j < rows; j++) { const uint_t pos_a = i * rows + j; const uint_t pos_b = j * rows + i; const auto tmp = BaseVector::data_[pos_a]; @@ -483,7 +483,7 @@ DensityMatrix::expval_pauli(const reg_t &qubits, auto lambda = [&](const int_t i, double &val_re, double &val_im) -> void { (void)val_im; // unused auto idx_vec = ((i << 1) & mask_u) | (i & mask_l); - auto idx_mat = idx_vec ^ x_mask + nrows * idx_vec; + auto idx_mat = (idx_vec ^ x_mask) + nrows * idx_vec; // Since rho is hermitian rho[i, j] + rho[j, i] = 2 real(rho[i, j]) auto val = 2 * std::real(phase * BaseVector::data_[idx_mat]); if (z_mask && (AER::Utils::popcount(idx_vec & z_mask) & 1)) { @@ -511,7 +511,7 @@ double DensityMatrix::expval_pauli_non_diagonal_chunk( auto lambda = [&](const int_t i, double &val_re, double &val_im) -> void { (void)val_im; // unused - auto idx_mat = i ^ x_mask + nrows * i; + auto idx_mat = (i ^ x_mask) + nrows * i; auto val = std::real(phase * BaseVector::data_[idx_mat]); if (z_mask && (AER::Utils::popcount(i & z_mask) & 1)) { val = -val; diff --git a/src/simulators/density_matrix/densitymatrix_executor.hpp b/src/simulators/density_matrix/densitymatrix_executor.hpp index 08708bf8ff..96429ed804 100644 --- a/src/simulators/density_matrix/densitymatrix_executor.hpp +++ b/src/simulators/density_matrix/densitymatrix_executor.hpp @@ -40,6 +40,7 @@ class Executor : public CircuitExecutor::ParallelStateExecutor, using Base = CircuitExecutor::MultiStateExecutor; using BasePar = CircuitExecutor::ParallelStateExecutor; using BaseBatch = CircuitExecutor::BatchShotsExecutor; + using Base::sample_measure; protected: public: @@ -203,14 +204,14 @@ class Executor : public CircuitExecutor::ParallelStateExecutor, //------------------------------------------------------------------------- template void Executor::initialize_qreg(uint_t num_qubits) { - for (int_t i = 0; i < Base::states_.size(); i++) { + for (uint_t i = 0; i < Base::states_.size(); i++) { Base::states_[i].qreg().set_num_qubits(BasePar::chunk_bits_); } if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) { if (Base::global_state_index_ + iChunk == 0) { Base::states_[iChunk].qreg().initialize(); @@ -220,7 +221,7 @@ void Executor::initialize_qreg(uint_t num_qubits) { } } } else { - for (int_t i = 0; i < Base::states_.size(); i++) { + for (uint_t i = 0; i < Base::states_.size(); i++) { if (Base::global_state_index_ + i == 0) { Base::states_[i].qreg().initialize(); } else { @@ -236,11 +237,10 @@ void Executor::initialize_from_vector(const list_t &vec) { if ((1ull << (Base::num_qubits_ * 2)) == vec.size()) { BasePar::initialize_from_vector(vec); } else if ((1ull << (Base::num_qubits_ * 2)) == vec.size() * vec.size()) { - int_t iChunk; if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) { uint_t irow_chunk = ((iChunk + Base::global_state_index_) >> ((Base::num_qubits_ - BasePar::chunk_bits_))) @@ -251,7 +251,7 @@ void Executor::initialize_from_vector(const list_t &vec) { << (BasePar::chunk_bits_); // copy part of state for this chunk - uint_t i, row, col; + uint_t i; list_t vec1(1ull << BasePar::chunk_bits_); list_t vec2(1ull << BasePar::chunk_bits_); @@ -264,7 +264,7 @@ void Executor::initialize_from_vector(const list_t &vec) { } } } else { - for (iChunk = 0; iChunk < Base::states_.size(); iChunk++) { + for (uint_t iChunk = 0; iChunk < Base::states_.size(); iChunk++) { uint_t irow_chunk = ((iChunk + Base::global_state_index_) >> ((Base::num_qubits_ - BasePar::chunk_bits_))) << (BasePar::chunk_bits_); @@ -274,7 +274,7 @@ void Executor::initialize_from_vector(const list_t &vec) { << (BasePar::chunk_bits_); // copy part of state for this chunk - uint_t i, row, col; + uint_t i; list_t vec1(1ull << BasePar::chunk_bits_); list_t vec2(1ull << BasePar::chunk_bits_); @@ -515,12 +515,12 @@ void Executor::apply_save_amplitudes_sq(const Operations::Op &op, throw std::invalid_argument( "Invalid save_amplitudes_sq instructions (empty params)."); } - const int_t size = op.int_params.size(); + const uint_t size = op.int_params.size(); rvector_t amps_sq(size); int_t iChunk; #pragma omp parallel for if (BasePar::chunk_omp_parallel_) private(iChunk) - for (iChunk = 0; iChunk < Base::states_.size(); iChunk++) { + for (iChunk = 0; iChunk < (int_t)Base::states_.size(); iChunk++) { uint_t irow, icol; irow = (Base::global_state_index_ + iChunk) >> ((Base::num_qubits_ - BasePar::chunk_bits_)); @@ -529,7 +529,7 @@ void Executor::apply_save_amplitudes_sq(const Operations::Op &op, if (irow != icol) continue; - for (int_t i = 0; i < size; ++i) { + for (uint_t i = 0; i < size; ++i) { uint_t idx = BasePar::mapped_index(op.int_params[i]); if (idx >= (irow << BasePar::chunk_bits_) && idx < ((irow + 1) << BasePar::chunk_bits_)) @@ -691,7 +691,7 @@ cmatrix_t Executor::reduced_density_matrix(const reg_t &qubits, if (qubits.empty()) { reduced_state = cmatrix_t(1, 1); std::complex sum = 0.0; - for (int_t i = 0; i < Base::states_.size(); i++) { + for (uint_t i = 0; i < Base::states_.size(); i++) { sum += Base::states_[i].qreg().trace(); } #ifdef AER_MPI @@ -719,7 +719,7 @@ template cmatrix_t Executor::reduced_density_matrix_helper(const reg_t &qubits, const reg_t &qubits_sorted) { - int_t iChunk; + uint_t iChunk; uint_t size = 1ull << (BasePar::chunk_bits_ * 2); uint_t mask = (1ull << (BasePar::chunk_bits_)) - 1; uint_t num_threads = Base::states_[0].qreg().get_omp_threads(); @@ -753,12 +753,12 @@ Executor::reduced_density_matrix_helper(const reg_t &qubits, BasePar::recv_data(tmp.data(), size, 0, iChunk); #endif #pragma omp parallel for if (num_threads > 1) num_threads(num_threads) - for (i = 0; i < size; i++) { + for (i = 0; i < (int_t)size; i++) { uint_t irow = (i >> (BasePar::chunk_bits_)) + irow_chunk; uint_t icol = (i & mask) + icol_chunk; uint_t irow_out = 0; uint_t icol_out = 0; - int j; + uint_t j; for (j = 0; j < qubits.size(); j++) { if ((irow >> qubits[j]) & 1) { irow &= ~(1ull << qubits[j]); @@ -803,7 +803,7 @@ void Executor::apply_save_density_matrix( final_op); std::vector copied(Base::num_bind_params_, false); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -843,7 +843,7 @@ void Executor::apply_save_state(CircuitExecutor::Branch &root, std::vector copied(Base::num_bind_params_, false); if (final_op) { auto state = Base::states_[root.state_index()].move_to_matrix(); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -855,7 +855,7 @@ void Executor::apply_save_state(CircuitExecutor::Branch &root, } else { auto state = Base::states_[root.state_index()].copy_to_matrix(); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -878,7 +878,7 @@ void Executor::apply_save_probs(CircuitExecutor::Branch &root, std::vector copied(Base::num_bind_params_, false); if (op.type == Operations::OpType::save_probs_ket) { // Convert to ket dict - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -890,7 +890,7 @@ void Executor::apply_save_probs(CircuitExecutor::Branch &root, } } } else { - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -918,7 +918,7 @@ void Executor::apply_save_amplitudes(CircuitExecutor::Branch &root, Base::states_[root.state_index()].qreg().probability(op.int_params[i]); } std::vector copied(Base::num_bind_params_, false); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -951,7 +951,7 @@ template rvector_t Executor::measure_probs(const reg_t &qubits) const { uint_t dim = 1ull << qubits.size(); rvector_t sum(dim, 0.0); - int_t i, j, k; + uint_t i, j, k; reg_t qubits_in_chunk; reg_t qubits_out_chunk; @@ -965,7 +965,7 @@ rvector_t Executor::measure_probs(const reg_t &qubits) const { if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for private(i, j, k) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { for (i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) { uint_t irow, icol; @@ -1084,14 +1084,14 @@ template void Executor::apply_reset(const reg_t &qubits) { if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) { Base::states_[iChunk].qreg().apply_reset(qubits); } } } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) Base::states_[i].qreg().apply_reset(qubits); } } @@ -1120,13 +1120,13 @@ void Executor::measure_reset_update(const reg_t &qubits, mdiag[meas_state] = 1. / std::sqrt(meas_prob); if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t i = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag); } } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag); } @@ -1135,13 +1135,13 @@ void Executor::measure_reset_update(const reg_t &qubits, if (qubits[0] < BasePar::chunk_bits_) { if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t i = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) Base::states_[i].qreg().apply_x(qubits[0]); } } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) Base::states_[i].qreg().apply_x(qubits[0]); } } else { @@ -1158,13 +1158,13 @@ void Executor::measure_reset_update(const reg_t &qubits, mdiag[meas_state] = 1. / std::sqrt(meas_prob); if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t i = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag); } } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) Base::states_[i].qreg().apply_diagonal_unitary_matrix(qubits, mdiag); } @@ -1183,7 +1183,7 @@ void Executor::measure_reset_update(const reg_t &qubits, reg_t qubits_in_chunk; reg_t qubits_out_chunk; - for (int_t i = 0; i < qubits.size(); i++) { + for (uint_t i = 0; i < qubits.size(); i++) { if (qubits[i] < BasePar::chunk_bits_) { qubits_in_chunk.push_back(qubits[i]); } else { @@ -1193,18 +1193,18 @@ void Executor::measure_reset_update(const reg_t &qubits, if (qubits_in_chunk.size() > 0) { // in chunk exchange if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t i = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) Base::states_[i].qreg().apply_unitary_matrix(qubits, perm); } } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) Base::states_[i].qreg().apply_unitary_matrix(qubits, perm); } } if (qubits_out_chunk.size() > 0) { // out of chunk exchange - for (int_t i = 0; i < qubits_out_chunk.size(); i++) { + for (uint_t i = 0; i < qubits_out_chunk.size(); i++) { BasePar::apply_chunk_x(qubits_out_chunk[i]); BasePar::apply_chunk_x(qubits_out_chunk[i] + (Base::num_qubits_ - BasePar::chunk_bits_)); @@ -1225,13 +1225,13 @@ std::vector Executor::sample_measure(const reg_t &qubits, rnds.push_back(rng.rand(0, 1)); reg_t allbit_samples(shots, 0); - int_t i, j; + uint_t i, j; std::vector chunkSum(Base::states_.size() + 1, 0); double sum, localSum; // calculate per chunk sum if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for private(i) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { for (i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) { uint_t irow, icol; @@ -1348,7 +1348,7 @@ Executor::sample_measure_with_prob(CircuitExecutor::Branch &root, uint_t nshots = root.num_shots(); reg_t shot_branch(nshots); - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { shot_branch[i] = root.rng_shots()[i].rand_int(probs); } @@ -1382,11 +1382,11 @@ void Executor::measure_reset_update(CircuitExecutor::Branch &root, root.branches()[i]->add_op_after_branch(op); if (final_state >= 0 && final_state != i) { - Operations::Op op; - op.type = OpType::gate; - op.name = "x"; - op.qubits = qubits; - root.branches()[i]->add_op_after_branch(op); + Operations::Op op2; + op2.type = OpType::gate; + op2.name = "x"; + op2.qubits = qubits; + root.branches()[i]->add_op_after_branch(op2); } } } @@ -1394,7 +1394,7 @@ void Executor::measure_reset_update(CircuitExecutor::Branch &root, else { // Diagonal matrix for projecting and renormalizing to measurement outcome const size_t dim = 1ULL << qubits.size(); - for (int_t i = 0; i < dim; i++) { + for (uint_t i = 0; i < dim; i++) { cvector_t mdiag(dim, 0.); mdiag[i] = 1. / std::sqrt(meas_probs[i]); @@ -1404,20 +1404,20 @@ void Executor::measure_reset_update(CircuitExecutor::Branch &root, op.params = mdiag; root.branches()[i]->add_op_after_branch(op); - if (final_state >= 0 && final_state != i) { + if (final_state >= 0 && final_state != (int_t)i) { // build vectorized permutation matrix cvector_t perm(dim * dim, 0.); perm[final_state * dim + i] = 1.; perm[i * dim + final_state] = 1.; for (size_t j = 0; j < dim; j++) { - if (j != final_state && j != i) + if ((int_t)j != final_state && j != i) perm[j * dim + j] = 1.; } - Operations::Op op; - op.type = OpType::matrix; - op.qubits = qubits; - op.mats.push_back(Utils::devectorize_matrix(perm)); - root.branches()[i]->add_op_after_branch(op); + Operations::Op op2; + op2.type = OpType::matrix; + op2.qubits = qubits; + op2.mats.push_back(Utils::devectorize_matrix(perm)); + root.branches()[i]->add_op_after_branch(op2); } } } @@ -1430,41 +1430,23 @@ void Executor::apply_measure(CircuitExecutor::Branch &root, rvector_t probs = sample_measure_with_prob(root, qubits); // save result to cregs - for (int_t i = 0; i < probs.size(); i++) { + for (uint_t i = 0; i < probs.size(); i++) { const reg_t outcome = Utils::int2reg(i, 2, qubits.size()); root.branches()[i]->creg().store_measure(outcome, cmemory, cregister); } measure_reset_update(root, qubits, -1, probs); } -/* -template -void Executor::apply_reset(CircuitExecutor::Branch& root, const -reg_t &qubits) -{ - rvector_t probs = sample_measure_with_prob(root, qubits); - - measure_reset_update(root, qubits, 0, probs); -} -*/ template std::vector Executor::sample_measure(state_t &state, const reg_t &qubits, uint_t shots, std::vector &rng) const { - int_t i, j; + uint_t i; std::vector rnds; rnds.reserve(shots); - /* - double norm = std::real( state.qreg().trace() ); - std::cout << " trace = " << norm<::apply_kraus(const reg_t &qubits, const std::vector &kmats) { if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) { Base::states_[iChunk].qreg().apply_superop_matrix( qubits, Utils::vectorize_matrix(Utils::kraus_superop(kmats))); } } } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) Base::states_[i].qreg().apply_superop_matrix( qubits, Utils::vectorize_matrix(Utils::kraus_superop(kmats))); } @@ -1549,7 +1531,7 @@ template void Executor::apply_multi_chunk_swap(const reg_t &qubits) { reg_t qubits_density; - for (int_t i = 0; i < qubits.size(); i += 2) { + for (uint_t i = 0; i < qubits.size(); i += 2) { uint_t q0, q1; q0 = qubits[i * 2]; q1 = qubits[i * 2 + 1]; diff --git a/src/simulators/density_matrix/densitymatrix_state.hpp b/src/simulators/density_matrix/densitymatrix_state.hpp index 9041adc1dd..91637166e2 100644 --- a/src/simulators/density_matrix/densitymatrix_state.hpp +++ b/src/simulators/density_matrix/densitymatrix_state.hpp @@ -362,7 +362,6 @@ void State::initialize_qreg(uint_t num_qubits, densmat_t &&state) { template void State::initialize_omp() { - uint_t i; BaseState::qreg_.set_omp_threshold(omp_qubit_threshold_); if (BaseState::threads_ > 0) BaseState::qreg_.set_omp_threads( @@ -404,7 +403,6 @@ void State::set_config(const Config &config) { // Set threshold for truncating snapshots json_chop_threshold_ = config.chop_threshold; - uint_t i; BaseState::qreg_.set_json_chop_threshold(json_chop_threshold_); // Set OMP threshold for state update functions @@ -650,7 +648,7 @@ void State::apply_gate(const Operations::Op &op) { } if (qubits_out.size() > 0) { uint_t mask = 0; - for (int i = 0; i < qubits_out.size(); i++) { + for (uint_t i = 0; i < qubits_out.size(); i++) { mask |= (1ull << (qubits_out[i] - BaseState::qreg_.num_qubits())); } if ((BaseState::qreg_.chunk_index() & mask) != mask) { @@ -670,7 +668,7 @@ void State::apply_gate(const Operations::Op &op) { else if (ctrl_chunk) apply_gate_statevector(new_op); else { - for (int i = 0; i < new_op.qubits.size(); i++) + for (uint_t i = 0; i < new_op.qubits.size(); i++) new_op.qubits[i] += BaseState::qreg_.num_qubits(); apply_gate_statevector(new_op); } @@ -861,7 +859,7 @@ void State::apply_diagonal_unitary_matrix(const reg_t &qubits, if (qubits_in.size() == qubits.size()) { BaseState::qreg_.apply_diagonal_unitary_matrix(qubits, diag); } else { - for (int_t i = 0; i < qubits.size(); i++) { + for (uint_t i = 0; i < qubits.size(); i++) { if (qubits[i] >= BaseState::qreg_.num_qubits()) qubits_row[i] = qubits[i] + BaseState::num_global_qubits_ - BaseState::qreg_.num_qubits(); @@ -871,7 +869,7 @@ void State::apply_diagonal_unitary_matrix(const reg_t &qubits, diag_row); reg_t qubits_chunk(qubits_in.size() * 2); - for (int_t i = 0; i < qubits_in.size(); i++) { + for (uint_t i = 0; i < qubits_in.size(); i++) { qubits_chunk[i] = qubits_in[i]; qubits_chunk[i + qubits_in.size()] = qubits_in[i] + BaseState::qreg_.num_qubits(); diff --git a/src/simulators/density_matrix/densitymatrix_thrust.hpp b/src/simulators/density_matrix/densitymatrix_thrust.hpp index 7cbce3cd45..6de9b78aa5 100755 --- a/src/simulators/density_matrix/densitymatrix_thrust.hpp +++ b/src/simulators/density_matrix/densitymatrix_thrust.hpp @@ -38,6 +38,7 @@ class DensityMatrixThrust : public UnitaryMatrixThrust { // Parent class aliases using BaseVector = QubitVectorThrust; using BaseMatrix = UnitaryMatrixThrust; + using BaseVector::probabilities; //----------------------------------------------------------------------- // Constructors and Destructor @@ -449,9 +450,9 @@ class DensityDiagMatMult2x2 : public Chunk::GateFuncBase { template class DensityDiagMatMultNxN : public Chunk::GateFuncBase { protected: - int nqubits_; - int total_bits_; - int chunk_bits_; + uint_t nqubits_; + uint_t total_bits_; + uint_t chunk_bits_; public: DensityDiagMatMultNxN(const reg_t &qb, int total, int chunk) { @@ -541,7 +542,7 @@ class DensityMCX : public Chunk::GateFuncBase { offset_ = 1ull << qubits[qubits.size() - 1]; offset_sp_ = 1ull << (qubits[qubits.size() - 1] + chunk_qubits_); cmask_ = 0; - for (int i = 0; i < qubits.size() - 1; i++) + for (uint_t i = 0; i < qubits.size() - 1; i++) cmask_ |= (1ull << qubits[i]); enable_batch_ = batch; } @@ -629,7 +630,7 @@ class DensityMCY : public Chunk::GateFuncBase { offset_ = 1ull << qubits[qubits.size() - 1]; offset_sp_ = 1ull << (qubits[qubits.size() - 1] + chunk_qubits_); cmask_ = 0; - for (int i = 0; i < qubits.size() - 1; i++) + for (uint_t i = 0; i < qubits.size() - 1; i++) cmask_ |= (1ull << qubits[i]); enable_batch_ = batch; } @@ -1081,7 +1082,7 @@ class expval_pauli_XYZ_func_dm : public Chunk::GateFuncBase { vec = this->data_; idx_vec = ((i << 1) & mask_u_) | (i & mask_l_); - idx_mat = idx_vec ^ x_mask_ + rows_ * idx_vec; + idx_mat = (idx_vec ^ x_mask_) + rows_ * idx_vec; q0 = vec[idx_mat]; q0 = 2 * phase_ * q0; @@ -1158,7 +1159,7 @@ class expval_pauli_XYZ_func_dm_non_diagonal vec = this->data_; - idx_mat = i ^ x_mask_ + rows_ * i; + idx_mat = (i ^ x_mask_) + rows_ * i; q0 = vec[idx_mat]; q0 = phase_ * q0; @@ -1353,7 +1354,7 @@ template void DensityMatrixThrust::apply_batched_measure( const reg_t &qubits, std::vector &rng, const reg_t &cmemory, const reg_t &cregs) { - const int_t DIM = 1 << qubits.size(); + const uint_t DIM = 1 << qubits.size(); uint_t i, count = 1; if (BaseVector::enable_batch_) { if (BaseVector::chunk_.pos() != 0) { @@ -1503,7 +1504,7 @@ void DensityMatrixThrust::apply_reset(const reg_t &qubits) { auto qubits_sorted = qubits; std::sort(qubits_sorted.begin(), qubits_sorted.end()); - for (int_t i = 0; i < qubits.size(); i++) { + for (uint_t i = 0; i < qubits.size(); i++) { qubits_sorted.push_back(qubits[i]); } BaseVector::chunk_.StoreUintParams(qubits_sorted); diff --git a/src/simulators/extended_stabilizer/chlib/chstabilizer.hpp b/src/simulators/extended_stabilizer/chlib/chstabilizer.hpp index 21f15e2c27..1d4e27fd39 100644 --- a/src/simulators/extended_stabilizer/chlib/chstabilizer.hpp +++ b/src/simulators/extended_stabilizer/chlib/chstabilizer.hpp @@ -426,8 +426,8 @@ scalar_t StabilizerState::ProposeFlip(unsigned flip_pos) { scalar_t amp; amp.e = 2 * Q.e; - amp.p = -1 * - (AER::Utils::popcount(v)); // each Hadamard gate contributes 1/sqrt(2) + // each Hadamard gate contributes 1/sqrt(2) + amp.p = -1 * (int)(AER::Utils::popcount(v)); bool isNonZero = true; for (unsigned q = 0; q < n; q++) { diff --git a/src/simulators/matrix_product_state/matrix_product_state.hpp b/src/simulators/matrix_product_state/matrix_product_state.hpp index 68f79f1f99..b1ae10c90f 100644 --- a/src/simulators/matrix_product_state/matrix_product_state.hpp +++ b/src/simulators/matrix_product_state/matrix_product_state.hpp @@ -743,7 +743,7 @@ void State::apply_measure(const reg_t &qubits, const reg_t &cmemory, const reg_t &cregister, RngEngine &rng) { rvector_t rands; rands.reserve(qubits.size()); - for (int_t i = 0; i < qubits.size(); ++i) + for (uint_t i = 0; i < qubits.size(); ++i) rands.push_back(rng.rand(0., 1.)); reg_t outcome = qreg_.apply_measure(qubits, rands); creg().store_measure(outcome, cmemory, cregister); @@ -777,10 +777,10 @@ State::sample_measure_using_apply_measure(const reg_t &qubits, uint_t shots, all_samples.resize(shots); std::vector rnds_list; rnds_list.reserve(shots); - for (int_t i = 0; i < shots; ++i) { + for (uint_t i = 0; i < shots; ++i) { rvector_t rands; rands.reserve(qubits.size()); - for (int_t j = 0; j < qubits.size(); ++j) + for (uint_t j = 0; j < qubits.size(); ++j) rands.push_back(rng.rand(0., 1.)); rnds_list.push_back(rands); } diff --git a/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp b/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp index 600b29207d..d243cc8ba6 100644 --- a/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp +++ b/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp @@ -53,7 +53,7 @@ void MPSSizeEstimator::initialize(uint_t nq) { qubit_map_.resize(nq); qubit_order_.resize(nq); - for (int_t i = 0; i < nq; i++) { + for (uint_t i = 0; i < nq; i++) { tensor_size_[i].first = 1; tensor_size_[i].second = 1; @@ -66,7 +66,7 @@ void MPSSizeEstimator::initialize(uint_t nq) { uint_t MPSSizeEstimator::estimate(const std::vector &ops) { uint_t n = ops.size(); - for (int_t i = 0; i < n; i++) { + for (uint_t i = 0; i < n; i++) { switch (ops[i].type) { case Operations::OpType::gate: case Operations::OpType::matrix: @@ -79,7 +79,7 @@ uint_t MPSSizeEstimator::estimate(const std::vector &ops) { } } uint_t max_bond = 0; - for (int_t i = 0; i < num_qubits_ - 1; i++) { + for (uint_t i = 0; i < num_qubits_ - 1; i++) { if (max_bond < bond_dimensions_[i]) max_bond = bond_dimensions_[i]; } @@ -89,16 +89,16 @@ uint_t MPSSizeEstimator::estimate(const std::vector &ops) { void MPSSizeEstimator::apply_qubits(const reg_t &qubits) { reg_t sorted(qubits.size()); - for (int_t i = 0; i < qubits.size(); i++) { + for (uint_t i = 0; i < qubits.size(); i++) { sorted[i] = qubit_map_[qubits[i]]; } std::sort(sorted.begin(), sorted.end()); - for (int_t i = 1; i < qubits.size(); i++) { + for (uint_t i = 1; i < qubits.size(); i++) { reorder_qubit(sorted[i - 1], sorted[i]); } - for (int_t i = 0; i < qubits.size() - 1; i++) { + for (uint_t i = 0; i < qubits.size() - 1; i++) { update(sorted[i]); } } diff --git a/src/simulators/multi_state_executor.hpp b/src/simulators/multi_state_executor.hpp index be578c0da3..a420e9e9d3 100644 --- a/src/simulators/multi_state_executor.hpp +++ b/src/simulators/multi_state_executor.hpp @@ -139,14 +139,6 @@ class MultiStateExecutor : public Executor { void measure_sampler(InputIterator first_meas, InputIterator last_meas, Branch &branch, ResultItr result_it); - // sampling measure - virtual std::vector sample_measure(state_t &state, const reg_t &qubits, - uint_t shots, - std::vector &rng) const { - // this is for single rng, impement in sub-class for multi-shots case - return state.sample_measure(qubits, shots, rng[0]); - } - void apply_save_expval(Branch &root, const Operations::Op &op, ResultItr result); }; @@ -192,7 +184,7 @@ void MultiStateExecutor::set_distribution(uint_t num_states) { state_index_begin_.resize(Base::distributed_procs_); state_index_end_.resize(Base::distributed_procs_); - for (int_t i = 0; i < Base::distributed_procs_; i++) { + for (uint_t i = 0; i < Base::distributed_procs_; i++) { state_index_begin_[i] = num_global_states_ * i / Base::distributed_procs_; state_index_end_[i] = num_global_states_ * (i + 1) / Base::distributed_procs_; @@ -212,7 +204,7 @@ void MultiStateExecutor::set_parallelization( template bool MultiStateExecutor::allocate_states(uint_t num_shots, const Config &config) { - int_t i; + uint_t i; bool ret = true; states_.resize(num_shots); @@ -281,12 +273,11 @@ void MultiStateExecutor::run_circuit_shots( Noise::NoiseModel dummy_noise; state_t dummy_state; - RngEngine dummy_rng; - dummy_rng.set_seed(circ.seed); // this is not used actually Circuit circ_opt; if (sample_noise) { RngEngine dummy_rng; + dummy_rng.set_seed(circ.seed); circ_opt = noise.sample_noise(circ, dummy_rng, Noise::NoiseModel::Method::circuit, true); auto fusion_pass = Base::transpile_fusion(circ_opt.opset(), config); @@ -385,12 +376,12 @@ void MultiStateExecutor::run_circuit_shots( #endif for (auto &res : par_results) { - for (int_t i = 0; i < Base::num_bind_params_; i++) { + for (uint_t i = 0; i < Base::num_bind_params_; i++) { (result_it + i)->combine(std::move(res[i])); } } - for (int_t i = 0; i < Base::num_bind_params_; i++) { + for (uint_t i = 0; i < Base::num_bind_params_; i++) { (result_it + i)->metadata.add(true, "shot_branching_enabled"); (result_it + i) ->metadata.add(sample_noise, "runtime_noise_sampling_enabled"); @@ -413,7 +404,7 @@ void MultiStateExecutor::run_circuit_with_shot_branching( bool can_sample = false; OpItr measure_seq = last; OpItr it = last - 1; - int_t num_measure = 0; + uint_t num_measure = 0; if (shot_branching_sampling_enable_) { do { @@ -445,14 +436,14 @@ void MultiStateExecutor::run_circuit_with_shot_branching( if (Base::num_bind_params_ > 1) { if (par_shots > 1) { #pragma omp parallel for num_threads(par_shots) - for (int_t i = 0; i < nshots; i++) { + for (int_t i = 0; i < (int_t)nshots; i++) { uint_t gid = global_state_index_ + ishot + i; uint_t ip = gid / Base::num_shots_per_bind_param_; shots_storage[i].set_seed(circ.seed_for_params[ip] + (gid % Base::num_shots_per_bind_param_)); } } else { - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { uint_t gid = global_state_index_ + ishot + i; uint_t ip = gid / Base::num_shots_per_bind_param_; shots_storage[i].set_seed(circ.seed_for_params[ip] + @@ -466,10 +457,10 @@ void MultiStateExecutor::run_circuit_with_shot_branching( shots_storage[0].set_seed(circ.seed + global_state_index_ + ishot); if (par_shots > 1) { #pragma omp parallel for num_threads(par_shots) - for (int_t i = 1; i < nshots; i++) + for (int_t i = 1; i < (int_t)nshots; i++) shots_storage[i].set_seed(circ.seed + global_state_index_ + ishot + i); } else { - for (int_t i = 1; i < nshots; i++) + for (uint_t i = 1; i < nshots; i++) shots_storage[i].set_seed(circ.seed + global_state_index_ + ishot + i); } } @@ -498,7 +489,7 @@ void MultiStateExecutor::run_circuit_with_shot_branching( uint_t num_active_states = 1; // set branches - for (int_t i = 0; i < waiting_branches.size(); i++) { + for (uint_t i = 0; i < waiting_branches.size(); i++) { if (i > num_states) break; uint_t sid = top_state + i; @@ -547,9 +538,9 @@ void MultiStateExecutor::run_circuit_with_shot_branching( if (branches[istate]->num_branches() > 0) { // if there are additional ops remaining, queue them on new // branches - for (int_t k = iadd + 1; + for (uint_t k = iadd + 1; k < branches[istate]->additional_ops().size(); k++) { - for (int_t l = 0; l < branches[istate]->num_branches(); + for (uint_t l = 0; l < branches[istate]->num_branches(); l++) branches[istate]->branches()[l]->add_op_after_branch( branches[istate]->additional_ops()[k]); @@ -631,10 +622,10 @@ void MultiStateExecutor::run_circuit_with_shot_branching( // repeat until new branch is available if (nbranch > 0) { uint_t num_states_prev = branches.size(); - for (int_t i = 0; i < num_states_prev; i++) { + for (uint_t i = 0; i < num_states_prev; i++) { // add new branches if (branches[i]->num_branches() > 0) { - for (int_t j = 0; j < branches[i]->num_branches(); j++) { + for (uint_t j = 0; j < branches[i]->num_branches(); j++) { if (branches[i]->branches()[j]->num_shots() > 0) { // add new branched state uint_t pos = branches.size(); @@ -680,7 +671,7 @@ void MultiStateExecutor::run_circuit_with_shot_branching( // check if there are remaining ops num_active_states = 0; - for (int_t i = 0; i < branches.size(); i++) { + for (uint_t i = 0; i < branches.size(); i++) { if (branches[i]->op_iterator() != measure_seq || branches[i]->additional_ops().size() > 0) num_active_states++; @@ -707,7 +698,7 @@ void MultiStateExecutor::run_circuit_with_shot_branching( Utils::apply_omp_parallel_for(can_parallel, 0, par_shots, sampling_measure_func, par_shots); - for (int_t i = 0; i < Base::num_bind_params_; i++) + for (uint_t i = 0; i < Base::num_bind_params_; i++) (result_it + i)->metadata.add(true, "shot_branching_sampling_enabled"); } else { // save cregs to result @@ -718,7 +709,7 @@ void MultiStateExecutor::run_circuit_with_shot_branching( for (; istate < state_end; istate++) { if (Base::num_process_per_experiment_ > 1) { - for (int_t j = 0; j < branches[istate]->num_shots(); j++) { + for (uint_t j = 0; j < branches[istate]->num_shots(); j++) { uint_t idx = branches[istate]->rng_shots()[j].initial_seed(); uint_t ip = branches[istate]->param_index(j); idx += ip * Base::num_shots_per_bind_param_; @@ -728,13 +719,13 @@ void MultiStateExecutor::run_circuit_with_shot_branching( } else { std::string memory_hex = states_[branches[istate]->state_index()].creg().memory_hex(); - for (int_t j = 0; j < branches[istate]->num_shots(); j++) { + for (uint_t j = 0; j < branches[istate]->num_shots(); j++) { uint_t ip = branches[istate]->param_index(j); par_results[i][ip].data.add_accum(static_cast(1ULL), "counts", memory_hex); } if (Base::save_creg_memory_) { - for (int_t j = 0; j < branches[istate]->num_shots(); j++) { + for (uint_t j = 0; j < branches[istate]->num_shots(); j++) { uint_t ip = branches[istate]->param_index(j); par_results[i][ip].data.add_list(memory_hex, "memory"); } @@ -748,14 +739,14 @@ void MultiStateExecutor::run_circuit_with_shot_branching( } // clear - for (int_t i = 0; i < branches.size(); i++) { + for (uint_t i = 0; i < branches.size(); i++) { branches[i].reset(); } branches.clear(); } for (auto &res : par_results) { - for (int_t i = 0; i < Base::num_bind_params_; i++) { + for (uint_t i = 0; i < Base::num_bind_params_; i++) { (result_it + i)->combine(std::move(res[i])); } } @@ -777,7 +768,7 @@ void MultiStateExecutor::apply_runtime_parameterization( root.branch_shots_by_params(); // add binded op after branch - for (int_t i = 0; i < nparams; i++) { + for (uint_t i = 0; i < nparams; i++) { uint_t ip = root.branches()[i]->param_index(0); Operations::Op bind_op = Operations::bind_parameter(op, ip, Base::num_bind_params_); @@ -799,7 +790,7 @@ void MultiStateExecutor::measure_sampler(InputIterator first_meas, // Check if meas_circ is empty, and if so return initial creg if (first_meas == last_meas) { if (Base::num_process_per_experiment_ > 1) { - for (int_t i = 0; i < shots; i++) { + for (uint_t i = 0; i < shots; i++) { uint_t idx = branch.rng_shots()[i].initial_seed(); uint_t ip = branch.param_index(i); idx += ip * Base::num_shots_per_bind_param_; @@ -807,7 +798,7 @@ void MultiStateExecutor::measure_sampler(InputIterator first_meas, cregs_[idx] = state.creg(); } } else { - for (int_t i = 0; i < shots; i++) { + for (uint_t i = 0; i < shots; i++) { uint_t ip = branch.param_index(i); (result + ip)->save_count_data(state.creg(), Base::save_creg_memory_); } @@ -837,7 +828,7 @@ void MultiStateExecutor::measure_sampler(InputIterator first_meas, // Generate the samples std::vector all_samples; - all_samples = sample_measure(state, meas_qubits, shots, rng); + all_samples = this->sample_measure(state, meas_qubits, shots, rng); // Make qubit map of position in vector of measured qubits std::unordered_map qubit_map; @@ -859,10 +850,6 @@ void MultiStateExecutor::measure_sampler(InputIterator first_meas, } // Process samples - uint_t num_memory = - (memory_map.empty()) ? 0ULL : 1 + memory_map.rbegin()->first; - uint_t num_registers = - (register_map.empty()) ? 0ULL : 1 + register_map.rbegin()->first; for (int_t i = all_samples.size() - 1; i >= 0; i--) { ClassicalRegister creg = state.creg(); @@ -929,7 +916,7 @@ void MultiStateExecutor::apply_save_expval(Branch &root, std::vector expval_var(2); expval_var[0] = expval; // mean expval_var[1] = sq_expval - expval * expval; // variance - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -940,7 +927,7 @@ void MultiStateExecutor::apply_save_expval(Branch &root, } } } else { - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) diff --git a/src/simulators/parallel_state_executor.hpp b/src/simulators/parallel_state_executor.hpp index 85121689a9..7cb26bc735 100644 --- a/src/simulators/parallel_state_executor.hpp +++ b/src/simulators/parallel_state_executor.hpp @@ -136,14 +136,6 @@ class ParallelStateExecutor : public virtual MultiStateExecutor { // Apply a save expectation value instruction void apply_save_expval(const Operations::Op &op, ExperimentResult &result); - // Sample n-measurement outcomes without applying the measure operation - // to the system state - virtual std::vector sample_measure(const reg_t &qubits, uint_t shots, - RngEngine &rng) const { - std::vector ret; - return ret; - }; - // swap between chunks virtual void apply_chunk_swap(const reg_t &qubits); @@ -270,7 +262,7 @@ ParallelStateExecutor::transpile_cache_blocking( template bool ParallelStateExecutor::allocate(uint_t num_qubits, const Config &config) { - int_t i; + uint_t i; Base::num_qubits_ = num_qubits; chunk_bits_ = cache_block_qubit_; @@ -312,9 +304,9 @@ bool ParallelStateExecutor::allocate(uint_t num_qubits, template bool ParallelStateExecutor::allocate_states(uint_t num_states, const Config &config) { - int_t i; + uint_t i; bool init_states = true; - uint_t num_states_allocated; + uint_t num_states_allocated = num_states; // deallocate qregs before reallocation if (Base::states_.size() > 0) { if (Base::states_.size() == num_states) @@ -532,7 +524,7 @@ void ParallelStateExecutor::run_circuit_shots( result.metadata.copy(fusion_result.metadata); } - for (int_t ishot = 0; ishot < circ.shots; ishot++) { + for (uint_t ishot = 0; ishot < circ.shots; ishot++) { RngEngine rng; if (iparam == 0 && ishot == 0) rng = init_rng; @@ -616,7 +608,7 @@ void ParallelStateExecutor::measure_sampler(InputIterator first_meas, // Generate the samples auto timer_start = myclock_t::now(); - auto all_samples = sample_measure(meas_qubits, shots, rng); + auto all_samples = this->sample_measure(meas_qubits, shots, rng); auto time_taken = std::chrono::duration(myclock_t::now() - timer_start).count(); result.metadata.add(time_taken, "sample_measure_time"); @@ -792,11 +784,11 @@ void ParallelStateExecutor::apply_ops_chunks( uint_t iOpBegin = iOp + 1; if (Base::num_groups_ > 1 && chunk_omp_parallel_) { #pragma omp parallel for num_threads(Base::num_groups_) - for (int_t ig = 0; ig < Base::num_groups_; ig++) + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) apply_cache_blocking_ops(ig, first + iOpBegin, first + iOpEnd, result, rng, iparam); } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) + for (uint_t ig = 0; ig < Base::num_groups_; ig++) apply_cache_blocking_ops(ig, first + iOpBegin, first + iOpEnd, result, rng, iparam); } @@ -810,11 +802,11 @@ void ParallelStateExecutor::apply_ops_chunks( final_ops && nOp == iOp + 1)) { if (Base::num_groups_ > 1 && chunk_omp_parallel_) { #pragma omp parallel for num_threads(Base::num_groups_) - for (int_t ig = 0; ig < Base::num_groups_; ig++) + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) apply_cache_blocking_ops(ig, bind_op.cbegin(), bind_op.cend(), result, rng, iparam); } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) + for (uint_t ig = 0; ig < Base::num_groups_; ig++) apply_cache_blocking_ops(ig, bind_op.cbegin(), bind_op.cend(), result, rng, iparam); } @@ -824,11 +816,11 @@ void ParallelStateExecutor::apply_ops_chunks( final_ops && nOp == iOp + 1)) { if (Base::num_groups_ > 1 && chunk_omp_parallel_) { #pragma omp parallel for num_threads(Base::num_groups_) - for (int_t ig = 0; ig < Base::num_groups_; ig++) + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) apply_cache_blocking_ops(ig, first + iOp, first + iOp + 1, result, rng, iparam); } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) + for (uint_t ig = 0; ig < Base::num_groups_; ig++) apply_cache_blocking_ops(ig, first + iOp, first + iOp + 1, result, rng, iparam); } @@ -843,10 +835,10 @@ void ParallelStateExecutor::apply_ops_chunks( if (Base::num_groups_ > 1 && chunk_omp_parallel_) { #pragma omp parallel for num_threads(Base::num_groups_) - for (int_t ig = 0; ig < Base::num_groups_; ig++) + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) Base::states_[Base::top_state_of_group_[ig]].qreg().synchronize(); } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) + for (uint_t ig = 0; ig < Base::num_groups_; ig++) Base::states_[Base::top_state_of_group_[ig]].qreg().synchronize(); } @@ -881,7 +873,7 @@ void ParallelStateExecutor::apply_cache_blocking_ops( const int_t iGroup, InputIterator first, InputIterator last, ExperimentResult &result, RngEngine &rng, uint_t iparam) { // for each chunk in group - for (int_t iChunk = Base::top_state_of_group_[iGroup]; + for (uint_t iChunk = Base::top_state_of_group_[iGroup]; iChunk < Base::top_state_of_group_[iGroup + 1]; iChunk++) { // fecth chunk in cache if (Base::states_[iChunk].qreg().fetch_chunk()) { @@ -901,15 +893,15 @@ void ParallelStateExecutor::apply_cache_blocking_ops( template template void ParallelStateExecutor::initialize_from_vector(const list_t &vec) { - int_t iChunk; + uint_t iChunk; if (chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for private(iChunk) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { for (iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) { list_t tmp(1ull << (chunk_bits_ * qubit_scale())); - for (int_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) { + for (uint_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) { tmp[i] = vec[((Base::global_state_index_ + iChunk) << (chunk_bits_ * qubit_scale())) + i]; @@ -920,7 +912,7 @@ void ParallelStateExecutor::initialize_from_vector(const list_t &vec) { } else { for (iChunk = 0; iChunk < Base::num_local_states_; iChunk++) { list_t tmp(1ull << (chunk_bits_ * qubit_scale())); - for (int_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) { + for (uint_t i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) { tmp[i] = vec[((Base::global_state_index_ + iChunk) << (chunk_bits_ * qubit_scale())) + i]; @@ -933,10 +925,10 @@ void ParallelStateExecutor::initialize_from_vector(const list_t &vec) { template template void ParallelStateExecutor::initialize_from_matrix(const list_t &mat) { - int_t iChunk; + uint_t iChunk; if (chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for private(iChunk) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { for (iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) { list_t tmp(1ull << (chunk_bits_), 1ull << (chunk_bits_)); @@ -949,7 +941,7 @@ void ParallelStateExecutor::initialize_from_matrix(const list_t &mat) { << (chunk_bits_); // copy part of state for this chunk - uint_t i, row, col; + uint_t i; for (i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) { uint_t icol = i & ((1ull << chunk_bits_) - 1); uint_t irow = i >> chunk_bits_; @@ -970,7 +962,7 @@ void ParallelStateExecutor::initialize_from_matrix(const list_t &mat) { << (chunk_bits_); // copy part of state for this chunk - uint_t i, row, col; + uint_t i; for (i = 0; i < (1ull << (chunk_bits_ * qubit_scale())); i++) { uint_t icol = i & ((1ull << chunk_bits_) - 1); uint_t irow = i >> chunk_bits_; @@ -985,7 +977,7 @@ void ParallelStateExecutor::initialize_from_matrix(const list_t &mat) { template auto ParallelStateExecutor::apply_to_matrix(bool copy) { // this function is used to collect states over chunks - int_t iChunk; + uint_t iChunk; uint_t size = 1ull << (chunk_bits_ * qubit_scale()); uint_t mask = (1ull << (chunk_bits_)) - 1; uint_t num_threads = Base::states_[0].qreg().get_omp_threads(); @@ -1024,7 +1016,7 @@ auto ParallelStateExecutor::apply_to_matrix(bool copy) { recv_data(tmp.data(), size, 0, iChunk); #endif #pragma omp parallel for if (num_threads > 1) num_threads(num_threads) - for (i = 0; i < size; i++) { + for (i = 0; i < (int_t)size; i++) { uint_t irow = i >> (chunk_bits_); uint_t icol = i & mask; uint_t idx = @@ -1093,9 +1085,7 @@ void ParallelStateExecutor::apply_save_expval( template void ParallelStateExecutor::apply_chunk_swap(const reg_t &qubits) { - uint_t nLarge = 1; uint_t q0, q1; - int_t iChunk; q0 = qubits[qubits.size() - 2]; q1 = qubits[qubits.size() - 1]; @@ -1112,14 +1102,14 @@ void ParallelStateExecutor::apply_chunk_swap(const reg_t &qubits) { // inside chunk if (chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for num_threads(Base::num_groups_) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) Base::states_[iChunk].qreg().apply_mcswap(qubits); } } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (uint_t ig = 0; ig < Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) Base::states_[iChunk].qreg().apply_mcswap(qubits); } @@ -1139,7 +1129,7 @@ void ParallelStateExecutor::apply_chunk_swap(const reg_t &qubits) { // processes // is needed auto apply_chunk_swap_1qubit = [this, mask1, qubits](int_t iGroup) { - for (int_t ic = Base::top_state_of_group_[iGroup]; + for (uint_t ic = Base::top_state_of_group_[iGroup]; ic < Base::top_state_of_group_[iGroup + 1]; ic++) { uint_t baseChunk; baseChunk = ic & (~mask1); @@ -1150,7 +1140,7 @@ void ParallelStateExecutor::apply_chunk_swap(const reg_t &qubits) { }; auto apply_chunk_swap_2qubits = [this, mask0, mask1, qubits](int_t iGroup) { - for (int_t ic = Base::top_state_of_group_[iGroup]; + for (uint_t ic = Base::top_state_of_group_[iGroup]; ic < Base::top_state_of_group_[iGroup + 1]; ic++) { uint_t baseChunk; baseChunk = ic & (~(mask0 | mask1)); @@ -1172,7 +1162,8 @@ void ParallelStateExecutor::apply_chunk_swap(const reg_t &qubits) { } #ifdef AER_MPI else { - int_t iPair; + uint_t nLarge = 1; + uint_t iPair; uint_t nPair; uint_t baseChunk, iChunk1, iChunk2; @@ -1343,14 +1334,14 @@ void ParallelStateExecutor::apply_multi_chunk_swap( // swap inside chunks to prepare for all-to-all shuffle if (chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps); } } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (uint_t ig = 0; ig < Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps); } @@ -1366,7 +1357,7 @@ void ParallelStateExecutor::apply_multi_chunk_swap( for (uint_t i = 0; i < nchunk; i++) { chunk_offset[i] = 0; - for (uint_t k = 0; k < nswap; k++) { + for (int_t k = 0; k < nswap; k++) { if (((i >> k) & 1) != 0) chunk_offset[i] += (1ull << chunk_shuffle_qubits[k]); } @@ -1381,7 +1372,7 @@ void ParallelStateExecutor::apply_multi_chunk_swap( uint_t i1, i2, k, ii, t; baseChunk = 0; ii = iPair; - for (k = 0; k < nswap; k++) { + for (k = 0; k < (uint_t)nswap; k++) { t = ii & ((1ull << chunk_shuffle_qubits_sorted[k]) - 1); baseChunk += t; ii = (ii - t) << 1; @@ -1395,7 +1386,6 @@ void ParallelStateExecutor::apply_multi_chunk_swap( // all-to-all // send data for (uint_t iswap = 1; iswap < nchunk; iswap++) { - uint_t sizeRecv, sizeSend; uint_t num_local_swap = 0; for (i1 = 0; i1 < nchunk; i1++) { i2 = i1 ^ iswap; @@ -1412,6 +1402,7 @@ void ParallelStateExecutor::apply_multi_chunk_swap( continue; // swap while data is exchanged between processes } #ifdef AER_MPI + uint_t sizeRecv, sizeSend; uint_t offset1 = i1 << (chunk_bits_ * qubit_scale() - nswap); uint_t offset2 = i2 << (chunk_bits_ * qubit_scale() - nswap); uint_t iChunk1 = @@ -1419,7 +1410,7 @@ void ParallelStateExecutor::apply_multi_chunk_swap( uint_t iChunk2 = baseChunk + chunk_offset[i2] - Base::global_state_index_; - int_t tid = (iPair << nswap) + iswap; + uint_t tid = (iPair << nswap) + iswap; if (iProc1 == Base::distributed_rank_) { auto pRecv = Base::states_[iChunk1].qreg().recv_buffer(sizeRecv); @@ -1499,14 +1490,14 @@ void ParallelStateExecutor::apply_multi_chunk_swap( // restore qubits order if (chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps); } } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (uint_t ig = 0; ig < Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) Base::states_[iChunk].qreg().apply_multi_swaps(local_swaps); } @@ -1515,13 +1506,10 @@ void ParallelStateExecutor::apply_multi_chunk_swap( template void ParallelStateExecutor::apply_chunk_x(const uint_t qubit) { - int_t iChunk; - uint_t nLarge = 1; - if (qubit < chunk_bits_ * qubit_scale()) { auto apply_mcx = [this, qubit](int_t ig) { reg_t qubits(1, qubit); - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) Base::states_[iChunk].qreg().apply_mcx(qubits); }; @@ -1529,9 +1517,7 @@ void ParallelStateExecutor::apply_chunk_x(const uint_t qubit) { (chunk_omp_parallel_ && Base::num_groups_ > 1), 0, Base::num_groups_, apply_mcx); } else { // exchange over chunks - int_t iPair; uint_t nPair, mask; - uint_t baseChunk, iChunk1, iChunk2; reg_t qubits(2); qubits[0] = qubit; qubits[1] = qubit; @@ -1547,7 +1533,7 @@ void ParallelStateExecutor::apply_chunk_x(const uint_t qubit) { nPair = Base::num_local_states_ >> 1; auto apply_chunk_swap = [this, mask, qubits](int_t iGroup) { - for (int_t ic = Base::top_state_of_group_[iGroup]; + for (uint_t ic = Base::top_state_of_group_[iGroup]; ic < Base::top_state_of_group_[iGroup + 1]; ic++) { uint_t pairChunk; pairChunk = ic ^ mask; @@ -1562,6 +1548,9 @@ void ParallelStateExecutor::apply_chunk_x(const uint_t qubit) { } #ifdef AER_MPI else { + uint_t iPair; + uint_t baseChunk, iChunk1, iChunk2; + // chunk scheduler that supports any number of processes uint_t nu[3]; uint_t ub[3]; @@ -1570,7 +1559,6 @@ void ParallelStateExecutor::apply_chunk_x(const uint_t qubit) { uint_t iLocalChunk, iRemoteChunk, iProc; int i; - nLarge = 1; nu[0] = 1ull << (qubit - chunk_bits_ * qubit_scale()); ub[0] = 0; iu[0] = 0; @@ -1864,8 +1852,8 @@ void ParallelStateExecutor::gather_state( AER::Vector> &state) { #ifdef AER_MPI if (Base::distributed_procs_ > 1) { - uint_t size, local_size, global_size, offset; - int i; + uint_t global_size; + uint_t i; std::vector recv_counts(Base::distributed_procs_); std::vector recv_offset(Base::distributed_procs_); diff --git a/src/simulators/shot_branching.hpp b/src/simulators/shot_branching.hpp index e9d1eb5811..0d81f707a4 100644 --- a/src/simulators/shot_branching.hpp +++ b/src/simulators/shot_branching.hpp @@ -83,7 +83,7 @@ class Branch { void set_shots(std::vector &shots) { shots_ = shots; } void initialize_shots(const uint_t nshots, const uint_t seed) { shots_.resize(nshots); - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { shots_[i].set_seed(seed + i); } } @@ -151,7 +151,7 @@ class Branch { if (param_index_.size() == 1) { return param_index_[0]; } - for (int_t i = 0; i < param_index_.size(); i++) { + for (uint_t i = 0; i < param_index_.size(); i++) { if (param_shots_[i] > ishot) { return param_index_[i]; } @@ -174,13 +174,13 @@ void Branch::branch_shots(reg_t &shots, int_t nbranch) { if (param_index_.size() > 1) { branches_[i]->param_index_ = param_index_; branches_[i]->param_shots_.resize(param_index_.size()); - for (int_t j = 0; j < param_index_.size(); j++) + for (uint_t j = 0; j < param_index_.size(); j++) branches_[i]->param_shots_[j] = 0; } } uint_t pos = 0; - for (int_t i = 0; i < shots.size(); i++) { + for (uint_t i = 0; i < shots.size(); i++) { branches_[shots[i]]->shots_.push_back(shots_[i]); if (param_index_.size() > 1) { @@ -193,19 +193,19 @@ void Branch::branch_shots(reg_t &shots, int_t nbranch) { // set parameter indices if (param_index_.size() > 1) { for (int_t i = 0; i < nbranch; i++) { - uint_t pos = 0; - while (pos < branches_[i]->param_index_.size()) { - if (branches_[i]->param_shots_[pos] == 0) { + uint_t ppos = 0; + while (ppos < branches_[i]->param_index_.size()) { + if (branches_[i]->param_shots_[ppos] == 0) { branches_[i]->param_index_.erase(branches_[i]->param_index_.begin() + - pos); + ppos); branches_[i]->param_shots_.erase(branches_[i]->param_index_.begin() + - pos); + ppos); } else { - if (pos > 0) { - branches_[i]->param_shots_[pos] += - branches_[i]->param_shots_[pos - 1]; + if (ppos > 0) { + branches_[i]->param_shots_[ppos] += + branches_[i]->param_shots_[ppos - 1]; } - pos++; + ppos++; } } } @@ -218,27 +218,27 @@ void Branch::branch_shots(reg_t &shots, int_t nbranch) { void Branch::branch_shots_by_params(void) { branches_.resize(param_index_.size()); - for (int_t i = 0; i < param_index_.size(); i++) { + for (uint_t i = 0; i < param_index_.size(); i++) { branches_[i] = std::make_shared(); branches_[i]->creg_ = creg_; branches_[i]->iter_ = iter_; branches_[i]->flow_marks_ = flow_marks_; } uint_t pos = 0; - for (int_t i = 0; i < shots_.size(); i++) { + for (uint_t i = 0; i < shots_.size(); i++) { if (i >= param_shots_[pos]) pos++; branches_[pos]->shots_.push_back(shots_[i]); } - for (int_t i = 0; i < param_index_.size(); i++) { + for (uint_t i = 0; i < param_index_.size(); i++) { branches_[i]->set_param_index(param_index_[i], 0); } } void Branch::advance_iterator(void) { iter_++; - for (int_t i = 0; i < branches_.size(); i++) { + for (uint_t i = 0; i < branches_.size(); i++) { branches_[i]->iter_++; } } @@ -253,24 +253,24 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg, reg_t shot_map(nshots); std::vector> noises; - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { std::vector noise_ops = noise.sample_noise_loc(op, shots_[i]); // search same noise ops int_t pos = -1; - for (int_t j = 0; j < noises.size(); j++) { + for (uint_t j = 0; j < noises.size(); j++) { if (noise_ops.size() != noises[j].size()) continue; bool same = true; - for (int_t k = 0; k < noise_ops.size(); k++) { + for (uint_t k = 0; k < noise_ops.size(); k++) { if (noise_ops[k].type != noises[j][k].type || noise_ops[k].name != noises[j][k].name) same = false; else if (noise_ops[k].qubits.size() != noises[j][k].qubits.size()) same = false; else { - for (int_t l = 0; l < noise_ops[k].qubits.size(); l++) { + for (uint_t l = 0; l < noise_ops[k].qubits.size(); l++) { if (noise_ops[k].qubits[l] != noises[j][k].qubits[l]) { same = false; break; @@ -286,7 +286,7 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg, } else if (noise_ops[k].params.size() != noises[j][k].params.size()) same = false; else { - for (int_t l = 0; l < noise_ops[k].params.size(); l++) { + for (uint_t l = 0; l < noise_ops[k].params.size(); l++) { if (noise_ops[k].params[l] != noises[j][k].params[l]) { same = false; break; @@ -298,12 +298,12 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg, if (noise_ops[k].mats.size() != noises[j][k].mats.size()) same = false; else { - for (int_t l = 0; l < noise_ops[k].mats.size(); l++) { + for (uint_t l = 0; l < noise_ops[k].mats.size(); l++) { if (noise_ops[k].mats[l].size() != noises[j][k].mats[l].size()) { same = false; break; } - for (int_t m = 0; m < noise_ops[k].mats[l].size(); m++) { + for (uint_t m = 0; m < noise_ops[k].mats[l].size(); m++) { if (noise_ops[k].mats[l][m] != noises[j][k].mats[l][m]) { same = false; break; @@ -333,7 +333,7 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg, creg_ = creg; branch_shots(shot_map, noises.size()); - for (int_t i = 0; i < noises.size(); i++) { + for (uint_t i = 0; i < noises.size(); i++) { branches_[i]->copy_ops_after_branch(noises[i]); } @@ -342,7 +342,7 @@ bool Branch::apply_runtime_noise_sampling(const ClassicalRegister &creg, void Branch::remove_empty_branches(void) { int_t istart = 0; - for (int_t j = 0; j < branches_.size(); j++) { + for (uint_t j = 0; j < branches_.size(); j++) { if (branches_[j]->num_shots() > 0) { // copy shots to the root shots_ = branches_[j]->rng_shots(); @@ -359,7 +359,7 @@ void Branch::remove_empty_branches(void) { std::vector> new_branches; - for (int_t j = istart; j < branches_.size(); j++) { + for (uint_t j = istart; j < branches_.size(); j++) { if (branches_[j]->num_shots() > 0) new_branches.push_back(branches_[j]); else @@ -370,7 +370,7 @@ void Branch::remove_empty_branches(void) { void Branch::reset_branch(void) { // reset random seeds - for (int_t i = 0; i < shots_.size(); i++) { + for (uint_t i = 0; i < shots_.size(); i++) { shots_[i].set_seed(shots_[i].initial_seed()); } additional_ops_.clear(); @@ -390,7 +390,7 @@ void Branch::set_param_index(uint_t ishot, uint_t nshots_per_param) { param_shots_.clear(); param_index_.push_back(ishot / nshots_per_param); - for (int_t i = 1; i < shots_.size(); i++) { + for (uint_t i = 1; i < shots_.size(); i++) { uint_t ip = (ishot + i) / nshots_per_param; if (ip != param_index_[pos]) { param_shots_.push_back(i); diff --git a/src/simulators/stabilizer/clifford.hpp b/src/simulators/stabilizer/clifford.hpp index 1de95089ce..568413d810 100644 --- a/src/simulators/stabilizer/clifford.hpp +++ b/src/simulators/stabilizer/clifford.hpp @@ -431,7 +431,7 @@ bool Clifford::measure_and_update(const uint64_t qubit, auto measure_non_determinisitic_func = [this, rS, row, qubit](AER::int_t i) { uint64_t row_mask = ~0ull; - if ((row >> destabilizer_phases_.BLOCK_BITS) == i) + if ((row >> destabilizer_phases_.BLOCK_BITS) == (uint_t)i) row_mask ^= (1ull << (row & destabilizer_phases_.BLOCK_MASK)); uint64_t d_mask = row_mask & destabilizer_table_[qubit].X(i); diff --git a/src/simulators/stabilizer/stabilizer_state.hpp b/src/simulators/stabilizer/stabilizer_state.hpp index 9078237d62..1a2df3410e 100644 --- a/src/simulators/stabilizer/stabilizer_state.hpp +++ b/src/simulators/stabilizer/stabilizer_state.hpp @@ -255,7 +255,7 @@ void State::set_config(const Config &config) { } bool State::validate_parameters(const std::vector &ops) const { - for (int_t i = 0; i < ops.size(); i++) { + for (uint_t i = 0; i < ops.size(); i++) { if (ops[i].type == OpType::gate) { // check parameter of R gates if (ops[i].name == "rx" || ops[i].name == "ry" || ops[i].name == "rz") { @@ -639,7 +639,7 @@ template void State::get_probabilities_auxiliary(const reg_t &qubits, std::string outcome, double outcome_prob, T &probs) { - uint_t qubit_for_branching = -1; + int_t qubit_for_branching = -1; for (uint_t i = 0; i < qubits.size(); ++i) { uint_t qubit = qubits[qubits.size() - i - 1]; if (outcome[i] == 'X') { @@ -690,7 +690,7 @@ void State::get_probability_helper(const reg_t &qubits, const std::string &outcome, std::string &outcome_carry, double &prob_carry) { - uint_t qubit_for_branching = -1; + int_t qubit_for_branching = -1; for (uint_t i = 0; i < qubits.size(); ++i) { uint_t qubit = qubits[qubits.size() - i - 1]; if (outcome_carry[i] == 'X') { diff --git a/src/simulators/statevector/chunk/chunk_container.hpp b/src/simulators/statevector/chunk/chunk_container.hpp index b249b12c95..50900d3bf8 100644 --- a/src/simulators/statevector/chunk/chunk_container.hpp +++ b/src/simulators/statevector/chunk/chunk_container.hpp @@ -395,7 +395,7 @@ void ChunkContainer::UnmapBuffer(Chunk &buf) { template void ChunkContainer::unmap_all(void) { - int_t i; + uint_t i; for (i = 0; i < chunks_map_.size(); i++) chunks_map_[i] = false; num_chunk_mapped_ = 0; @@ -804,14 +804,8 @@ void ChunkContainer::ExecuteSum2(double *pSum, Function func, #endif } -void host_func_launcher(void *pParam) { - HostFuncBase *func = reinterpret_cast(pParam); - func->execute(); -} - template void ChunkContainer::allocate_chunks(void) { - uint_t i; chunks_map_.resize(num_chunks_, false); reduced_queue_begin_.resize(num_chunks_, 0); @@ -855,7 +849,7 @@ void ChunkContainer::apply_matrix( #else if (N <= 10) { #endif - int i; + uint_t i; for (i = 0; i < N; i++) { qubits_sorted.push_back(qubits[i]); } @@ -918,7 +912,7 @@ void ChunkContainer::apply_batched_matrix( } else { auto qubits_sorted = qubits; std::sort(qubits_sorted.begin(), qubits_sorted.end()); - for (int i = 0; i < N; i++) { + for (uint_t i = 0; i < N; i++) { qubits_sorted.push_back(qubits[i]); } StoreUintParams(qubits_sorted, iChunk); @@ -971,8 +965,8 @@ void ChunkContainer::apply_phase(const uint_t iChunk, const int_t control_bits, const std::complex phase, const uint_t gid, const uint_t count) { - Execute(phase_func(qubits, *(thrust::complex *)&phase), - iChunk, gid, count); + thrust::complex p(phase); + Execute(phase_func(qubits, p), iChunk, gid, count); } template @@ -989,8 +983,8 @@ void ChunkContainer::apply_multi_swaps(const uint_t iChunk, const uint_t gid, const uint_t count) { // max 5 swaps can be applied at once using GPU's shared memory - for (int_t i = 0; i < qubits.size(); i += 10) { - int_t n = 10; + for (uint_t i = 0; i < qubits.size(); i += 10) { + uint_t n = 10; if (i + n > qubits.size()) n = qubits.size() - i; @@ -1009,7 +1003,6 @@ void ChunkContainer::apply_permutation( const uint_t iChunk, const reg_t &qubits, const std::vector> &pairs, const uint_t gid, const uint_t count) { - const size_t N = qubits.size(); auto qubits_sorted = qubits; std::sort(qubits_sorted.begin(), qubits_sorted.end()); @@ -1080,7 +1073,7 @@ void ChunkContainer::probabilities(std::vector &probs, template double ChunkContainer::norm(uint_t iChunk, uint_t count) const { - double ret; + double ret = 0.0; ExecuteSum(&ret, norm_func(), iChunk, count); return ret; @@ -1089,7 +1082,7 @@ double ChunkContainer::norm(uint_t iChunk, uint_t count) const { template double ChunkContainer::trace(uint_t iChunk, uint_t row, uint_t count) const { - double ret; + double ret = 0.0; ExecuteSum(&ret, trace_func(row), iChunk, count); return ret; @@ -1108,7 +1101,7 @@ double ChunkContainer::expval_matrix(const uint_t iChunk, else { auto qubits_sorted = qubits; std::sort(qubits_sorted.begin(), qubits_sorted.end()); - for (int_t i = 0; i < N; i++) { + for (uint_t i = 0; i < N; i++) { qubits_sorted.push_back(qubits[i]); } @@ -1166,7 +1159,6 @@ void ChunkContainer::batched_expval_pauli( count, first); return; } - double ret; // specialize x_max == 0 if (x_mask == 0) { ExecuteSum2(nullptr, diff --git a/src/simulators/statevector/chunk/chunk_manager.hpp b/src/simulators/statevector/chunk/chunk_manager.hpp index cb3c7ebbb9..2e304515c6 100644 --- a/src/simulators/statevector/chunk/chunk_manager.hpp +++ b/src/simulators/statevector/chunk/chunk_manager.hpp @@ -35,11 +35,11 @@ class ChunkManager { std::vector>> chunks_; // chunk containers for each device and host - int num_devices_; // number of devices - int num_places_; // number of places (devices + host) + uint_t num_devices_; // number of devices + uint_t num_places_; // number of places (devices + host) - int chunk_bits_; // number of qubits of chunk - int num_qubits_; // number of global qubits + uint_t chunk_bits_; // number of qubits of chunk + uint_t num_qubits_; // number of global qubits uint_t num_chunks_; // number of chunks on this process uint_t chunk_index_; // global chunk index for the first chunk @@ -105,7 +105,6 @@ class ChunkManager { template ChunkManager::ChunkManager() { - int i, j; num_places_ = 1; chunk_bits_ = 0; num_chunks_ = 0; @@ -126,7 +125,9 @@ ChunkManager::ChunkManager() { #else #ifdef AER_THRUST_GPU - if (cudaGetDeviceCount(&num_devices_) == cudaSuccess) { + int ndev; + if (cudaGetDeviceCount(&ndev) == cudaSuccess) { + num_devices_ = ndev; num_places_ = num_devices_; } else { cudaGetLastError(); @@ -168,19 +169,21 @@ uint_t ChunkManager::Allocate(int chunk_bits, int nqubits, bool density_mat, reg_t &gpus, bool enable_cuStatevec) { uint_t num_buffers; - int iDev; + uint_t iDev; uint_t is, ie, nc; - int i; + uint_t i; char *str; - bool multi_gpu = false; - bool hybrid = false; + bool hybrid = false; +#ifdef AER_THRUST_GPU + bool multi_gpu = false; //--- for test str = getenv("AER_MULTI_GPU"); if (str) { multi_gpu = true; num_places_ = num_devices_; } +#endif str = getenv("AER_HYBRID"); if (str) { hybrid = true; @@ -192,8 +195,10 @@ uint_t ChunkManager::Allocate(int chunk_bits, int nqubits, target_gpus_ = gpus; if (target_gpus_.size() > 0) { num_devices_ = target_gpus_.size(); +#ifdef AER_THRUST_GPU if (num_devices_ > 1) multi_gpu = true; +#endif } else { target_gpus_.resize(num_devices_); for (iDev = 0; iDev < num_devices_; iDev++) { @@ -203,7 +208,7 @@ uint_t ChunkManager::Allocate(int chunk_bits, int nqubits, chunk_index_ = chunk_index; - if (num_qubits_ != nqubits || chunk_bits_ != chunk_bits || + if (num_qubits_ != (uint_t)nqubits || chunk_bits_ != (uint_t)chunk_bits || nchunks > num_chunks_) { // free previous allocation Free(); @@ -224,7 +229,6 @@ uint_t ChunkManager::Allocate(int chunk_bits, int nqubits, multi_shots_ = true; #ifdef AER_THRUST_CPU - multi_gpu = false; num_places_ = 1; #else if (chunk_distribution_enable_) { @@ -260,7 +264,9 @@ uint_t ChunkManager::Allocate(int chunk_bits, int nqubits, #endif } else { // single chunk num_buffers = 0; +#ifdef AER_THRUST_GPU multi_gpu = false; +#endif num_places_ = 1; num_chunks_ = nchunks; multi_shots_ = false; @@ -346,7 +352,7 @@ uint_t ChunkManager::Allocate(int chunk_bits, int nqubits, } } if (chunks_allocated < num_chunks_) { - int nplaces_add = num_places_; + uint_t nplaces_add = num_places_; if ((num_chunks_ - chunks_allocated) < nplaces_add) nplaces_add = (num_chunks_ - chunks_allocated); // rest of chunks are stored on host @@ -391,7 +397,7 @@ uint_t ChunkManager::Allocate(int chunk_bits, int nqubits, template void ChunkManager::Free(void) { - int i; + uint_t i; for (i = 0; i < chunks_.size(); i++) { chunks_[i]->Deallocate(); @@ -408,7 +414,7 @@ void ChunkManager::Free(void) { template bool ChunkManager::MapChunk(Chunk &chunk, int iplace) { - int i; + uint_t i; for (i = 0; i < num_places_; i++) { if (chunks_[(iplace + i) % num_places_]->MapChunk(chunk)) { @@ -422,7 +428,7 @@ bool ChunkManager::MapChunk(Chunk &chunk, int iplace) { template bool ChunkManager::MapBufferChunk(Chunk &out, int idev) { if (idev < 0) { - int i; + uint_t i; for (i = 0; i < num_devices_; i++) { if (chunks_[i]->MapBufferChunk(out)) break; diff --git a/src/simulators/statevector/chunk/cuStateVec_chunk_container.hpp b/src/simulators/statevector/chunk/cuStateVec_chunk_container.hpp index e72d72003d..4baad583da 100644 --- a/src/simulators/statevector/chunk/cuStateVec_chunk_container.hpp +++ b/src/simulators/statevector/chunk/cuStateVec_chunk_container.hpp @@ -377,9 +377,6 @@ void cuStateVecChunkContainer::apply_diagonal_matrix( qubits32[i] = qubits[i]; int32_t *pQubits = &qubits32[control_bits]; - int32_t *pControl = nullptr; - if (control_bits > 0) - pControl = &qubits32[0]; uint_t bits; uint_t nc; @@ -686,7 +683,6 @@ void cuStateVecChunkContainer::apply_rotation( const uint_t iChunk, const reg_t &qubits, const Rotation r, const double theta, const uint_t gid, const uint_t count) { custatevecPauli_t pauli[2]; - int nPauli = 1; BaseContainer::set_device(); @@ -705,25 +701,21 @@ void cuStateVecChunkContainer::apply_rotation( case Rotation::xx: pauli[0] = CUSTATEVEC_PAULI_X; pauli[1] = CUSTATEVEC_PAULI_X; - nPauli = 2; control_bits--; break; case Rotation::yy: pauli[0] = CUSTATEVEC_PAULI_Y; pauli[1] = CUSTATEVEC_PAULI_Y; - nPauli = 2; control_bits--; break; case Rotation::zz: pauli[0] = CUSTATEVEC_PAULI_Z; pauli[1] = CUSTATEVEC_PAULI_Z; - nPauli = 2; control_bits--; break; case Rotation::zx: pauli[0] = CUSTATEVEC_PAULI_Z; pauli[1] = CUSTATEVEC_PAULI_X; - nPauli = 2; control_bits--; break; default: @@ -911,7 +903,7 @@ double cuStateVecChunkContainer::expval_pauli( const custatevecPauli_t *pauliOperatorsArray[] = {pauliOps}; const int32_t *basisBitsArray[] = {qubits32}; double ret[1]; - const uint32_t nBasisBitsArray[] = {qubits.size()}; + const uint32_t nBasisBitsArray[] = {(uint32_t)qubits.size()}; custatevecStatus_t err; err = custatevecComputeExpectationsOnPauliBasis( diff --git a/src/simulators/statevector/chunk/device_chunk_container.hpp b/src/simulators/statevector/chunk/device_chunk_container.hpp index 6ae28ae79d..d78f0ebd01 100644 --- a/src/simulators/statevector/chunk/device_chunk_container.hpp +++ b/src/simulators/statevector/chunk/device_chunk_container.hpp @@ -220,7 +220,7 @@ class DeviceChunkContainer : public ChunkContainer { void allocate_creg(uint_t num_mem, uint_t num_reg); int measured_cbit(uint_t iChunk, int qubit) { uint_t n64, i64, ibit; - if (qubit >= this->num_creg_bits_) + if ((uint_t)qubit >= this->num_creg_bits_) return -1; n64 = (this->num_creg_bits_ + 63) >> 6; i64 = qubit >> 6; @@ -324,7 +324,6 @@ uint_t DeviceChunkContainer::Allocate(int idev, int chunk_bits, bool density_matrix) { uint_t nc = chunks; uint_t i; - int mat_bits; this->chunk_bits_ = chunk_bits; this->num_qubits_ = num_qubits; @@ -359,13 +358,10 @@ uint_t DeviceChunkContainer::Allocate(int idev, int chunk_bits, if (multi_shots) { // mult-shot parallelization for small qubits multi_shots_ = true; - mat_bits = AER_DEFAULT_MATRIX_BITS; nc = chunks; num_matrices_ = chunks; } else { multi_shots_ = false; - - mat_bits = AER_DEFAULT_MATRIX_BITS; num_matrices_ = 1; nc = chunks; } @@ -519,7 +515,7 @@ void DeviceChunkContainer::calculate_matrix_buffer_size(int bits, if (shots > AER_MAX_SAMPLING_SHOTS) shots = AER_MAX_SAMPLING_SHOTS; uint_t b = this->matrix_bits_; - while ((1ull << (b * 2)) < shots) { + while ((1ull << (b * 2)) < (uint_t)shots) { b++; } this->matrix_bits_ = b; @@ -545,7 +541,7 @@ void DeviceChunkContainer::calculate_matrix_buffer_size(int bits, } params_buffer_size_ = size; - if (shots > 1 && params_buffer_size_ < shots) { + if (shots > 1 && params_buffer_size_ < (uint_t)shots) { params_buffer_size_ = shots; } } @@ -553,10 +549,9 @@ void DeviceChunkContainer::calculate_matrix_buffer_size(int bits, template void DeviceChunkContainer::ResizeMatrixBuffers(int bits, int max_shots) { - uint_t size; uint_t n = num_matrices_ + this->num_buffers_; - if (bits != this->matrix_bits_) { + if ((uint_t)bits != this->matrix_bits_) { calculate_matrix_buffer_size(bits, max_shots); } @@ -941,7 +936,7 @@ void DeviceChunkContainer::set_blocked_qubits(uint_t iChunk, auto qubits_sorted = qubits; std::sort(qubits_sorted.begin(), qubits_sorted.end()); - int i; + uint_t i; for (i = 0; i < qubits.size(); i++) { blocked_qubits_holder_[iBlock * QV_MAX_REGISTERS + i] = qubits_sorted[i]; } @@ -1010,8 +1005,7 @@ void DeviceChunkContainer::queue_blocked_gate( } cvector_t mat(4, 0.0); - int i; - uint_t idx, idxParam, iBlock; + uint_t iBlock; if (iChunk >= this->num_chunks_) { // for buffer chunks iBlock = num_matrices_ + iChunk - this->num_chunks_; } else { @@ -1028,7 +1022,7 @@ void DeviceChunkContainer::queue_blocked_gate( params.mask_ = mask; params.gate_ = gate; params.qubit_ = 0; - for (i = 0; i < num_blocked_qubits_[iBlock]; i++) { + for (uint_t i = 0; i < num_blocked_qubits_[iBlock]; i++) { if (blocked_qubits_holder_[iBlock * QV_MAX_REGISTERS + i] == qubit) { params.qubit_ = i; break; @@ -1408,8 +1402,8 @@ void DeviceChunkContainer::copy_reduce_buffer(std::vector &ret, count * reduce_buffer_size_, tmp.begin()); #endif - for (int_t i = 0; i < count; i++) { - for (int_t j = 0; j < num_val; j++) + for (uint_t i = 0; i < count; i++) { + for (uint_t j = 0; j < num_val; j++) ret[i * num_val + j] = tmp[i * reduce_buffer_size_ + j]; } } diff --git a/src/simulators/statevector/chunk/host_chunk_container.hpp b/src/simulators/statevector/chunk/host_chunk_container.hpp index e901086d45..696ad6478d 100644 --- a/src/simulators/statevector/chunk/host_chunk_container.hpp +++ b/src/simulators/statevector/chunk/host_chunk_container.hpp @@ -121,7 +121,6 @@ uint_t HostChunkContainer::Allocate(int idev, int chunk_bits, int matrix_bit, int max_shots, bool density_matrix) { uint_t nc = chunks; - uint_t i; ChunkContainer::chunk_bits_ = chunk_bits; ChunkContainer::num_qubits_ = num_qubits; diff --git a/src/simulators/statevector/chunk/thrust_kernels.hpp b/src/simulators/statevector/chunk/thrust_kernels.hpp index 70f9c36134..1d08973f7a 100644 --- a/src/simulators/statevector/chunk/thrust_kernels.hpp +++ b/src/simulators/statevector/chunk/thrust_kernels.hpp @@ -69,7 +69,10 @@ class GateFuncBase { public: GateFuncBase() { data_ = NULL; + matrix_ = NULL; + params_ = NULL; base_index_ = 0; + chunk_bits_ = 0; cregs_ = NULL; num_creg_bits_ = 0; conditional_bit_ = -1; @@ -147,7 +150,7 @@ class GateFuncBase { template class GateFuncWithCache : public GateFuncBase { protected: - int nqubits_; + uint_t nqubits_; public: GateFuncWithCache(uint_t nq) { nqubits_ = nq; } @@ -210,7 +213,7 @@ class GateFuncWithCache : public GateFuncBase { template class GateFuncSumWithCache : public GateFuncBase { protected: - int nqubits_; + uint_t nqubits_; public: GateFuncSumWithCache(uint_t nq) { nqubits_ = nq; } @@ -276,7 +279,7 @@ class strided_range { : public thrust::unary_function { difference_type stride; - stride_functor(difference_type stride) : stride(stride) {} + stride_functor(difference_type _stride) : stride(_stride) {} __host__ __device__ difference_type operator()(const difference_type &i) const { @@ -301,8 +304,8 @@ class strided_range { typedef PermutationIterator iterator; // construct strided_range for the range [first,last) - strided_range(Iterator first, Iterator last, difference_type stride) - : first(first), last(last), stride(stride) {} + strided_range(Iterator _first, Iterator _last, difference_type _stride) + : first(_first), last(_last), stride(_stride) {} iterator begin(void) const { return PermutationIterator( @@ -409,7 +412,7 @@ class initialize_component_1qubit_func : public GateFuncBase { template class initialize_component_func : public GateFuncBase { protected: - int nqubits; + uint_t nqubits; uint_t offset; uint_t mat_pos; uint_t mat_num; @@ -825,7 +828,7 @@ class MatrixMult16x16 : public GateFuncBase { int qubits_count(void) { return 4; } __host__ __device__ void operator()(const uint_t &i) const { - uint_t i0, i1, i2, i3, i4, offset, f0, f1, f2; + uint_t i0, i1, i2, i3, i4, offset; thrust::complex *vec; thrust::complex q0, q1, q2, q3, q4, q5, q6, q7; thrust::complex q8, q9, q10, q11, q12, q13, q14, q15; @@ -865,9 +868,6 @@ class MatrixMult16x16 : public GateFuncBase { q15 = vec[i0 + offset3 + offset2 + offset1 + offset0]; offset = 0; - f0 = 0; - f1 = 0; - f2 = 0; for (j = 0; j < 16; j++) { r = pMat[0 + j] * q0; r += pMat[16 + j] * q1; @@ -936,9 +936,9 @@ class MatrixMultNxN : public GateFuncWithCache { template class MatrixMultNxN_LU : public GateFuncBase { protected: - int nqubits; + uint_t nqubits; uint_t matSize; - int nswap; + uint_t nswap; public: MatrixMultNxN_LU(const cvector_t &mat, const reg_t &qb, @@ -978,7 +978,7 @@ class MatrixMultNxN_LU : public GateFuncBase { params[nqubits + i] = j; } - if (dmax != 0) { + if (dmax > 0) { c0 = matLU[(i << nqubits) + params[nqubits + i]]; for (j = i + 1; j < matSize; j++) { @@ -1211,7 +1211,7 @@ class BatchedMatrixMult2x2 : public GateFuncBase { public: BatchedMatrixMult2x2(const reg_t &qubits, uint_t imat, uint_t nshots_per_mat) { - int i; + uint_t i; nqubits_ = qubits.size(); offset_ = 1ull << qubits[nqubits_ - 1]; @@ -1402,7 +1402,7 @@ class DiagonalMult4x4 : public GateFuncBase { template class DiagonalMultNxN : public GateFuncBase { protected: - int nqubits; + uint_t nqubits; public: DiagonalMultNxN(const reg_t &qb) { nqubits = qb.size(); } @@ -1504,7 +1504,7 @@ class BatchedDiagonalMatrixMult2x2 : public GateFuncBase { public: BatchedDiagonalMatrixMult2x2(const reg_t &qubits, uint_t imat, uint_t nshots_per_mat) { - int i; + uint_t i; nqubits_ = qubits.size(); mask_ = (1ull << qubits[nqubits_ - 1]); @@ -1557,7 +1557,6 @@ class BatchedDiagonalMatrixMultNxN : public GateFuncBase { public: BatchedDiagonalMatrixMultNxN(const uint_t nq, uint_t imat, uint_t nshots_per_mat) { - int i; nqubits_ = nq; matrix_begin_ = imat; @@ -1894,9 +1893,8 @@ class CSwapChunk_func : public GateFuncBase { CSwapChunk_func(const reg_t &qubits, uint_t block_bits, thrust::complex *pVec0, thrust::complex *pVec1, bool wb) { - int i; - int nqubits; - int qubit_t; + uint_t nqubits; + uint_t qubit_t; nqubits = qubits.size(); if (qubits[nqubits - 2] < qubits[nqubits - 1]) { @@ -2078,10 +2076,8 @@ class NormMatrixMultNxN : public GateFuncSumWithCache { thrust::complex q, r; thrust::complex m; uint_t mat_size, irow; - thrust::complex *vec; thrust::complex *pMat; - vec = this->data_; pMat = this->matrix_; mat_size = 1ull << this->nqubits_; @@ -2492,7 +2488,7 @@ class batched_expval_I_func : public GateFuncBase { operator()(const uint_t &i) const { thrust::complex q; thrust::complex *vec; - double d, dv; + double d, dv = 0.0; vec = this->data_; q = vec[i]; @@ -2529,7 +2525,7 @@ class batched_expval_pauli_Z_func : public GateFuncBase { operator()(const uint_t &i) const { thrust::complex *vec; thrust::complex q0; - double d, dv; + double d, dv = 0.0; vec = this->data_; @@ -2585,7 +2581,7 @@ class batched_expval_pauli_XYZ_func : public GateFuncBase { thrust::complex q1; thrust::complex q0p; thrust::complex q1p; - double d0, d1, ret, ret_v; + double d0, d1, ret, ret_v = 0.0; uint_t idx0, idx1; vec = this->data_; diff --git a/src/simulators/statevector/qubitvector.hpp b/src/simulators/statevector/qubitvector.hpp index 4039c7c5f3..94753f409b 100755 --- a/src/simulators/statevector/qubitvector.hpp +++ b/src/simulators/statevector/qubitvector.hpp @@ -955,7 +955,9 @@ void QubitVector::allocate_mem(size_t data_size) { if (data_ == nullptr) { #if !defined(_WIN64) && !defined(_WIN32) void *data = nullptr; - posix_memalign(&data, 64, sizeof(std::complex) * data_size); + if (posix_memalign(&data, 64, sizeof(std::complex) * data_size) != + 0) + throw std::runtime_error("Cannot allocate memory by posix_memalign"); data_ = reinterpret_cast *>(data); #else data_ = reinterpret_cast *>( @@ -969,7 +971,8 @@ void QubitVector::allocate_checkpoint(size_t data_size) { free_checkpoint(); #if !defined(_WIN64) && !defined(_WIN32) void *data = nullptr; - posix_memalign(&data, 64, sizeof(std::complex) * data_size); + if (posix_memalign(&data, 64, sizeof(std::complex) * data_size) != 0) + throw std::runtime_error("Cannot allocate memory by posix_memalign"); checkpoint_ = reinterpret_cast *>(data); #else checkpoint_ = reinterpret_cast *>( @@ -1765,13 +1768,13 @@ void QubitVector::apply_chunk_swap(const reg_t &qubits, if (write_back) { #pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \ num_threads(omp_threads_) - for (int_t k = 0; k < data_size_; ++k) { + for (int_t k = 0; k < (int_t)data_size_; ++k) { std::swap(data_[k], src.data_[k]); } } else { #pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \ num_threads(omp_threads_) - for (int_t k = 0; k < data_size_; ++k) { + for (int_t k = 0; k < (int_t)data_size_; ++k) { data_[k] = src.data_[k]; } } @@ -1803,7 +1806,7 @@ void QubitVector::apply_chunk_swap(const reg_t &qubits, if (q0 >= num_qubits_) { // exchange whole of chunk each other #pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \ num_threads(omp_threads_) - for (int_t k = 0; k < data_size_; ++k) { + for (int_t k = 0; k < (int_t)data_size_; ++k) { data_[k] = recv_buffer_[k]; } } else { @@ -1824,13 +1827,13 @@ void QubitVector::apply_chunk_swap(QubitVector &src, if (src.chunk_index_ == chunk_index_) { #pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \ num_threads(omp_threads_) - for (int_t k = 0; k < size; ++k) { + for (int_t k = 0; k < (int_t)size; ++k) { data_[dest_offset + k] = src.recv_buffer_[src_offset + k]; } } else { #pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \ num_threads(omp_threads_) - for (int_t k = 0; k < size; ++k) { + for (int_t k = 0; k < (int_t)size; ++k) { std::swap(data_[dest_offset + k], src.data_[src_offset + k]); } } @@ -1838,8 +1841,8 @@ void QubitVector::apply_chunk_swap(QubitVector &src, template void QubitVector::apply_multi_swaps(const reg_t &qubits) { - for (int_t i = 0; i < qubits.size(); i += 10) { - int_t n = 10; + for (uint_t i = 0; i < qubits.size(); i += 10) { + uint_t n = 10; if (i + n > qubits.size()) n = qubits.size() - i; @@ -1850,17 +1853,17 @@ void QubitVector::apply_multi_swaps(const reg_t &qubits) { auto lambda = [&](const indexes_t &inds) -> void { cvector_t cache(size); - for (int_t i = 0; i < size; i++) - cache[i] = data_[inds[i]]; + for (uint_t ii = 0; ii < size; ii++) + cache[ii] = data_[inds[ii]]; - for (int_t i = 0; i < size; i++) { - uint_t pos = i; - for (int_t j = 0; j < nq; j += 2) { + for (uint_t ii = 0; ii < size; ii++) { + uint_t pos = ii; + for (uint_t j = 0; j < nq; j += 2) { if ((((pos >> j) & 1) ^ ((pos >> (j + 1)) & 1)) != 0) { pos ^= ((1ull << j) | (1ull << (j + 1))); } } - data_[inds[i]] = cache[pos]; + data_[inds[ii]] = cache[pos]; } }; apply_lambda(lambda, qubits_swap); diff --git a/src/simulators/statevector/qubitvector_thrust.hpp b/src/simulators/statevector/qubitvector_thrust.hpp index 272ae5e39b..36bb5a9837 100644 --- a/src/simulators/statevector/qubitvector_thrust.hpp +++ b/src/simulators/statevector/qubitvector_thrust.hpp @@ -819,17 +819,17 @@ void QubitVectorThrust::initialize_component( std::sort(qubits_sorted.begin(), qubits_sorted.end()); auto qubits_param = qubits; - int i; + uint_t i; for (i = 0; i < qubits.size(); i++) qubits_param.push_back(qubits_sorted[i]); - int nbit = chunk_.container()->matrix_bits(); + uint_t nbit = chunk_.container()->matrix_bits(); if (nbit > qubits.size()) nbit = qubits.size(); uint_t dim = 1ull << qubits.size(); uint_t sub_dim = 1ull << nbit; - for (uint_t i = 0; i < dim; i += sub_dim) { + for (i = 0; i < dim; i += sub_dim) { cvector_t state(sub_dim); for (uint_t j = 0; j < sub_dim; j++) state[j] = state0[dim - sub_dim - i + j]; @@ -872,7 +872,7 @@ uint_t QubitVectorThrust::chunk_setup(int chunk_bits, int num_qubits, if (chunk_manager_->chunk_bits() == chunk_bits && chunk_manager_->num_qubits() == num_qubits) { - bool mapped = chunk_manager_->MapChunk(chunk_, 0); + chunk_manager_->MapChunk(chunk_, 0); chunk_.set_chunk_index(chunk_index_); return num_local_chunks; } @@ -903,8 +903,8 @@ uint_t QubitVectorThrust::chunk_setup(int chunk_bits, int num_qubits, recv_chunk_.unmap(); // mapping/setting chunk - bool mapped = chunk_manager_->MapChunk(chunk_, 0); chunk_.set_chunk_index(chunk_index_); + chunk_manager_->MapChunk(chunk_, 0); return num_chunks_allocated; } @@ -932,7 +932,7 @@ QubitVectorThrust::chunk_setup(const QubitVectorThrust &base, // mapping/setting chunk chunk_manager_ = base.chunk_manager_; - bool mapped = chunk_manager_->MapChunk(chunk_, 0); + chunk_manager_->MapChunk(chunk_, 0); return 0; } @@ -1260,7 +1260,7 @@ void QubitVectorThrust::initialize_from_vector(const list_t &statevec) { int_t i; #pragma omp parallel for if (num_qubits_ > omp_threshold_ && omp_threads_ > 1) \ num_threads(omp_threads_) - for (i = 0; i < data_size_; i++) { + for (i = 0; i < (int_t)data_size_; i++) { tmp[i] = statevec[i]; } initialize_from_data(&tmp[0], tmp.size()); @@ -1322,7 +1322,7 @@ void QubitVectorThrust::initialize_creg( if (chunk_.pos() == 0) { chunk_.container()->allocate_creg(num_cmem_bits_, num_creg_bits_); - int_t i; + uint_t i; for (i = 0; i < num_register; i++) { if (register_hex[register_hex.size() - 1 - i] == '0') { store_cregister(i, 0); @@ -1528,7 +1528,6 @@ void QubitVectorThrust::apply_multiplexer( for (const auto &q : control_qubits) { qubits.push_back(q); } - size_t N = qubits.size(); cvector_t matMP(DIM * DIM, 0.0); uint_t b, i, j; @@ -1627,7 +1626,7 @@ void QubitVectorThrust::apply_mcx(const reg_t &qubits) { return; if (register_blocking_) { - int i; + uint_t i; uint_t mask = 0; for (i = 0; i < qubits.size() - 1; i++) { mask |= (1ull << qubits[i]); @@ -1645,7 +1644,7 @@ void QubitVectorThrust::apply_mcy(const reg_t &qubits) { return; if (register_blocking_) { - int i; + uint_t i; uint_t mask = 0; for (i = 0; i < qubits.size() - 1; i++) { mask |= (1ull << qubits[i]); @@ -1678,7 +1677,7 @@ template void QubitVectorThrust::apply_chunk_swap(const reg_t &qubits, QubitVectorThrust &src, bool write_back) { - int q0, q1, t; + uint_t q0, q1, t; q0 = qubits[0]; q1 = qubits[1]; @@ -1759,7 +1758,7 @@ void QubitVectorThrust::apply_chunk_swap(const reg_t &qubits, template void QubitVectorThrust::apply_chunk_swap(const reg_t &qubits, uint_t remote_chunk_index) { - int q0, q1, t; + uint_t q0, q1, t; q0 = qubits[qubits.size() - 2]; q1 = qubits[qubits.size() - 1]; @@ -1840,7 +1839,7 @@ void QubitVectorThrust::apply_mcphase( return; if (register_blocking_) { - int i; + uint_t i; uint_t mask = 0; for (i = 0; i < qubits.size() - 1; i++) { mask |= (1ull << qubits[i]); @@ -1875,7 +1874,7 @@ void QubitVectorThrust::apply_mcu(const reg_t &qubits, return; } else { if (register_blocking_) { - int i; + uint_t i; uint_t mask = 0; for (i = 0; i < qubits.size() - 1; i++) { mask |= (1ull << qubits[i]); @@ -1897,7 +1896,7 @@ void QubitVectorThrust::apply_mcu(const reg_t &qubits, return; } else { if (register_blocking_) { - int i; + uint_t i; uint_t mask = 0; for (i = 0; i < qubits.size() - 1; i++) { mask |= (1ull << qubits[i]); @@ -2252,7 +2251,7 @@ template void QubitVectorThrust::apply_batched_measure( const reg_t &qubits, std::vector &rng, const reg_t &cmemory, const reg_t &cregs) { - const int_t DIM = 1 << qubits.size(); + const uint_t DIM = 1 << qubits.size(); uint_t i, count = 1; if (enable_batch_) { if (chunk_.pos() != 0) { @@ -2386,7 +2385,7 @@ class reset_func : public Chunk::GateFuncBase { template void QubitVectorThrust::apply_batched_reset( const reg_t &qubits, std::vector &rng) { - const int_t DIM = 1 << qubits.size(); + const uint_t DIM = 1 << qubits.size(); uint_t i, count = 1; if (enable_batch_) { if (chunk_.pos() != 0) { @@ -2547,7 +2546,6 @@ class set_batched_creg_func : public Chunk::GateFuncBase { uint_t *mask; uint_t val = 1; n64 = (this->num_creg_bits_ + 63) >> 6; - int j; mask = this->params_; @@ -2686,7 +2684,7 @@ void QubitVectorThrust::batched_expval_pauli( std::vector &val, const reg_t &qubits, const std::string &pauli, bool variance, std::complex param, bool last, const complex_t initial_phase) const { - uint_t i, count = 1; + uint_t count = 1; if (enable_batch_) { if (chunk_.pos() != 0) { return; // first chunk execute all in batch @@ -2898,12 +2896,11 @@ void QubitVectorThrust::apply_batched_pauli_ops( } uint_t count = ops.size(); int num_inner_threads = omp_get_max_threads() / num_threads_per_group_; - int_t i; reg_t params(4 * count); auto count_paulis = [this, ¶ms, ops](int_t i) { - int_t j; + uint_t j; uint_t x_max = 0; uint_t num_y = 0; uint_t x_mask = 0; @@ -2975,7 +2972,6 @@ class MatrixMult2x2_conditional : public Chunk::GateFuncBase { thrust::complex q0, q1; thrust::complex *vec0; thrust::complex *vec1; - double p, p0, p1, rnd; uint_t iChunk = i >> this->chunk_bits_; double scale = @@ -3012,7 +3008,7 @@ class MatrixMultNxN_conditional : public Chunk::GateFuncWithCache { __host__ __device__ void run_with_cache(uint_t _tid, uint_t _idx, thrust::complex *_cache) const { - uint_t j, threadID; + uint_t j; thrust::complex q, r; thrust::complex m; uint_t mat_size, irow; @@ -3066,7 +3062,6 @@ class check_kraus_probability_func : public Chunk::GateFuncBase { __host__ __device__ void operator()(const uint_t &i) const { uint_t iChunk = i; double p0, p1, rnd; - bool mult = false; p0 = reduce_[iChunk * reduce_buf_size_]; probs_[iChunk + QV_RESET_CURRENT_PROB * prob_buf_size_] = p0; @@ -3103,7 +3098,6 @@ void QubitVectorThrust::apply_batched_kraus( std::vector &rng) { const size_t N = qubits.size(); uint_t i, count; - double ret; count = chunk_.container()->num_chunks(); @@ -3266,7 +3260,7 @@ void QubitVectorThrust::apply_bfunc(const Operations::Op &op) { return; // first chunk execute all in batch reg_t params; - int_t i, n64, n, iparam; + uint_t i, n64, n, iparam; // registers to be updated for (i = 0; i < op.registers.size(); i++) @@ -3377,7 +3371,7 @@ void QubitVectorThrust::apply_roerror(const Operations::Op &op, reg_t params; std::vector probs; - int_t i, j, offset; + uint_t i, offset; for (i = 0; i < op.memory.size(); i++) params.push_back(op.memory[i]); diff --git a/src/simulators/statevector/qv_avx2.cpp b/src/simulators/statevector/qv_avx2.cpp index 4d92ff8283..4053c34f97 100644 --- a/src/simulators/statevector/qv_avx2.cpp +++ b/src/simulators/statevector/qv_avx2.cpp @@ -43,7 +43,7 @@ namespace { /** Remember we cannot use STL (or memcpy) **/ template void copy(T dest, const U orig, size_t size) { - for (auto i = 0; i < size; ++i) + for (size_t i = 0; i < size; ++i) dest[i] = orig[i]; } @@ -1114,7 +1114,8 @@ Avx apply_diagonal_matrix_avx( #endif #if !defined(_WIN64) && !defined(_WIN32) void *data = nullptr; - posix_memalign(&data, 64, sizeof(std::complex) * 2); + if (posix_memalign(&data, 64, sizeof(std::complex) * 2) != 0) + throw std::runtime_error("Cannot allocate memory by posix_memalign"); auto double_tmp = reinterpret_cast *>(data); #else auto double_tmp = reinterpret_cast *>( @@ -1122,7 +1123,7 @@ Avx apply_diagonal_matrix_avx( #endif size_t q0_mask_ = 0; - for (int i = 0; i < qregs_size; ++i) { + for (size_t i = 0; i < qregs_size; ++i) { if (qregs[i] == 0) { q0_mask_ = 1UL << i; break; @@ -1135,9 +1136,9 @@ Avx apply_diagonal_matrix_avx( #pragma omp for for (int64_t k = 0; k < END; k += 1) { - const auto base = k << (batch + 1); - const auto until = base + (1UL << (batch + 1)); - for (auto i = base; i < until; i += 2) { + const int64_t base = k << (batch + 1); + const int64_t until = base + (1UL << (batch + 1)); + for (int64_t i = base; i < until; i += 2) { auto tgt_qv_data = _mm256_load(reinterpret_cast(&(qv_data[i]))); auto input_data = _load_diagonal_input(input_vec, double_tmp, i, qregs, @@ -1171,7 +1172,8 @@ Avx apply_diagonal_matrix_avx(float *qv_data_, const uint64_t data_size, { #if !defined(_WIN64) && !defined(_WIN32) void *data = nullptr; - posix_memalign(&data, 64, sizeof(std::complex) * 4); + if (posix_memalign(&data, 64, sizeof(std::complex) * 4) != 0) + throw std::runtime_error("Cannot allocate memory by posix_memalign"); auto float_tmp = reinterpret_cast *>(data); #else auto float_tmp = reinterpret_cast *>( @@ -1199,9 +1201,9 @@ Avx apply_diagonal_matrix_avx(float *qv_data_, const uint64_t data_size, #pragma omp for for (int64_t k = 0; k < END; k += 1) { - const auto base = k << (batch + 2); - const auto until = base + (1UL << (batch + 2)); - for (auto i = base; i < until; i += 4) { + const int64_t base = k << (batch + 2); + const int64_t until = base + (1UL << (batch + 2)); + for (int64_t i = base; i < until; i += 4) { m256_t tgt_qv_data = _mm256_load(reinterpret_cast(&(qv_data[i]))); auto input_data = _load_diagonal_input(input_vec, float_tmp, i, qregs, diff --git a/src/simulators/statevector/statevector_executor.hpp b/src/simulators/statevector/statevector_executor.hpp index 27cdf4a3ae..5301035660 100644 --- a/src/simulators/statevector/statevector_executor.hpp +++ b/src/simulators/statevector/statevector_executor.hpp @@ -41,6 +41,7 @@ class Executor : public CircuitExecutor::ParallelStateExecutor, using Base = CircuitExecutor::MultiStateExecutor; using BasePar = CircuitExecutor::ParallelStateExecutor; using BaseBatch = CircuitExecutor::BatchShotsExecutor; + using Base::sample_measure; protected: public: @@ -434,7 +435,7 @@ bool Executor::apply_branching_op(CircuitExecutor::Branch &root, template void Executor::initialize_qreg(uint_t num_qubits) { - int_t i; + uint_t i; for (i = 0; i < Base::states_.size(); i++) { Base::states_[i].qreg().set_num_qubits(BasePar::chunk_bits_); @@ -442,8 +443,8 @@ void Executor::initialize_qreg(uint_t num_qubits) { if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) { if (Base::global_state_index_ + iChunk == 0 || this->num_qubits_ == this->chunk_bits_) { @@ -482,7 +483,7 @@ auto Executor::move_to_vector(void) { state.resize(Base::num_local_states_ << BasePar::chunk_bits_); #pragma omp parallel for if (BasePar::chunk_omp_parallel_) private(iChunk) - for (iChunk = 1; iChunk < Base::states_.size(); iChunk++) { + for (iChunk = 1; iChunk < (int_t)Base::states_.size(); iChunk++) { auto tmp = Base::states_[iChunk].qreg().move_to_vector(); uint_t j, offset = iChunk << BasePar::chunk_bits_; for (j = 0; j < tmp.size(); j++) { @@ -511,7 +512,7 @@ auto Executor::copy_to_vector(void) { state.resize(Base::num_local_states_ << BasePar::chunk_bits_); #pragma omp parallel for if (BasePar::chunk_omp_parallel_) private(iChunk) - for (iChunk = 1; iChunk < Base::states_.size(); iChunk++) { + for (iChunk = 1; iChunk < (int_t)Base::states_.size(); iChunk++) { auto tmp = Base::states_[iChunk].qreg().copy_to_vector(); uint_t j, offset = iChunk << BasePar::chunk_bits_; for (j = 0; j < tmp.size(); j++) { @@ -553,12 +554,12 @@ double Executor::expval_pauli(const reg_t &qubits, reg_t qubits_out_chunk; std::string pauli_in_chunk; std::string pauli_out_chunk; - int_t i, n; + uint_t n; double expval(0.); // get inner/outer chunk pauli string n = pauli.size(); - for (i = 0; i < n; i++) { + for (uint_t i = 0; i < n; i++) { if (qubits[i] < BasePar::chunk_bits_) { qubits_in_chunk.push_back(qubits[i]); pauli_in_chunk.push_back(pauli[n - i - 1]); @@ -583,18 +584,18 @@ double Executor::expval_pauli(const reg_t &qubits, if (x_mask != 0) { // pairing state is out of chunk bool on_same_process = true; #ifdef AER_MPI - int proc_bits = 0; + uint_t proc_bits = 0; uint_t procs = Base::distributed_procs_; while (procs > 1) { if ((procs & 1) != 0) { - proc_bits = -1; + proc_bits = 0; break; } proc_bits++; procs >>= 1; } - if (x_mask & (~((1ull << (Base::num_qubits_ - proc_bits)) - 1)) != - 0) { // data exchange between processes is required + if ((x_mask & (~((1ull << (Base::num_qubits_ - proc_bits)) - 1))) != + 0) { // data exchange between processes is required on_same_process = false; } #endif @@ -609,8 +610,8 @@ double Executor::expval_pauli(const reg_t &qubits, auto apply_expval_pauli_chunk = [this, x_mask, z_mask, x_max, mask_u, mask_l, qubits_in_chunk, pauli_in_chunk, phase](int_t iGroup) { - double expval = 0.0; - for (int_t iChunk = Base::top_state_of_group_[iGroup]; + double expval_t = 0.0; + for (uint_t iChunk = Base::top_state_of_group_[iGroup]; iChunk < Base::top_state_of_group_[iGroup + 1]; iChunk++) { uint_t pair_chunk = iChunk ^ x_mask; if (iChunk < pair_chunk) { @@ -618,20 +619,20 @@ double Executor::expval_pauli(const reg_t &qubits, z_count = AER::Utils::popcount(iChunk & z_mask); z_count_pair = AER::Utils::popcount(pair_chunk & z_mask); - expval += Base::states_[iChunk - Base::global_state_index_] - .qreg() - .expval_pauli(qubits_in_chunk, pauli_in_chunk, - Base::states_[pair_chunk].qreg(), - z_count, z_count_pair, phase); + expval_t += Base::states_[iChunk - Base::global_state_index_] + .qreg() + .expval_pauli(qubits_in_chunk, pauli_in_chunk, + Base::states_[pair_chunk].qreg(), + z_count, z_count_pair, phase); } } - return expval; + return expval_t; }; expval += Utils::apply_omp_parallel_for_reduction( (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1), 0, Base::num_global_states_ / 2, apply_expval_pauli_chunk); } else { - for (int_t i = 0; i < Base::num_global_states_ / 2; i++) { + for (uint_t i = 0; i < Base::num_global_states_ / 2; i++) { uint_t iChunk = ((i << 1) & mask_u) | (i & mask_l); uint_t pair_chunk = iChunk ^ x_mask; uint_t iProc = BasePar::get_process_by_chunk(pair_chunk); @@ -675,9 +676,9 @@ double Executor::expval_pauli(const reg_t &qubits, z_mask >>= BasePar::chunk_bits_; if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for reduction(+ : expval) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { double e_tmp = 0.0; - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) { double sign = 1.0; if (z_mask && (AER::Utils::popcount( @@ -690,7 +691,7 @@ double Executor::expval_pauli(const reg_t &qubits, expval += e_tmp; } } else { - for (i = 0; i < Base::states_.size(); i++) { + for (uint_t i = 0; i < Base::states_.size(); i++) { double sign = 1.0; if (z_mask && (AER::Utils::popcount((i + Base::global_state_index_) & z_mask) & @@ -704,15 +705,15 @@ double Executor::expval_pauli(const reg_t &qubits, } else { // all bits are inside chunk if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for reduction(+ : expval) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { double e_tmp = 0.0; - for (int_t iChunk = Base::top_state_of_group_[ig]; + for (uint_t iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) e_tmp += Base::states_[iChunk].qreg().expval_pauli(qubits, pauli); expval += e_tmp; } } else { - for (i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) expval += Base::states_[i].qreg().expval_pauli(qubits, pauli); } } @@ -777,10 +778,10 @@ void Executor::apply_save_density_matrix(const Operations::Op &op, double sum = 0.0; if (BasePar::chunk_omp_parallel_) { #pragma omp parallel for reduction(+ : sum) - for (int_t i = 0; i < Base::states_.size(); i++) + for (int_t i = 0; i < (int_t)Base::states_.size(); i++) sum += Base::states_[i].qreg().norm(); } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) sum += Base::states_[i].qreg().norm(); } #ifdef AER_MPI @@ -906,7 +907,7 @@ template rvector_t Executor::measure_probs(const reg_t &qubits) const { uint_t dim = 1ull << qubits.size(); rvector_t sum(dim, 0.0); - int_t i, j, k; + uint_t i, j, k; reg_t qubits_in_chunk; reg_t qubits_out_chunk; @@ -916,8 +917,8 @@ rvector_t Executor::measure_probs(const reg_t &qubits) const { if (qubits_in_chunk.size() > 0) { if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for private(i, j, k) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t i = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) { auto chunkSum = Base::states_[i].qreg().probabilities(qubits_in_chunk); @@ -983,8 +984,8 @@ rvector_t Executor::measure_probs(const reg_t &qubits) const { } else { // there is no bit in chunk if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for private(i, j, k) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t i = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) { auto nr = std::real(Base::states_[i].qreg().norm()); int idx = 0; @@ -1002,7 +1003,7 @@ rvector_t Executor::measure_probs(const reg_t &qubits) const { } else { for (i = 0; i < Base::states_.size(); i++) { auto nr = std::real(Base::states_[i].qreg().norm()); - int idx = 0; + uint_t idx = 0; for (k = 0; k < qubits_out_chunk.size(); k++) { if ((((i + Base::global_state_index_) << (BasePar::chunk_bits_)) >> qubits_out_chunk[k]) & @@ -1058,14 +1059,14 @@ void Executor::measure_reset_update(const std::vector &qubits, if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].apply_diagonal_matrix(qubits, mdiag); } } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].apply_diagonal_matrix(qubits, mdiag); } @@ -1085,14 +1086,14 @@ void Executor::measure_reset_update(const std::vector &qubits, if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].apply_diagonal_matrix(qubits, mdiag); } } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].apply_diagonal_matrix(qubits, mdiag); } @@ -1120,20 +1121,20 @@ void Executor::measure_reset_update(const std::vector &qubits, // apply permutation to swap state if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].qreg().apply_matrix(qubits, perm); } } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].qreg().apply_matrix(qubits, perm); } } } else { - for (int_t i = 0; i < qubits.size(); i++) { + for (int_t i = 0; i < (int_t)qubits.size(); i++) { if (((final_state >> i) & 1) != ((meas_state >> i) & 1)) { BasePar::apply_chunk_x(qubits[i]); } @@ -1147,7 +1148,7 @@ template std::vector Executor::sample_measure(const reg_t &qubits, uint_t shots, RngEngine &rng) const { - int_t i, j; + uint_t i, j; // Generate flat register for storing std::vector rnds; rnds.reserve(shots); @@ -1162,8 +1163,8 @@ std::vector Executor::sample_measure(const reg_t &qubits, // calculate per chunk sum if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) { bool batched = Base::states_[ic].qreg().enable_batch( true); // return sum of all chunks in group @@ -1172,8 +1173,8 @@ std::vector Executor::sample_measure(const reg_t &qubits, } } } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (uint_t ig = 0; ig < Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) { bool batched = Base::states_[ic].qreg().enable_batch( true); // return sum of all chunks in group @@ -1271,9 +1272,9 @@ void Executor::apply_initialize(const reg_t &qubits, auto apply_global_phase = [&tmp, ¶ms_in, global_phase](int_t i) { tmp[i] = params_in[i] * global_phase; }; - Utils::apply_omp_parallel_for((qubits.size() > Base::omp_qubit_threshold_), - 0, params_in.size(), apply_global_phase, - Base::parallel_state_update_); + Utils::apply_omp_parallel_for( + (qubits.size() > (uint_t)Base::omp_qubit_threshold_), 0, + params_in.size(), apply_global_phase, Base::parallel_state_update_); } const cvector_t ¶ms = tmp.empty() ? params_in : tmp; if (qubits.size() == Base::num_qubits_) { @@ -1296,13 +1297,13 @@ void Executor::apply_initialize(const reg_t &qubits, if (qubits_out_chunk.size() == 0) { // no qubits outside of chunk if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t i = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) Base::states_[i].qreg().initialize_component(qubits, params); } } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) Base::states_[i].qreg().initialize_component(qubits, params); } } else { @@ -1311,16 +1312,16 @@ void Executor::apply_initialize(const reg_t &qubits, // scatter inside chunks const size_t dim = 1ULL << qubits_in_chunk.size(); cvector_t perm(dim * dim, 0.); - for (int_t i = 0; i < dim; i++) { + for (uint_t i = 0; i < dim; i++) { perm[i] = 1.0; } if (BasePar::chunk_omp_parallel_) { #pragma omp parallel for - for (int_t i = 0; i < Base::states_.size(); i++) + for (int_t i = 0; i < (int_t)Base::states_.size(); i++) Base::states_[i].qreg().apply_matrix(qubits_in_chunk, perm); } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) Base::states_[i].qreg().apply_matrix(qubits_in_chunk, perm); } } @@ -1329,8 +1330,9 @@ void Executor::apply_initialize(const reg_t &qubits, auto sorted_qubits_out = qubits_out_chunk; std::sort(sorted_qubits_out.begin(), sorted_qubits_out.end()); - for (int_t i = 0; i < (1ull << (Base::num_qubits_ - BasePar::chunk_bits_ - - qubits_out_chunk.size())); + for (uint_t i = 0; + i < (1ull << (Base::num_qubits_ - BasePar::chunk_bits_ - + qubits_out_chunk.size())); i++) { uint_t baseChunk = 0; uint_t j, ii, t; @@ -1344,7 +1346,7 @@ void Executor::apply_initialize(const reg_t &qubits, baseChunk >>= BasePar::chunk_bits_; for (j = 1; j < (1ull << qubits_out_chunk.size()); j++) { - int_t ic = baseChunk; + uint_t ic = baseChunk; for (t = 0; t < qubits_out_chunk.size(); t++) { if ((j >> t) & 1) ic += (1ull << (qubits_out_chunk[t] - BasePar::chunk_bits_)); @@ -1385,13 +1387,13 @@ void Executor::apply_initialize(const reg_t &qubits, // initialize by params if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t i = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) Base::states_[i].qreg().apply_diagonal_matrix(qubits, params); } } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) Base::states_[i].qreg().apply_diagonal_matrix(qubits, params); } } @@ -1402,7 +1404,7 @@ void Executor::initialize_from_vector(const cvector_t ¶ms) { uint_t local_offset = Base::global_state_index_ << BasePar::chunk_bits_; #pragma omp parallel for if (BasePar::chunk_omp_parallel_) - for (int_t i = 0; i < Base::states_.size(); i++) { + for (int_t i = 0; i < (int_t)Base::states_.size(); i++) { // copy part of state for this chunk cvector_t tmp(1ull << BasePar::chunk_bits_); std::copy(params.begin() + local_offset + (i << BasePar::chunk_bits_), @@ -1443,13 +1445,13 @@ void Executor::apply_kraus(const reg_t &qubits, p = 0.0; if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for reduction(+ : p) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t i = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t i = Base::top_state_of_group_[ig]; i < Base::top_state_of_group_[ig + 1]; i++) p += Base::states_[i].qreg().norm(qubits, vmat); } } else { - for (int_t i = 0; i < Base::states_.size(); i++) + for (uint_t i = 0; i < Base::states_.size(); i++) p += Base::states_[i].qreg().norm(qubits, vmat); } @@ -1465,14 +1467,14 @@ void Executor::apply_kraus(const reg_t &qubits, // apply Kraus projection operator if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].qreg().apply_matrix(qubits, vmat); } } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (uint_t ig = 0; ig < Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].qreg().apply_matrix(qubits, vmat); } @@ -1489,14 +1491,14 @@ void Executor::apply_kraus(const reg_t &qubits, auto vmat = Utils::vectorize_matrix(renorm * kmats.back()); if (BasePar::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].qreg().apply_matrix(qubits, vmat); } } else { - for (int_t ig = 0; ig < Base::num_groups_; ig++) { - for (int_t ic = Base::top_state_of_group_[ig]; + for (uint_t ig = 0; ig < Base::num_groups_; ig++) { + for (uint_t ic = Base::top_state_of_group_[ig]; ic < Base::top_state_of_group_[ig + 1]; ic++) Base::states_[ic].qreg().apply_matrix(qubits, vmat); } @@ -1513,7 +1515,7 @@ Executor::sample_measure_with_prob(CircuitExecutor::Branch &root, uint_t nshots = root.num_shots(); reg_t shot_branch(nshots); - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { shot_branch[i] = root.rng_shots()[i].rand_int(probs); } @@ -1547,11 +1549,11 @@ void Executor::measure_reset_update(CircuitExecutor::Branch &root, root.branches()[i]->add_op_after_branch(op); if (final_state >= 0 && final_state != i) { - Operations::Op op; - op.type = OpType::gate; - op.name = "mcx"; - op.qubits = qubits; - root.branches()[i]->add_op_after_branch(op); + Operations::Op op2; + op2.type = OpType::gate; + op2.name = "mcx"; + op2.qubits = qubits; + root.branches()[i]->add_op_after_branch(op2); } } } @@ -1559,7 +1561,7 @@ void Executor::measure_reset_update(CircuitExecutor::Branch &root, else { // Diagonal matrix for projecting and renormalizing to measurement outcome const size_t dim = 1ULL << qubits.size(); - for (int_t i = 0; i < dim; i++) { + for (uint_t i = 0; i < dim; i++) { cvector_t mdiag(dim, 0.); mdiag[i] = 1. / std::sqrt(meas_probs[i]); @@ -1569,20 +1571,20 @@ void Executor::measure_reset_update(CircuitExecutor::Branch &root, op.params = mdiag; root.branches()[i]->add_op_after_branch(op); - if (final_state >= 0 && final_state != i) { + if (final_state >= 0 && final_state != (int_t)i) { // build vectorized permutation matrix cvector_t perm(dim * dim, 0.); perm[final_state * dim + i] = 1.; perm[i * dim + final_state] = 1.; - for (size_t j = 0; j < dim; j++) { - if (j != final_state && j != i) + for (uint_t j = 0; j < dim; j++) { + if ((int_t)j != final_state && j != i) perm[j * dim + j] = 1.; } - Operations::Op op; - op.type = OpType::matrix; - op.qubits = qubits; - op.mats.push_back(Utils::devectorize_matrix(perm)); - root.branches()[i]->add_op_after_branch(op); + Operations::Op op2; + op2.type = OpType::matrix; + op2.qubits = qubits; + op2.mats.push_back(Utils::devectorize_matrix(perm)); + root.branches()[i]->add_op_after_branch(op2); } } } @@ -1595,7 +1597,7 @@ void Executor::apply_measure(CircuitExecutor::Branch &root, rvector_t probs = sample_measure_with_prob(root, qubits); // save result to cregs - for (int_t i = 0; i < probs.size(); i++) { + for (uint_t i = 0; i < probs.size(); i++) { const reg_t outcome = Utils::int2reg(i, 2, qubits.size()); root.branches()[i]->creg().store_measure(outcome, cmemory, cregister); } @@ -1624,9 +1626,9 @@ void Executor::apply_initialize(CircuitExecutor::Branch &root, auto apply_global_phase = [&tmp, params_in, global_phase](int_t i) { tmp[i] = params_in[i] * global_phase; }; - Utils::apply_omp_parallel_for((qubits.size() > Base::omp_qubit_threshold_), - 0, params_in.size(), apply_global_phase, - Base::parallel_state_update_); + Utils::apply_omp_parallel_for( + (qubits.size() > (uint_t)Base::omp_qubit_threshold_), 0, + params_in.size(), apply_global_phase, Base::parallel_state_update_); } const cvector_t ¶ms = tmp.empty() ? params_in : tmp; if (qubits.size() == Base::num_qubits_) { @@ -1648,7 +1650,7 @@ void Executor::apply_initialize(CircuitExecutor::Branch &root, op.name = "initialize"; op.qubits = qubits; op.params = params; - for (int_t i = 0; i < root.num_branches(); i++) { + for (uint_t i = 0; i < root.num_branches(); i++) { root.branches()[i]->add_op_after_branch(op); } return; // initialization will be done in next call because of shot @@ -1672,10 +1674,8 @@ void Executor::apply_kraus(CircuitExecutor::Branch &root, // So we only compute probabilities for the first N-1 kraus operators // and infer the probability of the last one from 1 - sum of the previous - double r; double accum = 0.; double p; - bool complete = false; reg_t shot_branch; uint_t nshots; @@ -1685,7 +1685,7 @@ void Executor::apply_kraus(CircuitExecutor::Branch &root, nshots = root.num_shots(); shot_branch.resize(nshots); rshots.resize(nshots); - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { shot_branch[i] = kmats.size() - 1; rshots[i] = root.rng_shots()[i].rand(0., 1.); } @@ -1701,7 +1701,7 @@ void Executor::apply_kraus(CircuitExecutor::Branch &root, // check if we need to apply this operator pmats[j] = p; - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { if (shot_branch[i] >= kmats.size() - 1) { if (accum > rshots[i]) { shot_branch[i] = j; @@ -1710,23 +1710,21 @@ void Executor::apply_kraus(CircuitExecutor::Branch &root, } } if (nshots_multiplied >= nshots) { - complete = true; break; } } - // check if we haven't applied a kraus operator yet pmats[pmats.size() - 1] = 1. - accum; root.creg() = Base::states_[root.state_index()].creg(); root.branch_shots(shot_branch, kmats.size()); - for (int_t i = 0; i < kmats.size(); i++) { + for (uint_t i = 0; i < kmats.size(); i++) { Operations::Op op; op.type = OpType::matrix; op.qubits = qubits; op.mats.push_back(kmats[i]); p = 1 / std::sqrt(pmats[i]); - for (int_t j = 0; j < op.mats[0].size(); j++) + for (uint_t j = 0; j < op.mats[0].size(); j++) op.mats[0][j] *= p; root.branches()[i]->add_op_after_branch(op); } @@ -1748,7 +1746,7 @@ void Executor::apply_save_density_matrix(CircuitExecutor::Branch &root, } std::vector copied(Base::num_bind_params_, false); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -1771,7 +1769,7 @@ void Executor::apply_save_probs(CircuitExecutor::Branch &root, std::vector copied(Base::num_bind_params_, false); if (op.type == Operations::OpType::save_probs_ket) { // Convert to ket dict - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -1783,7 +1781,7 @@ void Executor::apply_save_probs(CircuitExecutor::Branch &root, } } } else { - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -1810,7 +1808,7 @@ void Executor::apply_save_statevector(CircuitExecutor::Branch &root, if (last_op) { const auto v = Base::states_[root.state_index()].move_to_vector(); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); (result + ip) ->save_data_pershot(Base::states_[root.state_index()].creg(), key, v, @@ -1818,7 +1816,7 @@ void Executor::apply_save_statevector(CircuitExecutor::Branch &root, } } else { const auto v = Base::states_[root.state_index()].copy_to_vector(); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); (result + ip) ->save_data_pershot(Base::states_[root.state_index()].creg(), key, v, @@ -1841,7 +1839,7 @@ void Executor::apply_save_statevector_dict( for (auto const &it : state_ket) { result_state_ket[it.first] = it.second; } - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); (result + ip) ->save_data_pershot( @@ -1866,7 +1864,7 @@ void Executor::apply_save_amplitudes(CircuitExecutor::Branch &root, amps[i] = Base::states_[root.state_index()].qreg().get_state(op.int_params[i]); } - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); (result + ip) ->save_data_pershot( @@ -1880,7 +1878,7 @@ void Executor::apply_save_amplitudes(CircuitExecutor::Branch &root, op.int_params[i]); } std::vector copied(Base::num_bind_params_, false); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -1898,7 +1896,7 @@ std::vector Executor::sample_measure(state_t &state, const reg_t &qubits, uint_t shots, std::vector &rng) const { - int_t i, j; + uint_t i; std::vector rnds; rnds.reserve(shots); diff --git a/src/simulators/statevector/statevector_state.hpp b/src/simulators/statevector/statevector_state.hpp index 00b1e1711e..8408290b3d 100755 --- a/src/simulators/statevector/statevector_state.hpp +++ b/src/simulators/statevector/statevector_state.hpp @@ -402,7 +402,6 @@ const stringmap_t State::gateset_( template void State::initialize_qreg(uint_t num_qubits) { - int_t i; initialize_omp(); BaseState::qreg_.set_num_qubits(num_qubits); @@ -426,8 +425,6 @@ void State::initialize_statevector(uint_t num_qubits, template void State::initialize_omp() { - uint_t i; - BaseState::qreg_.set_omp_threshold(omp_qubit_threshold_); if (BaseState::threads_ > 0) // set allowed OMP threads in qubitvector BaseState::qreg_.set_omp_threads(BaseState::threads_); @@ -701,7 +698,7 @@ cmatrix_t State::vec2density(const reg_t &qubits, const T &vec) { cmatrix_t densmat(DIM, DIM); if ((N == BaseState::qreg_.num_qubits()) && (qubits == qubits_sorted)) { const int_t mask = QV::MASKS[N]; -#pragma omp parallel for if (2 * N > omp_qubit_threshold_ && \ +#pragma omp parallel for if (2 * N > (size_t)omp_qubit_threshold_ && \ BaseState::threads_ > 1) \ num_threads(BaseState::threads_) for (int_t rowcol = 0; rowcol < int_t(DIM * DIM); ++rowcol) { @@ -750,7 +747,7 @@ void State::apply_gate(const Operations::Op &op) { } if (qubits_out.size() > 0) { uint_t mask = 0; - for (int i = 0; i < qubits_out.size(); i++) { + for (uint_t i = 0; i < qubits_out.size(); i++) { mask |= (1ull << (qubits_out[i] - BaseState::qreg_.num_qubits())); } if ((BaseState::qreg_.chunk_index() & mask) == mask) { @@ -1026,7 +1023,7 @@ template std::vector State::sample_measure(const reg_t &qubits, uint_t shots, RngEngine &rng) { - int_t i, j; + uint_t i; // Generate flat register for storing std::vector rnds; rnds.reserve(shots); @@ -1066,9 +1063,9 @@ void State::apply_initialize(const reg_t &qubits, auto apply_global_phase = [&tmp, ¶ms_in, this](int_t i) { tmp[i] = params_in[i] * BaseState::global_phase_; }; - Utils::apply_omp_parallel_for((qubits.size() > omp_qubit_threshold_), 0, - params_in.size(), apply_global_phase, - BaseState::threads_); + Utils::apply_omp_parallel_for( + (qubits.size() > (uint_t)omp_qubit_threshold_), 0, params_in.size(), + apply_global_phase, BaseState::threads_); } const cvector_t ¶ms = tmp.empty() ? params_in : tmp; if (qubits.size() == BaseState::qreg_.num_qubits()) { diff --git a/src/simulators/statevector/transformer.hpp b/src/simulators/statevector/transformer.hpp index b9a5e36f10..93591849ea 100644 --- a/src/simulators/statevector/transformer.hpp +++ b/src/simulators/statevector/transformer.hpp @@ -244,9 +244,9 @@ void Transformer::apply_diagonal_matrix( auto func = [&](const areg_t<2> &inds, const cvector_t &_diag) -> void { for (int_t i = 0; i < 2; ++i) { - const int_t k = inds[i]; + const uint_t k = inds[i]; int_t iv = 0; - for (int_t j = 0; j < N; j++) + for (uint_t j = 0; j < N; j++) if ((k & (1ULL << qubits[j])) != 0) iv += (1ULL << j); if (_diag[iv] != (data_t)1.0) diff --git a/src/simulators/tensor_network/tensor.hpp b/src/simulators/tensor_network/tensor.hpp index daa233b517..635930cb44 100644 --- a/src/simulators/tensor_network/tensor.hpp +++ b/src/simulators/tensor_network/tensor.hpp @@ -177,7 +177,7 @@ void Tensor::set_conj(const reg_t &qubits, std::vector> &mat) { set(qubits, mat); - for (int i = 0; i < tensor_.size(); i++) + for (uint_t i = 0; i < tensor_.size(); i++) tensor_[i] = std::conj(tensor_[i]); sp_tensor_ = true; } diff --git a/src/simulators/tensor_network/tensor_net.hpp b/src/simulators/tensor_network/tensor_net.hpp index f4f9434382..32b7d52c0e 100644 --- a/src/simulators/tensor_network/tensor_net.hpp +++ b/src/simulators/tensor_network/tensor_net.hpp @@ -374,7 +374,7 @@ template TensorNet::TensorNet(const TensorNet &obj) {} template TensorNet::~TensorNet() { - int i; + uint_t i; for (i = 0; i < tensors_.size(); i++) { tensors_[i].reset(); } @@ -417,7 +417,7 @@ void TensorNet::buffer_statevector(void) const { std::vector extents_out(num_qubits_); // output tensor - for (int_t i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { modes_out[i] = modes_qubits_[i]; extents_out[i] = 2; } @@ -464,9 +464,9 @@ TensorNet::reduced_density_matrix(const reg_t &qubits) { uint_t nqubits = qubits.size(); // connect qubits not to be reduced - for (int_t i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { bool check = false; - for (int_t j = 0; j < qubits.size(); j++) { + for (uint_t j = 0; j < qubits.size(); j++) { if (i == qubits[j]) { check = true; break; @@ -491,7 +491,7 @@ TensorNet::reduced_density_matrix(const reg_t &qubits) { std::vector> trace; // output tensor - for (int_t i = 0; i < nqubits; i++) { + for (uint_t i = 0; i < nqubits; i++) { modes_out[i] = modes_qubits_[qubits[i]]; modes_out[i + nqubits] = modes_qubits_sp_[qubits[i]]; extents_out[i] = 2; @@ -505,9 +505,9 @@ TensorNet::reduced_density_matrix(const reg_t &qubits) { delete contractor; // recover connectted qubits - for (int_t i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { bool check = false; - for (int_t j = 0; j < qubits.size(); j++) { + for (uint_t j = 0; j < qubits.size(); j++) { if (i == qubits[j]) { check = true; break; @@ -538,7 +538,7 @@ void TensorNet::initialize_component(const reg_t &qubits, statevector_.clear(); // invalidate statevector buffer cvector_t state(state0.size()); - for (int_t i = 0; i < state0.size(); i++) + for (uint_t i = 0; i < state0.size(); i++) state[i] = (std::complex)state0[i]; tensors_.push_back(std::make_shared>()); @@ -547,7 +547,7 @@ void TensorNet::initialize_component(const reg_t &qubits, tensors_.push_back(std::make_shared>()); tensors_[last + 1]->set_conj(qubits, state); - for (int i = 0; i < qubits.size(); i++) { + for (uint_t i = 0; i < qubits.size(); i++) { modes_qubits_[qubits[i]] = mode_index_; tensors_[last]->modes()[i] = mode_index_++; qubits_[qubits[i]] = tensors_[last]; @@ -584,7 +584,7 @@ void TensorNet::add_tensor(const reg_t &qubits, tensors_.push_back(std::make_shared>()); uint_t last = tensors_.size() - 1; tensors_[last]->set(qubits, mat); - for (int i = 0; i < qubits.size(); i++) { + for (uint_t i = 0; i < qubits.size(); i++) { tensors_[last]->modes()[i] = modes_qubits_[qubits[i]]; modes_qubits_[qubits[i]] = mode_index_; tensors_[last]->modes()[qubits.size() + i] = mode_index_++; @@ -594,7 +594,7 @@ void TensorNet::add_tensor(const reg_t &qubits, tensors_.push_back(std::make_shared>()); last++; tensors_[last]->set_conj(qubits, mat); - for (int i = 0; i < qubits.size(); i++) { + for (uint_t i = 0; i < qubits.size(); i++) { tensors_[last]->modes()[i] = modes_qubits_sp_[qubits[i]]; modes_qubits_sp_[qubits[i]] = mode_index_; tensors_[last]->modes()[qubits.size() + i] = mode_index_++; @@ -614,13 +614,13 @@ void TensorNet::add_superop_tensor( uint_t last = tensors_.size() - 1; tensors_[last]->set(qubits, mat); - for (int i = 0; i < size; i++) { + for (uint_t i = 0; i < size; i++) { tensors_[last]->modes()[i] = modes_qubits_[qubits[i]]; modes_qubits_[qubits[i]] = mode_index_; tensors_[last]->modes()[size * 2 + i] = mode_index_++; qubits_[qubits[i]] = tensors_[last]; } - for (int i = 0; i < size; i++) { + for (uint_t i = 0; i < size; i++) { tensors_[last]->modes()[size + i] = modes_qubits_sp_[qubits[i]]; modes_qubits_sp_[qubits[i]] = mode_index_; tensors_[last]->modes()[size * 3 + i] = mode_index_++; @@ -636,7 +636,7 @@ void TensorNet::add_superop_tensor( template void TensorNet::initialize() { - int i; + uint_t i; if (statevector_.size() > 0) statevector_.clear(); // invalidate statevector buffer @@ -658,7 +658,7 @@ void TensorNet::initialize() { for (i = 0; i < num_qubits_; i++) { tensors_.push_back(std::make_shared>()); uint_t last = tensors_.size() - 1; - tensors_[last]->set({i}, init); + tensors_[last]->set({(int)i}, init); modes_qubits_[i] = mode_index_; tensors_[last]->modes()[0] = mode_index_++; @@ -667,7 +667,7 @@ void TensorNet::initialize() { for (i = 0; i < num_qubits_; i++) { // for super qubits tensors_.push_back(std::make_shared>()); uint_t last = tensors_.size() - 1; - tensors_[last]->set({i}, init); + tensors_[last]->set({(int)i}, init); modes_qubits_sp_[i] = mode_index_; tensors_[last]->modes()[0] = mode_index_++; @@ -700,19 +700,19 @@ void TensorNet::initialize(const TensorNet &obj) { template void TensorNet::initialize_from_matrix(const cmatrix_t &matrix0) { cvector_t matrix(matrix0.size()); - for (int_t i = 0; i < matrix0.size(); i++) + for (uint_t i = 0; i < matrix0.size(); i++) matrix[i] = (std::complex)matrix0[i]; tensors_.push_back(std::make_shared>()); uint_t last = tensors_.size() - 1; tensors_[last]->set(num_qubits_, matrix); - for (int i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { modes_qubits_[i] = mode_index_++; tensors_[last]->modes()[i] = modes_qubits_[i]; qubits_[i] = tensors_[last]; } - for (int i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { modes_qubits_sp_[i] = mode_index_++; tensors_[last]->modes()[i + num_qubits_] = modes_qubits_sp_[i]; qubits_sp_[i] = tensors_[last]; @@ -772,7 +772,6 @@ void TensorNet::apply_multiplexer(const reg_t &control_qubits, for (const auto &q : control_qubits) { qubits.push_back(q); } - size_t N = qubits.size(); cvector_t matMP(DIM * DIM, 0.0); uint_t b, i, j; @@ -794,11 +793,10 @@ template void TensorNet::apply_diagonal_matrix(const reg_t &qubits, const cvector_t &diag) { cvector_t mat(diag.size() * diag.size(), 0.0); - for (int_t i = 0; i < diag.size(); i++) { + for (uint_t i = 0; i < diag.size(); i++) { mat[i * (diag.size() + 1)] = diag[i]; } - Tensor *t = new Tensor; add_tensor(qubits, mat); } @@ -806,7 +804,7 @@ template void TensorNet::apply_diagonal_superop_matrix( const reg_t &qubits, const cvector_t &diag) { cvector_t mat(diag.size() * diag.size(), 0.0); - for (int_t i = 0; i < diag.size(); i++) { + for (uint_t i = 0; i < diag.size(); i++) { mat[i * (diag.size() + 1)] = diag[i]; } add_superop_tensor(qubits, mat); @@ -833,7 +831,7 @@ void TensorNet::apply_mcx(const reg_t &qubits) { reg_t qubits_t; qubits_t.push_back(qubits[qubits.size() - 1]); - for (int i = 0; i < qubits.size() - 1; i++) + for (uint_t i = 0; i < qubits.size() - 1; i++) qubits_t.push_back(qubits[i]); add_tensor(qubits_t, mat); @@ -850,7 +848,7 @@ void TensorNet::apply_mcy(const reg_t &qubits) { reg_t qubits_t; qubits_t.push_back(qubits[qubits.size() - 1]); - for (int i = 0; i < qubits.size() - 1; i++) + for (uint_t i = 0; i < qubits.size() - 1; i++) qubits_t.push_back(qubits[i]); add_tensor(qubits_t, mat); @@ -869,7 +867,7 @@ void TensorNet::apply_mcswap(const reg_t &qubits) { reg_t qubits_t; qubits_t.push_back(qubits[qubits.size() - 2]); qubits_t.push_back(qubits[qubits.size() - 1]); - for (int i = 0; i < qubits.size() - 2; i++) + for (uint_t i = 0; i < qubits.size() - 2; i++) qubits_t.push_back(qubits[i]); add_tensor(qubits_t, mat); @@ -886,7 +884,7 @@ void TensorNet::apply_mcphase(const reg_t &qubits, reg_t qubits_t; qubits_t.push_back(qubits[qubits.size() - 1]); - for (int i = 0; i < qubits.size() - 1; i++) + for (uint_t i = 0; i < qubits.size() - 1; i++) qubits_t.push_back(qubits[i]); add_tensor(qubits_t, mat); @@ -907,7 +905,7 @@ void TensorNet::apply_mcu(const reg_t &qubits, reg_t qubits_t; qubits_t.push_back(qubits[qubits.size() - 1]); - for (int i = 0; i < qubits.size() - 1; i++) + for (uint_t i = 0; i < qubits.size() - 1; i++) qubits_t.push_back(qubits[i]); add_tensor(qubits_t, matR); @@ -951,7 +949,7 @@ void TensorNet::apply_rotation(const reg_t &qubits, const Rotation r, template double TensorNet::norm() const { // connect qubits not used for trace - for (int_t i = 1; i < num_qubits_; i++) { + for (uint_t i = 1; i < num_qubits_; i++) { for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) { if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) { qubits_sp_[i]->modes()[j] = modes_qubits_[i]; @@ -980,7 +978,7 @@ double TensorNet::norm() const { delete contractor; // restore connected qubits - for (int_t i = 1; i < num_qubits_; i++) { + for (uint_t i = 1; i < num_qubits_; i++) { for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) { if (qubits_sp_[i]->modes()[j] == modes_qubits_[i]) { qubits_sp_[i]->modes()[j] = modes_qubits_sp_[i]; @@ -1002,26 +1000,26 @@ double TensorNet::norm(const reg_t &qubits, // additional matrix std::vector> mat_t(mat.size()); - for (int_t i = 0; i < mat.size(); i++) + for (uint_t i = 0; i < mat.size(); i++) mat_t[i] = mat[i]; mat_tensors[0] = std::make_shared>(); mat_tensors[0]->set(qubits, mat_t); - for (int i = 0; i < qubits.size(); i++) { + for (uint_t i = 0; i < qubits.size(); i++) { mat_tensors[0]->modes()[i] = tmp_modes[qubits[i]]; tmp_modes[qubits[i]] = tmp_index; mat_tensors[0]->modes()[qubits.size() + i] = tmp_index++; } mat_tensors[1] = std::make_shared>(); mat_tensors[1]->set_conj(qubits, mat_t); - for (int i = 0; i < qubits.size(); i++) { + for (uint_t i = 0; i < qubits.size(); i++) { mat_tensors[1]->modes()[i] = tmp_modes_sp[qubits[i]]; tmp_modes_sp[qubits[i]] = tmp_index; mat_tensors[1]->modes()[qubits.size() + i] = tmp_index++; } // connect qubits not used for trace - for (int_t i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { if (i != qubits[0]) { for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) { if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) { @@ -1054,7 +1052,7 @@ double TensorNet::norm(const reg_t &qubits, delete contractor; // restore connected qubits - for (int_t i = 1; i < num_qubits_; i++) { + for (uint_t i = 1; i < num_qubits_; i++) { if (i != qubits[0]) { for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) { if (qubits_sp_[i]->modes()[j] == tmp_modes[i]) { @@ -1085,7 +1083,7 @@ double TensorNet::probability(const uint_t outcome) const { template std::vector TensorNet::probabilities() const { reg_t qubits(num_qubits_); - for (int_t i = 0; i < num_qubits_; i++) + for (uint_t i = 0; i < num_qubits_; i++) qubits[i] = i; return probabilities(qubits); } @@ -1099,9 +1097,9 @@ TensorNet::probabilities(const reg_t &qubits) const { std::vector extents_out(nqubits * 2); std::vector> trace; // connect qubits not to be measured - for (int_t i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { bool check = false; - for (int_t j = 0; j < qubits.size(); j++) { + for (uint_t j = 0; j < qubits.size(); j++) { if (i == qubits[j]) { check = true; break; @@ -1122,7 +1120,7 @@ TensorNet::probabilities(const reg_t &qubits) const { contractor->set_network(tensors_); // output tensor - for (int_t i = 0; i < nqubits; i++) { + for (uint_t i = 0; i < nqubits; i++) { modes_out[i] = modes_qubits_[qubits[i]]; modes_out[i + nqubits] = modes_qubits_sp_[qubits[i]]; extents_out[i] = 2; @@ -1147,9 +1145,9 @@ TensorNet::probabilities(const reg_t &qubits) const { delete contractor; // recover connected qubits - for (int_t i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { bool check = false; - for (int_t j = 0; j < qubits.size(); j++) { + for (uint_t j = 0; j < qubits.size(); j++) { if (i == qubits[j]) { check = true; break; @@ -1201,7 +1199,7 @@ void TensorNet::sample_measure_branch(std::vector &samples, const reg_t &input_shot_index, const reg_t &input_measured_probs, const uint_t pos_measured) const { - const int_t SHOTS = rnds.size(); + const uint_t SHOTS = rnds.size(); /*--------------------------------------------------------------------------- | cccccccccccc | oooooooooooooo | ************** | xxxxxxxxxxxxxx | @@ -1233,7 +1231,7 @@ void TensorNet::sample_measure_branch(std::vector &samples, // output tensor std::vector modes_out(nqubits * 2); std::vector extents_out(nqubits * 2); - for (int_t i = 0; i < nqubits; i++) { + for (uint_t i = 0; i < nqubits; i++) { modes_out[i] = modes_qubits_[pos_measured - nqubits + i]; modes_out[i + nqubits] = modes_qubits_sp_[pos_measured - nqubits + i]; extents_out[i] = 2; @@ -1245,7 +1243,7 @@ void TensorNet::sample_measure_branch(std::vector &samples, // connect qubits not to be measured if (pos_measured - nqubits > 0) { - for (int_t i = 0; i < pos_measured - nqubits; i++) { + for (uint_t i = 0; i < pos_measured - nqubits; i++) { for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) { if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) { qubits_sp_[i]->modes()[j] = modes_qubits_[i]; @@ -1266,7 +1264,7 @@ void TensorNet::sample_measure_branch(std::vector &samples, shots[0] = rnds; shot_index[0] = input_shot_index; } else { - for (int_t i = 0; i < SHOTS; i++) { + for (uint_t i = 0; i < SHOTS; i++) { shots[input_sample_index[i]].push_back(rnds[i]); shot_index[input_sample_index[i]].push_back(input_shot_index[i]); } @@ -1276,7 +1274,7 @@ void TensorNet::sample_measure_branch(std::vector &samples, std::vector>> measured_tensors; if (measured_qubits > 0) { measured_tensors.resize(measured_qubits * 2); - for (int_t i = 0; i < measured_qubits; i++) { + for (uint_t i = 0; i < measured_qubits; i++) { std::vector> prob(2, 0.0); prob[input_measured_probs[pos_measured + i]] = 1.0; measured_tensors[i * 2] = std::make_shared>(); @@ -1293,11 +1291,11 @@ void TensorNet::sample_measure_branch(std::vector &samples, // 1st loop, sampling each branch before traversing branches to reuse tensor // network - for (int_t ib = 0; ib < num_branches; ib++) { + for (uint_t ib = 0; ib < num_branches; ib++) { if (shots[ib].size() > 0) { if (nqubits_branch > 0) { // tensors for measuredirmed probabilities - for (int_t i = 0; i < nqubits_branch; i++) { + for (uint_t i = 0; i < nqubits_branch; i++) { std::vector> prob(2, 0.0); if (((ib >> i) & 1) == 0) prob[0] = 1.0; @@ -1317,7 +1315,7 @@ void TensorNet::sample_measure_branch(std::vector &samples, // recover connected qubits if (pos_measured - nqubits > 0) { - for (int_t i = 0; i < pos_measured - nqubits; i++) { + for (uint_t i = 0; i < pos_measured - nqubits; i++) { for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) { if (qubits_sp_[i]->modes()[j] == modes_qubits_[i]) { qubits_sp_[i]->modes()[j] = modes_qubits_sp_[i]; @@ -1326,16 +1324,16 @@ void TensorNet::sample_measure_branch(std::vector &samples, } } } - for (int_t i = 0; i < measured_tensors.size(); i++) + for (uint_t i = 0; i < measured_tensors.size(); i++) measured_tensors[i].reset(); delete contractor; // 2nd loop traverse branches if (pos_measured - nqubits > 0) { - for (int_t ib = 0; ib < num_branches; ib++) { + for (uint_t ib = 0; ib < num_branches; ib++) { if (shots[ib].size() > 0) { reg_t measured_probs = input_measured_probs; - for (int_t i = 0; i < nqubits_branch; i++) + for (uint_t i = 0; i < nqubits_branch; i++) measured_probs[pos_measured + i] = ((ib >> i) & 1); sample_measure_branch(samples, shots[ib], sample_index[ib], @@ -1345,15 +1343,15 @@ void TensorNet::sample_measure_branch(std::vector &samples, } } else { // save samples - for (int_t ib = 0; ib < num_branches; ib++) { + for (uint_t ib = 0; ib < num_branches; ib++) { if (shots[ib].size() > 0) { reg_t sample = input_measured_probs; - for (int_t i = 0; i < nqubits_branch; i++) + for (uint_t i = 0; i < nqubits_branch; i++) sample[pos_measured + i] = ((ib >> i) & 1); - for (int_t i = 0; i < shots[ib].size(); i++) { + for (uint_t i = 0; i < shots[ib].size(); i++) { uint_t shot_id = shot_index[ib][i]; samples[shot_id] = sample; - for (int_t j = 0; j < nqubits; j++) { + for (uint_t j = 0; j < nqubits; j++) { samples[shot_id][j] = ((sample_index[ib][i] >> j) & 1); } } @@ -1385,7 +1383,7 @@ double TensorNet::expval_pauli(const reg_t &qubits, mat_phase[3] = initial_phase; // add Pauli ops to qubits - for (int_t i = 0; i < size; i++) { + for (uint_t i = 0; i < size; i++) { cvector_t mat(4, 0.0); switch (pauli[size - 1 - i]) { @@ -1421,7 +1419,7 @@ double TensorNet::expval_pauli(const reg_t &qubits, } // connect qubits not used for trace - for (int_t i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { if (i != qubits[0]) { for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) { if (qubits_sp_[i]->modes()[j] == modes_qubits_sp_[i]) { @@ -1454,7 +1452,7 @@ double TensorNet::expval_pauli(const reg_t &qubits, delete contractor; // restore connected qubits - for (int_t i = 0; i < num_qubits_; i++) { + for (uint_t i = 0; i < num_qubits_; i++) { if (i != qubits[0]) { for (int_t j = 0; j < qubits_sp_[i]->rank(); j++) { if (qubits_sp_[i]->modes()[j] == tmp_modes[i]) { @@ -1465,7 +1463,7 @@ double TensorNet::expval_pauli(const reg_t &qubits, } } - for (int_t i = 0; i < pauli_tensors.size(); i++) { + for (uint_t i = 0; i < pauli_tensors.size(); i++) { pauli_tensors[i].reset(); } diff --git a/src/simulators/tensor_network/tensor_net_contractor_cuTensorNet.hpp b/src/simulators/tensor_network/tensor_net_contractor_cuTensorNet.hpp index cc69b93e38..33abbf76cf 100644 --- a/src/simulators/tensor_network/tensor_net_contractor_cuTensorNet.hpp +++ b/src/simulators/tensor_network/tensor_net_contractor_cuTensorNet.hpp @@ -84,6 +84,7 @@ class RawTensorData { uint_t tensor_size_; uint_t additional_tensor_size_; uint_t out_size_; + uint_t work_size_limit_; uint_t work_size_; uint_t sampling_buffer_size_; @@ -484,6 +485,12 @@ uint_t RawTensorData::optimize_contraction(void) { cutensornetStatus_t err; cudaSetDevice(device_id_); + size_t freeMem, totalMem; + int nid = omp_get_num_threads(); + + HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem)); + work_size_limit_ = (freeMem / nid) * 0.9; + /******************************* * Find "optimal" contraction order and slicing *******************************/ @@ -510,7 +517,7 @@ uint_t RawTensorData::optimize_contraction(void) { cutensornetGetErrorString(err)); err = cutensornetContractionOptimize(hTensorNet_, tn_desc_, optimizer_config_, - work_size_, optimizer_info_); + work_size_limit_, optimizer_info_); if (err != CUTENSORNET_STATUS_SUCCESS) assert_error("cutensornetContractionOptimize", cutensornetGetErrorString(err)); @@ -540,27 +547,26 @@ void RawTensorData::create_contraction_plan(bool use_autotune) { assert_error("cutensornetCreateWorkspaceDescriptor", cutensornetGetErrorString(err)); - uint64_t requiredWorkspaceSize = 0; - err = cutensornetWorkspaceComputeSizes(hTensorNet_, tn_desc_, optimizer_info_, - work_desc_); + int64_t requiredWorkspaceSize = 0; + err = cutensornetWorkspaceComputeContractionSizes( + hTensorNet_, tn_desc_, optimizer_info_, work_desc_); if (err != CUTENSORNET_STATUS_SUCCESS) assert_error("cutensornetWorkspaceComputeSizes", cutensornetGetErrorString(err)); - err = cutensornetWorkspaceGetSize( + err = cutensornetWorkspaceGetMemorySize( hTensorNet_, work_desc_, CUTENSORNET_WORKSIZE_PREF_MIN, - CUTENSORNET_MEMSPACE_DEVICE, &requiredWorkspaceSize); + CUTENSORNET_MEMSPACE_DEVICE, CUTENSORNET_WORKSPACE_SCRATCH, + &requiredWorkspaceSize); if (err != CUTENSORNET_STATUS_SUCCESS) assert_error("cutensornetWorkspaceGetSize", cutensornetGetErrorString(err)); - if (work_size_ < requiredWorkspaceSize) { - throw std::runtime_error("ERROR : TensorNet::contractor required memory " - "size for workspace is not enough"); - } + allocate_work(requiredWorkspaceSize); - err = cutensornetWorkspaceSet( + err = cutensornetWorkspaceSetMemory( hTensorNet_, work_desc_, CUTENSORNET_MEMSPACE_DEVICE, - thrust::raw_pointer_cast(dev_work_.data()), work_size_); + CUTENSORNET_WORKSPACE_SCRATCH, thrust::raw_pointer_cast(dev_work_.data()), + work_size_); if (err != CUTENSORNET_STATUS_SUCCESS) assert_error("cutensornetWorkspaceSet", cutensornetGetErrorString(err)); @@ -967,8 +973,6 @@ void TensorNetContractor_cuTensorNet::allocate_additional_tensors( template void TensorNetContractor_cuTensorNet::set_additional_tensors( const std::vector>> &tensors) { - uint_t size = 0; - remove_additional_tensors(); num_additional_tensors_ = tensors.size(); @@ -1021,10 +1025,6 @@ void TensorNetContractor_cuTensorNet::set_output( template void TensorNetContractor_cuTensorNet::setup_contraction( bool use_autotune) { - int nid = omp_get_num_threads(); - cutensornetStatus_t err; - size_t freeMem, totalMem; - uint_t work_size; // for MPI distribution #ifdef AER_MPI @@ -1032,14 +1032,6 @@ void TensorNetContractor_cuTensorNet::setup_contraction( MPI_Comm_rank(MPI_COMM_WORLD, &myrank_); #endif - // allocate work buffer on GPU - if (!tensor_data_[0].work_allocated()) { - cudaSetDevice(target_gpus_[0]); - HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem)); - work_size = (freeMem / nid) * 0.9; - tensor_data_[0].allocate_work(work_size); - } - num_devices_used_ = 1; // setup first device @@ -1060,12 +1052,6 @@ void TensorNetContractor_cuTensorNet::setup_contraction( if (ns > 0) { // setup for the device - if (!tensor_data_[i].work_allocated()) { - cudaSetDevice(target_gpus_[i]); - HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeMem, &totalMem)); - work_size = (freeMem / nid) * 0.9; - tensor_data_[i].allocate_work(work_size); - } tensor_data_[i].copy_tensors_from_device( tensor_data_[0]); // copy data from the first device tensor_data_[i].create_contraction_descriptor( diff --git a/src/simulators/tensor_network/tensor_net_executor.hpp b/src/simulators/tensor_network/tensor_net_executor.hpp index 971dcd02c9..53d24faf96 100644 --- a/src/simulators/tensor_network/tensor_net_executor.hpp +++ b/src/simulators/tensor_network/tensor_net_executor.hpp @@ -37,6 +37,7 @@ using ResultItr = std::vector::iterator; template class Executor : public CircuitExecutor::MultiStateExecutor { using Base = CircuitExecutor::MultiStateExecutor; + using Base::sample_measure; protected: public: @@ -148,7 +149,7 @@ Executor::sample_measure_with_prob(CircuitExecutor::Branch &root, uint_t nshots = root.num_shots(); reg_t shot_branch(nshots); - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { shot_branch[i] = root.rng_shots()[i].rand_int(probs); } @@ -182,11 +183,11 @@ void Executor::measure_reset_update(CircuitExecutor::Branch &root, root.branches()[i]->add_op_after_branch(op); if (final_state >= 0 && final_state != i) { - Operations::Op op; - op.type = OpType::gate; - op.name = "mcx"; - op.qubits = qubits; - root.branches()[i]->add_op_after_branch(op); + Operations::Op op2; + op2.type = OpType::gate; + op2.name = "mcx"; + op2.qubits = qubits; + root.branches()[i]->add_op_after_branch(op2); } } } @@ -194,7 +195,7 @@ void Executor::measure_reset_update(CircuitExecutor::Branch &root, else { // Diagonal matrix for projecting and renormalizing to measurement outcome const size_t dim = 1ULL << qubits.size(); - for (int_t i = 0; i < dim; i++) { + for (uint_t i = 0; i < dim; i++) { cvector_t mdiag(dim, 0.); mdiag[i] = 1. / std::sqrt(meas_probs[i]); @@ -204,20 +205,20 @@ void Executor::measure_reset_update(CircuitExecutor::Branch &root, op.params = mdiag; root.branches()[i]->add_op_after_branch(op); - if (final_state >= 0 && final_state != i) { + if (final_state >= 0 && final_state != (int_t)i) { // build vectorized permutation matrix cvector_t perm(dim * dim, 0.); perm[final_state * dim + i] = 1.; perm[i * dim + final_state] = 1.; for (size_t j = 0; j < dim; j++) { - if (j != final_state && j != i) + if (j != (size_t)final_state && j != i) perm[j * dim + j] = 1.; } - Operations::Op op; - op.type = OpType::matrix; - op.qubits = qubits; - op.mats.push_back(Utils::devectorize_matrix(perm)); - root.branches()[i]->add_op_after_branch(op); + Operations::Op op2; + op2.type = OpType::matrix; + op2.qubits = qubits; + op2.mats.push_back(Utils::devectorize_matrix(perm)); + root.branches()[i]->add_op_after_branch(op2); } } } @@ -230,7 +231,7 @@ void Executor::apply_measure(CircuitExecutor::Branch &root, rvector_t probs = sample_measure_with_prob(root, qubits); // save result to cregs - for (int_t i = 0; i < probs.size(); i++) { + for (uint_t i = 0; i < probs.size(); i++) { const reg_t outcome = Utils::int2reg(i, 2, qubits.size()); root.branches()[i]->creg().store_measure(outcome, cmemory, cregister); } @@ -259,9 +260,9 @@ void Executor::apply_initialize(CircuitExecutor::Branch &root, auto apply_global_phase = [&tmp, params_in, global_phase](int_t i) { tmp[i] = params_in[i] * global_phase; }; - Utils::apply_omp_parallel_for((qubits.size() > Base::omp_qubit_threshold_), - 0, params_in.size(), apply_global_phase, - Base::parallel_state_update_); + Utils::apply_omp_parallel_for( + (qubits.size() > (uint_t)Base::omp_qubit_threshold_), 0, + params_in.size(), apply_global_phase, Base::parallel_state_update_); } const cvector_t ¶ms = tmp.empty() ? params_in : tmp; if (qubits.size() == Base::num_qubits_) { @@ -283,7 +284,7 @@ void Executor::apply_initialize(CircuitExecutor::Branch &root, op.name = "initialize"; op.qubits = qubits; op.params = params; - for (int_t i = 0; i < root.num_branches(); i++) { + for (uint_t i = 0; i < root.num_branches(); i++) { root.branches()[i]->add_op_after_branch(op); } return; // initialization will be done in next call because of shot @@ -307,10 +308,8 @@ void Executor::apply_kraus(CircuitExecutor::Branch &root, // So we only compute probabilities for the first N-1 kraus operators // and infer the probability of the last one from 1 - sum of the previous - double r; double accum = 0.; double p; - bool complete = false; reg_t shot_branch; uint_t nshots; @@ -320,7 +319,7 @@ void Executor::apply_kraus(CircuitExecutor::Branch &root, nshots = root.num_shots(); shot_branch.resize(nshots); rshots.resize(nshots); - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { shot_branch[i] = kmats.size() - 1; rshots[i] = root.rng_shots()[i].rand(0., 1.); } @@ -336,7 +335,7 @@ void Executor::apply_kraus(CircuitExecutor::Branch &root, // check if we need to apply this operator pmats[j] = p; - for (int_t i = 0; i < nshots; i++) { + for (uint_t i = 0; i < nshots; i++) { if (shot_branch[i] >= kmats.size() - 1) { if (accum > rshots[i]) { shot_branch[i] = j; @@ -345,7 +344,6 @@ void Executor::apply_kraus(CircuitExecutor::Branch &root, } } if (nshots_multiplied >= nshots) { - complete = true; break; } } @@ -355,13 +353,13 @@ void Executor::apply_kraus(CircuitExecutor::Branch &root, root.creg() = Base::states_[root.state_index()].creg(); root.branch_shots(shot_branch, kmats.size()); - for (int_t i = 0; i < kmats.size(); i++) { + for (uint_t i = 0; i < kmats.size(); i++) { Operations::Op op; op.type = OpType::matrix; op.qubits = qubits; op.mats.push_back(kmats[i]); p = 1 / std::sqrt(pmats[i]); - for (int_t j = 0; j < op.mats[0].size(); j++) + for (uint_t j = 0; j < op.mats[0].size(); j++) op.mats[0][j] *= p; root.branches()[i]->add_op_after_branch(op); } @@ -385,7 +383,7 @@ void Executor::apply_save_density_matrix(CircuitExecutor::Branch &root, } std::vector copied(Base::num_bind_params_, false); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -408,7 +406,7 @@ void Executor::apply_save_probs(CircuitExecutor::Branch &root, std::vector copied(Base::num_bind_params_, false); if (op.type == Operations::OpType::save_probs_ket) { // Convert to ket dict - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -420,7 +418,7 @@ void Executor::apply_save_probs(CircuitExecutor::Branch &root, } } } else { - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -447,7 +445,7 @@ void Executor::apply_save_statevector(CircuitExecutor::Branch &root, if (last_op) { const auto v = Base::states_[root.state_index()].move_to_vector(); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); (result + ip) ->save_data_pershot(Base::states_[root.state_index()].creg(), key, v, @@ -455,7 +453,7 @@ void Executor::apply_save_statevector(CircuitExecutor::Branch &root, } } else { const auto v = Base::states_[root.state_index()].copy_to_vector(); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); (result + ip) ->save_data_pershot(Base::states_[root.state_index()].creg(), key, v, @@ -478,7 +476,7 @@ void Executor::apply_save_statevector_dict( for (auto const &it : state_ket) { result_state_ket[it.first] = it.second; } - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); (result + ip) ->save_data_pershot( @@ -496,14 +494,14 @@ void Executor::apply_save_amplitudes(CircuitExecutor::Branch &root, throw std::invalid_argument( "Invalid save_amplitudes instructions (empty params)."); } - const int_t size = op.int_params.size(); + const uint_t size = op.int_params.size(); if (op.type == Operations::OpType::save_amps) { Vector amps(size, false); - for (int_t i = 0; i < size; ++i) { + for (uint_t i = 0; i < size; ++i) { amps[i] = Base::states_[root.state_index()].qreg().get_state(op.int_params[i]); } - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); (result + ip) ->save_data_pershot( @@ -512,12 +510,12 @@ void Executor::apply_save_amplitudes(CircuitExecutor::Branch &root, } } else { rvector_t amps_sq(size, 0); - for (int_t i = 0; i < size; ++i) { + for (uint_t i = 0; i < size; ++i) { amps_sq[i] = Base::states_[root.state_index()].qreg().probability( op.int_params[i]); } std::vector copied(Base::num_bind_params_, false); - for (int_t i = 0; i < root.num_shots(); i++) { + for (uint_t i = 0; i < root.num_shots(); i++) { uint_t ip = root.param_index(i); if (!copied[ip]) { (result + ip) @@ -539,23 +537,23 @@ Executor::sample_measure(state_t &state, const reg_t &qubits, std::vector rnds; rnds.reserve(shots); - for (i = 0; i < shots; ++i) + for (i = 0; i < (int_t)shots; ++i) rnds.push_back(rng[i].rand(0, 1)); std::vector samples = state.qreg().sample_measure(rnds); std::vector ret(shots); if (omp_get_num_threads() > 1) { - for (i = 0; i < shots; ++i) { + for (i = 0; i < (int_t)shots; ++i) { ret[i].resize(qubits.size()); - for (j = 0; j < qubits.size(); j++) + for (j = 0; j < (int_t)qubits.size(); j++) ret[i][j] = samples[i][qubits[j]]; } } else { #pragma omp parallel for private(j) - for (i = 0; i < shots; ++i) { + for (i = 0; i < (int_t)shots; ++i) { ret[i].resize(qubits.size()); - for (j = 0; j < qubits.size(); j++) + for (j = 0; j < (int_t)qubits.size(); j++) ret[i][j] = samples[i][qubits[j]]; } } diff --git a/src/simulators/tensor_network/tensor_net_state.hpp b/src/simulators/tensor_network/tensor_net_state.hpp index 7ac73ad22e..ef0bbf3a10 100644 --- a/src/simulators/tensor_network/tensor_net_state.hpp +++ b/src/simulators/tensor_network/tensor_net_state.hpp @@ -899,27 +899,26 @@ template std::vector State::sample_measure(const reg_t &qubits, uint_t shots, RngEngine &rng) { - int_t i, j; // Generate flat register for storing std::vector rnds(shots); - for (i = 0; i < shots; ++i) + for (uint_t i = 0; i < shots; ++i) rnds[i] = rng.rand(0, 1); std::vector samples = BaseState::qreg_.sample_measure(rnds); std::vector ret(shots); if (omp_get_num_threads() > 1) { - for (i = 0; i < shots; ++i) { + for (uint_t i = 0; i < shots; ++i) { ret[i].resize(qubits.size()); - for (j = 0; j < qubits.size(); j++) + for (uint_t j = 0; j < qubits.size(); j++) ret[i][j] = samples[i][qubits[j]]; } } else { -#pragma omp parallel for private(j) - for (i = 0; i < shots; ++i) { +#pragma omp parallel for + for (int_t i = 0; i < (int_t)shots; ++i) { ret[i].resize(qubits.size()); - for (j = 0; j < qubits.size(); j++) + for (uint_t j = 0; j < qubits.size(); j++) ret[i][j] = samples[i][qubits[j]]; } } @@ -963,7 +962,7 @@ void State::initialize_from_vector( BaseState::qreg_.initialize(); reg_t qubits(BaseState::qreg_.num_qubits()); - for (int_t i = 0; i < BaseState::qreg_.num_qubits(); i++) + for (uint_t i = 0; i < BaseState::qreg_.num_qubits(); i++) qubits[i] = i; BaseState::qreg_.initialize_component(qubits, params); } diff --git a/src/simulators/unitary/unitary_executor.hpp b/src/simulators/unitary/unitary_executor.hpp index 3066e0d619..3cc2414668 100644 --- a/src/simulators/unitary/unitary_executor.hpp +++ b/src/simulators/unitary/unitary_executor.hpp @@ -84,14 +84,14 @@ void Executor::set_config(const Config &config) { template void Executor::initialize_qreg(uint_t num_qubits) { - int_t iChunk; + uint_t iChunk; for (iChunk = 0; iChunk < Base::states_.size(); iChunk++) { Base::states_[iChunk].qreg().set_num_qubits(Base::chunk_bits_); } if (Base::chunk_omp_parallel_ && Base::num_groups_ > 1) { #pragma omp parallel for private(iChunk) - for (int_t ig = 0; ig < Base::num_groups_; ig++) { + for (int_t ig = 0; ig < (int_t)Base::num_groups_; ig++) { for (iChunk = Base::top_state_of_group_[ig]; iChunk < Base::top_state_of_group_[ig + 1]; iChunk++) { uint_t irow, icol; diff --git a/src/simulators/unitary/unitary_state.hpp b/src/simulators/unitary/unitary_state.hpp index 8f33e43ce1..8fbad7d4a1 100755 --- a/src/simulators/unitary/unitary_state.hpp +++ b/src/simulators/unitary/unitary_state.hpp @@ -369,7 +369,6 @@ void State::initialize_qreg(uint_t num_qubits, template void State::initialize_omp() { - uint_t i; BaseState::qreg_.set_omp_threshold(omp_qubit_threshold_); if (BaseState::threads_ > 0) BaseState::qreg_.set_omp_threads( @@ -414,7 +413,7 @@ void State::apply_gate(const Operations::Op &op) { } if (qubits_out.size() > 0) { uint_t mask = 0; - for (int i = 0; i < qubits_out.size(); i++) { + for (uint_t i = 0; i < qubits_out.size(); i++) { mask |= (1ull << (qubits_out[i] - BaseState::qreg_.num_qubits())); } if ((BaseState::qreg_.chunk_index() & mask) == mask) { diff --git a/src/simulators/unitary/unitarymatrix.hpp b/src/simulators/unitary/unitarymatrix.hpp index f406091662..7fd6170d57 100644 --- a/src/simulators/unitary/unitarymatrix.hpp +++ b/src/simulators/unitary/unitarymatrix.hpp @@ -237,7 +237,6 @@ void UnitaryMatrix::initialize() { // Zero the underlying vector BaseVector::zero(); // Set to be identity matrix - const int_t nrows = rows_; // end for k loop auto initialize_proc = [this](int_t i) { BaseVector::data_[i * (rows_ + 1)] = 1.0; }; @@ -261,7 +260,7 @@ void UnitaryMatrix::initialize_from_matrix( ")."); } auto initialize_proc = [this, &mat](int_t row) { - for (int_t col = 0; col < rows_; ++col) { + for (uint_t col = 0; col < rows_; ++col) { BaseVector::data_[row + rows_ * col] = mat(row, col); } }; diff --git a/src/simulators/unitary/unitarymatrix_thrust.hpp b/src/simulators/unitary/unitarymatrix_thrust.hpp index 8687f8e2c7..58a15f79c8 100755 --- a/src/simulators/unitary/unitarymatrix_thrust.hpp +++ b/src/simulators/unitary/unitarymatrix_thrust.hpp @@ -212,13 +212,11 @@ UnitaryMatrixThrust::copy_to_matrix() const { cvector_t qreg = BaseVector::vector(); - int_t i; - uint_t irow, icol; -#pragma omp parallel for private( \ - i, irow, icol) if (BaseVector::num_qubits_ > BaseVector::omp_threshold_ && \ - BaseVector::omp_threads_ > 1) \ +#pragma omp parallel for if (BaseVector::num_qubits_ > \ + BaseVector::omp_threshold_ && \ + BaseVector::omp_threads_ > 1) \ num_threads(BaseVector::omp_threads_) - for (i = 0; i < csize; i++) { + for (int_t i = 0; i < (int_t)csize; i++) { ret[i] = qreg[i]; } return ret; diff --git a/src/transpile/batch_converter.hpp b/src/transpile/batch_converter.hpp index 40e1b65537..85766780bb 100644 --- a/src/transpile/batch_converter.hpp +++ b/src/transpile/batch_converter.hpp @@ -100,7 +100,7 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise, const opset_t &allowed_opset, ExperimentResult &result) const { // convert operations for batch shots execution - for (int_t i = 0; i < circ.ops.size(); i++) { + for (uint_t i = 0; i < circ.ops.size(); i++) { if (circ.ops[i].has_bind_params) { if (circ.ops[i].type == Operations::OpType::gate) { gate_to_matrix(circ.ops[i], circ.num_bind_params); @@ -108,8 +108,8 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise, // convert matrix to cvector_t in params uint_t matrix_size = circ.ops[i].mats[0].size(); circ.ops[i].params.resize(matrix_size * circ.num_bind_params); - for (int_t j = 0; j < circ.num_bind_params; j++) { - for (int_t k = 0; k < matrix_size; k++) + for (uint_t j = 0; j < circ.num_bind_params; j++) { + for (uint_t k = 0; k < matrix_size; k++) circ.ops[i].params[j * matrix_size + k] = circ.ops[i].mats[j][k]; } circ.ops[i].mats.clear(); @@ -120,7 +120,7 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise, // convert global phase to diagonal matrix if (circ.global_phase_for_params.size() == circ.num_bind_params) { bool has_global_phase = false; - for (int_t j = 0; j < circ.num_bind_params; j++) { + for (uint_t j = 0; j < circ.num_bind_params; j++) { if (!Linalg::almost_equal(circ.global_phase_for_params[j], 0.0)) { has_global_phase = true; break; @@ -132,7 +132,7 @@ void BatchConverter::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise, phase_op.type = Operations::OpType::diagonal_matrix; phase_op.has_bind_params = true; phase_op.params.resize(2 * circ.num_bind_params); - for (int_t j = 0; j < circ.num_bind_params; j++) { + for (uint_t j = 0; j < circ.num_bind_params; j++) { auto t = std::exp(complex_t(0.0, circ.global_phase_for_params[j])); phase_op.params[j * 2] = t; phase_op.params[j * 2 + 1] = t; @@ -173,64 +173,64 @@ void BatchConverter::gate_to_matrix(Operations::Op &op, auto store_matrix = [&matrix_array, matrix_size](int_t iparam, cvector_t mat) { - for (int_t j = 0; j < matrix_size; j++) + for (uint_t j = 0; j < matrix_size; j++) matrix_array[iparam * matrix_size + j] = mat[j]; }; switch (it->second) { case ParamGates::mcr: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::r(op.params[i * 2], op.params[i * 2 + 1])); break; case ParamGates::mcrx: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::rx(std::real(op.params[i]))); break; case ParamGates::mcry: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::ry(std::real(op.params[i]))); break; case ParamGates::mcrz: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::rz_diag(std::real(op.params[i]))); break; case ParamGates::rxx: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::rxx(std::real(op.params[i]))); break; case ParamGates::ryy: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::ryy(std::real(op.params[i]))); break; case ParamGates::rzz: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::rzz_diag(std::real(op.params[i]))); break; case ParamGates::rzx: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::rzx(std::real(op.params[i]))); break; case ParamGates::mcu3: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::u3(std::real(op.params[i * 3]), std::real(op.params[i * 3 + 1]), std::real(op.params[i * 3 + 2]))); break; case ParamGates::mcu: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::u4(std::real(op.params[i * 4]), std::real(op.params[i * 4 + 1]), std::real(op.params[i * 4 + 2]), std::real(op.params[i * 4 + 3]))); break; case ParamGates::mcu2: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::u2(std::real(op.params[i * 2]), std::real(op.params[i * 2 + 1]))); break; case ParamGates::mcp: - for (int_t i = 0; i < num_params; i++) + for (uint_t i = 0; i < num_params; i++) store_matrix(i, Linalg::VMatrix::phase_diag(std::real(op.params[i]))); break; default: diff --git a/src/transpile/cacheblocking.hpp b/src/transpile/cacheblocking.hpp index 35d72908c0..9989264347 100644 --- a/src/transpile/cacheblocking.hpp +++ b/src/transpile/cacheblocking.hpp @@ -68,16 +68,16 @@ class CacheBlocking : public CircuitOptimization { void set_num_processes(int np) { num_processes_ = np; } protected: - mutable int block_bits_; // qubits less than this will be blocked - mutable int qubits_; + mutable uint_t block_bits_; // qubits less than this will be blocked + mutable uint_t qubits_; mutable reg_t qubitMap_; mutable reg_t qubitSwapped_; mutable bool blocking_enabled_; mutable bool sample_measure_ = false; mutable bool restore_qubit_map_ = false; - int memory_blocking_bits_ = 0; + uint_t memory_blocking_bits_ = 0; bool density_matrix_ = false; - int num_processes_ = 1; + uint_t num_processes_ = 1; bool block_circuit(Circuit &circ, bool doSwap) const; @@ -150,7 +150,6 @@ void CacheBlocking::set_blocking(int bits, size_t min_memory, uint_t n_place, size_t complex_size, bool is_matrix) { int chunk_bits = bits; uint_t scale = is_matrix ? 2 : 1; - size_t size; // get largest possible chunk bits while ((complex_size << (scale * chunk_bits)) > min_memory) { @@ -215,7 +214,7 @@ void CacheBlocking::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise, // loop over operations to find max number of parameters for cross-qubits // operations - int_t max_params = 1; + uint_t max_params = 1; for (uint_t i = 0; i < circ.ops.size(); i++) { if (is_blockable_operation(circ.ops[i]) && is_cross_qubits_op(circ.ops[i])) { @@ -302,7 +301,7 @@ void CacheBlocking::define_blocked_qubits(std::vector &ops, reg_t &blockedQubits, bool crossQubitOnly) const { uint_t i, j, iq; - int nq, nb; + uint_t nq; bool exist; for (i = 0; i < ops.size(); i++) { if (blockedQubits.size() >= block_bits_) @@ -384,7 +383,7 @@ bool CacheBlocking::can_reorder( } bool CacheBlocking::block_circuit(Circuit &circ, bool doSwap) const { - uint_t i, n; + uint_t n; std::vector out; std::vector queue; std::vector queue_next; @@ -523,11 +522,8 @@ uint_t CacheBlocking::add_ops(std::vector &ops, std::vector &queue, bool doSwap, bool first, bool crossQubitOnly) const { uint_t i, j, iq; - - int nqubitUsed = 0; reg_t blockedQubits; - int nq; - bool exist; + uint_t nq; uint_t pos_begin, num_gates_added; bool end_block_inserted; @@ -807,7 +803,7 @@ bool CacheBlocking::split_pauli(const Operations::Op &op, reg_t qubits_out_chunk; std::string pauli_in_chunk; std::string pauli_out_chunk; - int_t i, j, n; + uint_t i, j, n; bool inside; // get inner/outer chunk pauli string @@ -857,7 +853,7 @@ bool CacheBlocking::split_op(const Operations::Op &op, std::vector &queue) const { reg_t qubits_in_chunk; reg_t qubits_out_chunk; - int_t i, j, n; + uint_t i, j, n; bool inside; n = op.qubits.size(); diff --git a/src/transpile/fusion.hpp b/src/transpile/fusion.hpp index 3d5e64fde6..af8ec073ae 100644 --- a/src/transpile/fusion.hpp +++ b/src/transpile/fusion.hpp @@ -67,7 +67,7 @@ class FusionMethod { } } else { // loop for runtime parameter binding - for (int_t p = 0; p < num_params_; p++) { + for (uint_t p = 0; p < num_params_; p++) { std::vector ops; ops.reserve(fusioned_ops.size()); for (auto &op : fusioned_ops) { @@ -449,18 +449,18 @@ bool NQubitFusion::aggregate_operations(oplist_t &ops, std::vector>> targets; bool fused = false; - for (uint_t op_idx = fusion_start; op_idx < fusion_end; ++op_idx) { + for (int op_idx = fusion_start; op_idx < fusion_end; ++op_idx) { // skip operations to be ignored if (!method.can_apply(ops[op_idx], max_fused_qubits) || ops[op_idx].type == optype_t::nop) continue; // 1. find a N-qubit operation - if (ops[op_idx].qubits.size() != N) + if (ops[op_idx].qubits.size() != N) { continue; + } - std::vector fusing_op_idxs = {op_idx}; - + std::vector fusing_op_idxs = {(uint_t)op_idx}; std::vector fusing_qubits; fusing_qubits.insert(fusing_qubits.end(), ops[op_idx].qubits.begin(), ops[op_idx].qubits.end()); @@ -895,14 +895,14 @@ void Fusion::optimize_circuit(Circuit &circ, Noise::NoiseModel &noise, if (parallelization_ > 1) { #pragma omp parallel for num_threads(parallelization_) - for (int_t i = 0; i < parallelization_; i++) { + for (int_t i = 0; i < (int_t)parallelization_; i++) { int_t start = unit * i; int_t end = std::min(start + unit, (int_t)circ.ops.size()); optimize_circuit(circ, noise, allowed_opset, start, end, fuser, method); } } else { - for (int_t i = 0; i < parallelization_; i++) { + for (uint_t i = 0; i < parallelization_; i++) { int_t start = unit * i; int_t end = std::min(start + unit, (int_t)circ.ops.size()); optimize_circuit(circ, noise, allowed_opset, start, end, fuser,