Qiskit · hhorii · Feb 20, 2024 · Feb 14, 2024
diff --git a/qiskit_aer/backends/aer_simulator.py b/qiskit_aer/backends/aer_simulator.py
@@ -237,10 +237,11 @@ class AerSimulator(AerBackend):
       experiment execution (Default: 0).
 
     * ``max_memory_mb`` (int): Sets the maximum size of memory
-      to store a state vector. If a state vector needs more, an error
-      is thrown. In general, a state vector of n-qubits uses 2^n complex
-      values (16 Bytes). If set to 0, the maximum will be automatically
-      set to the system memory size (Default: 0).
+      to store quantum states. If quantum states need more, an error
+      is thrown unless -1 is set. In general, a state vector of n-qubits
+      uses 2^n complex values (16 Bytes).
+      If set to 0, the maximum will be automatically set to
+      the system memory size (Default: 0).
 
     * ``cuStateVec_enable`` (bool): This option enables accelerating by
       cuStateVec library of cuQuantum from NVIDIA, that has highly optimized

diff --git a/qiskit_aer/backends/wrappers/aer_controller_binding.hpp b/qiskit_aer/backends/wrappers/aer_controller_binding.hpp
@@ -139,7 +139,7 @@ void bind_aer_controller(MODULE m) {
   aer_config.def_property(
       "max_memory_mb",
       [](const Config &config) { return config.max_memory_mb.val; },
-      [](Config &config, uint_t val) { config.max_memory_mb.value(val); });
+      [](Config &config, int_t val) { config.max_memory_mb.value(val); });
   aer_config.def_readwrite("fusion_enable", &Config::fusion_enable);
   aer_config.def_readwrite("fusion_verbose", &Config::fusion_verbose);
   aer_config.def_property(

diff --git a/releasenotes/notes/fix_mps_required_memory_estimation-75a9bb739701046c.yaml b/releasenotes/notes/fix_mps_required_memory_estimation-75a9bb739701046c.yaml
@@ -0,0 +1,11 @@
+---
+upgrade:
+  - |
+    Add option to ignore checking system memory availability against
+    required memory if `max_memory_mb=-1` is set
+fixes:
+  - |
+    Fixed required memory size for MPS method. Required memory was wrongly
+    estimated because all the 2-qubits gates increased required memory,
+    but only rxx, ryy and rzx gates (when theta is not multiple of pi/2)
+    and unitary increase memory.
diff --git a/src/controllers/aer_controller.hpp b/src/controllers/aer_controller.hpp
@@ -178,6 +178,7 @@ class Controller {
   int max_parallel_experiments_ = 1;
   size_t max_memory_mb_ = 0;
   size_t max_gpu_memory_mb_ = 0;
+  bool check_required_memory_ = true;
 
   // use explicit parallelization
   bool explicit_parallelization_ = false;
@@ -228,9 +229,14 @@ void Controller::set_config(const Config &config) {
 
   // Load configurations for parallelization
 
-  if (config.max_memory_mb.has_value())
-    max_memory_mb_ = config.max_memory_mb.value();
-  else
+  if (config.max_memory_mb.has_value()) {
+    int_t mem = config.max_memory_mb.value();
+    if (mem < 0) {
+      check_required_memory_ = false;
+      max_memory_mb_ = get_system_memory_mb();
+    } else
+      max_memory_mb_ = (size_t)mem;
+  } else
     max_memory_mb_ = get_system_memory_mb();
 
   // for debugging
@@ -390,9 +396,13 @@ void Controller::set_parallelization_experiments(
     ++parallel_experiments;
   }
 
-  if (parallel_experiments <= 0)
-    throw std::runtime_error(
-        "a circuit requires more memory than max_memory_mb.");
+  if (parallel_experiments <= 0) {
+    if (check_required_memory_)
+      throw std::runtime_error(
+          "a circuit requires more memory than max_memory_mb.");
+    else
+      parallel_experiments = 1;
+  }
   parallel_experiments_ = std::min<int>(
       {parallel_experiments, max_experiments, max_parallel_threads_,
        static_cast<int>(required_memory_mb_list.size())});
@@ -549,6 +559,8 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
     result.metadata.add(parallel_experiments_, "parallel_experiments");
     result.metadata.add(max_memory_mb_, "max_memory_mb");
     result.metadata.add(max_gpu_memory_mb_, "max_gpu_memory_mb");
+    if (!check_required_memory_)
+      result.metadata.add(true, "ignore_required_memory_error");
 
 #ifdef _OPENMP
     result.metadata.add(true, "omp_enabled");

diff --git a/src/framework/config.hpp b/src/framework/config.hpp
@@ -81,7 +81,7 @@ struct Config {
   optional<uint_t> max_parallel_threads;
   optional<uint_t> max_parallel_experiments;
   optional<uint_t> max_parallel_shots;
-  optional<uint_t> max_memory_mb;
+  optional<int_t> max_memory_mb;
   bool fusion_enable = true;
   bool fusion_verbose = false;
   optional<uint_t> fusion_max_qubit;

diff --git a/src/simulators/circuit_executor.hpp b/src/simulators/circuit_executor.hpp
@@ -92,6 +92,7 @@ class Executor : public Base {
   size_t min_gpu_memory_mb_; // minimum size per GPU
   int num_gpus_;             // max number of GPU per process
   reg_t target_gpus_;        // GPUs to be used
+  bool check_required_memory_;
 
   // use explicit parallelization
   bool explicit_parallelization_;
@@ -234,6 +235,7 @@ class Executor : public Base {
 template <class state_t>
 Executor<state_t>::Executor() {
   max_memory_mb_ = 0;
+  check_required_memory_ = true;
   max_gpu_memory_mb_ = 0;
   max_parallel_threads_ = 0;
   max_parallel_shots_ = 0;
@@ -290,8 +292,15 @@ void Executor<state_t>::set_config(const Config &config) {
 
   // Load configurations for parallelization
 
-  if (config.max_memory_mb.has_value())
-    max_memory_mb_ = config.max_memory_mb.value();
+  if (config.max_memory_mb.has_value()) {
+    int_t mem = config.max_memory_mb.value();
+    if (mem < 0) {
+      check_required_memory_ = false;
+      max_memory_mb_ = 0;
+    } else {
+      max_memory_mb_ = (size_t)mem;
+    }
+  }
 
   // for debugging
   if (config._parallel_shots.has_value()) {
@@ -1149,7 +1158,7 @@ bool Executor<state_t>::validate_state(const Config &config,
 
   // Validate memory requirements
   bool memory_valid = true;
-  if (max_memory_mb_ > 0) {
+  if (max_memory_mb_ > 0 && check_required_memory_) {
     size_t required_mb = state.required_memory_mb(circ.num_qubits, circ.ops) /
                          num_process_per_experiment_;
     size_t mem_size = (sim_device_ == Device::GPU)

diff --git a/src/simulators/matrix_product_state/matrix_product_state.hpp b/src/simulators/matrix_product_state/matrix_product_state.hpp
@@ -328,7 +328,7 @@ size_t State::required_memory_mb(uint_t num_qubits,
                                  const std::vector<Operations::Op> &ops) const {
   if (num_qubits > 1) {
     MPSSizeEstimator est(num_qubits);
-    uint_t size = est.estimate(ops);
+    uint_t size = est.estimate(ops, gateset_);
     return (size >> 20);
   }
   return 0;

diff --git a/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp b/src/simulators/matrix_product_state/matrix_product_state_size_estimator.hpp
@@ -36,7 +36,8 @@ class MPSSizeEstimator {
 
   void initialize(uint_t nq);
 
-  uint_t estimate(const std::vector<Operations::Op> &ops);
+  uint_t estimate(const std::vector<Operations::Op> &ops,
+                  const stringmap_t<Gates> &gateset);
 
 protected:
   void apply_qubits(const reg_t &qubits);
@@ -64,13 +65,30 @@ void MPSSizeEstimator::initialize(uint_t nq) {
   }
 }
 
-uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops) {
+uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops,
+                                  const stringmap_t<Gates> &gateset) {
   uint_t n = ops.size();
   for (uint_t i = 0; i < n; i++) {
+    double pi2, pi2_int;
     switch (ops[i].type) {
     case Operations::OpType::gate:
+      if (ops[i].qubits.size() > 1) {
+        auto it = gateset.find(ops[i].name);
+        switch (it->second) {
+        case Gates::rxx:
+        case Gates::ryy:
+        case Gates::rzx:
+          pi2 = std::real(ops[i].params[0]) / M_PI;
+          pi2_int = (double)std::round(pi2);
+          if (!AER::Linalg::almost_equal(pi2, pi2_int))
+            apply_qubits(ops[i].qubits);
+          break;
+        default:
+          break;
+        }
+      }
+      break;
     case Operations::OpType::matrix:
-    case Operations::OpType::diagonal_matrix:
       if (ops[i].qubits.size() > 1)
         apply_qubits(ops[i].qubits);
       break;