Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix required memory estimation for MPS #2059

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions qiskit_aer/backends/aer_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,11 @@ class AerSimulator(AerBackend):
experiment execution (Default: 0).

* ``max_memory_mb`` (int): Sets the maximum size of memory
to store a state vector. If a state vector needs more, an error
is thrown. In general, a state vector of n-qubits uses 2^n complex
values (16 Bytes). If set to 0, the maximum will be automatically
set to the system memory size (Default: 0).
to store quantum states. If quantum states need more, an error
is thrown unless -1 is set. In general, a state vector of n-qubits
uses 2^n complex values (16 Bytes).
If set to 0, the maximum will be automatically set to
the system memory size (Default: 0).

* ``cuStateVec_enable`` (bool): This option enables accelerating by
cuStateVec library of cuQuantum from NVIDIA, that has highly optimized
Expand Down
2 changes: 1 addition & 1 deletion qiskit_aer/backends/wrappers/aer_controller_binding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void bind_aer_controller(MODULE m) {
aer_config.def_property(
"max_memory_mb",
[](const Config &config) { return config.max_memory_mb.val; },
[](Config &config, uint_t val) { config.max_memory_mb.value(val); });
[](Config &config, int_t val) { config.max_memory_mb.value(val); });
aer_config.def_readwrite("fusion_enable", &Config::fusion_enable);
aer_config.def_readwrite("fusion_verbose", &Config::fusion_verbose);
aer_config.def_property(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
upgrade:
- |
Add option to ignore checking system memory availability against
required memory if `max_memory_mb=-1` is set
fixes:
- |
Fixed required memory size for MPS method. Required memory was wrongly
estimated because all the 2-qubits gates increased required memory,
but only rxx, ryy and rzx gates (when theta is not multiple of pi/2)
and unitary increase memory.
24 changes: 18 additions & 6 deletions src/controllers/aer_controller.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ class Controller {
int max_parallel_experiments_ = 1;
size_t max_memory_mb_ = 0;
size_t max_gpu_memory_mb_ = 0;
bool check_required_memory_ = true;

// use explicit parallelization
bool explicit_parallelization_ = false;
Expand Down Expand Up @@ -228,9 +229,14 @@ void Controller::set_config(const Config &config) {

// Load configurations for parallelization

if (config.max_memory_mb.has_value())
max_memory_mb_ = config.max_memory_mb.value();
else
if (config.max_memory_mb.has_value()) {
int_t mem = config.max_memory_mb.value();
if (mem < 0) {
check_required_memory_ = false;
max_memory_mb_ = get_system_memory_mb();
} else
max_memory_mb_ = (size_t)mem;
} else
max_memory_mb_ = get_system_memory_mb();

// for debugging
Expand Down Expand Up @@ -390,9 +396,13 @@ void Controller::set_parallelization_experiments(
++parallel_experiments;
}

if (parallel_experiments <= 0)
throw std::runtime_error(
"a circuit requires more memory than max_memory_mb.");
if (parallel_experiments <= 0) {
if (check_required_memory_)
throw std::runtime_error(
"a circuit requires more memory than max_memory_mb.");
else
parallel_experiments = 1;
}
parallel_experiments_ = std::min<int>(
{parallel_experiments, max_experiments, max_parallel_threads_,
static_cast<int>(required_memory_mb_list.size())});
Expand Down Expand Up @@ -549,6 +559,8 @@ Result Controller::execute(std::vector<std::shared_ptr<Circuit>> &circuits,
result.metadata.add(parallel_experiments_, "parallel_experiments");
result.metadata.add(max_memory_mb_, "max_memory_mb");
result.metadata.add(max_gpu_memory_mb_, "max_gpu_memory_mb");
if (!check_required_memory_)
result.metadata.add(true, "ignore_required_memory_error");

#ifdef _OPENMP
result.metadata.add(true, "omp_enabled");
Expand Down
2 changes: 1 addition & 1 deletion src/framework/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ struct Config {
optional<uint_t> max_parallel_threads;
optional<uint_t> max_parallel_experiments;
optional<uint_t> max_parallel_shots;
optional<uint_t> max_memory_mb;
optional<int_t> max_memory_mb;
bool fusion_enable = true;
bool fusion_verbose = false;
optional<uint_t> fusion_max_qubit;
Expand Down
15 changes: 12 additions & 3 deletions src/simulators/circuit_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class Executor : public Base {
size_t min_gpu_memory_mb_; // minimum size per GPU
int num_gpus_; // max number of GPU per process
reg_t target_gpus_; // GPUs to be used
bool check_required_memory_;

// use explicit parallelization
bool explicit_parallelization_;
Expand Down Expand Up @@ -234,6 +235,7 @@ class Executor : public Base {
template <class state_t>
Executor<state_t>::Executor() {
max_memory_mb_ = 0;
check_required_memory_ = true;
max_gpu_memory_mb_ = 0;
max_parallel_threads_ = 0;
max_parallel_shots_ = 0;
Expand Down Expand Up @@ -290,8 +292,15 @@ void Executor<state_t>::set_config(const Config &config) {

// Load configurations for parallelization

if (config.max_memory_mb.has_value())
max_memory_mb_ = config.max_memory_mb.value();
if (config.max_memory_mb.has_value()) {
int_t mem = config.max_memory_mb.value();
if (mem < 0) {
check_required_memory_ = false;
max_memory_mb_ = 0;
} else {
max_memory_mb_ = (size_t)mem;
}
}

// for debugging
if (config._parallel_shots.has_value()) {
Expand Down Expand Up @@ -1149,7 +1158,7 @@ bool Executor<state_t>::validate_state(const Config &config,

// Validate memory requirements
bool memory_valid = true;
if (max_memory_mb_ > 0) {
if (max_memory_mb_ > 0 && check_required_memory_) {
size_t required_mb = state.required_memory_mb(circ.num_qubits, circ.ops) /
num_process_per_experiment_;
size_t mem_size = (sim_device_ == Device::GPU)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ size_t State::required_memory_mb(uint_t num_qubits,
const std::vector<Operations::Op> &ops) const {
if (num_qubits > 1) {
MPSSizeEstimator est(num_qubits);
uint_t size = est.estimate(ops);
uint_t size = est.estimate(ops, gateset_);
return (size >> 20);
}
return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ class MPSSizeEstimator {

void initialize(uint_t nq);

uint_t estimate(const std::vector<Operations::Op> &ops);
uint_t estimate(const std::vector<Operations::Op> &ops,
const stringmap_t<Gates> &gateset);

protected:
void apply_qubits(const reg_t &qubits);
Expand Down Expand Up @@ -64,13 +65,30 @@ void MPSSizeEstimator::initialize(uint_t nq) {
}
}

uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops) {
uint_t MPSSizeEstimator::estimate(const std::vector<Operations::Op> &ops,
const stringmap_t<Gates> &gateset) {
uint_t n = ops.size();
for (uint_t i = 0; i < n; i++) {
double pi2, pi2_int;
switch (ops[i].type) {
case Operations::OpType::gate:
if (ops[i].qubits.size() > 1) {
auto it = gateset.find(ops[i].name);
switch (it->second) {
case Gates::rxx:
case Gates::ryy:
case Gates::rzx:
pi2 = std::real(ops[i].params[0]) / M_PI;
pi2_int = (double)std::round(pi2);
if (!AER::Linalg::almost_equal(pi2, pi2_int))
apply_qubits(ops[i].qubits);
break;
default:
break;
}
}
break;
case Operations::OpType::matrix:
case Operations::OpType::diagonal_matrix:
if (ops[i].qubits.size() > 1)
apply_qubits(ops[i].qubits);
break;
Expand Down
Loading