Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support time range and only combining some blocks in combine_h5 #6306

Merged
merged 4 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/Domain/Creators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ spectre_target_sources(
PRIVATE
AlignedLattice.cpp
BinaryCompactObject.cpp
BlockGroups.cpp
Cylinder.cpp
CylindricalBinaryCompactObject.cpp
Disk.cpp
Expand All @@ -30,7 +29,6 @@ spectre_target_headers(
HEADERS
AlignedLattice.hpp
BinaryCompactObject.hpp
BlockGroups.hpp
Cylinder.hpp
CylindricalBinaryCompactObject.hpp
Disk.hpp
Expand Down
1 change: 0 additions & 1 deletion src/Domain/FunctionsOfTime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,4 @@ target_link_libraries(
Utilities
PRIVATE
ErrorHandling
H5
)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Distributed under the MIT License.
// See LICENSE.txt for details.

#include "Domain/Creators/BlockGroups.hpp"
#include "Domain/Structure/BlockGroups.hpp"

#include <exception>
#include <string>
Expand Down
2 changes: 2 additions & 0 deletions src/Domain/Structure/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ add_spectre_library(${LIBRARY})
spectre_target_sources(
${LIBRARY}
PRIVATE
BlockGroups.cpp
BlockNeighbor.cpp
ChildSize.cpp
CreateInitialMesh.cpp
Expand All @@ -30,6 +31,7 @@ spectre_target_headers(
${LIBRARY}
INCLUDE_DIRECTORY ${CMAKE_SOURCE_DIR}/src
HEADERS
BlockGroups.hpp
BlockId.hpp
BlockNeighbor.hpp
ChildSize.hpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@

#include "DataStructures/DataBox/DataBox.hpp"
#include "DataStructures/DataBox/Tag.hpp"
#include "Domain/Creators/BlockGroups.hpp"
#include "Domain/Creators/DomainCreator.hpp"
#include "Domain/Creators/ExpandOverBlocks.hpp"
#include "Domain/Creators/OptionTags.hpp"
#include "Domain/Creators/Tags/Domain.hpp"
#include "Domain/Structure/BlockGroups.hpp"
#include "Domain/Structure/ElementId.hpp"
#include "Domain/Tags.hpp"
#include "Elliptic/Tags.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/Evolution/DgSubcell/SubcellOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#include <utility>
#include <vector>

#include "Domain/Creators/BlockGroups.hpp"
#include "Domain/Creators/DomainCreator.hpp"
#include "Domain/Structure/BlockGroups.hpp"
#include "Options/Options.hpp"
#include "Utilities/Algorithm.hpp"
#include "Utilities/ErrorHandling/Error.hpp"
Expand Down
1 change: 1 addition & 0 deletions src/IO/H5/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ target_link_libraries(
PUBLIC
Boost::boost
DataStructures
Domain
DomainStructure
ErrorHandling
HDF5::HDF5
Expand Down
155 changes: 143 additions & 12 deletions src/IO/H5/CombineH5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,51 @@
#include <cstddef>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <string>
#include <vector>

#include "DataStructures/DataVector.hpp"
#include "Domain/Domain.hpp"
#include "Domain/Structure/BlockGroups.hpp"
#include "IO/H5/AccessType.hpp"
#include "IO/H5/CheckH5PropertiesMatch.hpp"
#include "IO/H5/File.hpp"
#include "IO/H5/SourceArchive.hpp"
#include "IO/H5/TensorData.hpp"
#include "IO/H5/VolumeData.hpp"
#include "Parallel/Printf/Printf.hpp"
#include "Utilities/Algorithm.hpp"
#include "Utilities/FileSystem.hpp"
#include "Utilities/MakeString.hpp"
#include "Utilities/Serialization/Serialize.hpp"
#include "Utilities/StdHelpers.hpp"

namespace {
// Returns all the observation_ids stored in the volume files. Assumes all
// volume files have the same observation ids
std::vector<size_t> get_observation_ids(
std::vector<std::pair<size_t, double>> get_observation_ids(
const std::vector<std::string>& file_names,
const std::string& subfile_name) {
const h5::H5File<h5::AccessType::ReadOnly> initial_file(file_names[0], false);
const auto& initial_volume_file =
initial_file.get<h5::VolumeData>(subfile_name);
return initial_volume_file.list_observation_ids();
const std::vector<size_t> observation_ids =
initial_volume_file.list_observation_ids();
std::vector<std::pair<size_t, double>> observation_ids_and_values(
observation_ids.size());
for (size_t i = 0; i < observation_ids.size(); ++i) {
observation_ids_and_values[i] = std::pair{
observation_ids[i],
initial_volume_file.get_observation_value(observation_ids[i])};
}
// Sort by the observation value
alg::sort(observation_ids_and_values,
[](const std::pair<size_t, double>& id_and_value_a,
const std::pair<size_t, double>& id_and_value_b) {
return id_and_value_a.second < id_and_value_b.second;
});
return observation_ids_and_values;
}

// Returns total number of elements for an observation id across all volume data
Expand All @@ -49,19 +69,89 @@ size_t get_number_of_elements(const std::vector<std::string>& input_filenames,
}
return total_elements;
}

std::optional<std::unordered_set<size_t>> get_block_numbers_to_use(
const std::string& file_name, const std::string& subfile_name,
const size_t observation_id,
const std::optional<std::vector<std::string>>& blocks_to_combine) {
if (not blocks_to_combine.has_value() or blocks_to_combine.value().empty()) {
return std::nullopt;
}

const h5::H5File<h5::AccessType::ReadOnly> original_file(file_name, false);
const auto& volume_file = original_file.get<h5::VolumeData>(subfile_name);

const auto dim = volume_file.get_dimension();
auto serialized_domain = volume_file.get_domain(observation_id);
if (not serialized_domain.has_value()) {
ERROR("Could not read the domain the from file "
<< file_name << " and subfile " << subfile_name
<< ". This means we cannot filter based on block names. You can "
"still combine the files but will need to use all blocks.");
}
std::unordered_set<std::string> block_names_to_combine{};
std::vector<std::string> block_names_in_domain{};
switch (dim) {
case 1: {
const auto domain =
deserialize<Domain<1>>(serialized_domain.value().data());
block_names_in_domain = domain.block_names();
block_names_to_combine = domain::expand_block_groups_to_block_names(
blocks_to_combine.value(), domain.block_names(),
domain.block_groups());
break;
}
case 2: {
const auto domain =
deserialize<Domain<2>>(serialized_domain.value().data());
block_names_in_domain = domain.block_names();
block_names_to_combine = domain::expand_block_groups_to_block_names(
blocks_to_combine.value(), domain.block_names(),
domain.block_groups());
break;
}
case 3: {
const auto domain =
deserialize<Domain<3>>(serialized_domain.value().data());
block_names_in_domain = domain.block_names();
block_names_to_combine = domain::expand_block_groups_to_block_names(
blocks_to_combine.value(), domain.block_names(),
domain.block_groups());
break;
}
default:
ERROR("Only can handle 1, 2, or 3d domains not " << dim);
};

std::unordered_set<size_t> blocks_to_use{};
for (const std::string& block_to_combine : block_names_to_combine) {
auto location_it = alg::find(block_names_in_domain, block_to_combine);
if (location_it == block_names_in_domain.end()) {
ERROR("Block name " << block_to_combine << " not found.");
}
blocks_to_use.insert(static_cast<size_t>(
std::distance(block_names_in_domain.begin(), location_it)));
}

return blocks_to_use;
}
} // namespace

namespace h5 {

void combine_h5(const std::vector<std::string>& file_names,
const std::string& subfile_name, const std::string& output,
const bool check_src) {
void combine_h5(
const std::vector<std::string>& file_names, const std::string& subfile_name,
const std::string& output, const std::optional<double> start_value,
const std::optional<double> stop_value,
const std::optional<std::vector<std::string>>& blocks_to_combine,
const bool check_src) {
// Parses for and stores all input files to be looped over
Parallel::printf("Processing files:\n%s\n",
std::string{MakeString{} << file_names}.c_str());

// Checks that volume data was generated with identical versions of SpECTRE
if (check_src) {
if (!h5::check_src_files_match(file_names)) {
if (not h5::check_src_files_match(file_names)) {
ERROR(
"One or more of your files were found to have differing src.tar.gz "
"files, meaning that they may be from differing versions of "
Expand All @@ -70,7 +160,7 @@ void combine_h5(const std::vector<std::string>& file_names,
}

// Checks that volume data files contain the same observation ids
if (!h5::check_observation_ids_match(file_names, subfile_name)) {
if (not h5::check_observation_ids_match(file_names, subfile_name)) {
ERROR(
"One or more of your files were found to have differing observation "
"ids, meaning they may be from different runs of your SpECTRE "
Expand All @@ -88,12 +178,30 @@ void combine_h5(const std::vector<std::string>& file_names,
} // End of scope for H5 file

// Obtains list of observation ids to loop over
const std::vector<size_t> observation_ids =
const std::vector<std::pair<size_t, double>> observation_ids_and_values =
get_observation_ids(file_names, subfile_name);

if (observation_ids_and_values.empty()) {
ERROR("No observation IDs found in subfile" << subfile_name);
}

const std::optional<std::unordered_set<size_t>> blocks_to_use =
get_block_numbers_to_use(file_names[0], subfile_name,
observation_ids_and_values[0].first,
blocks_to_combine);

// Loops over observation ids to write volume data by observation id
for (size_t obs_index = 0; obs_index < observation_ids.size(); ++obs_index) {
const size_t obs_id = observation_ids[obs_index];
for (size_t obs_index = 0; obs_index < observation_ids_and_values.size();
++obs_index) {
const double obs_value = observation_ids_and_values[obs_index].second;
if (obs_value > stop_value.value_or(std::numeric_limits<double>::max()) or
obs_value <
start_value.value_or(std::numeric_limits<double>::lowest())) {
Parallel::printf("Skipping observation value %1.6e\n", obs_value);
continue;
}

const size_t obs_id = observation_ids_and_values[obs_index].first;
// Pre-calculates size of vector to store element data and allocates
// corresponding memory
const size_t vector_dim =
Expand All @@ -117,11 +225,12 @@ void combine_h5(const std::vector<std::string>& file_names,
if (not printed) {
Parallel::printf(
"Processing obsevation ID %lo (%lo/%lo) with value %1.14e\n",
obs_id, obs_index, observation_ids.size(), obs_val);
obs_id, obs_index, observation_ids_and_values.size(), obs_val);
printed = true;
}
Parallel::printf(" Processing file: %s\n", file_name.c_str());

const auto dim = original_volume_file.get_dimension();
serialized_domain = original_volume_file.get_domain(obs_id);
serialized_functions_of_time =
original_volume_file.get_functions_of_time(obs_id);
Expand All @@ -133,10 +242,32 @@ void combine_h5(const std::vector<std::string>& file_names,
obs_val * (1.0 + 4.0 * std::numeric_limits<double>::epsilon()),
std::nullopt)[0]));

auto end_it = data_by_element.end();

if (blocks_to_use.has_value()) {
end_it = alg::remove_if(
data_by_element,
[&blocks_to_use, &dim](const ElementVolumeData& element) -> bool {
switch (dim) {
case 1:
return not blocks_to_use->contains(
ElementId<1>{element.element_name}.block_id());
case 2:
return not blocks_to_use->contains(
ElementId<2>{element.element_name}.block_id());
case 3:
return not blocks_to_use->contains(
ElementId<3>{element.element_name}.block_id());
default:
ERROR("Only can handle 1, 2, or 3d domains but got " << dim);
};
});
}

// Append vector to total vector of element data for this `obs_id`
element_data.insert(element_data.end(),
std::make_move_iterator(data_by_element.begin()),
std::make_move_iterator(data_by_element.end()));
std::make_move_iterator(end_it));
data_by_element.clear();
original_file.close_current_object();
}
Expand Down
15 changes: 13 additions & 2 deletions src/IO/H5/CombineH5.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,24 @@

#pragma once

#include <optional>
#include <string>
#include <vector>

namespace h5 {

/*!
* \brief Combine a volume subfile across different HDF5 files.
*
* The argument `blocks_to_combine` can list block names and block groups that
* should be combined. We ignore other blocks when combining the HDF5
* files. This provides a way to filter volume data for easier visualization.
*/
void combine_h5(const std::vector<std::string>& file_names,
const std::string& subfile_name, const std::string& output,
const bool check_src = true);
std::optional<double> start_value = std::nullopt,
std::optional<double> stop_value = std::nullopt,
const std::optional<std::vector<std::string>>&
blocks_to_combine = std::nullopt,
bool check_src = true);

} // namespace h5
1 change: 1 addition & 0 deletions src/IO/H5/Python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ spectre_python_link_libraries(
${LIBRARY}
PRIVATE
Boost::boost
DomainCreators
DataStructures
H5
pybind11::module
Expand Down
8 changes: 7 additions & 1 deletion src/IO/H5/Python/CombineH5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,20 @@
#include <pybind11/stl.h>
#include <string>

#include "Domain/Creators/RegisterDerivedWithCharm.hpp"
#include "Domain/Creators/TimeDependence/RegisterDerivedWithCharm.hpp"
#include "IO/H5/CombineH5.hpp"

namespace py = pybind11;

namespace py_bindings {
void bind_h5combine(py::module& m) {
domain::creators::register_derived_with_charm();
domain::creators::time_dependence::register_derived_with_charm();
// Wrapper for combining h5 files
m.def("combine_h5", &h5::combine_h5, py::arg("file_names"),
py::arg("subfile_name"), py::arg("output"), py::arg("check_src"));
py::arg("subfile_name"), py::arg("output"), py::arg("start-time"),
py::arg("stop-time"), py::arg("blocks_to_combine"),
py::arg("check_src"));
}
} // namespace py_bindings
Loading
Loading