Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New variables #199

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ Contributors
* Sébastien Biner <biner.sebastien@hydroquebec.com> `@sbiner <https://github.com/sbiner>`_
* David Huard <huard.david@ouranos.ca> `@huard <https://github.com/huard>`_
* Gabriel Rondeau-Genesse <rondeau-genesse.gabriel@ouranos.ca> `@RondeauG <https://github.com/RondeauG>`_
* Aslı Beşe <bese.asli@ouranos.ca> `@aslibese <https://github.com/aslibese>`_
112 changes: 61 additions & 51 deletions src/miranda/convert/_data_corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,14 @@ def _preprocess_correct(d: xr.Dataset, *, ops: list[partial]) -> xr.Dataset:
return ds


def _correct_standard_names(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
key = "_corrected_standard_name"
for var, val in _iter_entry_key(d, m, "variables", key, p):
if val:
d[var].attrs["standard_name"] = val
return d


def _correct_units_names(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
key = "_corrected_units"
for var, val in _iter_entry_key(d, m, "variables", key, p):
Expand Down Expand Up @@ -450,53 +458,55 @@ def _transform(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
return d_out


def _offset_time(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
key = "_offset_time"
d_out = xr.Dataset(coords=d.coords, attrs=d.attrs)
converted = []
offset, offset_meaning = None, None

time_freq = dict()
expected_period = _get_section_entry_key(
m, "dimensions", "time", "_ensure_correct_time", p
)
if isinstance(expected_period, str):
time_freq["expected_period"] = expected_period

for vv, offs in _iter_entry_key(d, m, "dimensions", key, p):
if offs:
# Offset time by value of one time-step
if offset is None and offset_meaning is None:
try:
offset, offset_meaning = get_time_frequency(d, **time_freq)
except TypeError:
logging.error(
"Unable to parse the time frequency. Verify data integrity before retrying."
)
raise

msg = f"Offsetting data for `{vv}` by `{offset[0]} {offset_meaning}(s)`."

logging.info(msg)
with xr.set_options(keep_attrs=True):
out = d[vv]
out["time"] = out.time - np.timedelta64(offset[0], offset[1])
d_out[vv] = out
converted.append(vv)
prev_history = d.attrs.get("history", "")
history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}"
d_out.attrs.update(dict(history=history))
elif offs is False:
msg = f"No time offsetting needed for `{vv}` in `{p}` (Explicitly set to False)."

logging.info(msg)
continue

# Copy unconverted variables
for vv in d.data_vars:
if vv not in converted:
d_out[vv] = d[vv]
return d_out
# TODO: Determine if this function is still needed

# def _offset_time(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
# key = "_offset_time"
# d_out = xr.Dataset(coords=d.coords, attrs=d.attrs)
# converted = []
# offset, offset_meaning = None, None
#
# time_freq = dict()
# expected_period = _get_section_entry_key(
# m, "dimensions", "time", "_ensure_correct_time", p
# )
# if isinstance(expected_period, str):
# time_freq["expected_period"] = expected_period
#
# for vv, offs in _iter_entry_key(d, m, "dimensions", key, p):
# if offs:
# # Offset time by value of one time-step
# if offset is None and offset_meaning is None:
# try:
# offset, offset_meaning = get_time_frequency(d, **time_freq)
# except TypeError:
# logging.error(
# "Unable to parse the time frequency. Verify data integrity before retrying."
# )
# raise
#
# msg = f"Offsetting data for `{vv}` by `{offset[0]} {offset_meaning}(s)`."
#
# logging.info(msg)
# with xr.set_options(keep_attrs=True):
# out = d[vv]
# out["time"] = out.time - np.timedelta64(offset[0], offset[1])
# d_out[vv] = out
# converted.append(vv)
# prev_history = d.attrs.get("history", "")
# history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}"
# d_out.attrs.update(dict(history=history))
# elif offs is False:
# msg = f"No time offsetting needed for `{vv}` in `{p}` (Explicitly set to False)."
#
# logging.info(msg)
# continue
#
# # Copy unconverted variables
# for vv in d.data_vars:
# if vv not in converted:
# d_out[vv] = d[vv]
# return d_out


def _invert_sign(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
Expand Down Expand Up @@ -536,7 +546,7 @@ def _units_cf_conversion(d: xr.Dataset, m: dict) -> xr.Dataset:
for vv, unit in _iter_entry_key(d, m, "variables", "units", None):
if unit:
with xr.set_options(keep_attrs=True):
d[vv] = units.convert_units_to(d[vv], unit, context="hydro")
d[vv] = units.convert_units_to(d[vv], unit)
prev_history = d.attrs.get("history", "")
history = f"Converted variable `{vv}` to CF-compliant units (`{unit}`). {prev_history}"
d.attrs.update(dict(history=history))
Expand Down Expand Up @@ -888,17 +898,17 @@ def dataset_corrections(ds: xr.Dataset, project: str) -> xr.Dataset:
metadata_definition = load_json_data_mappings(project)

ds = _correct_units_names(ds, project, metadata_definition)
ds = _correct_standard_names(ds, project, metadata_definition)
ds = _transform(ds, project, metadata_definition)
ds = _invert_sign(ds, project, metadata_definition)
ds = _units_cf_conversion(ds, metadata_definition)
ds = _clip_values(ds, project, metadata_definition)

ds = dims_conversion(ds, project, metadata_definition)
ds = _ensure_correct_time(ds, project, metadata_definition)
ds = _offset_time(ds, project, metadata_definition)

# TODO validate this is needed
# ds = _offset_time(ds, project, metadata_definition)
ds = variable_conversion(ds, project, metadata_definition)

ds = metadata_conversion(ds, project, metadata_definition)

ds.attrs["history"] = (
Expand Down
Loading