From 1383c016ed1ebc3731b09150eaf54e6ef773417c Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Fri, 17 Feb 2023 12:56:17 -0500
Subject: [PATCH 01/11] store data in OrsoDataset with column as first index
 (and allow it to be a list), to enable multidimensional 'columns' in the
 future

---
 orsopy/fileio/orso.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/orsopy/fileio/orso.py b/orsopy/fileio/orso.py
index 6ddf21fe..8fae4f09 100644
--- a/orsopy/fileio/orso.py
+++ b/orsopy/fileio/orso.py
@@ -163,11 +163,14 @@ class OrsoDataset:
     """
 
     info: Orso
-    data: Any
+    data: Union[np.ndarray, Sequence[np.ndarray], Sequence[Sequence]]
 
     def __post_init__(self):
-        if self.data.shape[1] != len(self.info.columns):
+        if len(self.data) != len(self.info.columns):
             raise ValueError("Data has to have the same number of columns as header")
+        column_lengths = set(len(c) for c in self.data)
+        if len(column_lengths) > 1:
+            raise ValueError("Columns must all have the same length in first dimension")
 
     def header(self) -> str:
         """
@@ -249,13 +252,13 @@ def save_orso(
 
         ds1 = datasets[0]
         header += ds1.header()
-        np.savetxt(f, ds1.data, header=header, fmt="%-22.16e")
+        np.savetxt(f, np.asarray(ds1.data).T, header=header, fmt="%-22.16e")
 
         for dsi in datasets[1:]:
             # write an optional spacer string between dataset e.g. \n
             f.write(data_separator)
             hi = ds1.diff_header(dsi)
-            np.savetxt(f, dsi.data, header=hi, fmt="%-22.16e")
+            np.savetxt(f, np.asarray(dsi.data).T, header=hi, fmt="%-22.16e")
 
 
 def load_orso(fname: Union[TextIO, str]) -> List[OrsoDataset]:

From 709a585914d60ccb5e2db8ede1ce1866569f5b35 Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Fri, 17 Feb 2023 12:59:09 -0500
Subject: [PATCH 02/11] store data in OrsoDataset with column as first index
 (and allow it to be a list), to enable multidimensional 'columns' in the
 future

---
 orsopy/fileio/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/orsopy/fileio/base.py b/orsopy/fileio/base.py
index 8e3db140..eb6cb6c9 100644
--- a/orsopy/fileio/base.py
+++ b/orsopy/fileio/base.py
@@ -798,7 +798,7 @@ def _read_header_data(file: Union[TextIO, str], validate: bool = False) -> Tuple
                 # numerical array  and start collecting the numbers for this
                 # dataset
                 _d = np.array([np.fromstring(v, dtype=float, sep=" ") for v in _ds_lines])
-                data.append(_d)
+                data.append(_d.T)
                 _ds_lines = []
 
                 # append '---' to signify the start of a new yaml document
@@ -811,7 +811,7 @@ def _read_header_data(file: Union[TextIO, str], validate: bool = False) -> Tuple
 
         # append the last numerical array
         _d = np.array([np.fromstring(v, dtype=float, sep=" ") for v in _ds_lines])
-        data.append(_d)
+        data.append(_d.T)
 
         yml = "".join(header)
 

From e71b2076f45722777313704065740e89694f27a7 Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Fri, 17 Feb 2023 13:15:18 -0500
Subject: [PATCH 03/11] update tests to reflect column-first ordering of
 datasets

---
 tests/test_fileio/test_orso.py   | 14 +++++++-------
 tests/test_fileio/test_schema.py |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/test_fileio/test_orso.py b/tests/test_fileio/test_orso.py
index 5199c998..8c23e646 100644
--- a/tests/test_fileio/test_orso.py
+++ b/tests/test_fileio/test_orso.py
@@ -104,8 +104,8 @@ def test_write_read(self):
         # test write and read of multiple datasets
         info = fileio.Orso.empty()
         info2 = fileio.Orso.empty()
-        data = np.zeros((100, 3))
-        data[:] = np.arange(100.0)[:, None]
+        data = np.zeros((3, 100))
+        data[:] = np.arange(100.0)[None, :]
 
         info.columns = [
             fileio.Column("Qz", "1/angstrom"),
@@ -177,14 +177,14 @@ def test_unique_dataset(self):
         info2.data_set = 0
         info2.columns = [Column("stuff")] * 4
 
-        ds = OrsoDataset(info, np.empty((2, 4)))
-        ds2 = OrsoDataset(info2, np.empty((2, 4)))
+        ds = OrsoDataset(info, np.empty((4, 2)))
+        ds2 = OrsoDataset(info2, np.empty((4, 2)))
 
         with pytest.raises(ValueError):
             fileio.save_orso([ds, ds2], "test_data_set.ort")
 
         with pytest.raises(ValueError):
-            OrsoDataset(info, np.empty((2, 5)))
+            OrsoDataset(info, np.empty((5, 2)))
 
     def test_user_data(self):
         # test write and read of userdata
@@ -195,8 +195,8 @@ def test_user_data(self):
             fileio.ErrorColumn("R"),
         ]
 
-        data = np.zeros((100, 3))
-        data[:] = np.arange(100.0)[:, None]
+        data = np.zeros((3, 100))
+        data[:] = np.arange(100.0)[None, :]
         dct = {"ci": "1", "foo": ["bar", 1, 2, 3.5]}
         info.user_data = dct
         ds = fileio.OrsoDataset(info, data)
diff --git a/tests/test_fileio/test_schema.py b/tests/test_fileio/test_schema.py
index e485b851..d9808eaf 100644
--- a/tests/test_fileio/test_schema.py
+++ b/tests/test_fileio/test_schema.py
@@ -17,7 +17,7 @@ def test_example_ort(self):
             schema = json.load(f)
 
         dct_list, data, version = _read_header_data(os.path.join("tests", "test_example.ort"), validate=True)
-        assert data[0].shape == (2, 4)
+        assert data[0].shape == (4, 2)
         assert version == "0.1"
 
         # d contains datetime.datetime objects, which would fail the
@@ -33,4 +33,4 @@ def test_example_ort(self):
         assert len(dct_list) == 2
         assert dct_list[1]["data_set"] == "spin_down"
         assert data[1].shape == (4, 4)
-        np.testing.assert_allclose(data[1][2:], data[0])
+        np.testing.assert_allclose(data[1][:, 2:], data[0])

From a63bfb907b8ba879102f338e173856e0d1024aa4 Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Fri, 17 Feb 2023 13:17:16 -0500
Subject: [PATCH 04/11] add save_nexus and load_nexus

---
 orsopy/fileio/base.py |  82 ++++++++++++++++++++++++++++++++-
 orsopy/fileio/orso.py | 104 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 180 insertions(+), 6 deletions(-)

diff --git a/orsopy/fileio/base.py b/orsopy/fileio/base.py
index eb6cb6c9..0c21c3b3 100644
--- a/orsopy/fileio/base.py
+++ b/orsopy/fileio/base.py
@@ -35,6 +35,8 @@ def _noop(self, *args, **kw):
     pass
 
 
+JSON_MIMETYPE = "application/json"
+
 yaml.emitter.Emitter.process_tag = _noop
 
 # make sure that datetime strings get loaded as str not datetime instances
@@ -82,7 +84,11 @@ def _custom_init_fn(fieldsarg, frozen, has_post_init, self_name, globals):
     )
 
 
+ORSO_DATACLASSES = dict()
+
+
 def orsodataclass(cls: type):
+    ORSO_DATACLASSES[cls.__name__] = cls
     attrs = cls.__dict__
     bases = cls.__bases__
     if "__annotations__" in attrs and len([k for k in attrs["__annotations__"].keys() if not k.startswith("_")]) > 0:
@@ -275,7 +281,8 @@ def _resolve_type(hint: type, item: Any) -> Any:
                     return item
                 else:
                     warnings.warn(
-                        f"Has to be one of {get_args(hint)} got {item}", RuntimeWarning,
+                        f"Has to be one of {get_args(hint)} got {item}",
+                        RuntimeWarning,
                     )
                     return str(item)
         return None
@@ -376,6 +383,57 @@ def yaml_representer_compact(self, dumper: yaml.Dumper):
         output = self._to_object_dict()
         return dumper.represent_mapping(dumper.DEFAULT_MAPPING_TAG, output, flow_style=True)
 
+    def to_nexus(self, root=None, name=None, data=None):
+        """
+        Produces an HDF5 representation of the Header object, removing
+        any optional attributes with the value :code:`None`.
+
+        :return: HDF5 object
+        """
+        classname = self.__class__.__name__
+        import h5py
+
+        assert isinstance(root, h5py.Group)
+        group = root.create_group(classname if name is None else name)
+        group.attrs["ORSO_class"] = classname
+
+        for child_name, value in self.__dict__.items():
+            if child_name.startswith("_") or (value is None and child_name in self._orso_optionals):
+                continue
+
+            if value.__class__ in ORSO_DATACLASSES.values():
+                value.to_nexus(root=group, name=child_name)
+            elif isinstance(value, (list, tuple)):
+                child_group = group.create_group(child_name)
+                child_group.attrs["list"] = 1
+                for index, item in enumerate(value):
+                    # use the 'name' attribute of children if it exists, else index:
+                    sub_name = getattr(item, "name", str(index))
+                    if item.__class__ in ORSO_DATACLASSES.values():
+                        item_out = item.to_nexus(root=child_group, name=sub_name)
+                    else:
+                        t_value = nexus_value_converter(item)
+                        if any(isinstance(t_value, t) for t in (str, float, int, bool, np.ndarray)):
+                            item_out = child_group.create_dataset(sub_name, data=t_value)
+                        else:
+                            item_out = child_group.create_dataset(
+                                sub_name, data=json.dumps(_todict(value), default=nexus_value_converter)
+                            )
+                            item_out.attrs["mimetype"] = JSON_MIMETYPE
+                    item_out.attrs["sequence_index"] = index
+            else:
+                # here _todict converts objects that aren't derived from Header
+                # and therefore don't have to_dict methods.
+                t_value = nexus_value_converter(value)
+                if any(isinstance(t_value, t) for t in (str, float, int, bool, np.ndarray)):
+                    group.create_dataset(child_name, data=t_value)
+                else:
+                    dset = group.create_dataset(
+                        child_name, data=json.dumps(_todict(value), default=nexus_value_converter)
+                    )
+                    dset.attrs["mimetype"] = JSON_MIMETYPE
+        return group
+
     @staticmethod
     def _check_unit(unit: str):
         """
@@ -431,6 +489,8 @@ def represent_data(self, data):
         elif isinstance(data, datetime.datetime):
             value = data.isoformat("T")
             return super().represent_scalar("tag:yaml.org,2002:timestamp", value)
+        elif isinstance(data, np.floating):
+            return super().represent_data(float(data))
         else:
             return super().represent_data(data)
 
@@ -924,7 +984,7 @@ def _todict(obj: Any, classkey: Any = None) -> dict:
     """
     if isinstance(obj, dict):
         data = {}
-        for (k, v) in obj.items():
+        for k, v in obj.items():
             data[k] = _todict(v, classkey)
         return data
     elif isinstance(obj, Enum):
@@ -949,6 +1009,24 @@ def _todict(obj: Any, classkey: Any = None) -> dict:
         return obj
 
 
+def json_datetime_trap(obj):
+    if isinstance(obj, datetime.datetime):
+        return obj.isoformat()
+    return obj
+
+
+def enum_trap(obj):
+    if isinstance(obj, Enum):
+        return obj.value
+    return obj
+
+
+def nexus_value_converter(obj):
+    for converter in (json_datetime_trap, enum_trap):
+        obj = converter(obj)
+    return obj
+
+
 def _nested_update(d: dict, u: dict) -> dict:
     """
     Nested dictionary update.
diff --git a/orsopy/fileio/orso.py b/orsopy/fileio/orso.py
index 8fae4f09..615f1cef 100644
--- a/orsopy/fileio/orso.py
+++ b/orsopy/fileio/orso.py
@@ -2,14 +2,14 @@
 Implementation of the top level class for the ORSO header.
 """
 
-from dataclasses import dataclass
-from typing import Any, List, Optional, TextIO, Union
+from dataclasses import dataclass, fields
+from typing import BinaryIO, List, Optional, Sequence, TextIO, Union
 
 import numpy as np
 import yaml
 
-from .base import (Column, ErrorColumn, Header, _dict_diff, _nested_update, _possibly_open_file, _read_header_data,
-                   orsodataclass)
+from .base import (JSON_MIMETYPE, ORSO_DATACLASSES, Column, ErrorColumn, Header, _dict_diff, _nested_update,
+                   _possibly_open_file, _read_header_data, orsodataclass)
 from .data_source import DataSource
 from .reduction import Reduction
 
@@ -213,6 +213,9 @@ def __eq__(self, other: "OrsoDataset"):
         return self.info == other.info and (self.data == other.data).all()
 
 
+ORSO_DATACLASSES["OrsoDataset"] = OrsoDataset
+
+
 def save_orso(
     datasets: List[OrsoDataset], fname: Union[TextIO, str], comment: Optional[str] = None, data_separator: str = ""
 ) -> None:
@@ -276,3 +279,96 @@ def load_orso(fname: Union[TextIO, str]) -> List[OrsoDataset]:
         od = OrsoDataset(o, data)
         ods.append(od)
     return ods
+
+
+def _from_nexus_group(group):
+    if group.attrs.get("list", None) is not None:
+        sort_list = [[v.attrs["sequence_index"], v] for v in group.values()]
+        return [_get_nexus_item(v) for _, v in sorted(sort_list)]
+    else:
+        dct = dict()
+        for name, value in group.items():
+            dct[name] = _get_nexus_item(value)
+
+        if "ORSO_class" in group.attrs:
+            cls = ORSO_DATACLASSES[group.attrs["ORSO_class"]]
+            cls_fields = set(field.name for field in fields(cls))
+            init_params = {name: value for name, value in dct.items() if name in cls_fields}
+            return cls(**init_params)
+        else:
+            return dct
+
+
+def _get_nexus_item(value):
+    import json
+
+    import h5py
+
+    if isinstance(value, h5py.Group):
+        return _from_nexus_group(value)
+    elif isinstance(value, h5py.Dataset):
+        v = value[()]
+        if value.attrs.get("mimetype", None) == JSON_MIMETYPE:
+            return json.loads(v)
+        elif hasattr(v, "decode"):
+            # it is a bytes object, should be string
+            return v.decode()
+        else:
+            return v
+
+
+def load_nexus(fname: Union[str, BinaryIO]) -> List[OrsoDataset]:
+    import h5py
+
+    f = h5py.File(fname, "r")
+    return [_from_nexus_group(g) for g in f.values() if g.attrs.get("ORSO_class", None) == "OrsoDataset"]
+
+
+def save_nexus(datasets: List[OrsoDataset], fname: Union[str, BinaryIO], comment: Optional[str] = None) -> BinaryIO:
+    import h5py
+
+    for idx, dataset in enumerate(datasets):
+        info = dataset.info
+        data_set = info.data_set
+        if data_set is None or (isinstance(data_set, str) and len(data_set) == 0):
+            # it's not set, or is zero length string
+            info.data_set = idx
+
+    dsets = [dataset.info.data_set for dataset in datasets]
+    if len(set(dsets)) != len(dsets):
+        raise ValueError("All `OrsoDataset.info.data_set` values must be unique")
+
+    with h5py.File(fname, mode="w") as f:
+        f.attrs["NX_class"] = "NXroot"
+        if comment is not None:
+            f.attrs["comment"] = comment
+
+        for dsi in datasets:
+            info = dsi.info
+            entry = f.create_group(info.data_set)
+            entry.attrs["ORSO_class"] = "OrsoDataset"
+            entry.attrs["NX_class"] = "NXentry"
+            entry.attrs["default"] = "plottable_data"
+            info.to_nexus(root=entry, name="info")
+            data_group = entry.create_group("data")
+            data_group.attrs["list"] = 1
+            plottable_data_group = entry.create_group("plottable_data")
+            plottable_data_group.attrs["NX_class"] = "NXdata"
+            plottable_data_group.attrs["list"] = 1
+            plottable_data_group.attrs["axes"] = [info.columns[0].name]
+            plottable_data_group.attrs["signal"] = info.columns[1].name
+            plottable_data_group.attrs[f"{info.columns[0].name}_indices"] = [0]
+            for column_index, column in enumerate(info.columns):
+                # assume that dataset.data has dimension == ncolumns along first dimension
+                # (note that this is not how data would be loaded from e.g. load_orso, which is row-first)
+                col_data = data_group.create_dataset(column.name, data=dsi.data[column_index])
+                col_data.attrs["sequence_index"] = column_index
+                col_data.attrs["target"] = col_data.name
+                if isinstance(column, ErrorColumn):
+                    nexus_colname = column.error_of + "_errors"
+                else:
+                    nexus_colname = column.name
+                    if column.unit is not None:
+                        col_data.attrs["units"] = column.unit
+
+                plottable_data_group[nexus_colname] = h5py.SoftLink(col_data.name)

From 9e8f67c474128ce7ebe9171f250edf724105b371 Mon Sep 17 00:00:00 2001
From: Brian Benjamin Maranville <brian.maranville@nist.gov>
Date: Fri, 17 Feb 2023 13:58:08 -0500
Subject: [PATCH 05/11] drop p36, add py310 and py311 and update actions

---
 .github/workflows/pytest.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index f6dce06a..e5032c47 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -10,12 +10,12 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8, 3.9]
+        python-version: [3.8, 3.9, '3.10', '3.11']
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -47,12 +47,12 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7]
+        python-version: [3.7]
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
 

From 2acb9ba169e52822a014f0e108d48934acb4a632 Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Tue, 21 Feb 2023 14:28:35 -0500
Subject: [PATCH 06/11] formalize handling of dict and None types for NeXus

---
 orsopy/fileio/base.py | 17 +++++++++--------
 orsopy/fileio/orso.py |  4 +++-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/orsopy/fileio/base.py b/orsopy/fileio/base.py
index 0c21c3b3..fb7a6070 100644
--- a/orsopy/fileio/base.py
+++ b/orsopy/fileio/base.py
@@ -415,10 +415,11 @@ def to_nexus(self, root=None, name=None, data=None):
                         t_value = nexus_value_converter(item)
                         if any(isinstance(t_value, t) for t in (str, float, int, bool, np.ndarray)):
                             item_out = child_group.create_dataset(sub_name, data=t_value)
-                        else:
-                            item_out = child_group.create_dataset(
-                                sub_name, data=json.dumps(_todict(value), default=nexus_value_converter)
-                            )
+                        elif t_value is None:
+                            # special handling for null datasets: no data
+                            item_out = child_group.create_dataset(sub_name, dtype="f")
+                        elif isinstance(t_value, dict):
+                            item_out = child_group.create_dataset(sub_name, data=json.dumps(t_value))
                             item_out.attrs["mimetype"] = JSON_MIMETYPE
                     item_out.attrs["sequence_index"] = index
             else:
@@ -427,10 +428,10 @@ def to_nexus(self, root=None, name=None, data=None):
                 t_value = nexus_value_converter(value)
                 if any(isinstance(t_value, t) for t in (str, float, int, bool, np.ndarray)):
                     group.create_dataset(child_name, data=t_value)
-                else:
-                    dset = group.create_dataset(
-                        child_name, data=json.dumps(_todict(value), default=nexus_value_converter)
-                    )
+                elif t_value is None:
+                    group.create_dataset(child_name, dtype="f")
+                elif isinstance(t_value, dict):
+                    dset = group.create_dataset(child_name, data=json.dumps(t_value))
                     dset.attrs["mimetype"] = JSON_MIMETYPE
         return group
 
diff --git a/orsopy/fileio/orso.py b/orsopy/fileio/orso.py
index 615f1cef..e7d25d91 100644
--- a/orsopy/fileio/orso.py
+++ b/orsopy/fileio/orso.py
@@ -308,7 +308,9 @@ def _get_nexus_item(value):
         return _from_nexus_group(value)
     elif isinstance(value, h5py.Dataset):
         v = value[()]
-        if value.attrs.get("mimetype", None) == JSON_MIMETYPE:
+        if isinstance(value, h5py.Empty):
+            return None
+        elif value.attrs.get("mimetype", None) == JSON_MIMETYPE:
             return json.loads(v)
         elif hasattr(v, "decode"):
             # it is a bytes object, should be string

From da5334c4fad175388442ee1b774b93c697585de4 Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Tue, 21 Feb 2023 14:30:20 -0500
Subject: [PATCH 07/11] use '@sequence' instead of '@list' to signify an
 anonymous ordered array of children in a NeXus group

---
 orsopy/fileio/base.py | 2 +-
 orsopy/fileio/orso.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/orsopy/fileio/base.py b/orsopy/fileio/base.py
index fb7a6070..1ebe4468 100644
--- a/orsopy/fileio/base.py
+++ b/orsopy/fileio/base.py
@@ -405,7 +405,7 @@ def to_nexus(self, root=None, name=None, data=None):
                 value.to_nexus(root=group, name=child_name)
             elif isinstance(value, (list, tuple)):
                 child_group = group.create_group(child_name)
-                child_group.attrs["list"] = 1
+                child_group.attrs["sequence"] = 1
                 for index, item in enumerate(value):
                     # use the 'name' attribute of children if it exists, else index:
                     sub_name = getattr(item, "name", str(index))
diff --git a/orsopy/fileio/orso.py b/orsopy/fileio/orso.py
index e7d25d91..510c7a4b 100644
--- a/orsopy/fileio/orso.py
+++ b/orsopy/fileio/orso.py
@@ -282,7 +282,7 @@ def load_orso(fname: Union[TextIO, str]) -> List[OrsoDataset]:
 
 
 def _from_nexus_group(group):
-    if group.attrs.get("list", None) is not None:
+    if group.attrs.get("sequence", None) is not None:
         sort_list = [[v.attrs["sequence_index"], v] for v in group.values()]
         return [_get_nexus_item(v) for _, v in sorted(sort_list)]
     else:
@@ -353,10 +353,10 @@ def save_nexus(datasets: List[OrsoDataset], fname: Union[str, BinaryIO], comment
             entry.attrs["default"] = "plottable_data"
             info.to_nexus(root=entry, name="info")
             data_group = entry.create_group("data")
-            data_group.attrs["list"] = 1
-            plottable_data_group = entry.create_group("plottable_data")
+            data_group.attrs["sequence"] = 1
+            plottable_data_group = entry.create_group("plottable_data", track_order=True)
             plottable_data_group.attrs["NX_class"] = "NXdata"
-            plottable_data_group.attrs["list"] = 1
+            plottable_data_group.attrs["sequence"] = 1
             plottable_data_group.attrs["axes"] = [info.columns[0].name]
             plottable_data_group.attrs["signal"] = info.columns[1].name
             plottable_data_group.attrs[f"{info.columns[0].name}_indices"] = [0]

From bdc50f75e458b877a08afd6dfa5a2809fbf0ff7b Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Tue, 21 Feb 2023 14:31:16 -0500
Subject: [PATCH 08/11] filter only the NXdata folder, let other non-canonical
 fields through the deserialization process

---
 orsopy/fileio/orso.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/orsopy/fileio/orso.py b/orsopy/fileio/orso.py
index 510c7a4b..a736a9a7 100644
--- a/orsopy/fileio/orso.py
+++ b/orsopy/fileio/orso.py
@@ -288,13 +288,14 @@ def _from_nexus_group(group):
     else:
         dct = dict()
         for name, value in group.items():
+            if value.attrs.get('NX_class', None) == 'NXdata':
+                # remove NXdata folder, which exists only for NeXus plotting
+                continue
             dct[name] = _get_nexus_item(value)
 
         if "ORSO_class" in group.attrs:
             cls = ORSO_DATACLASSES[group.attrs["ORSO_class"]]
-            cls_fields = set(field.name for field in fields(cls))
-            init_params = {name: value for name, value in dct.items() if name in cls_fields}
-            return cls(**init_params)
+            return cls(**dct)
         else:
             return dct
 

From 4500269f1bbe016aec7ac62bc6d25566bea37af7 Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Tue, 21 Feb 2023 14:32:01 -0500
Subject: [PATCH 09/11] use hard links for plottable_data columns, so they can
 be portable when copying parts of HDF5 structures to other files

---
 orsopy/fileio/orso.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orsopy/fileio/orso.py b/orsopy/fileio/orso.py
index a736a9a7..6858f249 100644
--- a/orsopy/fileio/orso.py
+++ b/orsopy/fileio/orso.py
@@ -374,4 +374,4 @@ def save_nexus(datasets: List[OrsoDataset], fname: Union[str, BinaryIO], comment
                     if column.unit is not None:
                         col_data.attrs["units"] = column.unit
 
-                plottable_data_group[nexus_colname] = h5py.SoftLink(col_data.name)
+                plottable_data_group[nexus_colname] = col_data

From c8175adc6c309492f5b67acb607e133dc0e94836 Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Tue, 21 Feb 2023 14:32:32 -0500
Subject: [PATCH 10/11] add warning if attribute cannot be converted to HDF5
 equivalent structure

---
 orsopy/fileio/base.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/orsopy/fileio/base.py b/orsopy/fileio/base.py
index 1ebe4468..db1563ae 100644
--- a/orsopy/fileio/base.py
+++ b/orsopy/fileio/base.py
@@ -84,6 +84,7 @@ def _custom_init_fn(fieldsarg, frozen, has_post_init, self_name, globals):
     )
 
 
+#register all ORSO classes here:
 ORSO_DATACLASSES = dict()
 
 
@@ -383,7 +384,7 @@ def yaml_representer_compact(self, dumper: yaml.Dumper):
         output = self._to_object_dict()
         return dumper.represent_mapping(dumper.DEFAULT_MAPPING_TAG, output, flow_style=True)
 
-    def to_nexus(self, root=None, name=None, data=None):
+    def to_nexus(self, root=None, name=None):
         """
         Produces an HDF5 representation of the Header object, removing
         any optional attributes with the value :code:`None`.
@@ -421,6 +422,11 @@ def to_nexus(self, root=None, name=None, data=None):
                         elif isinstance(t_value, dict):
                             item_out = child_group.create_dataset(sub_name, data=json.dumps(t_value))
                             item_out.attrs["mimetype"] = JSON_MIMETYPE
+                        else:
+                            import warnings
+                            # raise ValueError(f"unserializable attribute found: {child_name}[{index}] = {t_value}")
+                            warnings.warn(f"unserializable attribute found: {child_name}[{index}] = {t_value}")
+                            continue
                     item_out.attrs["sequence_index"] = index
             else:
                 # here _todict converts objects that aren't derived from Header
@@ -433,6 +439,10 @@ def to_nexus(self, root=None, name=None, data=None):
                 elif isinstance(t_value, dict):
                     dset = group.create_dataset(child_name, data=json.dumps(t_value))
                     dset.attrs["mimetype"] = JSON_MIMETYPE
+                else:
+                    import warnings
+                    warnings.warn(f"unserializable attribute found: {child_name} = {t_value}")
+                    # raise ValueError(f"unserializable attribute found: {child_name} = {t_value}")
         return group
 
     @staticmethod

From bfd7841f96a8d3bbdd56f4d28a2f3041a1e30bee Mon Sep 17 00:00:00 2001
From: bbm <brian.maranville@nist.gov>
Date: Tue, 21 Feb 2023 14:35:09 -0500
Subject: [PATCH 11/11] flake8

---
 orsopy/fileio/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orsopy/fileio/base.py b/orsopy/fileio/base.py
index db1563ae..92c24df1 100644
--- a/orsopy/fileio/base.py
+++ b/orsopy/fileio/base.py
@@ -84,7 +84,7 @@ def _custom_init_fn(fieldsarg, frozen, has_post_init, self_name, globals):
     )
 
 
-#register all ORSO classes here:
+# register all ORSO classes here:
 ORSO_DATACLASSES = dict()