Source code for nxtomomill.utils.hdf5

# coding: utf-8

import contextlib

import h5py
import pint
import logging
from .pintutils import get_unit

try:
    import hdf5plugin  # noqa F401
except ImportError:
    pass
from silx.io.url import DataUrl
from silx.io.utils import open as open_hdf5

_logger = logging.getLogger(__name__)

__all__ = ["EntryReader", "DatasetReader"]


class _BaseReader(contextlib.AbstractContextManager):
    def __init__(self, url: DataUrl):
        if not isinstance(url, DataUrl):
            raise TypeError(f"url should be an instance of DataUrl. Not {type(url)}")
        if url.scheme() not in ("silx", "h5py"):
            raise ValueError("Valid scheme are silx and h5py")
        if url.data_slice() is not None:
            raise ValueError(
                "Data slices are not managed. Data path should "
                "point to a bliss node (h5py.Group)"
            )
        self._url = url
        self._file_handler = None

    def __exit__(self, *exc):
        return self._file_handler.close()


[docs]class EntryReader(_BaseReader): """Context manager used to read a bliss node""" def __enter__(self): self._file_handler = open_hdf5(filename=self._url.file_path()) if self._url.data_path() == "": entry = self._file_handler elif self._url.data_path() not in self._file_handler: raise KeyError( f"data path '{self._url.data_path()}' doesn't exists from '{self._url.file_path()}'" ) else: entry = self._file_handler[self._url.data_path()] if not isinstance(entry, h5py.Group): raise ValueError("Data path should point to a bliss node (h5py.Group)") return entry
[docs]class DatasetReader(_BaseReader): """Context manager used to read a bliss node""" def __enter__(self): self._file_handler = open_hdf5(filename=self._url.file_path()) entry = self._file_handler[self._url.data_path()] if not isinstance(entry, h5py.Dataset): raise ValueError( f"Data path ({self._url.path()}) should point to a dataset (h5py.Dataset)" ) return entry
[docs]def get_dataset_unit( dataset: h5py.Dataset, default: pint.Unit, from_dataset: str ) -> pint.Unit: """ Util function to return the pint Unit of a HDF5 dataset. This dataset must have `unit` or `units` defined else will fall back to the default unit :param from_dataset: information about the 'dataset' / metadata we are trying to access. For logging purpose in case of failure. """ if not isinstance(dataset, h5py.Dataset): raise TypeError( f"dataset is expected to be an instance of {h5py.Dataset}. Got {type(dataset)}." ) if "unit" in dataset.attrs: unit = dataset.attrs["unit"] elif "units" in dataset.attrs: unit = dataset.attrs["units"] else: _logger.info(f"no unit found for {from_dataset}. Take default unit: {default}") return default if hasattr(unit, "decode"): # handle Diamond dataset unit = unit.decode() return get_unit(unit=unit, default=default, from_dataset=from_dataset)