Skip to content

Commit 678d4d3

Browse files
ENH: Replace custom _whosmat_hdf5 with pymatreader.whosmat (#13753)
1 parent 0e4e845 commit 678d4d3

3 files changed

Lines changed: 126 additions & 114 deletions

File tree

doc/changes/dev/13753.other.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improved internal functions for inspecting MATLAB v7.3 (HDF5) files in :func:`mne.io.read_raw_eeglab`, by `Bruno Aristimunha`_.

mne/fixes.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,123 @@ def sph_harm_y(n, m, theta, phi, *, diff_n=0):
778778
return special.sph_harm(m, n, phi, theta)
779779

780780

781+
###############################################################################
782+
# TODO VERSION: Can be removed once pymatreader >= 1.2.2 is the minimum
783+
# supported version.
784+
785+
786+
def _whosmat(fname):
787+
"""List variables in a .mat file, including MATLAB v7.3 (HDF5).
788+
789+
Wraps ``pymatreader.whosmat`` if available (>= 1.2.2), otherwise falls back
790+
to ``scipy.io.whosmat`` (non-HDF5) or a custom HDF5 reader using h5py.
791+
"""
792+
try:
793+
import pymatreader
794+
795+
if _compare_version(pymatreader.__version__, ">=", "1.2.2"):
796+
return pymatreader.whosmat(str(fname))
797+
except (ImportError, AttributeError):
798+
pass
799+
800+
# Fall back: try scipy.io.whosmat (works for non-HDF5 .mat files)
801+
from scipy.io import whosmat
802+
803+
try:
804+
return whosmat(str(fname))
805+
except NotImplementedError:
806+
pass
807+
808+
# HDF5 file — use custom h5py-based reader
809+
return _whosmat_hdf5(str(fname))
810+
811+
812+
def _whosmat_hdf5(fname):
813+
"""List variables in a MATLAB v7.3 (HDF5) .mat file without loading data.
814+
815+
This function provides similar functionality to :func:`scipy.io.whosmat` but
816+
for MATLAB v7.3 files stored in HDF5 format, which are not supported by SciPy.
817+
818+
Parameters
819+
----------
820+
fname : str
821+
Path to the MATLAB v7.3 (.mat) file.
822+
823+
Returns
824+
-------
825+
variables : list of tuple
826+
A list of (name, shape, class) tuples for each variable in the file.
827+
The name is a string, shape is a tuple of ints, and class is a string
828+
indicating the MATLAB data type (e.g., 'double', 'int32', 'struct').
829+
"""
830+
import h5py
831+
832+
variables = []
833+
834+
with h5py.File(str(fname), "r") as f:
835+
for name in f.keys():
836+
node = f[name]
837+
838+
# Extract shape from HDF5 object
839+
if isinstance(node, h5py.Dataset):
840+
shape = tuple(int(x) for x in node.shape)
841+
else:
842+
shape = ()
843+
for attr_key in (
844+
"MATLAB_shape",
845+
"MATLAB_Size",
846+
"MATLAB_size",
847+
"dims",
848+
"MATLAB_dims",
849+
):
850+
shp = node.attrs.get(attr_key)
851+
if shp is not None:
852+
try:
853+
shape = tuple(int(x) for x in shp)
854+
break
855+
except Exception:
856+
pass
857+
if not shape and "size" in node:
858+
try:
859+
shape = tuple(int(x) for x in node["size"][()])
860+
except Exception:
861+
pass
862+
863+
# Infer MATLAB class from HDF5 object
864+
mcls = node.attrs.get("MATLAB_class", "").lower()
865+
if mcls:
866+
matlab_class = "char" if mcls == "string" else mcls
867+
elif isinstance(node, h5py.Dataset):
868+
dt = node.dtype
869+
# Handle complex numbers stored as {real, imag} struct
870+
if getattr(dt, "names", None) and {"real", "imag"} <= set(dt.names):
871+
matlab_class = (
872+
"double" if dt["real"].base.itemsize == 8 else "single"
873+
)
874+
# Map NumPy dtype to MATLAB class
875+
elif (kind := dt.kind) == "f":
876+
matlab_class = "double" if dt.itemsize == 8 else "single"
877+
elif kind == "i":
878+
matlab_class = f"int{8 * dt.itemsize}"
879+
elif kind == "u":
880+
matlab_class = f"uint{8 * dt.itemsize}"
881+
elif kind == "b":
882+
matlab_class = "logical"
883+
elif kind in ("S", "U", "O"):
884+
matlab_class = "char"
885+
else:
886+
matlab_class = "unknown"
887+
# Check for sparse matrix structure
888+
elif {"ir", "jc", "data"}.issubset(set(node.keys())):
889+
matlab_class = "sparse"
890+
else:
891+
matlab_class = "unknown"
892+
893+
variables.append((name, shape, matlab_class))
894+
895+
return variables
896+
897+
781898
###############################################################################
782899
# workaround: Numpy won't allow to read from file-like objects with numpy.fromfile,
783900
# we try to use numpy.fromfile, if a blob is used we use numpy.frombuffer to read

mne/io/eeglab/_eeglab.py

Lines changed: 8 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
except ImportError: # scipy < 1.8
1010
from scipy.io.matlab.mio5 import MatlabFunction
1111
from scipy.io.matlab.mio5_params import MatlabOpaque
12-
from scipy.io import loadmat, whosmat
12+
from scipy.io import loadmat
1313

14-
from ...utils import _import_pymatreader_funcs, _soft_import, warn
14+
from ...fixes import _whosmat
15+
from ...utils import _import_pymatreader_funcs, warn
1516

1617

1718
def _todict_from_np_struct(data): # taken from pymatreader.utils
@@ -83,106 +84,6 @@ def _scipy_reader(file_name, variable_names=None, uint16_codec=None):
8384
return _check_for_scipy_mat_struct(mat_data)
8485

8586

86-
def _whosmat_hdf5(fname: str):
87-
"""List variables in a MATLAB v7.3 (HDF5) .mat file without loading data.
88-
89-
This function provides similar functionality to :func:`scipy.io.whosmat` but
90-
for MATLAB v7.3 files stored in HDF5 format, which are not supported by SciPy.
91-
92-
Parameters
93-
----------
94-
fname : str | PathLike
95-
Path to the MATLAB v7.3 (.mat) file.
96-
97-
Returns
98-
-------
99-
variables : list of tuple
100-
A list of (name, shape, class) tuples for each variable in the file.
101-
The name is a string, shape is a tuple of ints, and class is a string
102-
indicating the MATLAB data type (e.g., 'double', 'int32', 'struct').
103-
104-
Notes
105-
-----
106-
This function only works with MATLAB v7.3 (HDF5) files. For earlier versions,
107-
use :func:`scipy.io.whosmat` instead.
108-
109-
See Also
110-
--------
111-
scipy.io.whosmat : List variables in classic MATLAB files.
112-
"""
113-
h5py = _soft_import("h5py", purpose="MATLAB v7.3 I/O", strict=False)
114-
if h5py is False:
115-
raise ModuleNotFoundError(
116-
"h5py is required to inspect MATLAB v7.3 files preload=`False` "
117-
"Please install h5py to use this functionality."
118-
)
119-
120-
variables = []
121-
122-
with h5py.File(str(fname), "r") as f:
123-
for name in f.keys():
124-
node = f[name]
125-
126-
# Extract shape from HDF5 object
127-
if isinstance(node, h5py.Dataset):
128-
shape = tuple(int(x) for x in node.shape)
129-
else:
130-
shape = ()
131-
for attr_key in (
132-
"MATLAB_shape",
133-
"MATLAB_Size",
134-
"MATLAB_size",
135-
"dims",
136-
"MATLAB_dims",
137-
):
138-
shp = node.attrs.get(attr_key)
139-
if shp is not None:
140-
try:
141-
shape = tuple(int(x) for x in shp)
142-
break
143-
except Exception:
144-
pass
145-
if not shape and "size" in node:
146-
try:
147-
shape = tuple(int(x) for x in node["size"][()])
148-
except Exception:
149-
pass
150-
151-
# Infer MATLAB class from HDF5 object
152-
mcls = node.attrs.get("MATLAB_class", "").lower()
153-
if mcls:
154-
matlab_class = "char" if mcls == "string" else mcls
155-
elif isinstance(node, h5py.Dataset):
156-
dt = node.dtype
157-
# Handle complex numbers stored as {real, imag} struct
158-
if getattr(dt, "names", None) and {"real", "imag"} <= set(dt.names):
159-
matlab_class = (
160-
"double" if dt["real"].base.itemsize == 8 else "single"
161-
)
162-
# Map NumPy dtype to MATLAB class
163-
elif (kind := dt.kind) == "f":
164-
matlab_class = "double" if dt.itemsize == 8 else "single"
165-
elif kind == "i":
166-
matlab_class = f"int{8 * dt.itemsize}"
167-
elif kind == "u":
168-
matlab_class = f"uint{8 * dt.itemsize}"
169-
elif kind == "b":
170-
matlab_class = "logical"
171-
elif kind in ("S", "U", "O"):
172-
matlab_class = "char"
173-
else:
174-
matlab_class = "unknown"
175-
# Check for sparse matrix structure
176-
elif {"ir", "jc", "data"}.issubset(set(node.keys())):
177-
matlab_class = "sparse"
178-
else:
179-
matlab_class = "unknown"
180-
181-
variables.append((name, shape, matlab_class))
182-
183-
return variables
184-
185-
18687
def _readmat(fname, uint16_codec=None, *, preload=False):
18788
try:
18889
read_mat = _import_pymatreader_funcs("EEGLAB I/O")
@@ -220,18 +121,11 @@ def _readmat(fname, uint16_codec=None, *, preload=False):
220121
# checking the variables in the .set file
221122
# to decide how to handle 'data' variable
222123
try:
223-
variables = whosmat(str(fname))
224-
except NotImplementedError:
225-
try:
226-
variables = _whosmat_hdf5(str(fname))
227-
except ModuleNotFoundError:
228-
warn(
229-
"pymatreader is required to preload=`False` for "
230-
"Matlab files v7.3 files with HDF5 support. "
231-
"Setting preload=True."
232-
)
233-
preload = True
234-
return read_mat(fname, uint16_codec=uint16_codec)
124+
variables = _whosmat(str(fname))
125+
except Exception:
126+
warn("Could not inspect .set file variables. Setting preload=True.")
127+
preload = True
128+
return read_mat(fname, uint16_codec=uint16_codec)
235129

236130
is_possible_not_loaded = False
237131

0 commit comments

Comments
 (0)