Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ sphinx-autodoc-typehints==1.11.1
pandas
einops
transformers>=4.53.0
mlflow>=2.12.2
mlflow>=2.12.2,<3.13
clearml>=1.10.0rc0
tensorboardX
imagecodecs; platform_system == "Linux" or platform_system == "Darwin"
Expand Down
26 changes: 23 additions & 3 deletions monai/data/image_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1231,17 +1231,26 @@ class NumpyReader(ImageReader):
npz_keys: if loading npz file, only load the specified keys, if None, load all the items.
stack the loaded items together to construct a new first dimension.
channel_dim: if not None, explicitly specify the channel dim, otherwise, treat the array as no channel.
allow_pickle: if True, allows loading pickled contents from NPY/NPZ files. Note that the default value of False
prevents the risk of remote code execution, set this to True only for loading known trusted data. If this
argument is False and pickled data is loaded, a ValueError will be raised.
kwargs: additional args for `numpy.load` API except `allow_pickle`. more details about available args:
https://numpy.org/doc/stable/reference/generated/numpy.load.html

"""

def __init__(self, npz_keys: KeysCollection | None = None, channel_dim: str | int | None = None, **kwargs):
def __init__(
self,
npz_keys: KeysCollection | None = None,
channel_dim: str | int | None = None,
allow_pickle: bool = False,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default change from the implicit allow_pickle=True (hardcoded in the previous np.load call) to allow_pickle=False is a breaking change for any caller who loads .npy/.npz files containing Python objects. The PR template marks this as non-breaking, and the commit message does not include a migration note. This will silently raise ValueError for existing users — they need to be told to pass allow_pickle=True explicitly, and CHANGELOG.MD, I think, would be worth mentioning in the release notes.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added an exception raise that will note this has been changed. I think it's very unlikely anyone is loading pickled data this way but the exception doesn't hurt to add. Since this wasn't an existing argument to the class I couldn't rightly add a deprecation decorator.

**kwargs,
):
super().__init__()
if npz_keys is not None:
npz_keys = ensure_tuple(npz_keys)
self.npz_keys = npz_keys
self.channel_dim = float("nan") if channel_dim == "no_channel" else channel_dim
self.allow_pickle = allow_pickle
self.kwargs = kwargs

def verify_suffix(self, filename: Sequence[PathLike] | PathLike) -> bool:
Expand All @@ -1267,14 +1276,25 @@ def read(self, data: Sequence[PathLike] | PathLike, **kwargs):
More details about available args:
https://numpy.org/doc/stable/reference/generated/numpy.load.html

Raises:
ValueError: when `self.allow_pickle` is False but loaded data contains pickled objects.
"""
img_: list[Nifti1Image] = []

filenames: Sequence[PathLike] = ensure_tuple(data)
kwargs_ = self.kwargs.copy()
kwargs_.update(kwargs)
for name in filenames:
img = np.load(name, allow_pickle=True, **kwargs_)
try:
img = np.load(name, allow_pickle=self.allow_pickle, **kwargs_)
except ValueError as e:
# if a ValueError is raised, this is likely about pickle loading so raise an exception about this
raise ValueError(
"MONAI default value for argument `allow_pickle` of `np.load` changed to `False`, "
"explicitly pass `allow_pickle=True` as a constructor argument to NumpyReader "
"to enable pickle loading."
) from e

if Path(name).name.endswith(".npz"):
# load expected items from NPZ file
npz_keys = list(img.keys()) if self.npz_keys is None else self.npz_keys
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pandas
requests
einops
transformers>=4.53.0
mlflow>=2.12.2
mlflow>=2.12.2,<3.13
clearml>=1.10.0rc0
matplotlib>=3.6.3
tensorboardX
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ all =
pandas
einops
transformers>=4.53.0
mlflow>=2.12.2
mlflow>=2.12.2,<3.13
clearml>=1.10.0rc0
matplotlib>=3.6.3
tensorboardX
Expand Down Expand Up @@ -135,7 +135,7 @@ einops =
transformers =
transformers>=4.36.0, <4.41.0; python_version <= '3.10'
mlflow =
mlflow>=2.12.2
mlflow>=2.12.2,<3.13
matplotlib =
matplotlib>=3.6.3
clearml =
Expand Down
11 changes: 11 additions & 0 deletions tests/data/test_numpy_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,13 @@ def test_npy_pickle(self):
np.save(filepath, test_data, allow_pickle=True)

reader = NumpyReader()

with self.assertRaises(ValueError):
reader.get_data(reader.read(filepath))

reader = NumpyReader(allow_pickle=True)
result = reader.get_data(reader.read(filepath))[0].item()

np.testing.assert_allclose(result["test"].shape, test_data["test"].shape)
np.testing.assert_allclose(result["test"], test_data["test"])

Expand All @@ -92,6 +98,11 @@ def test_kwargs(self):
np.save(filepath, test_data, allow_pickle=True)

reader = NumpyReader(mmap_mode="r")

with self.assertRaises(ValueError):
reader.get_data(reader.read(filepath, mmap_mode=None))

reader = NumpyReader(mmap_mode="r", allow_pickle=True)
result = reader.get_data(reader.read(filepath, mmap_mode=None))[0].item()
np.testing.assert_allclose(result["test"].shape, test_data["test"].shape)

Expand Down
Loading