From c03462a2036bef6f22149b68699057e512da3b78 Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Wed, 27 May 2026 11:42:51 +0100 Subject: [PATCH 1/6] Adding allow_pickle argument Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- monai/data/image_reader.py | 10 +++++++--- tests/data/test_numpy_reader.py | 11 +++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 078b62db01..03d83784d1 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -1231,17 +1231,21 @@ class NumpyReader(ImageReader): npz_keys: if loading npz file, only load the specified keys, if None, load all the items. stack the loaded items together to construct a new first dimension. channel_dim: if not None, explicitly specify the channel dim, otherwise, treat the array as no channel. + allow_pickle: if True, allows loading pickled contents from NPY/NPZ files. Note that the default value of False + prevents the risk of remote code execution, set this to True only for loading known trusted data. kwargs: additional args for `numpy.load` API except `allow_pickle`. more details about available args: https://numpy.org/doc/stable/reference/generated/numpy.load.html - """ - def __init__(self, npz_keys: KeysCollection | None = None, channel_dim: str | int | None = None, **kwargs): + def __init__( + self, npz_keys: KeysCollection | None = None, channel_dim: str | int | None = None, allow_pickle=False, **kwargs + ): super().__init__() if npz_keys is not None: npz_keys = ensure_tuple(npz_keys) self.npz_keys = npz_keys self.channel_dim = float("nan") if channel_dim == "no_channel" else channel_dim + self.allow_pickle = allow_pickle self.kwargs = kwargs def verify_suffix(self, filename: Sequence[PathLike] | PathLike) -> bool: @@ -1274,7 +1278,7 @@ def read(self, data: Sequence[PathLike] | PathLike, **kwargs): kwargs_ = self.kwargs.copy() kwargs_.update(kwargs) for name in filenames: - img = np.load(name, allow_pickle=True, **kwargs_) + img = np.load(name, allow_pickle=self.allow_pickle, **kwargs_) if Path(name).name.endswith(".npz"): # load expected items from NPZ file npz_keys = list(img.keys()) if self.npz_keys is None else self.npz_keys diff --git a/tests/data/test_numpy_reader.py b/tests/data/test_numpy_reader.py index c427778c67..ec951004b3 100644 --- a/tests/data/test_numpy_reader.py +++ b/tests/data/test_numpy_reader.py @@ -81,7 +81,13 @@ def test_npy_pickle(self): np.save(filepath, test_data, allow_pickle=True) reader = NumpyReader() + + with self.assertRaises(ValueError): + reader.get_data(reader.read(filepath)) + + reader = NumpyReader(allow_pickle=True) result = reader.get_data(reader.read(filepath))[0].item() + np.testing.assert_allclose(result["test"].shape, test_data["test"].shape) np.testing.assert_allclose(result["test"], test_data["test"]) @@ -92,6 +98,11 @@ def test_kwargs(self): np.save(filepath, test_data, allow_pickle=True) reader = NumpyReader(mmap_mode="r") + + with self.assertRaises(ValueError): + reader.get_data(reader.read(filepath, mmap_mode=None)) + + reader = NumpyReader(mmap_mode="r", allow_pickle=True) result = reader.get_data(reader.read(filepath, mmap_mode=None))[0].item() np.testing.assert_allclose(result["test"].shape, test_data["test"].shape) From 4788503eb157e5680c297269da087e4be5dbf1db Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Wed, 27 May 2026 11:58:32 +0100 Subject: [PATCH 2/6] Type update Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- monai/data/image_reader.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 03d83784d1..70a082915c 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -1232,13 +1232,18 @@ class NumpyReader(ImageReader): stack the loaded items together to construct a new first dimension. channel_dim: if not None, explicitly specify the channel dim, otherwise, treat the array as no channel. allow_pickle: if True, allows loading pickled contents from NPY/NPZ files. Note that the default value of False - prevents the risk of remote code execution, set this to True only for loading known trusted data. + prevents the risk of remote code execution, set this to True only for loading known trusted data. If this + argument is False and pickled data is loaded, a ValueError will be raised. kwargs: additional args for `numpy.load` API except `allow_pickle`. more details about available args: https://numpy.org/doc/stable/reference/generated/numpy.load.html """ def __init__( - self, npz_keys: KeysCollection | None = None, channel_dim: str | int | None = None, allow_pickle=False, **kwargs + self, + npz_keys: KeysCollection | None = None, + channel_dim: str | int | None = None, + allow_pickle: bool = False, + **kwargs, ): super().__init__() if npz_keys is not None: @@ -1271,6 +1276,8 @@ def read(self, data: Sequence[PathLike] | PathLike, **kwargs): More details about available args: https://numpy.org/doc/stable/reference/generated/numpy.load.html + Raises: + ValueError: when `self.allow_pickle` is False but loaded data contains pickled objects. """ img_: list[Nifti1Image] = [] From 690d22eefba77d0c459d14518df1fe9f6e5c1ef3 Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Mon, 1 Jun 2026 19:55:47 +0100 Subject: [PATCH 3/6] Tighter mlflow version to get around exception issue with file system backend Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- docs/requirements.txt | 2 +- requirements-dev.txt | 2 +- setup.cfg | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 18537b10b7..01dc9989a3 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -21,7 +21,7 @@ sphinx-autodoc-typehints==1.11.1 pandas einops transformers>=4.53.0 -mlflow>=2.12.2 +mlflow>=2.12.2,<2.13 clearml>=1.10.0rc0 tensorboardX imagecodecs; platform_system == "Linux" or platform_system == "Darwin" diff --git a/requirements-dev.txt b/requirements-dev.txt index eb4429cce7..d74f0fe1b3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -34,7 +34,7 @@ pandas requests einops transformers>=4.53.0 -mlflow>=2.12.2 +mlflow>=2.12.2,<2.13 clearml>=1.10.0rc0 matplotlib>=3.6.3 tensorboardX diff --git a/setup.cfg b/setup.cfg index 724d1eceb3..87f163e085 100644 --- a/setup.cfg +++ b/setup.cfg @@ -71,7 +71,7 @@ all = pandas einops transformers>=4.53.0 - mlflow>=2.12.2 + mlflow>=2.12.2,<2.13 clearml>=1.10.0rc0 matplotlib>=3.6.3 tensorboardX @@ -135,7 +135,7 @@ einops = transformers = transformers>=4.36.0, <4.41.0; python_version <= '3.10' mlflow = - mlflow>=2.12.2 + mlflow>=2.12.2,<2.13 matplotlib = matplotlib>=3.6.3 clearml = From 6cad5d98f02dec79485c1bd13f76c803c307cfbc Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Mon, 1 Jun 2026 19:56:57 +0100 Subject: [PATCH 4/6] Tighter mlflow version to get around exception issue with file system backend Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- docs/requirements.txt | 2 +- requirements-dev.txt | 2 +- setup.cfg | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 01dc9989a3..3027d40164 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -21,7 +21,7 @@ sphinx-autodoc-typehints==1.11.1 pandas einops transformers>=4.53.0 -mlflow>=2.12.2,<2.13 +mlflow>=2.12.2,<3.13 clearml>=1.10.0rc0 tensorboardX imagecodecs; platform_system == "Linux" or platform_system == "Darwin" diff --git a/requirements-dev.txt b/requirements-dev.txt index d74f0fe1b3..08fcdc2b0e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -34,7 +34,7 @@ pandas requests einops transformers>=4.53.0 -mlflow>=2.12.2,<2.13 +mlflow>=2.12.2,<3.13 clearml>=1.10.0rc0 matplotlib>=3.6.3 tensorboardX diff --git a/setup.cfg b/setup.cfg index 87f163e085..d987141d0b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -71,7 +71,7 @@ all = pandas einops transformers>=4.53.0 - mlflow>=2.12.2,<2.13 + mlflow>=2.12.2,<3.13 clearml>=1.10.0rc0 matplotlib>=3.6.3 tensorboardX @@ -135,7 +135,7 @@ einops = transformers = transformers>=4.36.0, <4.41.0; python_version <= '3.10' mlflow = - mlflow>=2.12.2,<2.13 + mlflow>=2.12.2,<3.13 matplotlib = matplotlib>=3.6.3 clearml = From 8268b17ddb3ae3a2314889a24a6581341059682f Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Mon, 1 Jun 2026 21:57:06 +0100 Subject: [PATCH 5/6] Add exception message about pickle loading. Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> --- monai/data/image_reader.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 70a082915c..a85eb95c20 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -1285,7 +1285,16 @@ def read(self, data: Sequence[PathLike] | PathLike, **kwargs): kwargs_ = self.kwargs.copy() kwargs_.update(kwargs) for name in filenames: - img = np.load(name, allow_pickle=self.allow_pickle, **kwargs_) + try: + img = np.load(name, allow_pickle=self.allow_pickle, **kwargs_) + except ValueError as e: + # if a ValueError is raised, this is likely about pickle loading so raise an exception about this + raise ValueError( + "MONAI default value for argument `allow_pickle` of `np.load` changed to `False`, " + "explicitly pass `allow_pickle=True` as a constructor argument to NumpyReader " + "to enable pickle loading." + ) from e + if Path(name).name.endswith(".npz"): # load expected items from NPZ file npz_keys = list(img.keys()) if self.npz_keys is None else self.npz_keys From 2e4ba5d04bd0ccf0b06867a66029559b9810c812 Mon Sep 17 00:00:00 2001 From: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Date: Mon, 1 Jun 2026 22:31:20 +0100 Subject: [PATCH 6/6] Wake up Deepsource Signed-off-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com>