diff --git a/docs/requirements.txt b/docs/requirements.txt index 18537b10b7..3027d40164 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -21,7 +21,7 @@ sphinx-autodoc-typehints==1.11.1 pandas einops transformers>=4.53.0 -mlflow>=2.12.2 +mlflow>=2.12.2,<3.13 clearml>=1.10.0rc0 tensorboardX imagecodecs; platform_system == "Linux" or platform_system == "Darwin" diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index 078b62db01..a85eb95c20 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -1231,17 +1231,26 @@ class NumpyReader(ImageReader): npz_keys: if loading npz file, only load the specified keys, if None, load all the items. stack the loaded items together to construct a new first dimension. channel_dim: if not None, explicitly specify the channel dim, otherwise, treat the array as no channel. + allow_pickle: if True, allows loading pickled contents from NPY/NPZ files. Note that the default value of False + prevents the risk of remote code execution, set this to True only for loading known trusted data. If this + argument is False and pickled data is loaded, a ValueError will be raised. kwargs: additional args for `numpy.load` API except `allow_pickle`. more details about available args: https://numpy.org/doc/stable/reference/generated/numpy.load.html - """ - def __init__(self, npz_keys: KeysCollection | None = None, channel_dim: str | int | None = None, **kwargs): + def __init__( + self, + npz_keys: KeysCollection | None = None, + channel_dim: str | int | None = None, + allow_pickle: bool = False, + **kwargs, + ): super().__init__() if npz_keys is not None: npz_keys = ensure_tuple(npz_keys) self.npz_keys = npz_keys self.channel_dim = float("nan") if channel_dim == "no_channel" else channel_dim + self.allow_pickle = allow_pickle self.kwargs = kwargs def verify_suffix(self, filename: Sequence[PathLike] | PathLike) -> bool: @@ -1267,6 +1276,8 @@ def read(self, data: Sequence[PathLike] | PathLike, **kwargs): More details about available args: https://numpy.org/doc/stable/reference/generated/numpy.load.html + Raises: + ValueError: when `self.allow_pickle` is False but loaded data contains pickled objects. """ img_: list[Nifti1Image] = [] @@ -1274,7 +1285,16 @@ def read(self, data: Sequence[PathLike] | PathLike, **kwargs): kwargs_ = self.kwargs.copy() kwargs_.update(kwargs) for name in filenames: - img = np.load(name, allow_pickle=True, **kwargs_) + try: + img = np.load(name, allow_pickle=self.allow_pickle, **kwargs_) + except ValueError as e: + # if a ValueError is raised, this is likely about pickle loading so raise an exception about this + raise ValueError( + "MONAI default value for argument `allow_pickle` of `np.load` changed to `False`, " + "explicitly pass `allow_pickle=True` as a constructor argument to NumpyReader " + "to enable pickle loading." + ) from e + if Path(name).name.endswith(".npz"): # load expected items from NPZ file npz_keys = list(img.keys()) if self.npz_keys is None else self.npz_keys diff --git a/requirements-dev.txt b/requirements-dev.txt index eb4429cce7..08fcdc2b0e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -34,7 +34,7 @@ pandas requests einops transformers>=4.53.0 -mlflow>=2.12.2 +mlflow>=2.12.2,<3.13 clearml>=1.10.0rc0 matplotlib>=3.6.3 tensorboardX diff --git a/setup.cfg b/setup.cfg index 724d1eceb3..d987141d0b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -71,7 +71,7 @@ all = pandas einops transformers>=4.53.0 - mlflow>=2.12.2 + mlflow>=2.12.2,<3.13 clearml>=1.10.0rc0 matplotlib>=3.6.3 tensorboardX @@ -135,7 +135,7 @@ einops = transformers = transformers>=4.36.0, <4.41.0; python_version <= '3.10' mlflow = - mlflow>=2.12.2 + mlflow>=2.12.2,<3.13 matplotlib = matplotlib>=3.6.3 clearml = diff --git a/tests/data/test_numpy_reader.py b/tests/data/test_numpy_reader.py index c427778c67..ec951004b3 100644 --- a/tests/data/test_numpy_reader.py +++ b/tests/data/test_numpy_reader.py @@ -81,7 +81,13 @@ def test_npy_pickle(self): np.save(filepath, test_data, allow_pickle=True) reader = NumpyReader() + + with self.assertRaises(ValueError): + reader.get_data(reader.read(filepath)) + + reader = NumpyReader(allow_pickle=True) result = reader.get_data(reader.read(filepath))[0].item() + np.testing.assert_allclose(result["test"].shape, test_data["test"].shape) np.testing.assert_allclose(result["test"], test_data["test"]) @@ -92,6 +98,11 @@ def test_kwargs(self): np.save(filepath, test_data, allow_pickle=True) reader = NumpyReader(mmap_mode="r") + + with self.assertRaises(ValueError): + reader.get_data(reader.read(filepath, mmap_mode=None)) + + reader = NumpyReader(mmap_mode="r", allow_pickle=True) result = reader.get_data(reader.read(filepath, mmap_mode=None))[0].item() np.testing.assert_allclose(result["test"].shape, test_data["test"].shape)