|
18 | 18 | from dask.dataframe import DataFrame as DaskDataFrame |
19 | 19 | from geopandas import GeoDataFrame |
20 | 20 | from numpy.random import default_rng |
| 21 | +from packaging.version import Version |
21 | 22 | from shapely.geometry import MultiPolygon, Point, Polygon |
22 | 23 | from shapely.io import to_ragged_array |
23 | 24 | from spatial_image import to_spatial_image |
@@ -311,7 +312,7 @@ def test_shapes_model(self, model: ShapesModel, path: Path) -> None: |
311 | 312 | @pytest.mark.parametrize("model", [PointsModel]) |
312 | 313 | @pytest.mark.parametrize("instance_key", [None, "cell_id"]) |
313 | 314 | @pytest.mark.parametrize("feature_key", [None, "target"]) |
314 | | - @pytest.mark.parametrize("typ", [np.ndarray, pd.DataFrame, dd.DataFrame]) |
| 315 | + @pytest.mark.parametrize("typ", [np.ndarray, pd.DataFrame, dd.DataFrame], ids=["numpy", "pandas", "dask"]) |
315 | 316 | @pytest.mark.parametrize("is_annotation", [True, False]) |
316 | 317 | @pytest.mark.parametrize("is_3d", [True, False]) |
317 | 318 | @pytest.mark.parametrize("coordinates", [None, {"x": "A", "y": "B", "z": "C"}]) |
@@ -937,12 +938,12 @@ def test_categories_on_partitioned_dataframe(sdata_blobs: SpatialData): |
937 | 938 | assert np.array_equal(df["genes"].to_numpy(), ddf_parsed["genes"].compute().to_numpy()) |
938 | 939 | assert set(df["genes"].cat.categories.tolist()) == set(ddf_parsed["genes"].compute().cat.categories.tolist()) |
939 | 940 |
|
940 | | - # two behavior to investigate later/report to dask (they originate in dask) |
941 | | - # TODO: df['genes'].cat.categories has dtype 'object', while ddf_parsed['genes'].compute().cat.categories has dtype |
942 | | - # 'string' |
943 | | - # this problem should disappear after pandas 3.0 is released |
944 | | - assert df["genes"].cat.categories.dtype == "object" |
| 941 | + if Version(pd.__version__) >= Version("3"): |
| 942 | + assert df["genes"].cat.categories.dtype == "string" |
| 943 | + else: |
| 944 | + assert df["genes"].cat.categories.dtype == "object" |
945 | 945 | assert ddf_parsed["genes"].compute().cat.categories.dtype == "string" |
946 | 946 |
|
| 947 | + # behavior to investigate later/report to dask |
947 | 948 | # TODO: the list of categories are not preserving the order |
948 | 949 | assert df["genes"].cat.categories.tolist() != ddf_parsed["genes"].compute().cat.categories.tolist() |
0 commit comments