Skip to content

Commit 2de4c4a

Browse files
authored
Closes #5394 and #5395: pandas extension update: adding ndarray return to isna and isnull to Categoical Extension Array (#5440)
This PR (Closes #5394 and #5395) adds `ndarray` return to `isna` and `isnull` to Categoical Extension Array Co-authored-by: jaketrookman <jaketrookman@users.noreply.github.com>
1 parent df04b2a commit 2de4c4a

2 files changed

Lines changed: 75 additions & 7 deletions

File tree

arkouda/pandas/extension/_arkouda_categorical_array.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from pandas.api.extensions import ExtensionArray
1414
from pandas.core.dtypes.dtypes import ExtensionDtype
1515

16-
from arkouda.numpy.dtypes import bool_
1716
from arkouda.numpy.pdarrayclass import pdarray
1817

1918
from ._arkouda_array import ArkoudaArray
@@ -335,10 +334,41 @@ def astype(
335334
casted = data.astype(dtype)
336335
return ArkoudaExtensionArray._from_sequence(casted)
337336

338-
def isna(self):
339-
from arkouda.numpy.pdarraycreation import zeros
337+
def isna(self) -> np.ndarray:
338+
"""
339+
# Return a boolean mask indicating missing values.
340+
341+
# This implements the pandas ExtensionArray.isna contract and returns a
342+
# NumPy ndarray[bool] of the same length as this categorical array.
343+
344+
# Returns
345+
# -------
346+
# np.ndarray
347+
# Boolean mask where True indicates a missing value.
348+
349+
# Raises
350+
# ------
351+
# TypeError
352+
# If the underlying categorical cannot expose its codes or if missing
353+
# detection is unsupported.
354+
#
355+
"""
356+
from arkouda.categorical import Categorical
357+
358+
data = self._data # should be an arkouda.Categorical
340359

341-
return zeros(self._data.size, dtype=bool_)
360+
if not isinstance(data, Categorical):
361+
raise TypeError("ArkoudaCategorical.isna requires an arkouda.Categorical backend")
362+
363+
# Missing values in ArkoudaCategorical are represented by code == -1
364+
try:
365+
return (data.codes == -1).to_ndarray()
366+
except Exception as e:
367+
raise TypeError(f"Unable to determine missing values: {e}") from e
368+
369+
def isnull(self):
370+
"""Alias for isna()."""
371+
return self.isna()
342372

343373
@property
344374
def dtype(self):
@@ -534,9 +564,6 @@ def describe(self, *args, **kwargs):
534564
def from_codes(cls, *args, **kwargs):
535565
raise NotImplementedError("from_codes is not yet implemented for ArkoudaCategorical.")
536566

537-
def isnull(self, *args, **kwargs):
538-
self._categorical_not_implemented("isnull")
539-
540567
def memory_usage(self, *args, **kwargs):
541568
self._categorical_not_implemented("memory_usage")
542569

tests/pandas/extension/arkouda_categorical_extension.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,47 @@ def test_take_categorical_scaling(self, prob_size):
144144
idx1 = ak.arange(prob_size, dtype=ak.int64) // 2
145145
assert_equivalent(arr.take(idx1)._data.to_strings(), s.take(idx1.to_ndarray()).to_numpy())
146146

147+
def test_categorical_isna_and_isnull(self):
148+
from arkouda.pandas.extension import ArkoudaCategorical
149+
150+
cases = [
151+
# Case 1: no missing values
152+
(
153+
ak.Categorical(ak.array(["a", "b", "c"])),
154+
np.array([False, False, False]),
155+
),
156+
# Case 2: some missing values (must inject after construction)
157+
(
158+
# construct with valid codes, then rewrite codes to include -1
159+
(
160+
lambda: (lambda cat: (setattr(cat, "codes", ak.array([0, -1, 1, -1])) or cat))(
161+
ak.Categorical.from_codes(
162+
codes=ak.array([0, 0, 1, 1]), categories=ak.array(["x", "y"])
163+
)
164+
)
165+
)(),
166+
np.array([False, True, False, True]),
167+
),
168+
# Case 3: empty categorical
169+
(
170+
ak.Categorical(ak.array([])),
171+
np.array([], dtype=bool),
172+
),
173+
]
174+
175+
for cat, expected in cases:
176+
arr = ArkoudaCategorical(cat)
177+
178+
out_isna = arr.isna()
179+
out_isnull = arr.isnull()
180+
181+
assert isinstance(out_isna, np.ndarray)
182+
assert isinstance(out_isnull, np.ndarray)
183+
assert out_isna.dtype == bool
184+
assert out_isnull.dtype == bool
185+
assert np.array_equal(out_isna, expected)
186+
assert np.array_equal(out_isnull, expected)
187+
147188

148189
class TestArkoudaCategoricalAsType:
149190
def test_categorical_array_astype_category_stays_extension(

0 commit comments

Comments
 (0)