|
23 | 23 |
|
24 | 24 | from deepnote_toolkit.ocelots.filters import Filter |
25 | 25 | from deepnote_toolkit.ocelots.pandas.implementation import PandasImplementation |
| 26 | +from deepnote_toolkit.ocelots.polars.implementation_eager import ( |
| 27 | + PolarsEagerImplementation, |
| 28 | +) |
26 | 29 | from deepnote_toolkit.ocelots.pyspark.implementation import PysparkImplementation |
27 | 30 | from deepnote_toolkit.ocelots.types import ( |
28 | 31 | Column, |
|
32 | 35 | NativeOutputType, |
33 | 36 | PandasDF, |
34 | 37 | PandasOnSparkDF, |
| 38 | + PolarsEagerDF, |
35 | 39 | PysparkDF, |
36 | 40 | UnsupportedDataFrameException, |
37 | 41 | ) |
38 | 42 | from deepnote_toolkit.ocelots.utils import ( |
39 | 43 | is_pandas_dataframe, |
40 | 44 | is_pandas_on_spark_dataframe, |
| 45 | + is_polars_eager_dataframe, |
41 | 46 | is_pyspark_dataframe, |
42 | 47 | ) |
43 | 48 |
|
44 | | -Implementation = Union[PandasImplementation, PysparkImplementation] |
| 49 | +Implementation = Union[ |
| 50 | + PandasImplementation, PysparkImplementation, PolarsEagerImplementation |
| 51 | +] |
45 | 52 |
|
46 | 53 | T = TypeVar("T", bound=NativeOutputDF) |
47 | 54 | FromNativeT = TypeVar("FromNativeT", bound=NativeOutputDF) |
@@ -86,6 +93,7 @@ def is_supported(cls, df: Any) -> bool: |
86 | 93 | is_pandas_dataframe(df) |
87 | 94 | or is_pyspark_dataframe(df) |
88 | 95 | or is_pandas_on_spark_dataframe(df) |
| 96 | + or is_polars_eager_dataframe(df) |
89 | 97 | ) |
90 | 98 |
|
91 | 99 | # Special case for Pandas-on-Spark DFs, as they aren't wrapped directly, but converted |
@@ -117,6 +125,8 @@ def from_native(cls, df: NativeInputDF): |
117 | 125 | return cls(PandasImplementation(df)) |
118 | 126 | if is_pyspark_dataframe(df): |
119 | 127 | return cls(PysparkImplementation(df)) |
| 128 | + if is_polars_eager_dataframe(df): |
| 129 | + return cls(PolarsEagerImplementation(df)) |
120 | 130 | if is_pandas_on_spark_dataframe(df): |
121 | 131 | # NOTE: we accept Pandas-on-Spark dataframes, but we convert them into Spark and |
122 | 132 | # work like with it same as with normal Spark DF from that. |
@@ -144,18 +154,27 @@ def from_native(cls, df: NativeInputDF): |
144 | 154 | return cls(PysparkImplementation(df.to_spark())) |
145 | 155 |
|
146 | 156 | raise UnsupportedDataFrameException( |
147 | | - f"expected Pandas or PySpark dataframe, got {type(df)}" |
| 157 | + f"expected Pandas, PySpark, or Polars dataframe, got {type(df)}" |
148 | 158 | ) |
149 | 159 |
|
150 | 160 | @property |
151 | 161 | def native_type(self) -> NativeOutputType: |
152 | 162 | """Get the native type of the dataframe. |
153 | 163 |
|
154 | 164 | Returns: |
155 | | - NativeType: Either 'pandas' or 'pyspark' |
| 165 | + NativeOutputType: Either 'pandas', 'pyspark', or 'polars-eager' |
156 | 166 | """ |
157 | 167 | return self._implementation.name |
158 | 168 |
|
| 169 | + @property |
| 170 | + def lazy(self) -> bool: |
| 171 | + """Whether the underlying dataframe uses lazy evaluation. |
| 172 | +
|
| 173 | + Lazy dataframes (e.g. PySpark) defer computation until results are collected, |
| 174 | + while eager ones (e.g. pandas, Polars) evaluate immediately. |
| 175 | + """ |
| 176 | + return self._implementation.lazy |
| 177 | + |
159 | 178 | @property |
160 | 179 | def columns(self) -> Tuple[Column, ...]: |
161 | 180 | """Get the list of columns in the dataframe. |
@@ -425,3 +444,9 @@ def is_wrapped_pandas_dataframe(df: DataFrame) -> TypeGuard[DataFrame[PandasDF]] |
425 | 444 |
|
426 | 445 | def is_wrapped_pyspark_dataframe(df: DataFrame) -> TypeGuard[DataFrame[PysparkDF]]: |
427 | 446 | return df.native_type == "pyspark" |
| 447 | + |
| 448 | + |
| 449 | +def is_wrapped_polars_eager_dataframe( |
| 450 | + df: DataFrame, |
| 451 | +) -> TypeGuard[DataFrame[PolarsEagerDF]]: |
| 452 | + return df.native_type == "polars-eager" |
0 commit comments