1+ from abc import abstractmethod
12from typing import List , Literal , Tuple , Type , TypedDict , Dict as TDict , Any as TAny , Union
23from typing_extensions import override
34import six
45import warnings
5- import pandas as pd
66from traitlets import Unicode , Any , observe , Dict
77
88from buckaroo .pluggable_analysis_framework .col_analysis import ColAnalysis , SDType
9- from ..serialization_utils import pd_to_obj , sd_to_parquet_b64
9+ from ..serialization_utils import sd_to_parquet_b64
1010from buckaroo .pluggable_analysis_framework .utils import (filter_analysis )
11- from buckaroo .pluggable_analysis_framework .df_stats_v2 import DfStatsV2
1211from .autocleaning import SentinelAutocleaning
1312from .dataflow_extras import (exception_protect , Sampling )
1413from .styling_core import (
@@ -92,7 +91,7 @@ def __init__(self, raw_df):
9291
9392
9493
95- def _compute_sampled_df (self , raw_df : pd . DataFrame , sample_method : str ):
94+ def _compute_sampled_df (self , raw_df , sample_method ):
9695 if sample_method == "first" :
9796 return raw_df [:1 ]
9897 return raw_df
@@ -185,7 +184,7 @@ def processed_sd(self) -> SDType:
185184 return self .processed_result [1 ]
186185 return {}
187186
188- def _get_summary_sd (self , df : pd . DataFrame ) -> Tuple [SDType , TAny ]:
187+ def _get_summary_sd (self , df ) -> Tuple [SDType , TAny ]:
189188 analysis_klasses = self .analysis_klasses
190189 if analysis_klasses == "foo" :
191190 return {'some-col' : {'foo' :8 }}, {}
@@ -236,12 +235,15 @@ def _widget_config(self, change):
236235
237236class CustomizableDataflow (DataFlow ):
238237 """
239- This allows targetd extension and customization of DataFlow
238+ This allows targetd extension and customization of DataFlow.
239+
240+ This is an abstract base class — use PandasCustomizableDataflow or
241+ PolarsCustomizableDataflow for concrete implementations.
240242 """
241243 #analysis_klasses = [StylingAnalysis]
242244 analysis_klasses : List [Type [ColAnalysis ]] = [StylingAnalysis ]
243245 command_config = Dict ({}).tag (sync = True )
244- DFStatsClass = DfStatsV2
246+ DFStatsClass = None
245247 sampling_klass = Sampling
246248
247249 df_display_klasses : TDict [str , Type [StylingAnalysis ]] = {}
@@ -323,7 +325,8 @@ def setup_options_from_analysis(self):
323325 empty_df_display_args [kls .df_display_name ] = EMPTY_DF_DISPLAY_ARG
324326
325327
326- self .DFStatsClass .verify_analysis_objects (self .analysis_klasses )
328+ if self .DFStatsClass is not None :
329+ self .DFStatsClass .verify_analysis_objects (self .analysis_klasses )
327330
328331 self .post_processing_klasses = filter_analysis (self .analysis_klasses , "post_processing_method" )
329332
@@ -379,38 +382,20 @@ def run_code_generator(self, operations):
379382 self .ac_obj .run_code_generator (operations )
380383 ### end code interpeter block
381384
382- @override
383- def _compute_processed_result (self , cleaned_df :pd .DataFrame , post_processing_method :str ) -> Tuple [pd .DataFrame , SDType ]:
384- if post_processing_method == '' :
385- return (cleaned_df , {})
386- else :
387- post_analysis = self .post_processing_klasses [post_processing_method ]
388- try :
389- ret_df , sd = post_analysis .post_process_df (cleaned_df )
390- return (ret_df , sd )
391- except Exception as e :
392- return (self ._build_error_dataframe (e ), {})
385+ @abstractmethod
386+ def _compute_processed_result (self , cleaned_df , post_processing_method ):
387+ ...
393388
389+ @abstractmethod
394390 def _build_error_dataframe (self , e ):
395- return pd . DataFrame ({ 'err' : [ str ( e )]})
391+ ...
396392
397393
398394 ### start summary stats block
399395 #TAny closer to some error type
400- @override
401- def _get_summary_sd (self , processed_df :pd .DataFrame ) -> Tuple [SDType , TDict [str , TAny ]]:
402- stats = self .DFStatsClass (
403- processed_df ,
404- self .analysis_klasses ,
405- self .df_name , debug = self .debug )
406- sdf = stats .sdf
407- if stats .errs :
408- if self .debug :
409- raise Exception ("Error executing analysis" )
410- else :
411- return {}, stats .errs
412- else :
413- return sdf , {}
396+ @abstractmethod
397+ def _get_summary_sd (self , processed_df ) -> Tuple [SDType , TDict [str , TAny ]]:
398+ ...
414399
415400
416401 # ### end summary stats block
@@ -422,13 +407,16 @@ def _sd_to_jsondf(self, sd:SDType):
422407 """
423408 return sd_to_parquet_b64 (sd )
424409
425- def _df_to_obj (self , df :pd .DataFrame ) -> TDict [str , TAny ]:
426- return pd_to_obj (self .sampling_klass .serialize_sample (df ))
410+ @abstractmethod
411+ def _df_to_obj (self , df ) -> TDict [str , TAny ]:
412+ ...
427413
428414 def add_analysis (self , analysis_klass :Type [ColAnalysis ]) -> None :
429415 """
430416 same as get_summary_sd, call whatever to set summary_sd and trigger further comps
431417 """
418+ if self .DFStatsClass is None :
419+ return
432420
433421 stats = self .DFStatsClass (
434422 self .processed_df ,
0 commit comments