4141import geopandas
4242from shapely .geometry .multipolygon import MultiPolygon
4343from shapely .geometry import Polygon
44- from sklearn .cluster import KMeans
4544import sliderule
4645
4746###############################################################################
5150# create logger
5251logger = logging .getLogger (__name__ )
5352
53+ # import cluster support
54+ clustering_enabled = False
55+ try :
56+ from sklearn .cluster import KMeans
57+ clustering_enabled = True
58+ except :
59+ logger .warning ("Unable to import sklearn... clustering support disabled" )
60+
5461# profiling times for each major function
5562profiles = {}
5663
@@ -503,12 +510,8 @@ def __gdf2poly(gdf):
503510#
504511def __procoutputfile (parm , lon_key , lat_key ):
505512 if "open_on_complete" in parm ["output" ] and parm ["output" ]["open_on_complete" ]:
506- # Read Parquet File as DataFrame
507- df = geopandas .pd .read_parquet (parm ["output" ]["path" ])
508- # Build GeoDataFrame
509- gdf = __todataframe (df , lon_key = lon_key , lat_key = lat_key )
510- # Return Results
511- return gdf
513+ # Return GeoParquet File as GeoDataFrame
514+ return geopandas .read_parquet (parm ["output" ]["path" ])
512515 else :
513516 # Return Parquet Filename
514517 return parm ["output" ]["path" ]
@@ -804,7 +807,7 @@ def atl06p(parm, asset=DEFAULT_ASSET, version=DEFAULT_ICESAT2_SDP_VERSION, callb
804807 columns = {}
805808 elevation_records = []
806809 num_elevations = 0
807- field_dictionary = {} # [' field_name' ] = {"extent_id": [], field_name: []}
810+ field_dictionary = {} # [< field_name> ] = {"extent_id": [], < field_name> : []}
808811 if len (rsps ) > 0 :
809812 # Sort Records
810813 for rsp in rsps :
@@ -814,15 +817,23 @@ def atl06p(parm, asset=DEFAULT_ASSET, version=DEFAULT_ICESAT2_SDP_VERSION, callb
814817 elif 'extrec' == rsp ['__rectype' ]:
815818 field_name = parm ['atl03_geo_fields' ][rsp ['field_index' ]]
816819 if field_name not in field_dictionary :
817- field_dictionary [field_name ] = {" extent_id" : [], field_name : []}
820+ field_dictionary [field_name ] = {' extent_id' : [], field_name : []}
818821 # Parse Ancillary Data
819- data = __get_values (rsp ['data' ], rsp ['data_type ' ], len (rsp ['data' ]))
822+ data = __get_values (rsp ['data' ], rsp ['datatype ' ], len (rsp ['data' ]))
820823 # Add Left Pair Track Entry
821824 field_dictionary [field_name ]['extent_id' ] += rsp ['extent_id' ] | 0x2 ,
822825 field_dictionary [field_name ][field_name ] += data [LEFT_PAIR ],
823826 # Add Right Pair Track Entry
824827 field_dictionary [field_name ]['extent_id' ] += rsp ['extent_id' ] | 0x3 ,
825828 field_dictionary [field_name ][field_name ] += data [RIGHT_PAIR ],
829+ elif 'rsrec' == rsp ['__rectype' ]:
830+ for sample in rsp ["samples" ]:
831+ time_str = sliderule .gps2utc (sample ["time" ])
832+ field_name = parm ['samples' ][rsp ['raster_index' ]] + "-" + time_str .split (" " )[0 ].strip ()
833+ if field_name not in field_dictionary :
834+ field_dictionary [field_name ] = {'extent_id' : [], field_name : []}
835+ field_dictionary [field_name ]['extent_id' ] += rsp ['extent_id' ],
836+ field_dictionary [field_name ][field_name ] += sample ['value' ],
826837 # Build Elevation Columns
827838 if num_elevations > 0 :
828839 # Initialize Columns
@@ -967,23 +978,23 @@ def atl03sp(parm, asset=DEFAULT_ASSET, version=DEFAULT_ICESAT2_SDP_VERSION, call
967978 # Get Field Type
968979 field_name = parm ['atl03_geo_fields' ][rsp ['field_index' ]]
969980 if field_name not in extent_field_types :
970- extent_field_types [field_name ] = sliderule .basictypes [sliderule .codedtype2str [rsp ['data_type ' ]]]["nptype" ]
981+ extent_field_types [field_name ] = sliderule .basictypes [sliderule .codedtype2str [rsp ['datatype ' ]]]["nptype" ]
971982 # Initialize Extent Dictionary Entry
972983 if extent_id not in extent_dictionary :
973984 extent_dictionary [extent_id ] = {}
974985 # Save of Values per Extent ID per Field Name
975- data = __get_values (rsp ['data' ], rsp ['data_type ' ], len (rsp ['data' ]))
986+ data = __get_values (rsp ['data' ], rsp ['datatype ' ], len (rsp ['data' ]))
976987 extent_dictionary [extent_id ][field_name ] = data
977988 elif 'phrec' == rsp ['__rectype' ]:
978989 # Get Field Type
979990 field_name = parm ['atl03_ph_fields' ][rsp ['field_index' ]]
980991 if field_name not in photon_field_types :
981- photon_field_types [field_name ] = sliderule .basictypes [sliderule .codedtype2str [rsp ['data_type ' ]]]["nptype" ]
992+ photon_field_types [field_name ] = sliderule .basictypes [sliderule .codedtype2str [rsp ['datatype ' ]]]["nptype" ]
982993 # Initialize Extent Dictionary Entry
983994 if extent_id not in photon_dictionary :
984995 photon_dictionary [extent_id ] = {}
985996 # Save of Values per Extent ID per Field Name
986- data = __get_values (rsp ['data' ], rsp ['data_type ' ], len (rsp ['data' ]))
997+ data = __get_values (rsp ['data' ], rsp ['datatype ' ], len (rsp ['data' ]))
987998 photon_dictionary [extent_id ][field_name ] = data
988999 # Build Elevation Columns
9891000 if num_photons > 0 :
@@ -1331,22 +1342,25 @@ def toregion(source, tolerance=0.0, cellsize=0.01, n_clusters=1):
13311342 # generate clusters
13321343 clusters = []
13331344 if n_clusters > 1 :
1334- # pull out centroids of each geometry object
1335- if "CenLon" in gdf and "CenLat" in gdf :
1336- X = numpy .column_stack ((gdf ["CenLon" ], gdf ["CenLat" ]))
1345+ if clustering_enabled :
1346+ # pull out centroids of each geometry object
1347+ if "CenLon" in gdf and "CenLat" in gdf :
1348+ X = numpy .column_stack ((gdf ["CenLon" ], gdf ["CenLat" ]))
1349+ else :
1350+ s = gdf .centroid
1351+ X = numpy .column_stack ((s .x , s .y ))
1352+ # run k means clustering algorithm against polygons in gdf
1353+ kmeans = KMeans (n_clusters = n_clusters , init = 'k-means++' , random_state = 5 , max_iter = 400 )
1354+ y_kmeans = kmeans .fit_predict (X )
1355+ k = geopandas .pd .DataFrame (y_kmeans , columns = ['cluster' ])
1356+ gdf = gdf .join (k )
1357+ # build polygon for each cluster
1358+ for n in range (n_clusters ):
1359+ c_gdf = gdf [gdf ["cluster" ] == n ]
1360+ c_poly = __gdf2poly (c_gdf )
1361+ clusters .append (c_poly )
13371362 else :
1338- s = gdf .centroid
1339- X = numpy .column_stack ((s .x , s .y ))
1340- # run k means clustering algorithm against polygons in gdf
1341- kmeans = KMeans (n_clusters = n_clusters , init = 'k-means++' , random_state = 5 , max_iter = 400 )
1342- y_kmeans = kmeans .fit_predict (X )
1343- k = geopandas .pd .DataFrame (y_kmeans , columns = ['cluster' ])
1344- gdf = gdf .join (k )
1345- # build polygon for each cluster
1346- for n in range (n_clusters ):
1347- c_gdf = gdf [gdf ["cluster" ] == n ]
1348- c_poly = __gdf2poly (c_gdf )
1349- clusters .append (c_poly )
1363+ raise sliderule .FatalError ("Clustering support not enabled; unable to import sklearn package" )
13501364
13511365 # update timing profiles
13521366 profiles [toregion .__name__ ] = time .perf_counter () - tstart
0 commit comments