Merge pull request #3778 from chrishalcrow/new-string-check-on-aggregate-sorting

chrishalcrow · web-flow · commit 6ef50f0842ed · 2025-03-24T09:15:42.000Z
Update sorting property dtype check on unit aggregation to allow for different string types
diff --git a/src/spikeinterface/core/tests/test_unitsaggregationsorting.py b/src/spikeinterface/core/tests/test_unitsaggregationsorting.py
@@ -1,63 +1,68 @@
 import pytest
 import numpy as np
 
-from spikeinterface.core import aggregate_units
+from spikeinterface.core import aggregate_units, generate_sorting
 
-from spikeinterface.core import NpzSortingExtractor
-from spikeinterface.core import create_sorting_npz
-from spikeinterface.core import generate_sorting
 
+def create_three_sortings(num_units):
+    sorting1 = generate_sorting(seed=1205, num_units=num_units)
+    sorting2 = generate_sorting(seed=1206, num_units=num_units)
+    sorting3 = generate_sorting(seed=1207, num_units=num_units)
 
-def test_unitsaggregationsorting(create_cache_folder):
-    cache_folder = create_cache_folder
+    return (sorting1, sorting2, sorting3)
 
-    num_seg = 2
-    file_path = cache_folder / "test_BaseSorting.npz"
 
-    create_sorting_npz(num_seg, file_path)
+def test_unitsaggregationsorting_spiketrains():
+    """Aggregates three sortings, then checks that the number of units and spike trains are equal
+    for pre-aggregated sorting and the aggregated sorting."""
 
-    sorting1 = NpzSortingExtractor(file_path)
-    sorting2 = sorting1.clone()
-    sorting3 = sorting1.clone()
-    print(sorting1)
-    num_units = len(sorting1.get_unit_ids())
+    num_units = 5
+    sorting1, sorting2, sorting3 = create_three_sortings(num_units=num_units)
 
     # test num units
     sorting_agg = aggregate_units([sorting1, sorting2, sorting3])
-    print(sorting_agg)
-    assert len(sorting_agg.get_unit_ids()) == 3 * num_units
+    unit_ids = sorting_agg.get_unit_ids()
+    assert len(unit_ids) == 3 * num_units
 
     # test spike trains
-    unit_ids = sorting1.get_unit_ids()
-
-    for seg in range(num_seg):
-        spiketrain1_1 = sorting1.get_unit_spike_train(unit_ids[1], segment_index=seg)
-        spiketrains2_0 = sorting2.get_unit_spike_train(unit_ids[0], segment_index=seg)
-        spiketrains3_2 = sorting3.get_unit_spike_train(unit_ids[2], segment_index=seg)
-        assert np.allclose(spiketrain1_1, sorting_agg.get_unit_spike_train(unit_ids[1], segment_index=seg))
-        assert np.allclose(spiketrains2_0, sorting_agg.get_unit_spike_train(num_units + unit_ids[0], segment_index=seg))
-        assert np.allclose(
-            spiketrains3_2, sorting_agg.get_unit_spike_train(2 * num_units + unit_ids[2], segment_index=seg)
+    for segment_index in range(sorting1.get_num_segments()):
+
+        spiketrain1 = sorting1.get_unit_spike_train(unit_ids[1], segment_index=segment_index)
+        assert np.all(spiketrain1 == sorting_agg.get_unit_spike_train(unit_ids[1], segment_index=segment_index))
+
+        spiketrain2 = sorting2.get_unit_spike_train(unit_ids[0], segment_index=segment_index)
+        assert np.all(
+            spiketrain2 == sorting_agg.get_unit_spike_train(unit_ids[0 + num_units], segment_index=segment_index)
+        )
+
+        spiketrain3 = sorting3.get_unit_spike_train(unit_ids[2], segment_index=segment_index)
+        assert np.all(
+            spiketrain3 == sorting_agg.get_unit_spike_train(unit_ids[2 + num_units * 2], segment_index=segment_index)
         )
 
     # test rename units
     renamed_unit_ids = [f"#Unit {i}" for i in range(3 * num_units)]
     sorting_agg_renamed = aggregate_units([sorting1, sorting2, sorting3], renamed_unit_ids=renamed_unit_ids)
     assert all(unit in renamed_unit_ids for unit in sorting_agg_renamed.get_unit_ids())
 
-    # test annotations
 
-    # matching annotation
+def test_unitsaggregationsorting_annotations():
+    """Aggregates a sorting and check if annotations were correctly propagated."""
+
+    num_units = 5
+    sorting1, sorting2, sorting3 = create_three_sortings(num_units=num_units)
+
+    # Annotations the same, so can be propagated to aggregated sorting
     sorting1.annotate(organ="brain")
     sorting2.annotate(organ="brain")
     sorting3.annotate(organ="brain")
 
-    # not matching annotation
+    # Annotations are not equal, so cannot be propagated to aggregated sorting
     sorting1.annotate(area="CA1")
     sorting2.annotate(area="CA2")
     sorting3.annotate(area="CA3")
 
-    # incomplete annotation
+    # Annotations are not known for all sortings, so cannot be propagated to aggregated sorting
     sorting1.annotate(date="2022-10-13")
     sorting2.annotate(date="2022-10-13")
 
@@ -66,31 +71,45 @@ def test_unitsaggregationsorting(create_cache_folder):
     assert "area" not in sorting_agg_prop.get_annotation_keys()
     assert "date" not in sorting_agg_prop.get_annotation_keys()
 
-    # test properties
 
-    # complete property
+def test_unitsaggregationsorting_properties():
+    """Aggregates a sorting and check if properties were correctly propagated."""
+
+    num_units = 5
+    sorting1, sorting2, sorting3 = create_three_sortings(num_units=num_units)
+
+    # Can propagated property
     sorting1.set_property("brain_area", ["CA1"] * num_units)
     sorting2.set_property("brain_area", ["CA2"] * num_units)
     sorting3.set_property("brain_area", ["CA3"] * num_units)
 
-    # skip for inconsistency
-    sorting1.set_property("template", np.zeros((num_units, 4, 30)))
-    sorting1.set_property("template", np.zeros((num_units, 20, 50)))
-    sorting1.set_property("template", np.zeros((num_units, 2, 10)))
-
-    # incomplete property (str can't be propagated)
-    sorting1.set_property("quality", ["good"] * num_units)
-    sorting2.set_property("quality", ["bad"] * num_units)
+    # Can propagated, even though the dtype is different, since dtype.kind is the same
+    sorting1.set_property("quality_string", ["good"] * num_units)
+    sorting2.set_property("quality_string", ["bad"] * num_units)
+    sorting3.set_property("quality_string", ["bad"] * num_units)
 
-    # incomplete property (object can be propagated)
+    # Can propagated. Although we don't know the "rand" property for sorting3, we can
+    # use the Extractor's `default_missing_property_values`
     sorting1.set_property("rand", np.random.rand(num_units))
     sorting2.set_property("rand", np.random.rand(num_units))
 
+    # Cannot propagate as arrays are different shapes for each sorting
+    sorting1.set_property("template", np.zeros((num_units, 4, 30)))
+    sorting2.set_property("template", np.zeros((num_units, 20, 50)))
+    sorting3.set_property("template", np.zeros((num_units, 2, 10)))
+
+    # Cannot propagate as dtypes are different
+    sorting1.set_property("quality_mixed", ["good"] * num_units)
+    sorting2.set_property("quality_mixed", [1] * num_units)
+    sorting3.set_property("quality_mixed", [2] * num_units)
+
     sorting_agg_prop = aggregate_units([sorting1, sorting2, sorting3])
+
     assert "brain_area" in sorting_agg_prop.get_property_keys()
-    assert "quality" not in sorting_agg_prop.get_property_keys()
+    assert "quality_string" in sorting_agg_prop.get_property_keys()
     assert "rand" in sorting_agg_prop.get_property_keys()
-    print(sorting_agg_prop.get_property("brain_area"))
+    assert "template" not in sorting_agg_prop.get_property_keys()
+    assert "quality_mixed" not in sorting_agg_prop.get_property_keys()
 
 
 def test_unit_aggregation_preserve_ids():
diff --git a/src/spikeinterface/core/unitsaggregationsorting.py b/src/spikeinterface/core/unitsaggregationsorting.py
@@ -77,43 +77,44 @@ def __init__(self, sorting_list, renamed_unit_ids=None):
             if np.all(annotations == annotations[0]):
                 self.set_annotation(annotation_name, sorting_list[0].get_annotation(annotation_name))
 
-        property_keys = {}
-        property_dict = {}
-        deleted_keys = []
-        for sort in sorting_list:
-            for prop_name in sort.get_property_keys():
-                if prop_name in deleted_keys:
-                    continue
-                if prop_name in property_keys:
-                    if property_keys[prop_name] != sort.get_property(prop_name).dtype:
-                        print(f"Skipping property '{prop_name}: difference in dtype between sortings'")
-                        del property_keys[prop_name]
-                        deleted_keys.append(prop_name)
-                else:
-                    property_keys[prop_name] = sort.get_property(prop_name).dtype
-        for prop_name in property_keys:
-            dtype = property_keys[prop_name]
-            property_dict[prop_name] = np.array([], dtype=dtype)
+        # Check if all the sortings have the same properties
+        properties_set = set(np.concatenate([sorting.get_property_keys() for sorting in sorting_list]))
+        for prop_name in properties_set:
 
+            dtypes_per_sorting = []
             for sort in sorting_list:
                 if prop_name in sort.get_property_keys():
-                    values = sort.get_property(prop_name)
-                else:
-                    if dtype.kind not in BaseExtractor.default_missing_property_values:
-                        del property_dict[prop_name]
+                    dtypes_per_sorting.append(sort.get_property(prop_name).dtype.kind)
+
+            if len(set(dtypes_per_sorting)) != 1:
+                warnings.warn(
+                    f"Skipping property '{prop_name}'. Difference in dtype.kind between sortings: {dtypes_per_sorting}"
+                )
+                continue
+
+            all_property_values = []
+            for sort in sorting_list:
+
+                # If one of the sortings doesn't have the property, use the default missing property value
+                if prop_name not in sort.get_property_keys():
+                    try:
+                        values = np.full(
+                            sort.get_num_units(),
+                            BaseExtractor.default_missing_property_values[dtypes_per_sorting[0]],
+                        )
+                    except:
+                        warnings.warn(f"Skipping property '{prop_name}: cannot inpute missing property values.'")
                         break
-                    values = np.full(
-                        sort.get_num_units(), BaseExtractor.default_missing_property_values[dtype.kind], dtype=dtype
-                    )
-
-                try:
-                    property_dict[prop_name] = np.concatenate((property_dict[prop_name], values))
-                except Exception as e:
-                    print(f"Skipping property '{prop_name}' due to shape inconsistency")
-                    del property_dict[prop_name]
-                    break
-        for prop_name, prop_values in property_dict.items():
-            self.set_property(key=prop_name, values=prop_values)
+                else:
+                    values = sort.get_property(prop_name)
+
+                all_property_values.append(values)
+
+            try:
+                prop_values = np.concatenate(all_property_values)
+                self.set_property(key=prop_name, values=prop_values)
+            except Exception as ext:
+                warnings.warn(f"Skipping property '{prop_name}' as numpy cannot concatente. Numpy error: {ext}")
 
         # add segments
         for i_seg in range(num_segments):
diff --git a/src/spikeinterface/postprocessing/template_metrics.py b/src/spikeinterface/postprocessing/template_metrics.py
@@ -153,10 +153,10 @@ def _set_params(
         if delete_existing_metrics is False and tm_extension is not None:
 
             existing_metric_names = tm_extension.params["metric_names"]
-            existing_metric_names_propogated = [
+            existing_metric_names_propagated = [
                 metric_name for metric_name in existing_metric_names if metric_name not in metrics_to_compute
             ]
-            metric_names = metrics_to_compute + existing_metric_names_propogated
+            metric_names = metrics_to_compute + existing_metric_names_propagated
 
         params = dict(
             metric_names=metric_names,
@@ -328,7 +328,7 @@ def _run(self, verbose=False):
 
         existing_metrics = []
 
-        # Check if we need to propogate any old metrics. If so, we'll do that.
+        # Check if we need to propagate any old metrics. If so, we'll do that.
         # Otherwise, we'll avoid attempting to load an empty template_metrics.
         if set(self.params["metrics_to_compute"]) != set(self.params["metric_names"]):
 
diff --git a/src/spikeinterface/postprocessing/tests/test_template_metrics.py b/src/spikeinterface/postprocessing/tests/test_template_metrics.py
@@ -98,7 +98,7 @@ def test_compute_new_template_metrics(small_sorting_analyzer):
 
 def test_metric_names_in_same_order(small_sorting_analyzer):
     """
-    Computes sepecified template metrics and checks order is propogated.
+    Computes sepecified template metrics and checks order is propagated.
     """
     specified_metric_names = ["peak_trough_ratio", "num_negative_peaks", "half_width"]
     small_sorting_analyzer.compute("template_metrics", metric_names=specified_metric_names)
diff --git a/src/spikeinterface/qualitymetrics/quality_metric_calculator.py b/src/spikeinterface/qualitymetrics/quality_metric_calculator.py
@@ -108,10 +108,10 @@ def _set_params(
         if delete_existing_metrics is False and qm_extension is not None:
 
             existing_metric_names = qm_extension.params["metric_names"]
-            existing_metric_names_propogated = [
+            existing_metric_names_propagated = [
                 metric_name for metric_name in existing_metric_names if metric_name not in metrics_to_compute
             ]
-            metric_names = metrics_to_compute + existing_metric_names_propogated
+            metric_names = metrics_to_compute + existing_metric_names_propagated
 
         params = dict(
             metric_names=metric_names,
diff --git a/src/spikeinterface/qualitymetrics/tests/test_metrics_functions.py b/src/spikeinterface/qualitymetrics/tests/test_metrics_functions.py
@@ -120,7 +120,7 @@ def test_compute_new_quality_metrics(small_sorting_analyzer):
 
 def test_metric_names_in_same_order(small_sorting_analyzer):
     """
-    Computes sepecified quality metrics and checks order is propogated.
+    Computes sepecified quality metrics and checks order is propagated.
     """
     specified_metric_names = ["firing_range", "snr", "amplitude_cutoff"]
     small_sorting_analyzer.compute("quality_metrics", metric_names=specified_metric_names)