|
9 | 9 | import pandas as pd |
10 | 10 | import xarray as xr |
11 | 11 |
|
12 | | -from imap_processing.hi.utils import parse_sensor_number |
| 12 | +from imap_processing.hi.utils import CoincidenceBitmap, parse_sensor_number |
13 | 13 |
|
14 | 14 | logger = logging.getLogger(__name__) |
15 | 15 |
|
@@ -808,3 +808,273 @@ def mark_overflow_packets( |
808 | 808 | f"Found {len(full_packet_indices)} full packet(s), " |
809 | 809 | f"dropped {len(mets_to_cull)} 8-spin period(s) due to overflow packets" |
810 | 810 | ) |
| 811 | + |
| 812 | + |
| 813 | +def _get_sweep_indices(esa_step: np.ndarray) -> np.ndarray: |
| 814 | + """ |
| 815 | + Assign sweep indices to each MET based on ESA step transitions. |
| 816 | +
|
| 817 | + A new sweep starts when ESA step transitions from high to low |
| 818 | + (e.g., 9 -> 1), detected using np.diff(). |
| 819 | +
|
| 820 | + Parameters |
| 821 | + ---------- |
| 822 | + esa_step : numpy.ndarray |
| 823 | + ESA step values for each MET (epoch dimension). |
| 824 | +
|
| 825 | + Returns |
| 826 | + ------- |
| 827 | + sweep_indices : numpy.ndarray |
| 828 | + Sweep index for each MET. First sweep is index 0. |
| 829 | + """ |
| 830 | + if len(esa_step) == 0: |
| 831 | + return np.array([], dtype=np.int32) |
| 832 | + |
| 833 | + # Find sweep boundaries where ESA step transitions from high to low |
| 834 | + esa_diff = np.diff(esa_step.astype(np.int32)) |
| 835 | + # Negative diff indicates high-to-low transition (e.g., 9 -> 1 = -8) |
| 836 | + sweep_boundaries = esa_diff < 0 |
| 837 | + |
| 838 | + # Create sweep indices using cumsum on boundaries |
| 839 | + # Prepend False so first MET is in sweep 0 |
| 840 | + sweep_indices = ( |
| 841 | + np.concatenate([[False], sweep_boundaries]).cumsum().astype(np.int32) |
| 842 | + ) |
| 843 | + |
| 844 | + return sweep_indices |
| 845 | + |
| 846 | + |
| 847 | +def _add_sweep_indices(l1b_de: xr.Dataset) -> xr.Dataset: |
| 848 | + """ |
| 849 | + Add esa_sweep coordinate to the dataset based on ESA step transitions. |
| 850 | +
|
| 851 | + Parameters |
| 852 | + ---------- |
| 853 | + l1b_de : xarray.Dataset |
| 854 | + L1B Direct Event dataset. |
| 855 | +
|
| 856 | + Returns |
| 857 | + ------- |
| 858 | + xarray.Dataset |
| 859 | + Dataset with esa_sweep coordinate added on epoch dimension. |
| 860 | + """ |
| 861 | + sweep_indices = _get_sweep_indices(l1b_de["esa_step"].values) |
| 862 | + return l1b_de.assign_coords(esa_sweep=("epoch", sweep_indices)) |
| 863 | + |
| 864 | + |
| 865 | +def _compute_normalized_counts_per_sweep( |
| 866 | + l1b_de: xr.Dataset, |
| 867 | + tof_ab_limit_ns: int, |
| 868 | +) -> xr.Dataset: |
| 869 | + """ |
| 870 | + Compute normalized AB coincidence counts per ESA sweep and reshape dataset. |
| 871 | +
|
| 872 | + This function: |
| 873 | + 1. Computes normalized AB coincidence counts per sweep |
| 874 | + 2. Removes all data associated with the event_met coordinate |
| 875 | + 3. Reshapes the dataset so esa_sweep becomes a dimension (removing epoch) |
| 876 | + 4. Returns the updated dataset with all epoch-based variables |
| 877 | +
|
| 878 | + Parameters |
| 879 | + ---------- |
| 880 | + l1b_de : xarray.Dataset |
| 881 | + L1B Direct Event dataset with esa_sweep coordinate on epoch dimension. |
| 882 | + tof_ab_limit_ns : int |
| 883 | + Maximum absolute value of tof_ab in nanoseconds. |
| 884 | +
|
| 885 | + Returns |
| 886 | + ------- |
| 887 | + xarray.Dataset |
| 888 | + Reshaped dataset with esa_sweep as a dimension containing: |
| 889 | + - normalized_count: normalized AB coincidence counts per sweep |
| 890 | + - All other variables from the input dataset (first value per sweep) |
| 891 | + """ |
| 892 | + if "esa_sweep" not in l1b_de.coords: |
| 893 | + raise ValueError("Dataset must have esa_sweep coordinate") |
| 894 | + |
| 895 | + # Filter to valid AB coincidences |
| 896 | + tof_ab = l1b_de["tof_ab"] |
| 897 | + coincidence_type = l1b_de["coincidence_type"] |
| 898 | + ccsds_index = l1b_de["ccsds_index"] |
| 899 | + |
| 900 | + ab_coincidence_type = CoincidenceBitmap.detector_hit_str_to_int("AB") |
| 901 | + is_valid_ab = (coincidence_type == ab_coincidence_type) & ( |
| 902 | + np.abs(tof_ab) <= tof_ab_limit_ns |
| 903 | + ) |
| 904 | + |
| 905 | + # Map events to sweeps via ccsds_index -> esa_sweep |
| 906 | + event_epoch_idx = ccsds_index.values |
| 907 | + event_sweep_idx = l1b_de["esa_sweep"].values[event_epoch_idx] |
| 908 | + |
| 909 | + # Count valid AB events per sweep |
| 910 | + n_sweeps = int(l1b_de["esa_sweep"].max().values) + 1 |
| 911 | + counts_per_sweep = np.zeros(n_sweeps, dtype=np.int64) |
| 912 | + np.add.at(counts_per_sweep, event_sweep_idx[is_valid_ab.values], 1) |
| 913 | + |
| 914 | + # Normalize by number of unique ESA steps |
| 915 | + n_unique_esa_steps = len(np.unique(l1b_de["esa_step"].values)) |
| 916 | + normalized_counts = counts_per_sweep / n_unique_esa_steps |
| 917 | + |
| 918 | + # Remove all variables that depend on event_met dimension |
| 919 | + ds = l1b_de.drop_dims("event_met", errors="ignore") |
| 920 | + |
| 921 | + # Set esa_sweep and esa_step as a multi-index on epoch dimension |
| 922 | + ds = ds.set_index(epoch=["esa_sweep", "esa_step"]) |
| 923 | + |
| 924 | + # Drop duplicates, keeping first occurrence of each (esa_sweep, esa_step) pair |
| 925 | + # This handles cases where multiple packets have the same esa_sweep and esa_step |
| 926 | + ds = ds.drop_duplicates(dim="epoch", keep="first") |
| 927 | + |
| 928 | + # Unstack to make esa_sweep and esa_step into separate dimensions |
| 929 | + # This creates a 2D array with dimensions (esa_sweep, esa_step) |
| 930 | + ds_reshaped = ds.unstack("epoch") |
| 931 | + |
| 932 | + # Add normalized_count as a new variable |
| 933 | + # It only has esa_sweep dimension (no esa_step variation within a sweep) |
| 934 | + ds_reshaped["normalized_count"] = xr.DataArray( |
| 935 | + normalized_counts, |
| 936 | + dims=["esa_sweep"], |
| 937 | + coords={"esa_sweep": np.arange(n_sweeps)}, |
| 938 | + ) |
| 939 | + |
| 940 | + return ds_reshaped |
| 941 | + |
| 942 | + |
| 943 | +def mark_statistical_filter_0( |
| 944 | + goodtimes_ds: xr.Dataset, |
| 945 | + l1b_de_datasets: list[xr.Dataset], |
| 946 | + current_index: int, |
| 947 | + threshold_factor: float = 1.5, |
| 948 | + tof_ab_limit_ns: int = 15, |
| 949 | + cull_code: int = CullCode.LOOSE, |
| 950 | + min_pointings: int = 4, |
| 951 | +) -> None: |
| 952 | + """ |
| 953 | + Apply Statistical Filter 0 to detect drastic penetrating background changes. |
| 954 | +
|
| 955 | + Statistical Filter 0 from Algorithm Document Section 2.3.2.3 detects when |
| 956 | + the penetrating background rate has changed drastically, compromising |
| 957 | + background subtraction accuracy. For each ESA sweep across all input |
| 958 | + Pointings, it computes the normalized AB coincidence count (total count |
| 959 | + divided by number of ESA steps). It then marks ESA sweeps in the current |
| 960 | + Pointing where the normalized count exceeds 150% of the median. |
| 961 | +
|
| 962 | + Parameters |
| 963 | + ---------- |
| 964 | + goodtimes_ds : xarray.Dataset |
| 965 | + Goodtimes dataset for the current Pointing to update. |
| 966 | + l1b_de_datasets : list[xarray.Dataset] |
| 967 | + List of L1B DE datasets for surrounding Pointings. Typically includes |
| 968 | + current plus preceding and following Pointings |
| 969 | + (e.g., [P-3, P-2, P-1, P(current), P+1, P+2, P+3]). |
| 970 | + current_index : int |
| 971 | + Index of the current Pointing in l1b_de_datasets. |
| 972 | + threshold_factor : float, optional |
| 973 | + Multiplier for median comparison. Default is 1.5 (150% of median). |
| 974 | + tof_ab_limit_ns : int, optional |
| 975 | + Maximum |tof_ab| in nanoseconds for AB coincidences. Default is 15. |
| 976 | + cull_code : int, optional |
| 977 | + Cull code to use for marking bad times. Default is CullCode.LOOSE. |
| 978 | + min_pointings : int, optional |
| 979 | + Minimum number of Pointings required. Default is 4. |
| 980 | +
|
| 981 | + Raises |
| 982 | + ------ |
| 983 | + ValueError |
| 984 | + If current_index is out of range or if fewer than min_pointings |
| 985 | + datasets are provided. |
| 986 | +
|
| 987 | + Notes |
| 988 | + ----- |
| 989 | + This function modifies goodtimes_ds in place. Only ESA sweeps in the |
| 990 | + current Pointing where the normalized count exceeds `threshold_factor * |
| 991 | + median` are marked as bad. Other sweeps remain unaffected. |
| 992 | +
|
| 993 | + Algorithm: |
| 994 | + 1. For each complete ESA sweep across all Pointings, count AB coincidences |
| 995 | + where |tof_ab| <= 15ns and divide by number of ESA steps |
| 996 | + 2. Calculate median of all normalized sweep counts |
| 997 | + 3. For each sweep in current Pointing, mark all METs in that sweep as bad |
| 998 | + if normalized count > threshold_factor * median |
| 999 | + """ |
| 1000 | + logger.info("Running mark_statistical_filter_0 culling") |
| 1001 | + |
| 1002 | + # Validate current_index is in range |
| 1003 | + if current_index < 0 or current_index >= len(l1b_de_datasets): |
| 1004 | + raise ValueError( |
| 1005 | + f"current_index {current_index} out of range for list of " |
| 1006 | + f"length {len(l1b_de_datasets)}" |
| 1007 | + ) |
| 1008 | + |
| 1009 | + # Validate that we have the minimum number of datasets |
| 1010 | + if len(l1b_de_datasets) < min_pointings: |
| 1011 | + raise ValueError( |
| 1012 | + f"At least {min_pointings} valid Pointings required, " |
| 1013 | + f"got {len(l1b_de_datasets)}" |
| 1014 | + ) |
| 1015 | + |
| 1016 | + # Add esa_sweep coordinate, reshape, and compute normalized_count for each dataset |
| 1017 | + all_normalized_counts: list[np.ndarray] = [] |
| 1018 | + reshaped_datasets: dict[int, xr.Dataset] = {} |
| 1019 | + |
| 1020 | + for i, l1b_de in enumerate(l1b_de_datasets): |
| 1021 | + # Add esa_sweep coordinate |
| 1022 | + l1b_de_with_sweep = _add_sweep_indices(l1b_de) |
| 1023 | + |
| 1024 | + # Compute normalized counts and reshape dataset. This removes epoch |
| 1025 | + # dimension, adds esa_sweep dimension, and includes normalized_count. |
| 1026 | + reshaped_ds = _compute_normalized_counts_per_sweep( |
| 1027 | + l1b_de_with_sweep, tof_ab_limit_ns |
| 1028 | + ) |
| 1029 | + |
| 1030 | + # Store reshaped dataset and normalized counts |
| 1031 | + reshaped_datasets[i] = reshaped_ds |
| 1032 | + all_normalized_counts.append(reshaped_ds["normalized_count"].values) |
| 1033 | + |
| 1034 | + offset = i - current_index |
| 1035 | + logger.debug( |
| 1036 | + f"Pointing {offset:+d}: " |
| 1037 | + f"{len(reshaped_ds['normalized_count'])} complete ESA sweeps" |
| 1038 | + ) |
| 1039 | + |
| 1040 | + current_ds = reshaped_datasets[current_index] |
| 1041 | + |
| 1042 | + # Calculate median from all sweep counts |
| 1043 | + all_counts = np.concatenate(all_normalized_counts) |
| 1044 | + median_count = float(np.median(all_counts)) |
| 1045 | + threshold = median_count * threshold_factor |
| 1046 | + |
| 1047 | + logger.info( |
| 1048 | + f"Statistical Filter 0: median={median_count:.2f}, " |
| 1049 | + f"threshold={threshold:.2f} ({len(all_counts)} sweeps)" |
| 1050 | + ) |
| 1051 | + |
| 1052 | + # Find and mark bad sweeps in current dataset |
| 1053 | + bad_sweep_mask = current_ds["normalized_count"] > threshold |
| 1054 | + n_bad_sweeps = int(bad_sweep_mask.sum()) |
| 1055 | + |
| 1056 | + # Get MET time ranges for bad sweeps using xarray boolean indexing |
| 1057 | + # Select only the bad sweeps using the mask |
| 1058 | + bad_sweeps_ds = current_ds.isel(esa_sweep=bad_sweep_mask) |
| 1059 | + |
| 1060 | + # For each bad sweep, mark the time range from first to last ccsds_met |
| 1061 | + for sweep_idx in range(len(bad_sweeps_ds["esa_sweep"])): |
| 1062 | + # Get all ccsds_met values for this sweep across all esa_steps |
| 1063 | + sweep_mets = bad_sweeps_ds["ccsds_met"].isel(esa_sweep=sweep_idx).values |
| 1064 | + |
| 1065 | + # Get min and max MET values, ignoring NaNs |
| 1066 | + met_start: float = float(np.nanmin(sweep_mets)) |
| 1067 | + met_end: float = float(np.nanmax(sweep_mets)) |
| 1068 | + |
| 1069 | + # Mark the entire time range for this sweep as bad |
| 1070 | + goodtimes_ds.goodtimes.mark_bad_times( |
| 1071 | + met=(met_start, met_end), bins=None, cull=cull_code |
| 1072 | + ) |
| 1073 | + |
| 1074 | + if n_bad_sweeps > 0: |
| 1075 | + logger.info( |
| 1076 | + f"Statistical Filter 0: Marked {n_bad_sweeps}/" |
| 1077 | + f"{len(current_ds['normalized_count'])} ESA sweeps as bad" |
| 1078 | + ) |
| 1079 | + else: |
| 1080 | + logger.info("No bad ESA sweeps identified by Statistical Filter 0") |
0 commit comments