fix cuda device mismatch

Aske-Rosted · Aske-Rosted · commit ce8bac4c2b20 · 2026-03-30T11:15:12.000+09:00
diff --git a/src/graphnet/deployment/deployment_module.py b/src/graphnet/deployment/deployment_module.py
@@ -112,7 +112,9 @@ def _resolve_prediction_columns(
                         )
                 else:
                     # Only Take First Label
-                    resolved_prediction_columns.append(model.prediction_labels[0])
+                    resolved_prediction_columns.append(
+                        model.prediction_labels[0]
+                    )
             return resolved_prediction_columns
 
     def _inference(self, data: Union[Data, Batch]) -> List[np.ndarray]:
diff --git a/src/graphnet/deployment/icecube/__init__.py b/src/graphnet/deployment/icecube/__init__.py
@@ -1,5 +1,9 @@
 """Deployment modules specific to IceCube."""
 
-from .inference_module import I3InferenceModule, I3ParticleInferenceModule, I3MultipleModelInferenceModule
+from .inference_module import (
+    I3InferenceModule,
+    I3ParticleInferenceModule,
+    I3MultipleModelInferenceModule,
+)
 from .cleaning_module import I3PulseCleanerModule
 from .i3deployer import I3Deployer
diff --git a/src/graphnet/deployment/icecube/inference_module.py b/src/graphnet/deployment/icecube/inference_module.py
@@ -149,42 +149,10 @@ def __call__(self, frame: I3Frame) -> bool:
                 return True
         # inference
         memory_watch = False
-        if self._inference_speed_check is True:
-            # create log file if it does not exist
-            data_repr_start = time()
         try:
-            if not self.multiple_models:
-                data = self._create_data_representation(frame=frame).to(
-                    self._device
-                )
-                if self._inference_speed_check is True:
-                    data_repr_end = time()
-                    data_repr_time = data_repr_end - data_repr_start
-                    inference_start = time()
-                predictions = self._apply_model(data=data)
-            else:
-                features = self._extract_feature_array_from_frame(frame=frame)
-                if self._inference_speed_check is True:
-                    data_repr_end = time()
-                    data_repr_time = data_repr_end - data_repr_start
-                    inference_start = time()
-                model_input_data = []
-                for _, graph_definition in enumerate(self._graph_definitions):
-                    data = graph_definition(
-                        input_features=features[_],
-                        input_feature_names=self.features_list[_],
-                    )
-                    model_input_data.append(Batch.from_data_list([data]))
-
-                predictions = self._apply_model(data=model_input_data)
-
-            if self._inference_speed_check is True:
-                inference_end = time()
-                inference_time = inference_end - inference_start
-                self._logger.info(
-                    f"Data representation time: {data_repr_time:.4f} s\n"
-                    f"Inference time: {inference_time:.4f} s\n"
-                )
+            predictions, data_repr_time, inference_time = (
+                self._create_data_and_apply(frame=frame)
+            )
 
         except OutOfMemoryError:
             self.error(
@@ -195,22 +163,12 @@ def __call__(self, frame: I3Frame) -> bool:
             save_device = self._device
             self._device = "cpu"
             self.model.to(self._device)
-            data = self._create_data_representation(frame=frame)
-            if self._inference_speed_check is True:
-                data_repr_end = time()
-                data_repr_time = data_repr_end - data_repr_start
-                inference_start = time()
-            predictions = self._apply_model(data=data)
-            if self._inference_speed_check is True:
-                inference_end = time()
-                inference_time = inference_end - inference_start
-                self._logger.info(
-                    f"Data representation time: {data_repr_time:.4f} s\n"
-                    f"Inference time: {inference_time:.4f} s\n"
-                )
-            self._device = save_device
+
+            predictions, data_repr_time, inference_time = (
+                self._create_data_and_apply(frame=frame)
+            )
             memory_watch = True
-        del data
+            self._device = save_device
 
         if self._inference_speed_check is True:
             write_start = time()
@@ -238,6 +196,49 @@ def __call__(self, frame: I3Frame) -> bool:
             self.model.to(self._device)
         return True
 
+    def _create_data_and_apply(self, frame: I3Frame) -> tuple:
+        data_repr_time = -1
+        inference_time = -1
+        if self._inference_speed_check is True:
+            # create log file if it does not exist
+            data_repr_start = time()
+        if not self.multiple_models:
+            data = self._create_data_representation(frame=frame).to(
+                self._device
+            )
+            if self._inference_speed_check is True:
+                data_repr_end = time()
+                data_repr_time = data_repr_end - data_repr_start
+                inference_start = time()
+            predictions = self._apply_model(data=data)
+        else:
+            features = self._extract_feature_array_from_frame(frame=frame)
+            if self._inference_speed_check is True:
+                data_repr_end = time()
+                data_repr_time = data_repr_end - data_repr_start
+                inference_start = time()
+            model_input_data = []
+            for _, graph_definition in enumerate(self._graph_definitions):
+                data = graph_definition(
+                    input_features=features[_],
+                    input_feature_names=self.features_list[_],
+                )
+                model_input_data.append(
+                    Batch.from_data_list([data.to(self._device)])
+                )
+
+            predictions = self._apply_model(data=model_input_data)
+
+        if self._inference_speed_check is True:
+            inference_end = time()
+            inference_time = inference_end - inference_start
+            self._logger.info(
+                f"Data representation time: {data_repr_time:.4f} s\n"
+                f"Inference time: {inference_time:.4f} s\n"
+            )
+        del data
+        return predictions, data_repr_time, inference_time
+
     def _check_dimensions(self, predictions: np.ndarray) -> int:
         if len(predictions.shape) > 1:
             dim = predictions.shape[1]
@@ -284,7 +285,7 @@ def _apply_model(self, data: Data) -> np.ndarray:
         """Apply model to `Data` and case-handling."""
         if data is not None:
             predictions = self._inference(data)
-            #print(predictions, type(predictions), type(predictions[0]))
+            # print(predictions, type(predictions), type(predictions[0]))
             if isinstance(predictions, list):
                 predictions = np.concatenate(
                     [pred.flatten() for pred in predictions]
@@ -316,8 +317,8 @@ def _create_data_representation(self, frame: I3Frame) -> Data:
             data = self._graph_definition(
                 input_features=input_features,
                 input_feature_names=self._features,
-            )
-            return Batch.from_data_list([data.to(self._device)])
+            ).to(self._device)
+            return Batch.from_data_list([data])
         else:
             return None
 
@@ -421,10 +422,10 @@ def __init__(
             len(self._positions) == 3
         ), "positions must be a list of 3 elements"
 
-    def _get_min_time(self, frame: I3Frame) -> float:
+    def _get_min_time(self, frame: I3Frame, pulsemap: str) -> float:
         """Get the minimum time of the first pulse in the frame."""
         min_time = np.inf
-        doms = frame[self._pulsemap].apply(frame).values()
+        doms = frame[pulsemap].apply(frame).values()
         # seach for the minimum time
         for dom in doms:
             if dom[0].time < min_time:
@@ -451,10 +452,11 @@ def _add_to_frame(self, frame, data):
 
         if self._shift_time:
             # Shift time to be relative to the first pulse
-            shift_time = (
-                self._get_min_time(frame)
-                - frame["CVStatistics"].min_pulse_time
-            )
+            shift_time = self._get_min_time(frame, self._pulsemap)
+            if "CVStatistics" in frame:
+                shift_time -= frame["CVStatistics"].min_pulse_time
+            else:
+                shift_time -= self._get_min_time(frame, "InIcePulses")
             particle.time = data[self._time].value + shift_time
         else:
             particle.time = data[self._time].value
@@ -479,6 +481,7 @@ def _add_to_frame(self, frame, data):
         super()._add_to_frame(frame=frame, data=data)
         return
 
+
 class I3MultipleModelInferenceModule(I3InferenceModule):
     """I3InferenceModule for I3Particle data."""
 
@@ -499,4 +502,3 @@ def _add_to_frame(self, frame, data):
 
         i3_score_container = dataclasses.I3MapStringDouble(data)
         frame.Put(self._key_name, i3_score_container)
-
diff --git a/src/graphnet/models/components/layers.py b/src/graphnet/models/components/layers.py
@@ -67,11 +67,12 @@ def forward(
         x = super().forward(x, edge_index)
 
         # Recompute adjacency
-        edge_index = knn_graph(
-            x=x[:, self.features_subset],
-            k=self.nb_neighbors,
-            batch=batch,
-        ).to(self.device)
+        with torch.cuda.device(x.device):
+            edge_index = knn_graph(
+                x=x[:, self.features_subset],
+                k=self.nb_neighbors,
+                batch=batch,
+            ).to(x.device)
 
         return x, edge_index
 
diff --git a/src/graphnet/models/gnn/dynedge.py b/src/graphnet/models/gnn/dynedge.py
@@ -254,8 +254,11 @@ def _global_pooling(self, x: Tensor, batch: LongTensor) -> Tensor:
         assert self._global_pooling_schemes
         pooled = []
         for pooling_scheme in self._global_pooling_schemes:
-            pooling_fn = GLOBAL_POOLINGS[pooling_scheme]
-            pooled_x = pooling_fn(x, index=batch, dim=0)
+            with torch.cuda.device(
+                x.device
+            ):  # Ensure pooling is performed on the same device as x
+                pooling_fn = GLOBAL_POOLINGS[pooling_scheme]
+                pooled_x = pooling_fn(x, index=batch, dim=0)
             if isinstance(pooled_x, tuple) and len(pooled_x) == 2:
                 # `scatter_{min,max}`, which return also an argument, vs.
                 # `scatter_{mean,sum}`