Project-MONAI
diff --git a/‎scripts/export.bash‎
Lines changed: 2 additions & 2 deletions b/‎scripts/export.bash‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎scripts/export.py‎
Lines changed: 26 additions & 2 deletions b/‎scripts/export.py‎
Lines changed: 26 additions & 2 deletions
diff --git a/‎scripts/utils/cast_utils.py‎
Lines changed: 96 additions & 0 deletions b/‎scripts/utils/cast_utils.py‎
Lines changed: 96 additions & 0 deletions
@@ -1,3 +1,3 @@
-# python3 -m scripts.export --config_file 'configs/infer.yaml' - infer_everything --image_file 'example-1.nii.gz'
+python3 -m scripts.export --config_file 'configs/infer.yaml' - infer_everything --image_file 'example-1.nii.gz'
 
-python3 -m scripts.export --config_file 'configs/infer.yaml' - infer --image_file 'example-1.nii.gz' --label_prompt [1] --save_mask true
+# python3 -m scripts.export --config_file 'configs/infer.yaml' - infer --image_file 'example-1.nii.gz' --label_prompt [1] --save_mask true
@@ -31,6 +31,8 @@
 from .sliding_window import point_based_window_inferer, sliding_window_inference
 from .train import CONFIG
 from .utils.trans_utils import VistaPostTransform
+from .utils.trt_utils import ExportWrapper, TRTWrapper
+import time
 
 rearrange, _ = optional_import("einops", name="rearrange")
 sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
@@ -60,7 +62,6 @@ def infer_wrapper(inputs, model, **kwargs):
     outputs = model(input_images=inputs, **kwargs)
     return outputs.transpose(1, 0)
 
-
 class InferClass:
     def __init__(self, config_file="./configs/infer.yaml", **override):
         logging.basicConfig(stream=sys.stdout, level=logging.INFO)
@@ -73,7 +74,7 @@ def __init__(self, config_file="./configs/infer.yaml", **override):
         parser.update(pairs=_args)
 
         # We do not use AMP for export 
-        self.amp = False # parser.get_parsed_content("amp")
+        self.amp = parser.get_parsed_content("amp")
         input_channels = parser.get_parsed_content("input_channels")
         patch_size = parser.get_parsed_content("patch_size")
         self.patch_size = patch_size
@@ -129,6 +130,17 @@ def __init__(self, config_file="./configs/infer.yaml", **override):
         self.save_transforms = transforms.Compose(save_transforms)
         self.prev_mask = None
         self.batch_data = None
+
+        en_wrapper = ExportWrapper.wrap(self.model.image_encoder.encoder,
+                                        input_names = ['x'], output_names = ['x_out'])
+        self.model.image_encoder.encoder = TRTWrapper("Encoder", en_wrapper, use_cuda_graph=False)
+        # self.model.image_encoder.encoder.load_engine()
+
+        cls_wrapper = ExportWrapper.wrap(self.model.class_head,
+                                         input_names = ['src', 'class_vector'], output_names = ['masks', 'class_embedding'])
+        self.model.class_head = TRTWrapper("ClassHead", cls_wrapper, use_cuda_graph=False)
+        # self.model.class_head.load_engine()
+        
         return
 
     def clear_cache(self):
@@ -162,6 +174,7 @@ def infer(
         used together with prev_mask. If prev_mask is generated by N points, point_start should be N+1 to save
         time and avoid repeated inference. This is by default disabled.
         """
+        time00=time.time()
         self.model.eval()
         if not isinstance(image_file, dict):
             image_file = {"image": image_file}
@@ -248,12 +261,15 @@ def infer(
                 finished = False
             if finished:
                 break
+        print(f"Infer Time: {time.time() - time00}")
+
         if not finished:
             raise RuntimeError("Infer not finished due to OOM.")
         return batch_data[0]["pred"]
 
     @torch.no_grad()
     def infer_everything(self, image_file, label_prompt=EVERYTHING_PROMPT, rank=0):
+        time00=time.time()
         self.model.eval()
         device = f"cuda:{rank}"
         if not isinstance(image_file, dict):
@@ -295,6 +311,8 @@ def infer_everything(self, image_file, label_prompt=EVERYTHING_PROMPT, rank=0):
                 finished = False
             if finished:
                 break
+        print(f"InferEverything Time: {time.time() - time00}")
+
         if not finished:
             raise RuntimeError("Infer not finished due to OOM.")
 
@@ -317,5 +335,11 @@ def batch_infer_everything(self, datalist=str, basedir=str):
 
 
 if __name__ == "__main__":
+    try:
+        #import torch_onnx
+        #torch_onnx.patch_torch(error_report=True)
+        print("patch succeeded")
+    except Exception:
+        pass
     fire, _ = optional_import("fire")
     fire.Fire(InferClass)
@@ -0,0 +1,96 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+# 
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from contextlib import nullcontext
+
+import torch
+
+def avoid_bfloat16_autocast_context():
+    """
+    If the current autocast context is bfloat16,
+    cast it to float32
+    """
+
+    if torch.is_autocast_enabled() and torch.get_autocast_gpu_dtype() == torch.bfloat16:
+        return torch.cuda.amp.autocast(dtype=torch.float32)
+    else:
+        return nullcontext()
+
+
+def avoid_float16_autocast_context():
+    """
+    If the current autocast context is float16, cast it to bfloat16
+    if available (unless we're in jit) or float32
+    """
+
+    if torch.is_autocast_enabled() and torch.get_autocast_gpu_dtype() == torch.float16:
+        if torch.jit.is_scripting() or torch.jit.is_tracing():
+            return torch.cuda.amp.autocast(dtype=torch.float32)
+
+        if torch.cuda.is_bf16_supported():
+            return torch.cuda.amp.autocast(dtype=torch.bfloat16)
+        else:
+            return torch.cuda.amp.autocast(dtype=torch.float32)
+    else:
+        return nullcontext()
+
+
+def cast_tensor(x, from_dtype=torch.float16, to_dtype=torch.float32):
+    return x.to(dtype=to_dtype) if x.dtype == from_dtype else x
+
+
+def cast_all(x, from_dtype=torch.float16, to_dtype=torch.float32):
+    if isinstance(x, torch.Tensor):
+        return cast_tensor(x, from_dtype=from_dtype, to_dtype=to_dtype)
+    else:
+        if isinstance(x, dict):
+            new_dict = {}
+            for k in x.keys():
+                new_dict[k] = cast_all(x[k], from_dtype=from_dtype, to_dtype=to_dtype)
+            return new_dict
+        elif isinstance(x, tuple):
+            return tuple(cast_all(y, from_dtype=from_dtype, to_dtype=to_dtype) for y in x)
+
+
+class CastToFloat(torch.nn.Module):
+    def __init__(self, mod):
+        super(CastToFloat, self).__init__()
+        self.mod = mod
+
+    def forward(self, x):
+        with torch.cuda.amp.autocast(enabled=False):
+            ret = self.mod.forward(x.to(torch.float32)).to(x.dtype)
+        return ret
+
+
+class CastToFloatAll(torch.nn.Module):
+    def __init__(self, mod):
+        super(CastToFloatAll, self).__init__()
+        self.mod = mod
+
+    def forward(self, *args):
+        from_dtype = args[0].dtype
+        with torch.cuda.amp.autocast(enabled=False):
+            ret = self.mod.forward(*cast_all(args, from_dtype=from_dtype, to_dtype=torch.float32))
+        return cast_all(ret, from_dtype=torch.float32, to_dtype=from_dtype)