leimao
diff --git a/‎README.md‎
Lines changed: 22 additions & 6 deletions b/‎README.md‎
Lines changed: 22 additions & 6 deletions
diff --git a/‎data/identity_neural_network.onnx‎
170 Bytes b/‎data/identity_neural_network.onnx‎
170 Bytes
diff --git a/‎docker/tensorrt.Dockerfile‎
Lines changed: 2 additions & 2 deletions b/‎docker/tensorrt.Dockerfile‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎python/README.md‎
Lines changed: 13 additions & 3 deletions b/‎python/README.md‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎python/common.py‎
Lines changed: 31 additions & 17 deletions b/‎python/common.py‎
Lines changed: 31 additions & 17 deletions
diff --git a/‎python/main.py‎
Lines changed: 29 additions & 7 deletions b/‎python/main.py‎
Lines changed: 29 additions & 7 deletions
diff --git a/‎python/test_engine.py‎
Lines changed: 40 additions & 26 deletions b/‎python/test_engine.py‎
Lines changed: 40 additions & 26 deletions
@@ -13,15 +13,15 @@ The ONNX model we created is a simple identity neural network that consists of t
 To build the custom Docker image, please run the following command.
 
 ```bash
-$ docker build -f docker/tensorrt.Dockerfile --no-cache --tag=tensorrt:24.02 .
+$ docker build -f docker/tensorrt.Dockerfile --no-cache --tag=tensorrt:24.05 .
 ```
 
 ### Run Docker Container
 
 To run the custom Docker container, please run the following command.
 
 ```bash
-$ docker run -it --rm --gpus device=0 -v $(pwd):/mnt tensorrt:24.02
+$ docker run -it --rm --gpus device=0 -v $(pwd):/mnt tensorrt:24.05
 ```
 
 ### Build Application
@@ -33,7 +33,9 @@ $ cmake -B build
 $ cmake --build build --config Release --parallel
 ```
 
-Under the `build/src` directory, the custom plugin library will be saved as `libidentity_conv.so`, the engine builder will be saved as `build_engine`, and the engine runner will be saved as `run_engine`.
+Under the `build/src/plugins` directory, the custom plugin library will be saved as `libidentity_conv_iplugin_v2_io_ext.so` for `IPluginV2Ext` and `libidentity_conv_iplugin_v3.so` for `IPluginV3`, respectively. The `IPluginV2Ext` plugin interface has been deprecated since TensorRT 10.0.0 and will be removed in the future. The `IPluginV3` plugin interface is the only recommended interface for custom plugin development.
+
+Under the `build/src/apps` directory, the engine builder will be saved as `build_engine`, and the engine runner will be saved as `run_engine`.
 
 ### Build ONNX Model
 
@@ -67,18 +69,32 @@ The ONNX model will be saved as `identity_neural_network.onnx` under the `data`
 
 To build the TensorRT engine from the ONNX model, please run the following command.
 
+#### Build Engine with IPluginV2IOExt
+
 ```bash
-$ ./build/src/build_engine
+$ ./build/src/apps/build_engine data/identity_neural_network.onnx build/src/plugins/IdentityConvIPluginV2IOExt/libidentity_conv_iplugin_v2_io_ext.so data/identity_neural_network_iplugin_v2_io_ext.engine
 ```
 
-The TensorRT engine will be saved as `identity_neural_network.engine` under the `data` directory.
+#### Build Engine with IPluginV3
+
+```bash
+$ ./build/src/apps/build_engine data/identity_neural_network.onnx build/src/plugins/IdentityConvIPluginV3/libidentity_conv_iplugin_v3.so data/identity_neural_network_iplugin_v3.engine
+```
 
 ### Run TensorRT Engine
 
 To run the TensorRT engine, please run the following command.
 
+#### Run Engine with IPluginV2IOExt
+
+```bash
+$ ./build/src/apps/run_engine build/src/plugins/IdentityConvIPluginV2IOExt/libidentity_conv_iplugin_v2_io_ext.so data/identity_neural_network_iplugin_v2_io_ext.engine
+```
+
+#### Run Engine with IPluginV3
+
 ```bash
-$ ./build/src/run_engine
+$ ./build/src/apps/run_engine build/src/plugins/IdentityConvIPluginV3/libidentity_conv_iplugin_v3.so data/identity_neural_network_iplugin_v3.engine
 ```
 
 If the custom plugin implementation and integration are correct, the output of the TensorRT engine should be the same as the input.
 
@@ -1,6 +1,6 @@
-FROM nvcr.io/nvidia/tensorrt:24.02-py3
+FROM nvcr.io/nvidia/tensorrt:24.05-py3
 
-ARG CMAKE_VERSION=3.28.0
+ARG CMAKE_VERSION=3.29.3
 ARG NUM_JOBS=8
 
 ENV DEBIAN_FRONTEND noninteractive
 
@@ -2,17 +2,27 @@
 
 ## Unit Test
 
+Assuming the `IPluginV2IOExt` and `IPluginV3` plugins have been built, the engine that uses each of the plugins have been built, the unit tests can be run.
+
 To run the unit test, please run the following command.
 
 ```bash
-python -m unittest test_plugin
-python -m unittest test_engine
+$ python -m unittest test_plugin
+$ python -m unittest test_engine
 ```
 
 ## Run TensorRT Engine
 
 To run the TensorRT engine, please run the following command.
 
+### IPluginV2IOExt
+
+```bash
+$ python main.py --engine_file_path ../data/identity_neural_network_iplugin_v2_io_ext.engine --plugin_lib_file_path ../build/src/plugins/IdentityConvIPluginV2IOExt/libidentity_conv_iplugin_v2_io_ext.so
+```
+
+### IPluginV3
+
 ```bash
-$ python main.py
+$ python main.py --engine_file_path ../data/identity_neural_network_iplugin_v3.engine --plugin_lib_file_path ../build/src/plugins/IdentityConvIPluginV3/libidentity_conv_iplugin_v3.so
 ```
@@ -1,5 +1,6 @@
 # Slightly modified from
 # https://github.com/NVIDIA/TensorRT/blob/c0c633cc629cc0705f0f69359f531a192e524c0f/samples/python/common.py
+# https://github.com/NVIDIA/TensorRT/blob/ccf119972b50299ba00d35d39f3938296e187f4e/samples/python/common_runtime.py
 
 #
 # SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
@@ -150,8 +151,6 @@ def allocate_buffers(engine: trt.ICudaEngine,
             raise ValueError(f"Binding {binding} has dynamic shape, " +\
                 "but no profile was specified.")
         size = trt.volume(shape)
-        if engine.has_implicit_batch_dimension:
-            size *= engine.max_batch_size
         dtype = np.dtype(trt.nptype(engine.get_tensor_dtype(binding)))
 
         # Allocate host and device buffers
@@ -219,23 +218,38 @@ def _do_inference_base(inputs, outputs, stream, execute_async):
     return [out.host for out in outputs]
 
 
-# This function is generalized for multiple inputs/outputs.
-# inputs and outputs are expected to be lists of HostDeviceMem objects.
-def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
-
-    def execute_async():
-        context.execute_async(batch_size=batch_size,
-                              bindings=bindings,
-                              stream_handle=stream)
-
-    return _do_inference_base(inputs, outputs, stream, execute_async)
+def _do_inference_base(inputs, outputs, stream, execute_async_func):
+    # Transfer input data to the GPU.
+    kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
+    [
+        cuda_call(
+            cudart.cudaMemcpyAsync(inp.device, inp.host, inp.nbytes, kind,
+                                   stream)) for inp in inputs
+    ]
+    # Run inference.
+    execute_async_func()
+    # Transfer predictions back from the GPU.
+    kind = cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
+    [
+        cuda_call(
+            cudart.cudaMemcpyAsync(out.host, out.device, out.nbytes, kind,
+                                   stream)) for out in outputs
+    ]
+    # Synchronize the stream
+    cuda_call(cudart.cudaStreamSynchronize(stream))
+    # Return only the host outputs.
+    return [out.host for out in outputs]
 
 
-# This function is generalized for multiple inputs/outputs for full dimension networks.
+# This function is generalized for multiple inputs/outputs.
 # inputs and outputs are expected to be lists of HostDeviceMem objects.
-def do_inference_v2(context, bindings, inputs, outputs, stream):
+def do_inference(context, engine, bindings, inputs, outputs, stream):
 
-    def execute_async():
-        context.execute_async_v2(bindings=bindings, stream_handle=stream)
+    def execute_async_func():
+        context.execute_async_v3(stream_handle=stream)
 
-    return _do_inference_base(inputs, outputs, stream, execute_async)
+    # Setup context tensor address.
+    num_io = engine.num_io_tensors
+    for i in range(num_io):
+        context.set_tensor_address(engine.get_tensor_name(i), bindings[i])
+    return _do_inference_base(inputs, outputs, stream, execute_async_func)
@@ -1,3 +1,4 @@
+import argparse
 import numpy as np
 
 import common
@@ -6,8 +7,26 @@
 
 def main():
 
-    engine_file_path = "../data/identity_neural_network.engine"
-    plugin_lib_file_path = "../build/src/libidentity_conv.so"
+    # Add an argparser to specify the engine file path and plugin library file path.
+    parser = argparse.ArgumentParser(
+        description="Run an engine with Identity Plugin.")
+    parser.add_argument(
+        "--engine_file_path",
+        type=str,
+        default="../data/identity_neural_network_iplugin_v3.engine",
+        help="Path to the engine file.",
+    )
+    parser.add_argument(
+        "--plugin_lib_file_path",
+        type=str,
+        default=
+        "../build/src/plugins/IdentityConvIPluginV3/libidentity_conv_iplugin_v3.so",
+        help="Path to the plugin library file.",
+    )
+
+    args = parser.parse_args()
+    engine_file_path = args.engine_file_path
+    plugin_lib_file_path = args.plugin_lib_file_path
 
     common_runtime.load_plugin_lib(plugin_lib_file_path)
     engine = common_runtime.load_engine(engine_file_path)
@@ -46,11 +65,14 @@ def main():
 
     # Execute the engine.
     context = engine.create_execution_context()
-    common.do_inference_v2(context,
-                           bindings=bindings,
-                           inputs=inputs,
-                           outputs=outputs,
-                           stream=stream)
+    common.do_inference(
+        context=context,
+        engine=engine,
+        inputs=inputs,
+        outputs=outputs,
+        bindings=bindings,
+        stream=stream,
+    )
 
     # Print output tensor data.
     for host_device_buffer in outputs:
 
@@ -5,40 +5,54 @@
 import common_runtime
 
 
-class TestMain(unittest.TestCase):
+def test_engine(engine_file_path: str, plugin_lib_file_path: str):
+
+    common_runtime.load_plugin_lib(plugin_lib_file_path=plugin_lib_file_path)
+    engine = common_runtime.load_engine(engine_file_path=engine_file_path)
+
+    inputs, outputs, bindings, stream = common.allocate_buffers(
+        engine=engine, profile_idx=None)
 
-    def test_engine(self):
+    for host_device_buffer in inputs:
+        data = np.random.uniform(low=-10.0,
+                                 high=10.0,
+                                 size=host_device_buffer.shape).astype(
+                                     host_device_buffer.dtype).flatten()
+        np.copyto(host_device_buffer.host, data)
 
-        engine_file_path = "../data/identity_neural_network.engine"
-        plugin_lib_file_path = "../build/src/libidentity_conv.so"
+    context = engine.create_execution_context()
+    common.do_inference(
+        context=context,
+        engine=engine,
+        inputs=inputs,
+        outputs=outputs,
+        bindings=bindings,
+        stream=stream,
+    )
 
-        common_runtime.load_plugin_lib(
-            plugin_lib_file_path=plugin_lib_file_path)
-        engine = common_runtime.load_engine(engine_file_path=engine_file_path)
+    for input_host_device_buffer, output_host_device_buffer in zip(
+            inputs, outputs):
+        np.testing.assert_equal(input_host_device_buffer.host,
+                                output_host_device_buffer.host)
 
-        inputs, outputs, bindings, stream = common.allocate_buffers(
-            engine=engine, profile_idx=None)
+    common.free_buffers(inputs=inputs, outputs=outputs, stream=stream)
+
+
+class TestMain(unittest.TestCase):
 
-        for host_device_buffer in inputs:
-            data = np.random.uniform(low=-10.0,
-                                     high=10.0,
-                                     size=host_device_buffer.shape).astype(
-                                         host_device_buffer.dtype).flatten()
-            np.copyto(host_device_buffer.host, data)
+    def test_engine_v2(self):
 
-        context = engine.create_execution_context()
-        common.do_inference_v2(context,
-                               bindings=bindings,
-                               inputs=inputs,
-                               outputs=outputs,
-                               stream=stream)
+        engine_file_path = "../data/identity_neural_network_iplugin_v2_io_ext.engine"
+        plugin_lib_file_path = "../build/src/plugins/IdentityConvIPluginV2IOExt/libidentity_conv_iplugin_v2_io_ext.so"
+        test_engine(engine_file_path=engine_file_path,
+                    plugin_lib_file_path=plugin_lib_file_path)
 
-        for input_host_device_buffer, output_host_device_buffer in zip(
-                inputs, outputs):
-            np.testing.assert_equal(input_host_device_buffer.host,
-                                    output_host_device_buffer.host)
+    def test_engine_v3(self):
 
-        common.free_buffers(inputs=inputs, outputs=outputs, stream=stream)
+        engine_file_path = "../data/identity_neural_network_iplugin_v3.engine"
+        plugin_lib_file_path = "../build/src/plugins/IdentityConvIPluginV3/libidentity_conv_iplugin_v3.so"
+        test_engine(engine_file_path=engine_file_path,
+                    plugin_lib_file_path=plugin_lib_file_path)
 
 
 if __name__ == "__main__":