Skip to content

Commit 31ef627

Browse files
committed
Update
1 parent dec8343 commit 31ef627

10 files changed

Lines changed: 580 additions & 6 deletions

File tree

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ The ONNX model we created is a simple identity neural network that consists of t
1313
To build the custom Docker image, please run the following command.
1414

1515
```bash
16-
$ docker build -f docker/tensorrt.Dockerfile --no-cache --tag=tensorrt:23.12 .
16+
$ docker build -f docker/tensorrt.Dockerfile --no-cache --tag=tensorrt:24.02 .
1717
```
1818

1919
### Run Docker Container
2020

2121
To run the custom Docker container, please run the following command.
2222

2323
```bash
24-
$ docker run -it --rm --gpus device=0 -v $(pwd):/mnt tensorrt:23.12
24+
$ docker run -it --rm --gpus device=0 -v $(pwd):/mnt tensorrt:24.02
2525
```
2626

2727
### Build Application

docker/tensorrt.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM nvcr.io/nvidia/tensorrt:23.12-py3
1+
FROM nvcr.io/nvidia/tensorrt:24.02-py3
22

33
ARG CMAKE_VERSION=3.28.0
44
ARG NUM_JOBS=8
@@ -33,7 +33,8 @@ RUN cd /tmp && \
3333
RUN rm -rf /tmp/*
3434

3535
RUN pip install --upgrade pip setuptools wheel
36-
RUN pip install numpy==1.26.3 \
36+
RUN pip install cuda-python==12.3.0 \
37+
numpy==1.26.3 \
3738
onnx==1.15.0
3839
RUN pip install --extra-index-url https://pypi.ngc.nvidia.com onnx_graphsurgeon==0.3.27
39-
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
40+
RUN pip install torch==2.3.0+cu121 --index-url https://download.pytorch.org/whl/cu121

python/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__pycache__

python/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# TensorRT Python Inference
2+
3+
## Unit Test
4+
5+
To run the unit test, please run the following command.
6+
7+
```bash
8+
python -m unittest test_plugin
9+
python -m unittest test_engine
10+
```

python/common.py

Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
# Slightly modified from
2+
# https://github.com/NVIDIA/TensorRT/blob/c0c633cc629cc0705f0f69359f531a192e524c0f/samples/python/common.py
3+
4+
#
5+
# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
6+
# SPDX-License-Identifier: Apache-2.0
7+
#
8+
# Licensed under the Apache License, Version 2.0 (the "License");
9+
# you may not use this file except in compliance with the License.
10+
# You may obtain a copy of the License at
11+
#
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
# See the License for the specific language governing permissions and
18+
# limitations under the License.
19+
#
20+
21+
import argparse
22+
import os
23+
import ctypes
24+
from typing import Optional, List
25+
26+
import numpy as np
27+
import tensorrt as trt
28+
from cuda import cuda, cudart
29+
30+
try:
31+
# Sometimes python does not understand FileNotFoundError
32+
FileNotFoundError
33+
except NameError:
34+
FileNotFoundError = IOError
35+
36+
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
37+
38+
39+
def check_cuda_err(err):
40+
if isinstance(err, cuda.CUresult):
41+
if err != cuda.CUresult.CUDA_SUCCESS:
42+
raise RuntimeError("Cuda Error: {}".format(err))
43+
if isinstance(err, cudart.cudaError_t):
44+
if err != cudart.cudaError_t.cudaSuccess:
45+
raise RuntimeError("Cuda Runtime Error: {}".format(err))
46+
else:
47+
raise RuntimeError("Unknown error type: {}".format(err))
48+
49+
50+
def cuda_call(call):
51+
err, res = call[0], call[1:]
52+
check_cuda_err(err)
53+
if len(res) == 1:
54+
res = res[0]
55+
return res
56+
57+
58+
def GiB(val):
59+
return val * 1 << 30
60+
61+
62+
def add_help(description):
63+
parser = argparse.ArgumentParser(
64+
description=description,
65+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
66+
args, _ = parser.parse_known_args()
67+
68+
69+
def find_sample_data(description="Runs a TensorRT Python sample",
70+
subfolder="",
71+
find_files=[],
72+
err_msg=""):
73+
"""
74+
Parses sample arguments.
75+
76+
Args:
77+
description (str): Description of the sample.
78+
subfolder (str): The subfolder containing data relevant to this sample
79+
find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
80+
81+
Returns:
82+
str: Path of data directory.
83+
"""
84+
85+
# Standard command-line arguments for all samples.
86+
kDEFAULT_DATA_ROOT = os.path.join(os.sep, "usr", "src", "tensorrt", "data")
87+
parser = argparse.ArgumentParser(
88+
description=description,
89+
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
90+
parser.add_argument(
91+
"-d",
92+
"--datadir",
93+
help=
94+
"Location of the TensorRT sample data directory, and any additional data directories.",
95+
action="append",
96+
default=[kDEFAULT_DATA_ROOT],
97+
)
98+
args, _ = parser.parse_known_args()
99+
100+
def get_data_path(data_dir):
101+
# If the subfolder exists, append it to the path, otherwise use the provided path as-is.
102+
data_path = os.path.join(data_dir, subfolder)
103+
if not os.path.exists(data_path):
104+
if data_dir != kDEFAULT_DATA_ROOT:
105+
print("WARNING: " + data_path + " does not exist. Trying " +
106+
data_dir + " instead.")
107+
data_path = data_dir
108+
# Make sure data directory exists.
109+
if not (os.path.exists(data_path)) and data_dir != kDEFAULT_DATA_ROOT:
110+
print(
111+
"WARNING: {:} does not exist. Please provide the correct data path with the -d option."
112+
.format(data_path))
113+
return data_path
114+
115+
data_paths = [get_data_path(data_dir) for data_dir in args.datadir]
116+
return data_paths, locate_files(data_paths, find_files, err_msg)
117+
118+
119+
def locate_files(data_paths, filenames, err_msg=""):
120+
"""
121+
Locates the specified files in the specified data directories.
122+
If a file exists in multiple data directories, the first directory is used.
123+
124+
Args:
125+
data_paths (List[str]): The data directories.
126+
filename (List[str]): The names of the files to find.
127+
128+
Returns:
129+
List[str]: The absolute paths of the files.
130+
131+
Raises:
132+
FileNotFoundError if a file could not be located.
133+
"""
134+
found_files = [None] * len(filenames)
135+
for data_path in data_paths:
136+
# Find all requested files.
137+
for index, (found, filename) in enumerate(zip(found_files, filenames)):
138+
if not found:
139+
file_path = os.path.abspath(os.path.join(data_path, filename))
140+
if os.path.exists(file_path):
141+
found_files[index] = file_path
142+
143+
# Check that all files were found
144+
for f, filename in zip(found_files, filenames):
145+
if not f or not os.path.exists(f):
146+
raise FileNotFoundError(
147+
"Could not find {:}. Searched in data paths: {:}\n{:}".format(
148+
filename, data_paths, err_msg))
149+
return found_files
150+
151+
152+
class HostDeviceMem:
153+
"""Pair of host and device memory, where the host memory is wrapped in a numpy array"""
154+
155+
def __init__(self,
156+
size: int,
157+
dtype: np.dtype,
158+
name: Optional[str] = None,
159+
shape: Optional[trt.Dims] = None,
160+
format: Optional[trt.TensorFormat] = None):
161+
nbytes = size * dtype.itemsize
162+
host_mem = cuda_call(cudart.cudaMallocHost(nbytes))
163+
pointer_type = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))
164+
165+
self._host = np.ctypeslib.as_array(ctypes.cast(host_mem, pointer_type),
166+
(size, ))
167+
self._device = cuda_call(cudart.cudaMalloc(nbytes))
168+
self._nbytes = nbytes
169+
self.name = name
170+
self.shape = shape
171+
self.format = format
172+
self.dtype = dtype
173+
174+
@property
175+
def host(self) -> np.ndarray:
176+
return self._host
177+
178+
@host.setter
179+
def host(self, arr: np.ndarray):
180+
if arr.size > self.host.size:
181+
raise ValueError(
182+
f"Tried to fit an array of size {arr.size} into host memory of size {self.host.size}"
183+
)
184+
np.copyto(self.host[:arr.size], arr.flat, casting='safe')
185+
186+
@property
187+
def device(self) -> int:
188+
return self._device
189+
190+
@property
191+
def nbytes(self) -> int:
192+
return self._nbytes
193+
194+
def __str__(self):
195+
return f"Host:\n{self.host}\nDevice:\n{self.device}\nSize:\n{self.nbytes}\n"
196+
197+
def __repr__(self):
198+
return self.__str__()
199+
200+
def free(self):
201+
cuda_call(cudart.cudaFree(self.device))
202+
cuda_call(cudart.cudaFreeHost(self.host.ctypes.data))
203+
204+
205+
# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
206+
# If engine uses dynamic shapes, specify a profile to find the maximum input & output size.
207+
def allocate_buffers(engine: trt.ICudaEngine,
208+
profile_idx: Optional[int] = None):
209+
inputs = []
210+
outputs = []
211+
bindings = []
212+
stream = cuda_call(cudart.cudaStreamCreate())
213+
tensor_names = [
214+
engine.get_tensor_name(i) for i in range(engine.num_io_tensors)
215+
]
216+
for binding in tensor_names:
217+
# get_tensor_profile_shape returns (min_shape, optimal_shape, max_shape)
218+
# Pick out the max shape to allocate enough memory for the binding.
219+
format = engine.get_tensor_format(binding)
220+
shape = engine.get_tensor_shape(
221+
binding
222+
) if profile_idx is None else engine.get_tensor_profile_shape(
223+
binding, profile_idx)[-1]
224+
shape_valid = np.all([s >= 0 for s in shape])
225+
if not shape_valid and profile_idx is None:
226+
raise ValueError(f"Binding {binding} has dynamic shape, " +\
227+
"but no profile was specified.")
228+
size = trt.volume(shape)
229+
if engine.has_implicit_batch_dimension:
230+
size *= engine.max_batch_size
231+
dtype = np.dtype(trt.nptype(engine.get_tensor_dtype(binding)))
232+
233+
# Allocate host and device buffers
234+
bindingMemory = HostDeviceMem(size,
235+
dtype,
236+
name=binding,
237+
shape=shape,
238+
format=format)
239+
240+
# Append the device buffer to device bindings.
241+
bindings.append(int(bindingMemory.device))
242+
243+
# Append to the appropriate list.
244+
if engine.get_tensor_mode(binding) == trt.TensorIOMode.INPUT:
245+
inputs.append(bindingMemory)
246+
else:
247+
outputs.append(bindingMemory)
248+
return inputs, outputs, bindings, stream
249+
250+
251+
# Frees the resources allocated in allocate_buffers
252+
def free_buffers(inputs: List[HostDeviceMem], outputs: List[HostDeviceMem],
253+
stream: cudart.cudaStream_t):
254+
for mem in inputs + outputs:
255+
mem.free()
256+
cuda_call(cudart.cudaStreamDestroy(stream))
257+
258+
259+
# Wrapper for cudaMemcpy which infers copy size and does error checking
260+
def memcpy_host_to_device(device_ptr: int, host_arr: np.ndarray):
261+
nbytes = host_arr.size * host_arr.itemsize
262+
cuda_call(
263+
cudart.cudaMemcpy(device_ptr, host_arr, nbytes,
264+
cudart.cudaMemcpyKind.cudaMemcpyHostToDevice))
265+
266+
267+
# Wrapper for cudaMemcpy which infers copy size and does error checking
268+
def memcpy_device_to_host(host_arr: np.ndarray, device_ptr: int):
269+
nbytes = host_arr.size * host_arr.itemsize
270+
cuda_call(
271+
cudart.cudaMemcpy(host_arr, device_ptr, nbytes,
272+
cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost))
273+
274+
275+
def _do_inference_base(inputs, outputs, stream, execute_async):
276+
# Transfer input data to the GPU.
277+
kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
278+
[
279+
cuda_call(
280+
cudart.cudaMemcpyAsync(inp.device, inp.host, inp.nbytes, kind,
281+
stream)) for inp in inputs
282+
]
283+
# Run inference.
284+
execute_async()
285+
# Transfer predictions back from the GPU.
286+
kind = cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
287+
[
288+
cuda_call(
289+
cudart.cudaMemcpyAsync(out.host, out.device, out.nbytes, kind,
290+
stream)) for out in outputs
291+
]
292+
# Synchronize the stream
293+
cuda_call(cudart.cudaStreamSynchronize(stream))
294+
# Return only the host outputs.
295+
return [out.host for out in outputs]
296+
297+
298+
# This function is generalized for multiple inputs/outputs.
299+
# inputs and outputs are expected to be lists of HostDeviceMem objects.
300+
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
301+
302+
def execute_async():
303+
context.execute_async(batch_size=batch_size,
304+
bindings=bindings,
305+
stream_handle=stream)
306+
307+
return _do_inference_base(inputs, outputs, stream, execute_async)
308+
309+
310+
# This function is generalized for multiple inputs/outputs for full dimension networks.
311+
# inputs and outputs are expected to be lists of HostDeviceMem objects.
312+
def do_inference_v2(context, bindings, inputs, outputs, stream):
313+
314+
def execute_async():
315+
context.execute_async_v2(bindings=bindings, stream_handle=stream)
316+
317+
return _do_inference_base(inputs, outputs, stream, execute_async)

0 commit comments

Comments
 (0)