diff --git a/CHANGELOG.md b/CHANGELOG.md index a12ed8379..b0d16fb13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - implement NRI plugin reconnect mechanism - ensure the toolkit exits if NRI Plugin init fails - feat(nvcdi): Allow IPC sockets to not be discovered +- fix(cudacompat): Fix handling of CUDA compat on Orin ## v1.19.0 - Promote v1.19.0-rc.7 to v1.19.0 diff --git a/cmd/nvidia-cdi-hook/cudacompat/cuda-elf-header.go b/cmd/nvidia-cdi-hook/cudacompat/cuda-elf-header.go index d94652de8..50091a392 100644 --- a/cmd/nvidia-cdi-hook/cudacompat/cuda-elf-header.go +++ b/cmd/nvidia-cdi-hook/cudacompat/cuda-elf-header.go @@ -50,6 +50,9 @@ func (h elf32_Nhdr) sizeof() int { return 12 } +// GetCUDACompatElfHeader returns the elf header for the specified library. +// This should be equivalent to: +// readelf -p .note.cuda.fwd_compatibility {{.libraryPath}} func GetCUDACompatElfHeader(libraryPath string) (*compatElfHeader, error) { lib, err := elf.Open(libraryPath) if os.IsNotExist(err) { @@ -99,8 +102,13 @@ func alignUp[T uint32 | uint64, S uint64](size T, to S) int { return int((size + T(to) - 1) &^ (T(to) - 1)) } -func trim(data []byte, from int, len int) []byte { - return bytes.Trim(data[from:from+len], "\x00") +func trim(data []byte, from int, n int) []byte { + if len(data) == 0 { + return nil + } + from = min(len(data)-1, from) + to := min(len(data), from+n) + return bytes.Trim(data[from:to], "\x00") } func getCUDAFwdCompatibilitySection(lib *elf.File) *elf.Section { diff --git a/cmd/nvidia-cdi-hook/cudacompat/cuda-elf-header_test.go b/cmd/nvidia-cdi-hook/cudacompat/cuda-elf-header_test.go index c1c69e201..ac564bb29 100644 --- a/cmd/nvidia-cdi-hook/cudacompat/cuda-elf-header_test.go +++ b/cmd/nvidia-cdi-hook/cudacompat/cuda-elf-header_test.go @@ -33,12 +33,13 @@ func TestGetCUDACompatElfHeader(t *testing.T) { dataRoot := filepath.Join(moduleRoot, "testdata", "compat") testCases := []struct { - description string - filename string - expected *compatElfHeader + description string + filename string + expected *compatElfHeader + expectedError string }{ { - description: "wip", + description: "libcuda.so.575.57.08", filename: "libcuda.so.575.57.08", expected: &compatElfHeader{ Format: 1, @@ -48,7 +49,7 @@ func TestGetCUDACompatElfHeader(t *testing.T) { }, }, { - description: "wip", + description: "libcuda.so.590.44.01", filename: "libcuda.so.590.44.01", expected: &compatElfHeader{ Format: 1, @@ -57,6 +58,19 @@ func TestGetCUDACompatElfHeader(t *testing.T) { Device: []int{1, 2, 7, 8, 9, 10, 11, 12, 13, 14}, }, }, + { + description: "invalid json", + filename: "libcuda.invalid.json.so.99.88", + expectedError: "could not unmarshal JSON data", + }, + { + description: "orin-13.2.1", + filename: "libcuda.orin.13.2.1.so.1.1", + expected: &compatElfHeader{ + Format: 1, + CUDAVersion: "13.2", + }, + }, } for _, tc := range testCases { @@ -64,6 +78,11 @@ func TestGetCUDACompatElfHeader(t *testing.T) { libpath := filepath.Join(dataRoot, tc.filename) h, err := GetCUDACompatElfHeader(libpath) + if tc.expectedError != "" { + require.ErrorContains(t, err, tc.expectedError) + require.Nil(t, h) + return + } require.NoError(t, err) require.EqualValues(t, tc.expected, h) diff --git a/testdata/compat/libcuda.invalid.json.so.99.88 b/testdata/compat/libcuda.invalid.json.so.99.88 new file mode 100644 index 000000000..31e9c8901 Binary files /dev/null and b/testdata/compat/libcuda.invalid.json.so.99.88 differ diff --git a/testdata/compat/libcuda.orin.13.2.1.so.1.1 b/testdata/compat/libcuda.orin.13.2.1.so.1.1 new file mode 100644 index 000000000..fa49b5084 Binary files /dev/null and b/testdata/compat/libcuda.orin.13.2.1.so.1.1 differ diff --git a/testdata/compat/libcuda.so.575.57.08 b/testdata/compat/libcuda.so.575.57.08 index 5e2dcd471..8ee4daa0a 100644 Binary files a/testdata/compat/libcuda.so.575.57.08 and b/testdata/compat/libcuda.so.575.57.08 differ diff --git a/testdata/compat/libcuda.so.590.44.01 b/testdata/compat/libcuda.so.590.44.01 index d855974c8..ca5b7cef4 100644 Binary files a/testdata/compat/libcuda.so.590.44.01 and b/testdata/compat/libcuda.so.590.44.01 differ