Skip to content

Commit fde02d2

Browse files
authored
Merge pull request #286 from abergeron/dynamic_load
Add dynamic loading of libraries.
2 parents 307b8a6 + 0783e8e commit fde02d2

47 files changed

Lines changed: 1822 additions & 759 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

doc/installation.rst

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,41 +5,40 @@ The library is routinely tested on OS X and linux and, less
55
frequently, on Windows. The OS most frequently tested are:
66

77
- Debian 6
8-
- Ubuntu 14.04
9-
- Mac OS X 10.11
8+
- Ubuntu 16.04
9+
- macOS 10.12
1010
- Windows 7
1111

1212
It should also work on any decently recent OS not listed here. If you
1313
get an error during the build on your favorite OS, please report it
1414
and we will attempt to fix it.
1515

16-
Requirements
17-
------------
16+
Build Requirements
17+
------------------
1818

1919
- cmake >= 3.0 (cmake_).
2020
- a c99-compliant compiler (or MSVC if on windows).
21-
- (optional) CUDA >= 6.5 (cuda_).
22-
- (optional) NVIDIA NCCL (nccl_).
23-
- (optional) OpenCL runtime.
24-
- (optional) clBLAS (clblas_).
2521
- (optional) libcheck (check_) to run the C tests.
2622
- (optional) python (python_) for the python bindings.
2723
- (optional) mako (mako_) for development or running the python bindings.
2824
- (optional) Cython >= 0.21 (cython_) for the python bindings.
2925
- (optional) nosetests (nosetests_) to run the python tests.
3026

31-
.. note::
32-
If you have neither an OpenCL runtime or a CUDA runtime, the
33-
library might still build, but will be rather useless.
27+
Run Requirements
28+
----------------
3429

35-
.. note::
36-
We support CUDA GPUs with `compute capability 2.0 (Fermi)
37-
<https://developer.nvidia.com/cuda-gpus>`_ and up.
30+
No matter what was available at build time, this library comes with
31+
dynamic loaders for the following library. You don't need to have any
32+
of this available, but you won't be able to use associated
33+
functionality.
3834

39-
.. note::
40-
In the case you want to build with collective operation support for CUDA,
41-
you will need CUDA GPUs with `compute capability 3.0 (Kepler)
42-
<https://developer.nvidia.com/cuda-gpus>`_ and up plus CUDA >= 7.
35+
* For CUDA:
36+
- CUDA (cuda_) version 7.0 or more, with the appropriate driver
37+
- (optional) NCCL (nccl_) for the collectives interface
38+
39+
* For OpenCL:
40+
- OpenCL version 1.1 or more
41+
- (optional) clBLAS (_clblas) or CLBlast (_clblast) for blas functionality
4342

4443
Download
4544
--------
@@ -125,18 +124,9 @@ can also reboot the machine to do that.
125124
Mac-specific instructions
126125
-------------------------
127126

128-
To get the compiler you need to install Xcode which is available for
129-
free from the App Store. Don't forget to install the command-line
130-
tools afterwards.
131-
132-
On Xcode 4.x these are installed by going to the download tab of the
133-
preferences window and selecting the "Command-line Tools" download.
134-
135-
If you have Xcode 5, ensure you update to 5.0.2 or later. Prior
136-
versions will not look in /usr/local for includes or libraries and
137-
this will cause a lot of errors. You can update by using the
138-
"Software Update..." function of the Apple menu or by running
139-
'xcode-select --install' on the command line.
127+
The only supported compiler is the clang version that comes with
128+
Xcode. Select the appropriate version of Xcode for you version of
129+
macOS.
140130

141131
It might be possible to use a version of gcc built using Homebrew or
142132
MacPorts, but this is untested and unsupported.
@@ -177,9 +167,8 @@ Running Tests
177167
everything is ok even if you intend on just using the C library.
178168

179169
To run the C tests, enter the build directory (the one where you ran
180-
cmake) and run 'make test'. It will run using the first OpenCL and
181-
the first CUDA device it finds skipping these if the corresponding
182-
backend wasn't built.
170+
cmake), select a target device by exporting DEVICE (or
171+
GPUARRAY_TEST_DEVICE) and run 'make test'.
183172

184173
If you get an error message similar to this one:
185174

@@ -215,6 +204,8 @@ you can confirm which device it is running on.
215204

216205
.. _clblas: https://github.com/clMathLibraries/clBLAS
217206

207+
.. _clblast: https://github.com/CNugteren/CLBlast
208+
218209
.. _cuda: https://developer.nvidia.com/category/zone/cuda-zone
219210

220211
.. _nccl: https://github.com/NVIDIA/nccl

src/CMakeLists.txt

Lines changed: 10 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,6 @@ if(CMAKE_COMPILER_IS_GNUCC)
55
add_definitions(-Wdeclaration-after-statement)
66
endif()
77

8-
find_package(CUDA)
9-
find_package(OpenCL)
10-
if(OpenCL_FOUND)
11-
find_package(clBLAS)
12-
if(NOT CLBLAS_FOUND)
13-
find_package(CLBlast)
14-
endif()
15-
endif()
16-
if(CUDA_FOUND)
17-
find_package(NCCL)
18-
endif()
19-
208
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
219

2210
add_custom_command(
@@ -60,6 +48,12 @@ gpuarray_kernel.c
6048
gpuarray_extension.c
6149
gpuarray_elemwise.c
6250
gpuarray_reduction.c
51+
gpuarray_buffer_cuda.c
52+
gpuarray_blas_cuda_cublas.c
53+
gpuarray_collectives_cuda_nccl.c
54+
gpuarray_buffer_opencl.c
55+
gpuarray_blas_opencl_clblas.c
56+
gpuarray_blas_opencl_clblast.c
6357
)
6458

6559
check_function_exists(strlcat HAVE_STRL)
@@ -77,93 +71,16 @@ if(NOT HAVE_MKSTEMP)
7771
list(APPEND _GPUARRAY_SRC gpuarray_mkstemp.c)
7872
endif()
7973

80-
if (CUDA_FOUND)
81-
if(NCCL_FOUND)
82-
if (CUDA_VERSION_MAJOR LESS 7)
83-
message( WARNING "This package requires CUDA 7.0 or more (building with NCCL). Found version ${CUDA_VERSION_STRING}")
84-
set(CUDA_FOUND 0)
85-
endif()
86-
else(NCCL_FOUND)
87-
if (CUDA_VERSION_MAJOR LESS 6 OR
88-
(CUDA_VERSION_MAJOR EQUAL 6 AND CUDA_VERSION_MINOR EQUAL 0))
89-
message( WARNING "This package requires CUDA 6.5 or more. Found version ${CUDA_VERSION_STRING}")
90-
set(CUDA_FOUND 0)
91-
endif()
92-
endif(NCCL_FOUND)
93-
endif()
94-
95-
if (CUDA_FOUND)
96-
if (APPLE)
97-
FIND_LIBRARY(CUDADRV_LIBRARY CUDA)
98-
FIND_PATH(CUDADRV_INCLUDE CUDA/cuda.h)
99-
# this is somewhat a hack, but otherwise cublas_v2.h isn't found
100-
set(CUDADRV_INCLUDE ${CUDADRV_INCLUDE} ${CUDA_TOOLKIT_INCLUDE})
101-
endif()
102-
if(NOT CUDADRV_LIBRARY)
103-
set(CUDADRV_LIBRARY ${CUDA_CUDA_LIBRARY})
104-
set(CUDADRV_INCLUDE ${CUDA_TOOLKIT_INCLUDE})
105-
endif()
106-
107-
find_cuda_helper_libs(nvrtc)
108-
109-
if(CUDA_nvrtc_LIBRARY)
110-
message(STATUS "Building with NVRTC")
111-
add_definitions(-DWITH_NVRTC)
112-
set(CUDADRV_LIBRARY ${CUDADRV_LIBRARY} ${CUDA_nvrtc_LIBRARY})
113-
else()
114-
add_definitions(-DNVCC_BIN=${CUDA_NVCC_EXECUTABLE})
115-
endif()
116-
117-
list(APPEND _GPUARRAY_SRC gpuarray_buffer_cuda.c)
118-
add_definitions(-DWITH_CUDA)
119-
include_directories(${CUDADRV_INCLUDE})
120-
121-
list(APPEND _GPUARRAY_SRC gpuarray_blas_cuda_cublas.c)
122-
add_definitions(-DWITH_CUDA_CUBLAS)
123-
124-
set(CMAKE_REQUIRED_LIBRARIES ${CUDA_CUBLAS_LIBRARIES})
125-
126-
check_function_exists(cublasSgemmEx CUBLAS_SGEMMEX)
127-
if (CUBLAS_SGEMMEX)
128-
add_definitions(-DHAVE_CUBLAS_SGEMMEX)
129-
endif()
130-
131-
if(NCCL_FOUND)
132-
message(STATUS "Building with NCCL")
133-
set(BUILD_WITH_COLLECTIVES 1 PARENT_SCOPE)
134-
add_definitions(-DWITH_CUDA_NCCL)
135-
list(APPEND _GPUARRAY_SRC gpuarray_collectives_cuda_nccl.c)
136-
include_directories(${NCCL_INCLUDE_DIR})
137-
endif()
138-
endif()
139-
140-
if(OpenCL_FOUND)
141-
list(APPEND _GPUARRAY_SRC gpuarray_buffer_opencl.c)
142-
add_definitions(-DWITH_OPENCL)
143-
include_directories(${OpenCL_INCLUDE_DIRS})
144-
145-
if(CLBLAS_FOUND)
146-
message(STATUS "Building with CLBLAS")
147-
list(APPEND _GPUARRAY_SRC gpuarray_blas_opencl_clblas.c)
148-
add_definitions(-DWITH_OPENCL_CLBLAS)
149-
include_directories(${CLBLAS_INCLUDE_DIRS})
150-
elseif(CLBLAS_FOUND)
151-
message(STATUS "Building with CLBLAST")
152-
list(APPEND _GPUARRAY_SRC gpuarray_blas_opencl_clblast.c)
153-
add_definitions(-DWITH_OPENCL_CLBLAST)
154-
include_directories(${CLBLAST_INCLUDE_DIRS})
155-
endif()
156-
endif()
157-
15874
configure_file(
15975
${CMAKE_CURRENT_SOURCE_DIR}/private_config.h.in
16076
${CMAKE_CURRENT_SOURCE_DIR}/private_config.h
16177
)
16278

16379
add_subdirectory(util)
80+
add_subdirectory(loaders)
16481

16582
set_rel(GPUARRAY_SRC ${_GPUARRAY_SRC})
166-
list(APPEND GPUARRAY_SRC ${UTIL_SRC})
83+
list(APPEND GPUARRAY_SRC ${UTIL_SRC} ${LOADERS_SRC})
16784

16885
add_library(gpuarray SHARED ${GPUARRAY_SRC})
16986
set_target_properties(gpuarray PROPERTIES
@@ -174,23 +91,8 @@ set_target_properties(gpuarray PROPERTIES
17491

17592
add_library(gpuarray-static STATIC ${GPUARRAY_SRC})
17693

177-
if(CUDA_FOUND)
178-
target_link_libraries(gpuarray ${CUDADRV_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
179-
target_link_libraries(gpuarray-static ${CUDADRV_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
180-
if (NCCL_FOUND)
181-
target_link_libraries(gpuarray ${NCCL_LIBRARY})
182-
target_link_libraries(gpuarray-static ${NCCL_LIBRARY})
183-
endif()
184-
endif()
185-
186-
if(OpenCL_FOUND)
187-
target_link_libraries(gpuarray ${OpenCL_LIBRARIES})
188-
target_link_libraries(gpuarray-static ${OpenCL_LIBRARIES})
189-
if (CLBLAS_FOUND)
190-
target_link_libraries(gpuarray ${CLBLAS_LIBRARIES})
191-
target_link_libraries(gpuarray-static ${CLBLAS_LIBRARIES})
192-
endif()
193-
endif()
94+
target_link_libraries(gpuarray ${CMAKE_DL_LIBS})
95+
target_link_libraries(gpuarray-static ${CMAKE_DL_LIBS})
19496

19597
set(headers
19698
gpuarray/array.h

src/cache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#ifndef CACHE_H
22
#define CACHE_H
33

4-
#include <stdint.h>
54
#include <stdlib.h>
5+
#include <gpuarray/config.h>
66
#include "private_config.h"
77

88
typedef void *cache_key_t;

src/gen_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def add_type(name, C, sz):
7373
int16_t exp;
7474
uint16_t hi;
7575
uint32_t lo;
76-
};
76+
} s;
7777
uint128_t raw;
7878
} u;
7979
} ga_quad;

src/gpuarray/error.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ enum ga_error {
3535
GA_MISC_ERROR,
3636
GA_COMM_ERROR,
3737
GA_XLARGE_ERROR,
38+
GA_LOAD_ERROR,
3839
/* Add more error types if needed, but at the end */
3940
/* Don't forget to sync with Gpu_error() */
4041
};

src/gpuarray_array.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@
55
#include <assert.h>
66
#include <stdarg.h>
77
#include <stddef.h>
8-
#if _MSC_VER < 1600
9-
#include <stdint.h>
10-
#endif
118
#include <stdlib.h>
129
#include <string.h>
1310
#include <errno.h>
1411

1512
#include "private.h"
13+
#include "gpuarray/config.h"
1614
#include "gpuarray/array.h"
1715
#include "gpuarray/error.h"
1816
#include "gpuarray/kernel.h"

0 commit comments

Comments
 (0)