Skip to content

Commit 2632dfa

Browse files
committed
[wip] [skip ci] Test OpenCL backend with amgcl solver
Enable vexcl backend for amgcl solver, which makes it possible to use GPGPU (either CUDA or OpenCL) in order to accelerate solution. * CMake option AMGCL_GPGPU (default: off) controls whether to compile GPGPU support. * CMake option AMGCL_GPGPU_BACKEND (default: OpenCL) selects vexcl backend (OpenCL/CUDA) * New setting in linear solver parameters: `use_gpgpu` enables GPGPU at runtime. * Environment variable OCL_DEVICE may be used to select a particular compute device.
1 parent 5e0f0e8 commit 2632dfa

6 files changed

Lines changed: 484 additions & 149 deletions

File tree

applications/trilinos_application/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ set( KRATOS_TRILINOS_APPLICATION_SOURCES
5050
${CMAKE_CURRENT_SOURCE_DIR}/custom_factories/trilinos_linear_solver_factory.cpp
5151
${CMAKE_SOURCE_DIR}/applications/FluidDynamicsApplication/fluid_dynamics_application_variables.cpp #TODO: this should REALLY NOT BE HERE
5252
${CMAKE_CURRENT_SOURCE_DIR}/custom_utilities/mpi_normal_calculation_utilities.cpp;
53+
${CMAKE_CURRENT_SOURCE_DIR}/amgcl_mpi_solver_impl.cpp;
5354
)
5455

5556
## Kratos tests sources. Enabled by default
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
#ifndef KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
2+
#define KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
3+
4+
#include <boost/range/iterator_range.hpp>
5+
#include <boost/property_tree/ptree.hpp>
6+
7+
#include <amgcl/adapter/crs_tuple.hpp>
8+
#include <amgcl/adapter/epetra.hpp>
9+
#include <amgcl/adapter/ublas.hpp>
10+
#include <amgcl/adapter/zero_copy.hpp>
11+
#include <amgcl/adapter/block_matrix.hpp>
12+
#include <amgcl/backend/builtin.hpp>
13+
#include <amgcl/value_type/static_matrix.hpp>
14+
#include <amgcl/solver/runtime.hpp>
15+
16+
#include <amgcl/mpi/util.hpp>
17+
#include <amgcl/mpi/make_solver.hpp>
18+
#include <amgcl/mpi/preconditioner.hpp>
19+
20+
#ifdef AMGCL_GPGPU
21+
# include <amgcl/backend/vexcl.hpp>
22+
# include <amgcl/backend/vexcl_static_matrix.hpp>
23+
#endif
24+
25+
#include "Epetra_FECrsMatrix.h"
26+
#include "Epetra_FEVector.h"
27+
#include "trilinos_space.h"
28+
29+
namespace Kratos
30+
{
31+
32+
#ifdef AMGCL_GPGPU
33+
vex::Context& vexcl_context();
34+
35+
template <int TBlockSize>
36+
void register_vexcl_static_matrix_type();
37+
#endif
38+
39+
// Spacialization of AMGCLScalarSolve for distribued systems.
40+
template <class TSparseSpaceType>
41+
typename std::enable_if<TSparseSpaceType::IsDistributed(), void>::type
42+
AMGCLScalarSolve(
43+
typename TSparseSpaceType::MatrixType& rA,
44+
typename TSparseSpaceType::VectorType& rX,
45+
typename TSparseSpaceType::VectorType& rB,
46+
typename TSparseSpaceType::IndexType& rIterationNumber,
47+
double& rResidual,
48+
const boost::property_tree::ptree &amgclParams,
49+
int verbosity_level,
50+
bool use_gpgpu
51+
)
52+
{
53+
#ifdef AMGCL_GPGPU
54+
if (use_gpgpu && vexcl_context()) {
55+
auto &ctx = vexcl_context();
56+
57+
typedef amgcl::backend::vexcl<double> Backend;
58+
59+
typedef
60+
amgcl::mpi::make_solver<
61+
amgcl::runtime::mpi::preconditioner<Backend>,
62+
amgcl::runtime::solver::wrapper
63+
>
64+
Solver;
65+
66+
Backend::params bprm;
67+
bprm.q = ctx;
68+
69+
Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams, bprm);
70+
71+
std::size_t n = rA.NumMyRows();
72+
73+
vex::vector<double> b(ctx, n, rB.Values());
74+
vex::vector<double> x(ctx, n, rX.Values());
75+
76+
std::tie(rIterationNumber, rResidual) = solve(b, x);
77+
78+
vex::copy(x.begin(), x.end(), rX.Values());
79+
} else
80+
#endif
81+
{
82+
typedef amgcl::backend::builtin<double> Backend;
83+
84+
typedef
85+
amgcl::mpi::make_solver<
86+
amgcl::runtime::mpi::preconditioner<Backend>,
87+
amgcl::runtime::solver::wrapper
88+
>
89+
Solver;
90+
91+
Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams);
92+
93+
std::size_t n = rA.NumMyRows();
94+
95+
auto b_range = boost::make_iterator_range(rB.Values(), rB.Values() + n);
96+
auto x_range = boost::make_iterator_range(rX.Values(), rX.Values() + n);
97+
98+
std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
99+
}
100+
}
101+
102+
// Spacialization of AMGCLBlockSolve for distribued systems.
103+
template <int TBlockSize, class TSparseSpaceType>
104+
typename std::enable_if<TSparseSpaceType::IsDistributed(), void>::type
105+
AMGCLBlockSolve(
106+
typename TSparseSpaceType::MatrixType & rA,
107+
typename TSparseSpaceType::VectorType& rX,
108+
typename TSparseSpaceType::VectorType& rB,
109+
typename TSparseSpaceType::IndexType& rIterationNumber,
110+
double& rResidual,
111+
boost::property_tree::ptree amgclParams,
112+
int verbosity_level,
113+
bool use_gpgpu
114+
)
115+
{
116+
if(amgclParams.get<std::string>("precond.class") != "amg")
117+
amgclParams.erase("precond.coarsening");
118+
else
119+
amgclParams.put("precond.coarsening.aggr.block_size",1);
120+
121+
typedef amgcl::static_matrix<double, TBlockSize, TBlockSize> val_type;
122+
typedef amgcl::static_matrix<double, TBlockSize, 1> rhs_type;
123+
124+
std::size_t n = rA.RowMap().NumMyElements();
125+
std::size_t nb = n / TBlockSize;
126+
127+
#ifdef AMGCL_GPGPU
128+
if (use_gpgpu && vexcl_context()) {
129+
auto &ctx = vexcl_context();
130+
register_vexcl_static_matrix_type<TBlockSize>();
131+
132+
typedef amgcl::backend::vexcl<val_type> Backend;
133+
134+
typedef
135+
amgcl::mpi::make_solver<
136+
amgcl::runtime::mpi::preconditioner<Backend>,
137+
amgcl::runtime::solver::wrapper
138+
>
139+
Solver;
140+
141+
typename Backend::params bprm;
142+
bprm.q = ctx;
143+
144+
Solver solve(
145+
MPI_COMM_WORLD,
146+
amgcl::adapter::block_matrix<val_type>(amgcl::adapter::map(rA)),
147+
amgclParams, bprm
148+
);
149+
150+
auto b_begin = reinterpret_cast<const rhs_type*>(rB.Values());
151+
auto x_begin = reinterpret_cast<rhs_type*>(rX.Values());
152+
153+
vex::vector<rhs_type> x(ctx, nb, x_begin);
154+
vex::vector<rhs_type> b(ctx, nb, b_begin);
155+
156+
std::tie(rIterationNumber, rResidual) = solve(b, x);
157+
158+
vex::copy(x.begin(), x.end(), x_begin);
159+
} else
160+
#endif
161+
{
162+
typedef amgcl::backend::builtin<val_type> Backend;
163+
164+
typedef
165+
amgcl::mpi::make_solver<
166+
amgcl::runtime::mpi::preconditioner<Backend>,
167+
amgcl::runtime::solver::wrapper
168+
>
169+
Solver;
170+
171+
Solver solve(
172+
MPI_COMM_WORLD,
173+
amgcl::adapter::block_matrix<val_type>(amgcl::adapter::map(rA)),
174+
amgclParams
175+
);
176+
177+
auto b_begin = reinterpret_cast<const rhs_type*>(rB.Values());
178+
auto x_begin = reinterpret_cast<rhs_type*>(rX.Values());
179+
180+
auto b_range = boost::make_iterator_range(b_begin, b_begin + nb);
181+
auto x_range = boost::make_iterator_range(x_begin, x_begin + nb);
182+
183+
std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
184+
}
185+
}
186+
187+
// Exlplicit instantiations:
188+
template void AMGCLScalarSolve< TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector> >(
189+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::MatrixType& rA,
190+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rX,
191+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rB,
192+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::IndexType& rIterationNumber,
193+
double& rResidual,
194+
const boost::property_tree::ptree &amgclParams,
195+
int verbosity_level,
196+
bool use_gpgpu
197+
);
198+
199+
#define INSTANTIATE_BLOCK_SOLVER(B) \
200+
template void AMGCLBlockSolve<B, TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector> >( \
201+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::MatrixType& rA, \
202+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rX, \
203+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rB, \
204+
TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::IndexType& rIterationNumber, \
205+
double& rResidual, \
206+
boost::property_tree::ptree amgclParams, \
207+
int verbosity_level, \
208+
bool use_gpgpu \
209+
)
210+
211+
INSTANTIATE_BLOCK_SOLVER(2);
212+
INSTANTIATE_BLOCK_SOLVER(3);
213+
INSTANTIATE_BLOCK_SOLVER(4);
214+
215+
#undef INSTANTIATE_BLOCK_SOLVER
216+
217+
} // namespace Kratos
218+
219+
#endif

applications/trilinos_application/external_includes/amgcl_mpi_solve_functions.h

Lines changed: 7 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,7 @@
11
#ifndef KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
22
#define KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
33

4-
#include <boost/range/iterator_range.hpp>
5-
6-
#include <amgcl/adapter/crs_tuple.hpp>
7-
#include <amgcl/adapter/epetra.hpp>
8-
#include <amgcl/adapter/ublas.hpp>
9-
#include <amgcl/adapter/zero_copy.hpp>
10-
#include <amgcl/adapter/block_matrix.hpp>
11-
#include <amgcl/backend/builtin.hpp>
12-
#include <amgcl/value_type/static_matrix.hpp>
13-
#include <amgcl/solver/runtime.hpp>
14-
15-
#include <amgcl/mpi/util.hpp>
16-
#include <amgcl/mpi/make_solver.hpp>
17-
#include <amgcl/mpi/preconditioner.hpp>
4+
#include <boost/property_tree/ptree.hpp>
185

196
namespace Kratos
207
{
@@ -29,27 +16,9 @@ AMGCLScalarSolve(
2916
typename TSparseSpaceType::IndexType& rIterationNumber,
3017
double& rResidual,
3118
const boost::property_tree::ptree &amgclParams,
32-
int verbosity_level
33-
)
34-
{
35-
typedef amgcl::backend::builtin<double> Backend;
36-
37-
typedef
38-
amgcl::mpi::make_solver<
39-
amgcl::runtime::mpi::preconditioner<Backend>,
40-
amgcl::runtime::solver::wrapper
41-
>
42-
Solver;
43-
44-
Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams);
45-
46-
std::size_t n = rA.NumMyRows();
47-
48-
auto b_range = boost::make_iterator_range(rB.Values(), rB.Values() + n);
49-
auto x_range = boost::make_iterator_range(rX.Values(), rX.Values() + n);
50-
51-
std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
52-
}
19+
int verbosity_level,
20+
bool use_opencl
21+
);
5322

5423
// Spacialization of AMGCLBlockSolve for distribued systems.
5524
template <int TBlockSize, class TSparseSpaceType>
@@ -61,41 +30,9 @@ AMGCLBlockSolve(
6130
typename TSparseSpaceType::IndexType& rIterationNumber,
6231
double& rResidual,
6332
boost::property_tree::ptree amgclParams,
64-
int verbosity_level
65-
)
66-
{
67-
if(amgclParams.get<std::string>("precond.class") != "amg")
68-
amgclParams.erase("precond.coarsening");
69-
else
70-
amgclParams.put("precond.coarsening.aggr.block_size",1);
71-
72-
typedef amgcl::static_matrix<double, TBlockSize, TBlockSize> val_type;
73-
typedef amgcl::static_matrix<double, TBlockSize, 1> rhs_type;
74-
typedef amgcl::backend::builtin<val_type> Backend;
75-
76-
std::size_t n = rA.RowMap().NumMyElements();
77-
78-
typedef
79-
amgcl::mpi::make_solver<
80-
amgcl::runtime::mpi::preconditioner<Backend>,
81-
amgcl::runtime::solver::wrapper
82-
>
83-
Solver;
84-
85-
Solver solve(
86-
MPI_COMM_WORLD,
87-
amgcl::adapter::block_matrix<val_type>(amgcl::adapter::map(rA)),
88-
amgclParams
89-
);
90-
91-
auto b_begin = reinterpret_cast<const rhs_type*>(rB.Values());
92-
auto x_begin = reinterpret_cast<rhs_type*>(rX.Values());
93-
94-
auto b_range = boost::make_iterator_range(b_begin, b_begin + n / TBlockSize);
95-
auto x_range = boost::make_iterator_range(x_begin, x_begin + n / TBlockSize);
96-
97-
std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
98-
}
33+
int verbosity_level,
34+
bool use_opencl
35+
);
9936

10037
} // namespace Kratos
10138

kratos/CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ set( KRATOS_CORE_SOURCES
8888
${CMAKE_CURRENT_SOURCE_DIR}/elements/mesh_element.cpp;
8989
${CMAKE_CURRENT_SOURCE_DIR}/conditions/mesh_condition.cpp;
9090
${CMAKE_CURRENT_SOURCE_DIR}/processes/apply_periodic_boundary_condition_process.cpp;
91+
${CMAKE_CURRENT_SOURCE_DIR}/linear_solvers/amgcl_solver_impl.cpp;
9192
)
9293

9394
## Kratos python interface code
@@ -155,6 +156,24 @@ add_library(KratosCore SHARED ${KRATOS_CORE_SOURCES} ${KRATOS_CORE_TESTING_ENGIN
155156
target_link_libraries(KratosCore PUBLIC gidpost) #${Boost_LIBRARIES} ${PYTHON_LIBRARIES} gidpost )
156157
set_target_properties(KratosCore PROPERTIES COMPILE_DEFINITIONS "KRATOS_CORE=IMPORT,API")
157158

159+
option(AMGCL_GPGPU "Enable GPGPU backend for AMGCL linear solver" OFF)
160+
if (AMGCL_GPGPU)
161+
find_package(VexCL)
162+
163+
set(AMGCL_GPGPU_BACKEND "OpenCL" CACHE STRING "Select AMGCL GPGPU backend (OpenCL/CUDA)")
164+
set_property(CACHE AMGCL_GPGPU_BACKEND PROPERTY STRINGS "OpenCL" "CUDA")
165+
166+
if ("${AMGCL_GPGPU_BACKEND}" STREQUAL "OpenCL" AND TARGET VexCL::OpenCL)
167+
target_link_libraries(KratosCore PUBLIC VexCL::OpenCL)
168+
target_compile_definitions(KratosCore PUBLIC AMGCL_GPGPU)
169+
elseif ("${AMGCL_GPGPU_BACKEND}" STREQUAL "CUDA" AND TARGET VexCL::CUDA)
170+
target_link_libraries(KratosCore PUBLIC VexCL::CUDA)
171+
target_compile_definitions(KratosCore PUBLIC AMGCL_GPGPU)
172+
else()
173+
message(WARNING "AMGCL GPGPU backend not found")
174+
endif()
175+
endif()
176+
158177
## Define library Kratos which defines the basic python interface
159178
pybind11_add_module(Kratos MODULE THIN_LTO ${KRATOS_PYTHON_SOURCES})
160179
# add_library(Kratos SHARED ${KRATOS_PYTHON_SOURCES})

0 commit comments

Comments
 (0)