[wip] [skip ci] Test OpenCL backend with amgcl solver

ddemidov · ddemidov · commit 2632dfa5d6d5 · 2019-01-22T21:56:24.000+03:00
Enable vexcl backend for amgcl solver, which makes it possible to use
GPGPU (either CUDA or OpenCL) in order to accelerate solution.

* CMake option AMGCL_GPGPU (default: off) controls whether to compile
  GPGPU support.
* CMake option AMGCL_GPGPU_BACKEND (default: OpenCL) selects vexcl
  backend (OpenCL/CUDA)
* New setting in linear solver parameters: `use_gpgpu` enables GPGPU at
  runtime.
* Environment variable OCL_DEVICE may be used to select a particular
  compute device.
diff --git a/applications/trilinos_application/CMakeLists.txt b/applications/trilinos_application/CMakeLists.txt
@@ -50,6 +50,7 @@ set( KRATOS_TRILINOS_APPLICATION_SOURCES
 	${CMAKE_CURRENT_SOURCE_DIR}/custom_factories/trilinos_linear_solver_factory.cpp
     ${CMAKE_SOURCE_DIR}/applications/FluidDynamicsApplication/fluid_dynamics_application_variables.cpp  #TODO: this should REALLY NOT BE HERE
     ${CMAKE_CURRENT_SOURCE_DIR}/custom_utilities/mpi_normal_calculation_utilities.cpp;
+    ${CMAKE_CURRENT_SOURCE_DIR}/amgcl_mpi_solver_impl.cpp;
 )
 
 ## Kratos tests sources. Enabled by default
diff --git a/applications/trilinos_application/amgcl_mpi_solver_impl.cpp b/applications/trilinos_application/amgcl_mpi_solver_impl.cpp
@@ -0,0 +1,219 @@
+#ifndef KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
+#define KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
+
+#include <boost/range/iterator_range.hpp>
+#include <boost/property_tree/ptree.hpp>
+
+#include <amgcl/adapter/crs_tuple.hpp>
+#include <amgcl/adapter/epetra.hpp>
+#include <amgcl/adapter/ublas.hpp>
+#include <amgcl/adapter/zero_copy.hpp>
+#include <amgcl/adapter/block_matrix.hpp>
+#include <amgcl/backend/builtin.hpp>
+#include <amgcl/value_type/static_matrix.hpp>
+#include <amgcl/solver/runtime.hpp>
+
+#include <amgcl/mpi/util.hpp>
+#include <amgcl/mpi/make_solver.hpp>
+#include <amgcl/mpi/preconditioner.hpp>
+
+#ifdef AMGCL_GPGPU
+#  include <amgcl/backend/vexcl.hpp>
+#  include <amgcl/backend/vexcl_static_matrix.hpp>
+#endif
+
+#include "Epetra_FECrsMatrix.h"
+#include "Epetra_FEVector.h"
+#include "trilinos_space.h"
+
+namespace Kratos
+{
+
+#ifdef AMGCL_GPGPU
+vex::Context& vexcl_context();
+
+template <int TBlockSize>
+void register_vexcl_static_matrix_type();
+#endif
+
+// Spacialization of AMGCLScalarSolve for distribued systems.
+template <class TSparseSpaceType>
+typename std::enable_if<TSparseSpaceType::IsDistributed(), void>::type
+AMGCLScalarSolve(
+    typename TSparseSpaceType::MatrixType& rA,
+    typename TSparseSpaceType::VectorType& rX,
+    typename TSparseSpaceType::VectorType& rB,
+    typename TSparseSpaceType::IndexType& rIterationNumber,
+    double& rResidual,
+    const boost::property_tree::ptree &amgclParams,
+    int verbosity_level,
+    bool use_gpgpu
+    )
+{
+#ifdef AMGCL_GPGPU
+    if (use_gpgpu && vexcl_context()) {
+        auto &ctx = vexcl_context();
+
+        typedef amgcl::backend::vexcl<double> Backend;
+
+        typedef
+            amgcl::mpi::make_solver<
+                amgcl::runtime::mpi::preconditioner<Backend>,
+                amgcl::runtime::solver::wrapper
+                >
+            Solver;
+
+        Backend::params bprm;
+        bprm.q = ctx;
+
+        Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams, bprm);
+
+        std::size_t n = rA.NumMyRows();
+
+        vex::vector<double> b(ctx, n, rB.Values());
+        vex::vector<double> x(ctx, n, rX.Values());
+
+        std::tie(rIterationNumber, rResidual) = solve(b, x);
+
+        vex::copy(x.begin(), x.end(), rX.Values());
+    } else
+#endif
+    {
+        typedef amgcl::backend::builtin<double> Backend;
+
+        typedef
+            amgcl::mpi::make_solver<
+                amgcl::runtime::mpi::preconditioner<Backend>,
+                amgcl::runtime::solver::wrapper
+                >
+            Solver;
+
+        Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams);
+
+        std::size_t n = rA.NumMyRows();
+
+        auto b_range = boost::make_iterator_range(rB.Values(), rB.Values() + n);
+        auto x_range = boost::make_iterator_range(rX.Values(), rX.Values() + n);
+
+        std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
+    }
+}
+
+// Spacialization of AMGCLBlockSolve for distribued systems.
+template <int TBlockSize, class TSparseSpaceType>
+typename std::enable_if<TSparseSpaceType::IsDistributed(), void>::type
+AMGCLBlockSolve(
+    typename TSparseSpaceType::MatrixType & rA,
+    typename TSparseSpaceType::VectorType& rX,
+    typename TSparseSpaceType::VectorType& rB,
+    typename TSparseSpaceType::IndexType& rIterationNumber,
+    double& rResidual,
+    boost::property_tree::ptree amgclParams,
+    int verbosity_level,
+    bool use_gpgpu
+    )
+{
+    if(amgclParams.get<std::string>("precond.class") != "amg")
+        amgclParams.erase("precond.coarsening");
+    else
+        amgclParams.put("precond.coarsening.aggr.block_size",1);
+
+    typedef amgcl::static_matrix<double, TBlockSize, TBlockSize> val_type;
+    typedef amgcl::static_matrix<double, TBlockSize, 1> rhs_type;
+
+    std::size_t n = rA.RowMap().NumMyElements();
+    std::size_t nb = n / TBlockSize;
+
+#ifdef AMGCL_GPGPU
+    if (use_gpgpu && vexcl_context()) {
+        auto &ctx = vexcl_context();
+        register_vexcl_static_matrix_type<TBlockSize>();
+
+        typedef amgcl::backend::vexcl<val_type> Backend;
+
+        typedef
+            amgcl::mpi::make_solver<
+                amgcl::runtime::mpi::preconditioner<Backend>,
+                amgcl::runtime::solver::wrapper
+                >
+            Solver;
+
+        typename Backend::params bprm;
+        bprm.q = ctx;
+
+        Solver solve(
+                MPI_COMM_WORLD,
+                amgcl::adapter::block_matrix<val_type>(amgcl::adapter::map(rA)),
+                amgclParams, bprm
+                );
+
+        auto b_begin = reinterpret_cast<const rhs_type*>(rB.Values());
+        auto x_begin = reinterpret_cast<rhs_type*>(rX.Values());
+
+        vex::vector<rhs_type> x(ctx, nb, x_begin);
+        vex::vector<rhs_type> b(ctx, nb, b_begin);
+
+        std::tie(rIterationNumber, rResidual) = solve(b, x);
+
+        vex::copy(x.begin(), x.end(), x_begin);
+    } else
+#endif
+    {
+        typedef amgcl::backend::builtin<val_type> Backend;
+
+        typedef
+            amgcl::mpi::make_solver<
+                amgcl::runtime::mpi::preconditioner<Backend>,
+                amgcl::runtime::solver::wrapper
+                >
+            Solver;
+
+        Solver solve(
+                MPI_COMM_WORLD,
+                amgcl::adapter::block_matrix<val_type>(amgcl::adapter::map(rA)),
+                amgclParams
+                );
+
+        auto b_begin = reinterpret_cast<const rhs_type*>(rB.Values());
+        auto x_begin = reinterpret_cast<rhs_type*>(rX.Values());
+
+        auto b_range = boost::make_iterator_range(b_begin, b_begin + nb);
+        auto x_range = boost::make_iterator_range(x_begin, x_begin + nb);
+
+        std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
+    }
+}
+
+// Exlplicit instantiations:
+template void AMGCLScalarSolve< TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector> >(
+    TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::MatrixType& rA,
+    TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rX,
+    TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rB,
+    TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::IndexType& rIterationNumber,
+    double& rResidual,
+    const boost::property_tree::ptree &amgclParams,
+    int verbosity_level,
+    bool use_gpgpu
+    );
+
+#define INSTANTIATE_BLOCK_SOLVER(B)                                                    \
+template void AMGCLBlockSolve<B, TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector> >( \
+    TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::MatrixType& rA,                \
+    TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rX,                \
+    TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::VectorType& rB,                \
+    TrilinosSpace<Epetra_FECrsMatrix, Epetra_FEVector>::IndexType& rIterationNumber,   \
+    double& rResidual,                                                                 \
+    boost::property_tree::ptree amgclParams,                                           \
+    int verbosity_level,                                                               \
+    bool use_gpgpu                                                                     \
+    )
+
+INSTANTIATE_BLOCK_SOLVER(2);
+INSTANTIATE_BLOCK_SOLVER(3);
+INSTANTIATE_BLOCK_SOLVER(4);
+
+#undef INSTANTIATE_BLOCK_SOLVER
+
+} // namespace Kratos
+
+#endif
diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_solve_functions.h b/applications/trilinos_application/external_includes/amgcl_mpi_solve_functions.h
@@ -1,20 +1,7 @@
 #ifndef KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
 #define KRATOS_AMGCL_MPI_SOLVE_FUNCTIONS_H
 
-#include <boost/range/iterator_range.hpp>
-
-#include <amgcl/adapter/crs_tuple.hpp>
-#include <amgcl/adapter/epetra.hpp>
-#include <amgcl/adapter/ublas.hpp>
-#include <amgcl/adapter/zero_copy.hpp>
-#include <amgcl/adapter/block_matrix.hpp>
-#include <amgcl/backend/builtin.hpp>
-#include <amgcl/value_type/static_matrix.hpp>
-#include <amgcl/solver/runtime.hpp>
-
-#include <amgcl/mpi/util.hpp>
-#include <amgcl/mpi/make_solver.hpp>
-#include <amgcl/mpi/preconditioner.hpp>
+#include <boost/property_tree/ptree.hpp>
 
 namespace Kratos
 {
@@ -29,27 +16,9 @@ AMGCLScalarSolve(
     typename TSparseSpaceType::IndexType& rIterationNumber,
     double& rResidual,
     const boost::property_tree::ptree &amgclParams,
-    int verbosity_level
-    )
-{
-    typedef amgcl::backend::builtin<double> Backend;
-
-    typedef
-        amgcl::mpi::make_solver<
-            amgcl::runtime::mpi::preconditioner<Backend>,
-            amgcl::runtime::solver::wrapper
-            >
-        Solver;
-
-    Solver solve(MPI_COMM_WORLD, amgcl::adapter::map(rA), amgclParams);
-
-    std::size_t n = rA.NumMyRows();
-
-    auto b_range = boost::make_iterator_range(rB.Values(), rB.Values() + n);
-    auto x_range = boost::make_iterator_range(rX.Values(), rX.Values() + n);
-
-    std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
-}
+    int verbosity_level,
+    bool use_opencl
+    );
 
 // Spacialization of AMGCLBlockSolve for distribued systems.
 template <int TBlockSize, class TSparseSpaceType>
@@ -61,41 +30,9 @@ AMGCLBlockSolve(
     typename TSparseSpaceType::IndexType& rIterationNumber,
     double& rResidual,
     boost::property_tree::ptree amgclParams,
-    int verbosity_level
-    )
-{
-    if(amgclParams.get<std::string>("precond.class") != "amg")
-        amgclParams.erase("precond.coarsening");
-    else
-        amgclParams.put("precond.coarsening.aggr.block_size",1);
-
-    typedef amgcl::static_matrix<double, TBlockSize, TBlockSize> val_type;
-    typedef amgcl::static_matrix<double, TBlockSize, 1> rhs_type;
-    typedef amgcl::backend::builtin<val_type> Backend;
-
-    std::size_t n = rA.RowMap().NumMyElements();
-
-    typedef
-        amgcl::mpi::make_solver<
-            amgcl::runtime::mpi::preconditioner<Backend>,
-            amgcl::runtime::solver::wrapper
-            >
-        Solver;
-
-    Solver solve(
-            MPI_COMM_WORLD,
-            amgcl::adapter::block_matrix<val_type>(amgcl::adapter::map(rA)),
-            amgclParams
-            );
-
-    auto b_begin = reinterpret_cast<const rhs_type*>(rB.Values());
-    auto x_begin = reinterpret_cast<rhs_type*>(rX.Values());
-
-    auto b_range = boost::make_iterator_range(b_begin, b_begin + n / TBlockSize);
-    auto x_range = boost::make_iterator_range(x_begin, x_begin + n / TBlockSize);
-
-    std::tie(rIterationNumber, rResidual) = solve(b_range, x_range);
-}
+    int verbosity_level,
+    bool use_opencl
+    );
 
 } // namespace Kratos
 
diff --git a/kratos/CMakeLists.txt b/kratos/CMakeLists.txt
@@ -88,6 +88,7 @@ set( KRATOS_CORE_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/elements/mesh_element.cpp;
     ${CMAKE_CURRENT_SOURCE_DIR}/conditions/mesh_condition.cpp;
     ${CMAKE_CURRENT_SOURCE_DIR}/processes/apply_periodic_boundary_condition_process.cpp;
+    ${CMAKE_CURRENT_SOURCE_DIR}/linear_solvers/amgcl_solver_impl.cpp;
 )
 
 ## Kratos python interface code
@@ -155,6 +156,24 @@ add_library(KratosCore SHARED ${KRATOS_CORE_SOURCES} ${KRATOS_CORE_TESTING_ENGIN
 target_link_libraries(KratosCore PUBLIC gidpost) #${Boost_LIBRARIES} ${PYTHON_LIBRARIES} gidpost )
 set_target_properties(KratosCore PROPERTIES COMPILE_DEFINITIONS "KRATOS_CORE=IMPORT,API")
 
+option(AMGCL_GPGPU "Enable GPGPU backend for AMGCL linear solver" OFF)
+if (AMGCL_GPGPU)
+    find_package(VexCL)
+
+    set(AMGCL_GPGPU_BACKEND "OpenCL" CACHE STRING "Select AMGCL GPGPU backend (OpenCL/CUDA)")
+    set_property(CACHE AMGCL_GPGPU_BACKEND PROPERTY STRINGS "OpenCL" "CUDA")
+
+    if ("${AMGCL_GPGPU_BACKEND}" STREQUAL "OpenCL" AND TARGET VexCL::OpenCL)
+        target_link_libraries(KratosCore PUBLIC VexCL::OpenCL)
+        target_compile_definitions(KratosCore PUBLIC AMGCL_GPGPU)
+    elseif ("${AMGCL_GPGPU_BACKEND}" STREQUAL "CUDA" AND TARGET VexCL::CUDA)
+        target_link_libraries(KratosCore PUBLIC VexCL::CUDA)
+        target_compile_definitions(KratosCore PUBLIC AMGCL_GPGPU)
+    else()
+        message(WARNING "AMGCL GPGPU backend not found")
+    endif()
+endif()
+
 ## Define library Kratos which defines the basic python interface
 pybind11_add_module(Kratos MODULE THIN_LTO ${KRATOS_PYTHON_SOURCES})
 # add_library(Kratos SHARED ${KRATOS_PYTHON_SOURCES})
diff --git a/kratos/linear_solvers/amgcl_solver.h b/kratos/linear_solvers/amgcl_solver.h
diff --git a/kratos/linear_solvers/amgcl_solver_impl.cpp b/kratos/linear_solvers/amgcl_solver_impl.cpp

Original file line number	Diff line number	Diff line change
`@@ -50,6 +50,7 @@ set( KRATOS_TRILINOS_APPLICATION_SOURCES`
`50`	`50`	`${CMAKE_CURRENT_SOURCE_DIR}/custom_factories/trilinos_linear_solver_factory.cpp`
`51`	`51`	`${CMAKE_SOURCE_DIR}/applications/FluidDynamicsApplication/fluid_dynamics_application_variables.cpp #TODO: this should REALLY NOT BE HERE`
`52`	`52`	`${CMAKE_CURRENT_SOURCE_DIR}/custom_utilities/mpi_normal_calculation_utilities.cpp;`
	`53`	`+ ${CMAKE_CURRENT_SOURCE_DIR}/amgcl_mpi_solver_impl.cpp;`
`53`	`54`	`)`
`54`	`55`
`55`	`56`	`## Kratos tests sources. Enabled by default`