diff --git a/include/Definitions/geometry_helper.h b/include/Definitions/geometry_helper.h index 3fcb9934..29821c9e 100644 --- a/include/Definitions/geometry_helper.h +++ b/include/Definitions/geometry_helper.h @@ -24,9 +24,9 @@ inline void compute_jacobian_elements(const DomainGeometry& domain_geometry, dou /* which is represented by: */ /* [arr, 0.5*art] */ /* [0.5*atr, att] */ - arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / fabs(detDF); - att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / fabs(detDF); - art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / fabs(detDF); + arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / std::fabs(detDF); + att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / std::fabs(detDF); + art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / std::fabs(detDF); /* Note that the inverse Jacobian matrix DF^{-1} is: */ /* 1.0 / det(DF) * */ /* [Jtt, -Jrt] */ diff --git a/include/Definitions/global_definitions.h b/include/Definitions/global_definitions.h index 2f4f1bef..d90ae9a6 100644 --- a/include/Definitions/global_definitions.h +++ b/include/Definitions/global_definitions.h @@ -79,50 +79,6 @@ enum class BetaCoeff ALPHA_INVERSE = 1 }; -/* ---------------------------- */ -/* Mumps - Constant Definitions */ -/* ---------------------------- */ -#ifdef GMGPOLAR_USE_MUMPS - #include "dmumps_c.h" -/* Mumps inline functions s.t. indices match documentation */ -inline int& ICNTL(DMUMPS_STRUC_C& mumps_solver, int I) -{ - return mumps_solver.icntl[(I)-1]; -} -inline double& CNTL(DMUMPS_STRUC_C& mumps_solver, int I) -{ - return mumps_solver.cntl[(I)-1]; -} -inline int& INFOG(DMUMPS_STRUC_C& mumps_solver, int I) -{ - return mumps_solver.infog[(I)-1]; -} - -constexpr int USE_COMM_WORLD = -987654; -constexpr int PAR_NOT_PARALLEL = 0; -constexpr int PAR_PARALLEL = 1; - -constexpr int JOB_INIT = -1; -constexpr int JOB_END = -2; -constexpr int JOB_REMOVE_SAVED_DATA = -3; -constexpr int JOB_FREE_INTERNAL_DATA = -4; -constexpr int JOB_SUPPRESS_OOC_FILES = -200; - -constexpr int JOB_ANALYSIS_PHASE = 1; -constexpr int JOB_FACTORIZATION_PHASE = 2; -constexpr int JOB_COMPUTE_SOLUTION = 3; -constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4; -constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5; -constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6; -constexpr int JOB_SAVE_INTERNAL_DATA = 7; -constexpr int JOB_RESTORE_INTERNAL_DATA = 8; -constexpr int JOB_DISTRIBUTE_RHS = 9; - -constexpr int SYM_UNSYMMETRIC = 0; -constexpr int SYM_POSITIVE_DEFINITE = 1; -constexpr int SYM_GENERAL_SYMMETRIC = 2; -#endif - // --------------------------------------- // // Function-like macros for LIKWID markers // // --------------------------------------- // diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl index 9f952eb8..e103d68a 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl @@ -60,7 +60,7 @@ void DirectSolver_COO_MUMPS_Take::nodeBuildSolverMatrixTake( double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */ double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */ + (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */ - left_value /* Center: (Left) */ - right_value /* Center: (Right) */ - bottom_value /* Center: (Bottom) */ @@ -184,13 +184,13 @@ void DirectSolver_COO_MUMPS_Take::nodeBuildSolverMatrixTake( double bottom_value = -coeff3 * (att(center_index) + att(bottom_index)); /* Bottom */ double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */ - double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */ - - left_value /* Center: (Left) */ - - right_value /* Center: (Right) */ - - bottom_value /* Center: (Bottom) */ - - top_value /* Center: (Top) */ - ); + double center_value = (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * + std::fabs(detDF(center_index)) /* beta_{i,j} */ + - left_value /* Center: (Left) */ + - right_value /* Center: (Right) */ + - bottom_value /* Center: (Bottom) */ + - top_value /* Center: (Top) */ + ); double bottom_right_value = +0.25 * (art(right_index) + art(bottom_index)); /* Bottom Right */ double top_right_value = -0.25 * (art(right_index) + art(top_index)); /* Top Right */ @@ -276,7 +276,7 @@ void DirectSolver_COO_MUMPS_Take::nodeBuildSolverMatrixTake( double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */ double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */ + (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */ - left_value /* Center: (Left) */ - right_value /* Center: (Right) */ - bottom_value /* Center: (Bottom) */ @@ -383,7 +383,7 @@ void DirectSolver_COO_MUMPS_Take::nodeBuildSolverMatrixTake( double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */ double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */ + (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */ - left_value /* Center: (Left) */ - right_value /* Center: (Right) */ - bottom_value /* Center: (Bottom) */ diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl index 55cdb111..08893805 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl @@ -798,80 +798,63 @@ SparseMatrixCSR DirectSolver_CSR_LU_Give::buildSolverMat SparseMatrixCSR solver_matrix(n, n, nnz_per_row); - if (num_omp_threads == 1) { - /* Single-threaded execution */ - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - buildSolverMatrixCircleSection(i_r, solver_matrix); - } - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - buildSolverMatrixRadialSection(i_theta, solver_matrix); - } - } - else { - /* Multi-threaded execution: For Loops */ - const int num_circle_tasks = grid.numberSmootherCircles(); - const int additional_radial_tasks = grid.ntheta() % 3; - const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; - + const int num_smoother_circles = grid.numberSmootherCircles(); + const int additional_radial_tasks = grid.ntheta() % 3; + const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; + + /* ---------------- */ + /* Circular section */ + /* ---------------- */ + // We parallelize the loop with step 3 to avoid data race conditions between adjacent circles. #pragma omp parallel num_threads(num_omp_threads) - { + { #pragma omp for - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid.numberSmootherCircles() - circle_task - 1; - buildSolverMatrixCircleSection(i_r, solver_matrix); - } + for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) { + buildSolverMatrixCircleSection(i_r, solver_matrix); + } /* Implicit barrier */ #pragma omp for - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid.numberSmootherCircles() - circle_task - 1; - buildSolverMatrixCircleSection(i_r, solver_matrix); - } -#pragma omp for nowait - for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid.numberSmootherCircles() - circle_task - 1; - buildSolverMatrixCircleSection(i_r, solver_matrix); - } + for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) { + buildSolverMatrixCircleSection(i_r, solver_matrix); + } /* Implicit barrier */ +#pragma omp for + for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) { + buildSolverMatrixCircleSection(i_r, solver_matrix); + } /* Implicit barrier */ + } + + /* ---------------- */ + /* Radial section */ + /* ---------------- */ + // We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines. + // Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks + // that are handled serially before the parallel loops. + if (additional_radial_tasks > 0) { + const int i_theta = 0; + buildSolverMatrixRadialSection(i_theta, solver_matrix); + } + + if (additional_radial_tasks > 1) { + const int i_theta = 1; + buildSolverMatrixRadialSection(i_theta, solver_matrix); + } +#pragma omp parallel num_threads(num_omp_threads) + { #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 0) { - int i_theta = radial_task + additional_radial_tasks; - buildSolverMatrixRadialSection(i_theta, solver_matrix); - } - else { - if (additional_radial_tasks == 0) { - buildSolverMatrixRadialSection(0, solver_matrix); - } - else if (additional_radial_tasks >= 1) { - buildSolverMatrixRadialSection(0, solver_matrix); - buildSolverMatrixRadialSection(1, solver_matrix); - } - } - } + for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { + const int i_theta = radial_task + additional_radial_tasks; + buildSolverMatrixRadialSection(i_theta, solver_matrix); + } /* Implicit barrier */ #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 1) { - int i_theta = radial_task + additional_radial_tasks; - buildSolverMatrixRadialSection(i_theta, solver_matrix); - } - else { - if (additional_radial_tasks == 0) { - buildSolverMatrixRadialSection(1, solver_matrix); - } - else if (additional_radial_tasks == 1) { - buildSolverMatrixRadialSection(2, solver_matrix); - } - else if (additional_radial_tasks == 2) { - buildSolverMatrixRadialSection(2, solver_matrix); - buildSolverMatrixRadialSection(3, solver_matrix); - } - } - } + for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { + const int i_theta = radial_task + additional_radial_tasks; + buildSolverMatrixRadialSection(i_theta, solver_matrix); + } /* Implicit barrier */ #pragma omp for - for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { - int i_theta = radial_task + additional_radial_tasks; - buildSolverMatrixRadialSection(i_theta, solver_matrix); - } - } + for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { + const int i_theta = radial_task + additional_radial_tasks; + buildSolverMatrixRadialSection(i_theta, solver_matrix); + } /* Implicit barrier */ } return solver_matrix; diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h b/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h similarity index 90% rename from include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h rename to include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h index 2e902808..e1f3c926 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h +++ b/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h @@ -33,16 +33,6 @@ class DirectSolver_CSR_LU_Give : public DirectSolver -1, 0, -1, -1, -1, -1 }; - const Stencil stencil_next_inner_DB_ = { - 7, 4, 8, - 1, 0, 2, - 5, 3, 6 - }; - const Stencil stencil_next_outer_DB_ = { - 7, 4, 8, - 1, 0, 2, - 5, 3, 6 - }; // clang-format on SparseMatrixCSR buildSolverMatrix(); diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl b/include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl index 8e65f71f..58418146 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl +++ b/include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl @@ -9,13 +9,11 @@ int DirectSolver_CSR_LU_Give::getStencilSize(int global_index) c int i_r, i_theta; grid.multiIndex(global_index, i_r, i_theta); - const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; - const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9; - const int size_stencil_interior = 9; - const int size_stencil_next_outer_boundary = 9; - const int size_stencil_outer_boundary = 1; + const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; + const int size_stencil_interior = 9; + const int size_stencil_outer_boundary = 1; - if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) { + if ((i_r > 0 && i_r < grid.nr() - 1)) { return size_stencil_interior; } else if (i_r == 0 && !DirBC_Interior) { @@ -24,12 +22,7 @@ int DirectSolver_CSR_LU_Give::getStencilSize(int global_index) c else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) { return size_stencil_outer_boundary; } - else if (i_r == 1 && DirBC_Interior) { - return size_stencil_next_inner_boundary; - } - else if (i_r == grid.nr() - 2) { - return size_stencil_next_outer_boundary; - } + throw std::out_of_range("Invalid index for stencil"); } @@ -42,7 +35,7 @@ const Stencil& DirectSolver_CSR_LU_Give::getStencil(int i_r) con assert(0 <= i_r && i_r < grid.nr()); assert(grid.nr() >= 4); - if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) { + if ((i_r > 0 && i_r < grid.nr() - 1)) { return stencil_interior_; } else if (i_r == 0 && !DirBC_Interior) { @@ -51,12 +44,7 @@ const Stencil& DirectSolver_CSR_LU_Give::getStencil(int i_r) con else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) { return stencil_DB_; } - else if (i_r == 1 && DirBC_Interior) { - return stencil_next_inner_DB_; - } - else if (i_r == grid.nr() - 2) { - return stencil_next_outer_DB_; - } + throw std::out_of_range("Invalid index for stencil"); } @@ -66,15 +54,10 @@ int DirectSolver_CSR_LU_Give::getNonZeroCountSolverMatrix() cons const PolarGrid& grid = DirectSolver::grid_; const bool DirBC_Interior = DirectSolver::DirBC_Interior_; - const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; - const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9; - const int size_stencil_interior = 9; - const int size_stencil_next_outer_boundary = 9; - const int size_stencil_outer_boundary = 1; - - assert(grid.nr() >= 4); + const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; + const int size_stencil_interior = 9; + const int size_stencil_outer_boundary = 1; return grid.ntheta() * - (size_stencil_inner_boundary + size_stencil_next_inner_boundary + (grid.nr() - 4) * size_stencil_interior + - size_stencil_next_outer_boundary + size_stencil_outer_boundary); + (size_stencil_inner_boundary + (grid.nr() - 2) * size_stencil_interior + size_stencil_outer_boundary); } diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl index b9a31e36..3cd34717 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl +++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl @@ -172,13 +172,13 @@ void DirectSolver_CSR_LU_Take::nodeBuildSolverMatrixTake( double bottom_value = -coeff3 * (att[center_index] + att[bottom_index]); /* Bottom */ double top_value = -coeff4 * (att[center_index] + att[top_index]); /* Top */ - double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF[center_index]) /* beta_{i,j} */ - - left_value /* Center: (Left) */ - - right_value /* Center: (Right) */ - - bottom_value /* Center: (Bottom) */ - - top_value /* Center: (Top) */ - ); + double center_value = (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * + std::fabs(detDF[center_index]) /* beta_{i,j} */ + - left_value /* Center: (Left) */ + - right_value /* Center: (Right) */ + - bottom_value /* Center: (Bottom) */ + - top_value /* Center: (Top) */ + ); double bottom_right_value = +0.25 * (art[right_index] + art[bottom_index]); /* Bottom Right */ double top_right_value = -0.25 * (art[right_index] + art[top_index]); /* Top Right */ diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h similarity index 89% rename from include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h rename to include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h index a387371c..248fe70b 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h +++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h @@ -33,16 +33,6 @@ class DirectSolver_CSR_LU_Take : public DirectSolver -1, 0, -1, -1, -1, -1 }; - const Stencil stencil_next_inner_DB_ = { - 7, 4, 8, - 1, 0, 2, - 5, 3, 6 - }; - const Stencil stencil_next_outer_DB_ = { - 7, 4, 8, - 1, 0, 2, - 5, 3, 6 - }; // clang-format on SparseMatrixCSR buildSolverMatrix(); diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/matrixStencil.inl b/include/DirectSolver/DirectSolver-CSR-LU-Take/matrixStencil.inl index cb160569..90887972 100644 --- a/include/DirectSolver/DirectSolver-CSR-LU-Take/matrixStencil.inl +++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/matrixStencil.inl @@ -9,13 +9,11 @@ int DirectSolver_CSR_LU_Take::getStencilSize(int global_index) c int i_r, i_theta; grid.multiIndex(global_index, i_r, i_theta); - const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; - const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9; - const int size_stencil_interior = 9; - const int size_stencil_next_outer_boundary = 9; - const int size_stencil_outer_boundary = 1; + const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; + const int size_stencil_interior = 9; + const int size_stencil_outer_boundary = 1; - if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) { + if ((i_r > 0 && i_r < grid.nr() - 1)) { return size_stencil_interior; } else if (i_r == 0 && !DirBC_Interior) { @@ -24,12 +22,7 @@ int DirectSolver_CSR_LU_Take::getStencilSize(int global_index) c else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) { return size_stencil_outer_boundary; } - else if (i_r == 1 && DirBC_Interior) { - return size_stencil_next_inner_boundary; - } - else if (i_r == grid.nr() - 2) { - return size_stencil_next_outer_boundary; - } + throw std::out_of_range("Invalid index for stencil"); } @@ -42,7 +35,7 @@ const Stencil& DirectSolver_CSR_LU_Take::getStencil(int i_r) con assert(0 <= i_r && i_r < grid.nr()); assert(grid.nr() >= 4); - if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) { + if ((i_r > 0 && i_r < grid.nr() - 1)) { return stencil_interior_; } else if (i_r == 0 && !DirBC_Interior) { @@ -51,12 +44,7 @@ const Stencil& DirectSolver_CSR_LU_Take::getStencil(int i_r) con else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) { return stencil_DB_; } - else if (i_r == 1 && DirBC_Interior) { - return stencil_next_inner_DB_; - } - else if (i_r == grid.nr() - 2) { - return stencil_next_outer_DB_; - } + throw std::out_of_range("Invalid index for stencil"); } @@ -66,15 +54,10 @@ int DirectSolver_CSR_LU_Take::getNonZeroCountSolverMatrix() cons const PolarGrid& grid = DirectSolver::grid_; const bool DirBC_Interior = DirectSolver::DirBC_Interior_; - const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; - const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9; - const int size_stencil_interior = 9; - const int size_stencil_next_outer_boundary = 9; - const int size_stencil_outer_boundary = 1; - - assert(grid.nr() >= 4); + const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7; + const int size_stencil_interior = 9; + const int size_stencil_outer_boundary = 1; return grid.ntheta() * - (size_stencil_inner_boundary + size_stencil_next_inner_boundary + (grid.nr() - 4) * size_stencil_interior + - size_stencil_next_outer_boundary + size_stencil_outer_boundary); + (size_stencil_inner_boundary + (grid.nr() - 2) * size_stencil_interior + size_stencil_outer_boundary); } diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl index 6ef9cbbb..3a845970 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl @@ -897,8 +897,11 @@ void ExtrapolatedSmootherGive::applyAscOrthoCircleSection(int i_ ConstVector x, ConstVector rhs, Vector temp) { + using extrapolated_smoother_give::nodeApplyAscOrthoCircleGive; + const PolarGrid& grid = ExtrapolatedSmoother::grid_; const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; assert(i_r >= 0 && i_r < grid.numberSmootherCircles() + 1); @@ -912,9 +915,8 @@ void ExtrapolatedSmootherGive::applyAscOrthoCircleSection(int i_ level_cache.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); // Apply Asc Ortho at the current node - extrapolated_smoother_give::nodeApplyAscOrthoCircleGive( - i_r, i_theta, grid, ExtrapolatedSmoother::DirBC_Interior_, smoother_color, x, rhs, temp, - arr, att, art, detDF, coeff_beta); + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art, + detDF, coeff_beta); } } @@ -923,11 +925,15 @@ void ExtrapolatedSmootherGive::applyAscOrthoRadialSection(int i_ ConstVector x, ConstVector rhs, Vector temp) { + using extrapolated_smoother_give::nodeApplyAscOrthoRadialGive; + const PolarGrid& grid = ExtrapolatedSmoother::grid_; const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; const double theta = grid.theta(i_theta); + /* We need to obtain left contributions from the circular section for AscOrtho. */ /* !!! i_r = grid.numberSmootherCircles()-1 !!! */ for (int i_r = grid.numberSmootherCircles() - 1; i_r < grid.nr(); i_r++) { const double r = grid.radius(i_r); @@ -937,8 +943,7 @@ void ExtrapolatedSmootherGive::applyAscOrthoRadialSection(int i_ level_cache.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); // Apply Asc Ortho at the current node - extrapolated_smoother_give::nodeApplyAscOrthoRadialGive( - i_r, i_theta, grid, ExtrapolatedSmoother::DirBC_Interior_, smoother_color, x, rhs, temp, - arr, att, art, detDF, coeff_beta); + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art, + detDF, coeff_beta); } } \ No newline at end of file diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl new file mode 100644 index 00000000..8c88946e --- /dev/null +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl @@ -0,0 +1,307 @@ +#include "../../../include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h" + +#include "../../../include/Definitions/geometry_helper.h" + +namespace extrapolated_smoother_give +{ + +#ifdef GMGPOLAR_USE_MUMPS +// When using the MUMPS solver, the matrix is assembled in COO format. +static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, + int column, double value) +{ + matrix.row_index(ptr + offset) = row; + matrix.col_index(ptr + offset) = column; + matrix.value(ptr + offset) += value; +} +#else +// When using the in-house solver, the matrix is stored in CSR format. +static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, + int column, double value) +{ + matrix.row_nz_index(row, offset) = column; + matrix.row_nz_entry(row, offset) += value; +} +#endif + +} // namespace extrapolated_smoother_give + +template +void ExtrapolatedSmootherGive::nodeBuildInteriorBoundarySolverMatrix_i_r_0( + int i_theta, const PolarGrid& grid, bool DirBC_Interior, InnerBoundaryMatrix& matrix, double arr, double att, + double art, double detDF, double coeff_beta) +{ + using extrapolated_smoother_give::update_CSR_COO_MatrixElement; + + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + int ptr, offset; + int row, column; + double value; + + const int i_r = 0; + + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + /* Fill result(i,j) */ + int center_index = i_theta; + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = getCircleAscIndex(i_r, i_theta); + + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + // h1 gets replaced with 2 * R0. + // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). + // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. + double h1 = 2.0 * grid.radius(0); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2)); + + const int center_index = i_theta; + const int left_index = i_theta_AcrossOrigin; + const int right_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); + const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); + const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); + + int nz_index; + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + if (i_theta & 1) { + /* i_theta % 2 == 1 */ + /* -| x | o | x | */ + /* -| | | | */ + /* -| O | o | o | */ + /* -| | | | */ + /* -| x | o | x | */ + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Left]; + column = left_index; + value = -coeff1 * arr; /* Left */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + /* Fill matrix row of (i-1,j) */ + /* From view the view of the across origin node, */ + /* the directions are roatated by 180 degrees in the stencil! */ + row = left_index; + ptr = left_nz_index; + + const Stencil& LeftStencil = CenterStencil; + + offset = LeftStencil[StencilPosition::Left]; + column = center_index; + value = -coeff1 * arr; /* Right -> Left*/ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = LeftStencil[StencilPosition::Center]; + column = left_index; + value = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* i_theta % 2 == 0 */ + /* -| o | o | o | */ + /* -| | | | */ + /* -| X | o | x | */ + /* -| | | | */ + /* -| o | o | o | */ + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + /* Fill matrix row of (i,j-1) */ + row = bottom_index; + ptr = bottom_nz_index; + + const Stencil& BottomStencil = CenterStencil; + + offset = BottomStencil[StencilPosition::Center]; + column = bottom_index; + value = +coeff3 * att; /* Center: (Top) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + /* Fill matrix row of (i,j+1) */ + row = top_index; + ptr = top_nz_index; + + const Stencil& TopStencil = CenterStencil; + + offset = TopStencil[StencilPosition::Center]; + column = top_index; + value = +coeff4 * att; /* Center: (Bottom) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } + } +} + +template +void ExtrapolatedSmootherGive::nodeBuildInteriorBoundarySolverMatrix_i_r_1( + int i_theta, const PolarGrid& grid, bool DirBC_Interior, InnerBoundaryMatrix& matrix, double arr, double att, + double art, double detDF, double coeff_beta) +{ + using extrapolated_smoother_give::update_CSR_COO_MatrixElement; + + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + int ptr, offset; + int row, column; + double value; + + const int i_r = 1; + + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left_index = i_theta; + + /* -------------------------- */ + /* Cyclic Tridiagonal Section */ + /* i_r % 2 == 1 */ + if (i_r & 1) { + if (i_theta & 1) { + /* i_theta % 2 == 1 */ + /* | x | o | x | */ + /* | | | | */ + /* | o | O | o | */ + /* | | | | */ + /* | x | o | x | */ + + /* Fill matrix row of (i-1,j) */ + + /* Only in the case of AcrossOrigin */ + if (!DirBC_Interior) { + row = left_index; + ptr = getCircleAscIndex(i_r - 1, i_theta); + + const Stencil& LeftStencil = getStencil(i_r - 1, i_theta); + + offset = LeftStencil[StencilPosition::Center]; + column = left_index; + value = +coeff1 * arr; /* Center: (Right) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } + } + } +} + +template +typename ExtrapolatedSmootherGive::InnerBoundaryMatrix +ExtrapolatedSmootherGive::buildInteriorBoundarySolverMatrix() +{ + const PolarGrid& grid = ExtrapolatedSmoother::grid_; + const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; + + const int ntheta = grid.ntheta(); + + // The interior boundary matrix is symmetric due to the periodicity in the theta direction + // and the assumption that ntheta is even, which is required for the across-origin discretization. + // We store all non-zero entries of the matrix, both in COO format (for MUMPS) + // and in CSR format (for the in-house solver). If the COO matrix is marked as symmetric, + // the COO_Mumps_Solver optimizes the factorization by only using the upper triangular part of the matrix, + // which is extracted by the COO_Mumps_Solver internally. +#ifdef GMGPOLAR_USE_MUMPS + const int nnz = getNonZeroCountCircleAsc(0); + SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); + inner_boundary_solver_matrix.is_symmetric(true); +#else + std::function nnz_per_row = [&](int i_theta) { + if (DirBC_Interior) + return 1; + else + return i_theta % 2 == 0 ? 1 : 2; + }; + SparseMatrixCSR inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row); +#endif + + { + const int i_r = 0; + const double r = grid.radius(i_r); + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + { + const int global_index = grid.index(i_r, i_theta); + const double theta = grid.theta(i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + + nodeBuildInteriorBoundarySolverMatrix_i_r_0(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, + arr, att, art, detDF, coeff_beta); + } + } + } + + { + const int i_r = 1; + const double r = grid.radius(i_r); + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + { + const int global_index = grid.index(i_r, i_theta); + const double theta = grid.theta(i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + + nodeBuildInteriorBoundarySolverMatrix_i_r_1(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, + arr, att, art, detDF, coeff_beta); + } + } + } + + return inner_boundary_solver_matrix; +} diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.inl similarity index 75% rename from include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.inl rename to include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.inl index f5310376..c12e50f4 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.inl @@ -1,9 +1,10 @@ -#pragma once +#include "../../../include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h" + +#include "../../../include/Definitions/geometry_helper.h" namespace extrapolated_smoother_give { -/* Tridiagonal matrices */ static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, double value) { @@ -15,34 +16,15 @@ static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, solver.cyclic_corner(batch) += value; } -/* Inner Boundary COO/CSR matrix */ -#ifdef GMGPOLAR_USE_MUMPS -static inline void updateCOOCSRMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_index(ptr + offset) = row; - matrix.col_index(ptr + offset) = col; - matrix.value(ptr + offset) += val; -} -#else -static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_nz_index(row, offset) = col; - matrix.row_nz_entry(row, offset) += val; -} -#endif - } // namespace extrapolated_smoother_give template -void ExtrapolatedSmootherGive::nodeBuildAscGive( - int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, MatrixType& inner_boundary_circle_matrix, +void ExtrapolatedSmootherGive::nodeBuildTridiagonalSolverMatrices( + int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, BatchedTridiagonalSolver& circle_tridiagonal_solver, BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, double art, double detDF, double coeff_beta) { - using extrapolated_smoother_give::updateCOOCSRMatrixElement; using extrapolated_smoother_give::updateMatrixElement; assert(i_r >= 0 && i_r < grid.nr()); @@ -55,8 +37,8 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive( assert(lengthSmootherRadial >= 3); int ptr, offset; - int row, column, col; - double value, val; + int row, column; + double value; /* ------------------------------------------ */ /* Node in the interior of the Circle Section */ /* ------------------------------------------ */ @@ -108,7 +90,7 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive( /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -157,21 +139,8 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive( /* | x | o | x | */ /* Fill matrix row of (i-1,j) */ - if (i_r == 1) { - /* Only in the case of AcrossOrigin */ - if (!DirBC_Interior) { - row = left_index; - ptr = getCircleAscIndex(i_r - 1, i_theta); - - const Stencil& LeftStencil = getStencil(i_r - 1, i_theta); - - offset = LeftStencil[StencilPosition::Center]; - col = left_index; - val = +coeff1 * arr; /* Center: (Right) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - } - } - else { + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. + if (i_r > 1) { row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ @@ -302,7 +271,7 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive( /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -433,177 +402,38 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive( /* Circle Section: Node in the inner boundary */ /* ------------------------------------------ */ else if (i_r == 0) { - auto& right_solver = circle_tridiagonal_solver; - int right_batch = i_r + 1; - - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - if (DirBC_Interior) { - /* Fill result(i,j) */ - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff2 = 0.5 * (k1 + k2) / h2; - - int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - int center_index = i_theta; - int right_index = i_theta; - int bottom_index = i_theta_M1; - int top_index = i_theta_P1; - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = getCircleAscIndex(i_r, i_theta); - - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - // h1 gets replaced with 2 * R0. - // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). - // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - double h1 = 2.0 * grid.radius(0); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2)); - - const int center_index = i_theta; - const int left_index = i_theta_AcrossOrigin; - const int right_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); - const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); - const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); - - int nz_index; - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - if (i_theta & 1) { - /* i_theta % 2 == 1 */ - /* -| x | o | x | */ - /* -| | | | */ - /* -| O | o | o | */ - /* -| | | | */ - /* -| x | o | x | */ - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Left]; - col = left_index; - val = -coeff1 * arr; /* Left */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i-1,j) */ - /* From view the view of the across origin node, */ - /* the directions are roatated by 180 degrees in the stencil! */ - row = left_index; - ptr = left_nz_index; - - const Stencil& LeftStencil = CenterStencil; - - offset = LeftStencil[StencilPosition::Left]; - col = center_index; - val = -coeff1 * arr; /* Right -> Left*/ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = LeftStencil[StencilPosition::Center]; - col = left_index; - val = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - } - else { - /* i_theta % 2 == 0 */ - /* -| o | o | o | */ - /* -| | | | */ - /* -| X | o | x | */ - /* -| | | | */ - /* -| o | o | o | */ - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i,j-1) */ - row = bottom_index; - ptr = bottom_nz_index; - - const Stencil& BottomStencil = CenterStencil; + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. + auto& center_solver = circle_tridiagonal_solver; + int center_batch = i_r; + auto& right_solver = circle_tridiagonal_solver; + int right_batch = i_r + 1; - offset = BottomStencil[StencilPosition::Center]; - col = bottom_index; - val = +coeff3 * att; /* Center: (Top) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); + /* Fill result(i,j) */ + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); - /* Fill matrix row of (i,j+1) */ - row = top_index; - ptr = top_nz_index; + double coeff2 = 0.5 * (k1 + k2) / h2; - const Stencil& TopStencil = CenterStencil; + int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - offset = TopStencil[StencilPosition::Center]; - col = top_index; - val = +coeff4 * att; /* Center: (Bottom) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); + int center_index = i_theta; + int right_index = i_theta; + int bottom_index = i_theta_M1; + int top_index = i_theta_P1; - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - } - } + /* Fill matrix row of (i,j) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(center_solver, center_batch, row, column, value); + + /* Fill matrix row of (i+1,j) */ + row = right_index; + column = right_index; + value = coeff2 * arr; /* Center: (Left) */ + updateMatrixElement(right_solver, right_batch, row, column, value); } /* ------------------------------------------- */ /* Circle Section: Node next to radial section */ @@ -1211,11 +1041,13 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive( } template -void ExtrapolatedSmootherGive::buildAscCircleSection(const int i_r) +void ExtrapolatedSmootherGive::buildTridiagonalCircleSection(int i_r) { const PolarGrid& grid = ExtrapolatedSmoother::grid_; const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + // Access pattern is aligned with the memory layout of the grid data to maximize cache efficiency. const double r = grid.radius(i_r); for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { const int global_index = grid.index(i_r, i_theta); @@ -1225,18 +1057,19 @@ void ExtrapolatedSmootherGive::buildAscCircleSection(const int i level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildAscGive(i_r, i_theta, grid, ExtrapolatedSmoother::DirBC_Interior_, - inner_boundary_circle_matrix_, circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, - att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } template -void ExtrapolatedSmootherGive::buildAscRadialSection(const int i_theta) +void ExtrapolatedSmootherGive::buildTridiagonalRadialSection(int i_theta) { const PolarGrid& grid = ExtrapolatedSmoother::grid_; const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + // Access pattern is aligned with the memory layout of the grid data to maximize cache efficiency. const double theta = grid.theta(i_theta); for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { const int global_index = grid.index(i_r, i_theta); @@ -1246,143 +1079,75 @@ void ExtrapolatedSmootherGive::buildAscRadialSection(const int i level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildAscGive(i_r, i_theta, grid, ExtrapolatedSmoother::DirBC_Interior_, - inner_boundary_circle_matrix_, circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, - att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } template -void ExtrapolatedSmootherGive::buildAscMatrices() +void ExtrapolatedSmootherGive::buildTridiagonalSolverMatrices() { - /* -------------------------------------- */ - /* Part 1: Allocate Asc Smoother matrices */ - /* -------------------------------------- */ - // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. - // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - - const PolarGrid& grid = ExtrapolatedSmoother::grid_; - const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; - const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; - -#ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int inner_i_r = 0; - const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); - const int num_circle_nodes = grid.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); - inner_boundary_circle_matrix_.is_symmetric(false); -#else - std::function nnz_per_row = [&](int i_theta) { - if (DirBC_Interior) - return 1; - else - return i_theta % 2 == 0 ? 1 : 2; - }; - const int num_circle_nodes = grid.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); - - for (int i = 0; i < inner_boundary_circle_matrix_.non_zero_size(); i++) { - inner_boundary_circle_matrix_.values_data()[i] = 0.0; - } - -#endif - - /* ---------------------------------- */ - /* Part 2: Fill Asc Smoother matrices */ - /* ---------------------------------- */ + const PolarGrid& grid = ExtrapolatedSmoother::grid_; + const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; - /* Multi-threaded execution: */ const int num_smoother_circles = grid.numberSmootherCircles(); const int additional_radial_tasks = grid.ntheta() % 3; const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; + /* ---------------- */ + /* Circular section */ + /* ---------------- */ + // We parallelize the loop with step 3 to avoid data race conditions between adjacent circles. #pragma omp parallel num_threads(num_omp_threads) { #pragma omp for for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); - } + buildTridiagonalCircleSection(i_r); + } /* Implicit barrier */ #pragma omp for for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); - } + buildTridiagonalCircleSection(i_r); + } /* Implicit barrier */ #pragma omp for for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); - } + buildTridiagonalCircleSection(i_r); + } /* Implicit barrier */ + } + + /* ---------------- */ + /* Radial section */ + /* ---------------- */ + // We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines. + // Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks + // that are handled serially before the parallel loops. + if (additional_radial_tasks > 0) { + const int i_theta = 0; + buildTridiagonalRadialSection(i_theta); + } + + if (additional_radial_tasks > 1) { + const int i_theta = 1; + buildTridiagonalRadialSection(i_theta); + } +#pragma omp parallel num_threads(num_omp_threads) + { #pragma omp for for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 0) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } - else { - if (additional_radial_tasks == 0) { - buildAscRadialSection(0); - } - else if (additional_radial_tasks >= 1) { - buildAscRadialSection(0); - buildAscRadialSection(1); - } - } - } + const int i_theta = radial_task + additional_radial_tasks; + buildTridiagonalRadialSection(i_theta); + } /* Implicit barrier */ #pragma omp for for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 1) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } - else { - if (additional_radial_tasks == 0) { - buildAscRadialSection(1); - } - else if (additional_radial_tasks == 1) { - buildAscRadialSection(2); - } - else if (additional_radial_tasks == 2) { - buildAscRadialSection(2); - buildAscRadialSection(3); - } - } - } + const int i_theta = radial_task + additional_radial_tasks; + buildTridiagonalRadialSection(i_theta); + } /* Implicit barrier */ #pragma omp for for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } - } - - circle_tridiagonal_solver_.setup(); - radial_tridiagonal_solver_.setup(); - -#ifdef GMGPOLAR_USE_MUMPS - /* ------------------------------------------------------------------- */ - /* Part 3: Convert inner_boundary_circle_matrix_ to a symmetric matrix */ - /* ------------------------------------------------------------------- */ - - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); - - const int nnz = full_matrix.non_zero_size(); - const int numRows = full_matrix.rows(); - const int numColumns = full_matrix.columns(); - const int symmetric_nnz = nnz - (nnz - numRows) / 2; - - inner_boundary_circle_matrix_ = SparseMatrixCOO(numRows, numColumns, symmetric_nnz); - inner_boundary_circle_matrix_.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) { - int current_row = full_matrix.row_index(nz_index); - int current_col = full_matrix.col_index(nz_index); - if (current_row <= current_col) { - inner_boundary_circle_matrix_.row_index(current_nz) = current_row; - inner_boundary_circle_matrix_.col_index(current_nz) = current_col; - inner_boundary_circle_matrix_.value(current_nz) = std::move(full_matrix.value(nz_index)); - current_nz++; - } + const int i_theta = radial_task + additional_radial_tasks; + buildTridiagonalRadialSection(i_theta); + } /* Implicit barrier */ } -#endif } -// clang-format on diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h index df31ca82..54290bfe 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h @@ -60,9 +60,6 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother explicit ExtrapolatedSmootherGive(const PolarGrid& grid, const LevelCacheType& level_cache, bool DirBC_Interior, int num_omp_threads); - // If MUMPS is enabled, this cleans up the inner boundary solver. - ~ExtrapolatedSmootherGive() override; - // Performs one full coupled extrapolated smoothing sweep: // BC -> WC -> BR -> WR // Parallel implementation using OpenMP: @@ -71,40 +68,13 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother void extrapolatedSmoothing(Vector x, ConstVector rhs, Vector temp) override; private: - /* ------------------- */ - /* Tridiagonal solvers */ - /* ------------------- */ - - // Batched solver for cyclic-tridiagonal circle line A_sc matrices. - BatchedTridiagonalSolver circle_tridiagonal_solver_; - - // Batched solver for tridiagonal radial circle line A_sc matrices. - BatchedTridiagonalSolver radial_tridiagonal_solver_; - - // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because - // it potentially includes across-origin coupling. Therefore, it is assembled - // into a sparse matrix and solved using a general-purpose sparse solver. - // When using the MUMPS solver, the matrix is assembled in COO format. - // When using the in-house solver, the matrix is stored in CSR format. -#ifdef GMGPOLAR_USE_MUMPS - using MatrixType = SparseMatrixCOO; - DMUMPS_STRUC_C inner_boundary_mumps_solver_; -#else - using MatrixType = SparseMatrixCSR; - SparseLUSolver inner_boundary_lu_solver_; -#endif - // Sparse matrix for the non-tridiagonal inner boundary circle block. - MatrixType inner_boundary_circle_matrix_; - - // Note: - // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. - // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. - // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. - /* ------------------- */ /* Stencil definitions */ /* ------------------- */ + // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, + // since the mumps solver will be build in the member initializer of the Smoother class. + // Stencils encode neighborhood connectivity for A_sc matrix assembly. // It is only used in the construction of COO/CSR matrices. // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. @@ -126,6 +96,48 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother }; // clang-format on + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------------ */ + /* Interior boundary solver */ + /* ------------------------ */ + + // The inner circle matrix (i_r = 0) is NOT tridiagonal due to across-origin coupling. + // It is solved using a general-purpose sparse solver. + // - MUMPS: matrix assembled in COO format; solver owns the matrix internally. + // - In-house: matrix stored in CSR; solver does not own the matrix. + +#ifdef GMGPOLAR_USE_MUMPS + using InnerBoundaryMatrix = SparseMatrixCOO; + using InnerBoundarySolver = CooMumpsSolver; +#else + using InnerBoundaryMatrix = SparseMatrixCSR; + using InnerBoundarySolver = SparseLUSolver; + + // Stored only for the in-house solver (CSR). + InnerBoundaryMatrix inner_boundary_circle_matrix_; +#endif + + // Solver object (owns matrix if MUMPS, references if in-house solver). + InnerBoundarySolver inner_boundary_solver_; + + /* -------------- */ + /* Stencil access */ + /* -------------- */ + // Select correct stencil depending on the grid position. const Stencil& getStencil(int i_r, int i_theta) const; /* Only i_r = 0 implemented */ // Number of nonzero A_sc entries. @@ -137,19 +149,25 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother /* --------------- */ /* Matrix assembly */ /* --------------- */ - // Build all A_sc matrices for circle and radial smoothers. - void buildAscMatrices(); - // Build A_sc matrix block for a single circular line. - void buildAscCircleSection(int i_r); - // Build A_sc matrix block for a single radial line. - void buildAscRadialSection(int i_theta); - // Build A_sc for a specific node (i_r, i_theta) - void nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, - double art, double detDF, double coeff_beta); + void buildTridiagonalSolverMatrices(); + void buildTridiagonalCircleSection(int i_r); + void buildTridiagonalRadialSection(int i_theta); + // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) + void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, + double att, double art, double detDF, double coeff_beta); + + // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. + InnerBoundaryMatrix buildInteriorBoundarySolverMatrix(); + // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. + void nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + InnerBoundaryMatrix& matrix, double arr, double att, double art, + double detDF, double coeff_beta); + void nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + InnerBoundaryMatrix& matrix, double arr, double att, double art, + double detDF, double coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -157,9 +175,9 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc - void applyAscOrthoCircleSection(int i_r, SmootherColor smoother_color, ConstVector x, + void applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(int i_theta, SmootherColor smoother_color, ConstVector x, + void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); /* ----------------- */ @@ -178,21 +196,11 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother void solveWhiteCircleSection(Vector x, Vector temp); void solveBlackRadialSection(Vector x, Vector temp); void solveWhiteRadialSection(Vector x, Vector temp); - - /* ----------------------------------- */ - /* Initialize and destroy MUMPS solver */ - /* ----------------------------------- */ -#ifdef GMGPOLAR_USE_MUMPS - // Initialize sparse MUMPS solver with assembled COO matrix. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Release MUMPS internal memory and MPI structures. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); -#endif }; #include "extrapolatedSmootherGive.inl" #include "smootherStencil.inl" -#include "buildAscMatrices.inl" +#include "buildInnerBoundaryAsc.inl" +#include "buildTridiagonalAsc.inl" #include "applyAscOrtho.inl" #include "solveAscSystem.inl" -#include "initializeMumps.inl" diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.inl index 734f163f..e55b51ce 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.inl @@ -7,21 +7,16 @@ ExtrapolatedSmootherGive::ExtrapolatedSmootherGive(const PolarGr : ExtrapolatedSmoother(grid, level_cache, DirBC_Interior, num_omp_threads) , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) -{ - buildAscMatrices(); #ifdef GMGPOLAR_USE_MUMPS - initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_); + , inner_boundary_solver_(buildInteriorBoundarySolverMatrix()) #else - inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); + , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix()) + , inner_boundary_solver_(inner_boundary_circle_matrix_) #endif -} - -template -ExtrapolatedSmootherGive::~ExtrapolatedSmootherGive() { -#ifdef GMGPOLAR_USE_MUMPS - finalizeMumpsSolver(inner_boundary_mumps_solver_); -#endif + buildTridiagonalSolverMatrices(); + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); } // The smoothing solves linear systems of the form: @@ -65,7 +60,7 @@ void ExtrapolatedSmootherGive::extrapolatedSmoothing(Vector -void ExtrapolatedSmootherGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout << "Warning: ExtrapolatedSmoother inner boundary matrix is not positive definite: Negative pivots in " - "the factorization phase." - << std::endl; - } -} - -template -void ExtrapolatedSmootherGive::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.inl index e17e6278..11cb6d1a 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.inl @@ -22,19 +22,8 @@ void ExtrapolatedSmootherGive::solveBlackCircleSection(Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); + inner_boundary_solver_.solveInPlace(inner_boundary); } // Move updated values to x @@ -69,19 +58,8 @@ void ExtrapolatedSmootherGive::solveWhiteCircleSection(Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); + inner_boundary_solver_.solveInPlace(inner_boundary); } // Move updated values to x diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl index 7686504c..0d8ac1da 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl @@ -467,10 +467,10 @@ void ExtrapolatedSmootherTake::applyAscOrthoBlackCircleSection(C { using extrapolated_smoother_take::nodeApplyAscOrthoCircleTake; - const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; - const LevelCacheType& level_cache = ExtrapolatedSmootherTake::level_cache_; - const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; - const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; + const PolarGrid& grid = ExtrapolatedSmoother::grid_; + const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -500,10 +500,10 @@ void ExtrapolatedSmootherTake::applyAscOrthoWhiteCircleSection(C { using extrapolated_smoother_take::nodeApplyAscOrthoCircleTake; - const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; - const LevelCacheType& level_cache = ExtrapolatedSmootherTake::level_cache_; - const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; - const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; + const PolarGrid& grid = ExtrapolatedSmoother::grid_; + const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -533,10 +533,10 @@ void ExtrapolatedSmootherTake::applyAscOrthoBlackRadialSection(C { using extrapolated_smoother_take::nodeApplyAscOrthoRadialTake; - const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; - const LevelCacheType& level_cache = ExtrapolatedSmootherTake::level_cache_; - const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; - const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; + const PolarGrid& grid = ExtrapolatedSmoother::grid_; + const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); @@ -563,10 +563,10 @@ void ExtrapolatedSmootherTake::applyAscOrthoWhiteRadialSection(C { using extrapolated_smoother_take::nodeApplyAscOrthoRadialTake; - const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; - const LevelCacheType& level_cache = ExtrapolatedSmootherTake::level_cache_; - const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; - const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; + const PolarGrid& grid = ExtrapolatedSmoother::grid_; + const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl index 373a5c74..548fd9ed 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl @@ -109,10 +109,10 @@ void ExtrapolatedSmootherTake::nodeBuildInteriorBoundarySolverMa const int top = grid.index(i_r, i_theta_P1); const int right = grid.index(i_r + 1, i_theta); - const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - const double left_value = -coeff1 * (arr[center] + arr[left]); + const double left_value = -coeff1 * (arr[center] + arr[left]); /* Fill matrix row of (i,j) */ row = center_index; @@ -156,14 +156,20 @@ template typename ExtrapolatedSmootherTake::InnerBoundaryMatrix ExtrapolatedSmootherTake::buildInteriorBoundarySolverMatrix() { - const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; - const LevelCacheType& level_cache = ExtrapolatedSmootherTake::level_cache_; - const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; - const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; + const PolarGrid& grid = ExtrapolatedSmoother::grid_; + const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; const int i_r = 0; const int ntheta = grid.ntheta(); + // The interior boundary matrix is symmetric due to the periodicity in the theta direction + // and the assumption that ntheta is even, which is required for the across-origin discretization. + // We store all non-zero entries of the matrix, both in COO format (for MUMPS) + // and in CSR format (for the in-house solver). If the COO matrix is marked as symmetric, + // the COO_Mumps_Solver optimizes the factorization by only using the upper triangular part of the matrix, + // which is extracted by the COO_Mumps_Solver internally. #ifdef GMGPOLAR_USE_MUMPS const int nnz = getNonZeroCountCircleAsc(i_r); SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl index d487aa00..b4553470 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl @@ -89,9 +89,9 @@ void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrice /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); /* Bottom */ @@ -128,9 +128,9 @@ void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrice /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); } else { @@ -232,9 +232,9 @@ void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrice /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); /* Left */ @@ -276,9 +276,9 @@ void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrice /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); } else { @@ -340,9 +340,9 @@ void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrice /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); /* Right */ @@ -363,9 +363,9 @@ void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrice /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); } else { @@ -429,9 +429,9 @@ void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrice /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); /* Left */ @@ -457,9 +457,9 @@ void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrice /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); } } @@ -518,10 +518,10 @@ void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrice template void ExtrapolatedSmootherTake::buildTridiagonalSolverMatrices() { - const PolarGrid& grid = ExtrapolatedSmootherTake::grid_; - const LevelCacheType& level_cache = ExtrapolatedSmootherTake::level_cache_; - const bool DirBC_Interior = ExtrapolatedSmootherTake::DirBC_Interior_; - const int num_omp_threads = ExtrapolatedSmootherTake::num_omp_threads_; + const PolarGrid& grid = ExtrapolatedSmoother::grid_; + const LevelCacheType& level_cache = ExtrapolatedSmoother::level_cache_; + const bool DirBC_Interior = ExtrapolatedSmoother::DirBC_Interior_; + const int num_omp_threads = ExtrapolatedSmoother::num_omp_threads_; assert(level_cache.cacheDensityProfileCoefficients()); assert(level_cache.cacheDomainGeometry()); diff --git a/include/GMGPolar/setup.h b/include/GMGPolar/setup.h index 3fd729c7..e33d96b9 100644 --- a/include/GMGPolar/setup.h +++ b/include/GMGPolar/setup.h @@ -211,7 +211,7 @@ void GMGPolar::discretize_rhs_f( double k1 = grid.angularSpacing(i_theta - 1); double k2 = grid.angularSpacing(i_theta); const double detDF = detDF_cache[grid.index(i_r, i_theta)]; - rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF); + rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF); } else if (i_r == 0 && DirBC_Interior_) { rhs_f[grid.index(i_r, i_theta)] *= 1.0; @@ -236,7 +236,7 @@ void GMGPolar::discretize_rhs_f( double k1 = grid.angularSpacing(i_theta - 1); double k2 = grid.angularSpacing(i_theta); const double detDF = detDF_cache[grid.index(i_r, i_theta)]; - rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF); + rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF); } else if (i_r == 0 && DirBC_Interior_) { rhs_f[grid.index(i_r, i_theta)] *= 1.0; @@ -277,7 +277,7 @@ void GMGPolar::discretize_rhs_f( double Jtt = domain_geometry_.dFy_dt(r, theta); /* Compute the determinant of the Jacobian matrix */ double detDF = Jrr * Jtt - Jrt * Jtr; - rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF); + rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF); } else if (i_r == 0 && DirBC_Interior_) { rhs_f[grid.index(i_r, i_theta)] *= 1.0; @@ -312,7 +312,7 @@ void GMGPolar::discretize_rhs_f( double Jtt = domain_geometry_.dFy_dt(r, theta); /* Compute the determinant of the Jacobian matrix */ double detDF = Jrr * Jtt - Jrt * Jtr; - rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF); + rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF); } else if (i_r == 0 && DirBC_Interior_) { rhs_f[grid.index(i_r, i_theta)] *= 1.0; diff --git a/include/Level/level.inl b/include/Level/level.inl index b26a0e1f..288d6dbe 100644 --- a/include/Level/level.inl +++ b/include/Level/level.inl @@ -4,8 +4,8 @@ #include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h" #include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h" -#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h" -#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h" +#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h" +#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h" #include "../../include/Smoother/SmootherGive/smootherGive.h" #include "../../include/Smoother/SmootherTake/smootherTake.h" @@ -16,9 +16,10 @@ // ----------- // // Constructor // template -Level::Level(const int level_depth, std::unique_ptr grid, - std::unique_ptr> level_cache, - const ExtrapolationType extrapolation, const bool FMG, const bool PCG_FMG) +Level::Level( + const int level_depth, std::unique_ptr grid, + std::unique_ptr> level_cache, + const ExtrapolationType extrapolation, const bool FMG, const bool PCG_FMG) : level_depth_(level_depth) , grid_(std::move(grid)) , level_cache_(std::move(level_cache)) @@ -46,7 +47,8 @@ const PolarGrid& Level::grid() const } template -const LevelCache& Level::levelCache() const +const LevelCache& +Level::levelCache() const { return *level_cache_; } @@ -102,8 +104,8 @@ ConstVector Level::error_cor // -------------- // // Apply Residual // template -void Level::initializeResidual(const bool DirBC_Interior, const int num_omp_threads, - const StencilDistributionMethod stencil_distribution_method) +void Level::initializeResidual( + const bool DirBC_Interior, const int num_omp_threads, const StencilDistributionMethod stencil_distribution_method) { if (stencil_distribution_method == StencilDistributionMethod::CPU_TAKE) { op_residual_ = @@ -118,14 +120,16 @@ void Level::initializeResidual(const } template -void Level::computeResidual(Vector result, ConstVector rhs, ConstVector x) const +void Level::computeResidual(Vector result, ConstVector rhs, + ConstVector x) const { if (!op_residual_) throw std::runtime_error("Residual not initialized."); op_residual_->computeResidual(result, rhs, x); } template -void Level::applySystemOperator(Vector result, ConstVector x) const +void Level::applySystemOperator(Vector result, + ConstVector x) const { if (!op_residual_) throw std::runtime_error("Residual not initialized."); @@ -135,8 +139,8 @@ void Level::applySystemOperator(Vect // ------------------- // // Solve coarse System // template -void Level::initializeDirectSolver(const bool DirBC_Interior, const int num_omp_threads, - const StencilDistributionMethod stencil_distribution_method) +void Level::initializeDirectSolver( + const bool DirBC_Interior, const int num_omp_threads, const StencilDistributionMethod stencil_distribution_method) { #ifdef GMGPOLAR_USE_MUMPS if (stencil_distribution_method == StencilDistributionMethod::CPU_TAKE) { @@ -172,8 +176,8 @@ void Level::directSolveInPlace(Vecto // --------------- // // Apply Smoothing // template -void Level::initializeSmoothing(const bool DirBC_Interior, const int num_omp_threads, - const StencilDistributionMethod stencil_distribution_method) +void Level::initializeSmoothing( + const bool DirBC_Interior, const int num_omp_threads, const StencilDistributionMethod stencil_distribution_method) { if (stencil_distribution_method == StencilDistributionMethod::CPU_TAKE) { op_smoother_ = @@ -188,7 +192,8 @@ void Level::initializeSmoothing(cons } template -void Level::smoothing(Vector x, ConstVector rhs, Vector temp) const +void Level::smoothing(Vector x, ConstVector rhs, + Vector temp) const { if (!op_smoother_) throw std::runtime_error("Smoother not initialized."); @@ -198,8 +203,8 @@ void Level::smoothing(Vector // ---------------------------- // // Apply Extrapolated Smoothing // template -void Level::initializeExtrapolatedSmoothing(const bool DirBC_Interior, const int num_omp_threads, - const StencilDistributionMethod stencil_distribution_method) +void Level::initializeExtrapolatedSmoothing( + const bool DirBC_Interior, const int num_omp_threads, const StencilDistributionMethod stencil_distribution_method) { if (stencil_distribution_method == StencilDistributionMethod::CPU_TAKE) { op_extrapolated_smoother_ = std::make_unique>( @@ -214,7 +219,8 @@ void Level::initializeExtrapolatedSm } template -void Level::extrapolatedSmoothing(Vector x, ConstVector rhs, Vector temp) const +void Level::extrapolatedSmoothing(Vector x, ConstVector rhs, + Vector temp) const { if (!op_extrapolated_smoother_) throw std::runtime_error("Extrapolated Smoother not initialized."); diff --git a/include/Residual/ResidualGive/applyAGive.inl b/include/Residual/ResidualGive/applyAGive.inl index 2802e0ef..0ace217e 100644 --- a/include/Residual/ResidualGive/applyAGive.inl +++ b/include/Residual/ResidualGive/applyAGive.inl @@ -296,13 +296,14 @@ void ResidualGive::applyCircleSection(const int i_r, Vector::grid_; + const PolarGrid& grid = Residual::grid_; + const LevelCacheType& level_cache = Residual::level_cache_; + const bool DirBC_Interior = Residual::DirBC_Interior_; const double r = grid.radius(i_r); for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { const double theta = grid.theta(i_theta); - node_apply_a_give(i_r, i_theta, r, theta, grid, Residual::level_cache_, - Residual::DirBC_Interior_, result, x); + node_apply_a_give(i_r, i_theta, r, theta, grid, level_cache, DirBC_Interior, result, x); } } @@ -312,12 +313,13 @@ void ResidualGive::applyRadialSection(const int i_theta, Vector< { using residual_give::node_apply_a_give; - const PolarGrid& grid = Residual::grid_; + const PolarGrid& grid = Residual::grid_; + const LevelCacheType& level_cache = Residual::level_cache_; + const bool DirBC_Interior = Residual::DirBC_Interior_; const double theta = grid.theta(i_theta); for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { const double r = grid.radius(i_r); - node_apply_a_give(i_r, i_theta, r, theta, grid, Residual::level_cache_, - Residual::DirBC_Interior_, result, x); + node_apply_a_give(i_r, i_theta, r, theta, grid, level_cache, DirBC_Interior, result, x); } } diff --git a/include/Residual/ResidualGive/residualGive.inl b/include/Residual/ResidualGive/residualGive.inl index bc50f620..0f9b6eb8 100644 --- a/include/Residual/ResidualGive/residualGive.inl +++ b/include/Residual/ResidualGive/residualGive.inl @@ -9,8 +9,7 @@ ResidualGive::ResidualGive(const PolarGrid& grid, const LevelCac /* ------------ */ /* result = A*x */ - -// clang-format off +/* ------------ */ template void ResidualGive::applySystemOperator(Vector result, ConstVector x) const { @@ -18,107 +17,84 @@ void ResidualGive::applySystemOperator(Vector result, Co assign(result, 0.0); - const PolarGrid& grid = Residual::grid_; - + const PolarGrid& grid = Residual::grid_; const int num_omp_threads = Residual::num_omp_threads_; - /* Single-threaded execution */ - if (num_omp_threads == 1) { - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { + const int num_smoother_circles = grid.numberSmootherCircles(); + const int additional_radial_tasks = grid.ntheta() % 3; + const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; + + /* ---------------- */ + /* Circular section */ + /* ---------------- */ + // We parallelize the loop with step 3 to avoid data race conditions between adjacent circles. +#pragma omp parallel num_threads(num_omp_threads) + { +#pragma omp for + for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) { applyCircleSection(i_r, result, x); - } - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - applyRadialSection(i_theta, result, x); - } + } /* Implicit barrier */ +#pragma omp for + for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) { + applyCircleSection(i_r, result, x); + } /* Implicit barrier */ +#pragma omp for + for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) { + applyCircleSection(i_r, result, x); + } /* Implicit barrier */ } - /* Multi-threaded execution */ - else { - const int num_circle_tasks = grid.numberSmootherCircles(); - const int additional_radial_tasks = grid.ntheta() % 3; - const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; - #pragma omp parallel num_threads(num_omp_threads) - { - /* Circle Section 0 */ - #pragma omp for - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid.numberSmootherCircles() - circle_task - 1; - applyCircleSection(i_r, result, x); - } - /* Circle Section 1 */ - #pragma omp for - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid.numberSmootherCircles() - circle_task - 1; - applyCircleSection(i_r, result, x); - } - /* Circle Section 2 */ - #pragma omp for nowait - for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid.numberSmootherCircles() - circle_task - 1; - applyCircleSection(i_r, result, x); - } + /* -------------- */ + /* Radial section */ + /* -------------- */ + // We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines. + // Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks + // that are handled serially before the parallel loops. + if (additional_radial_tasks > 0) { + const int i_theta = 0; + applyRadialSection(i_theta, result, x); + } - /* Radial Section 0 */ - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 0) { - int i_theta = radial_task + additional_radial_tasks; - applyRadialSection(i_theta, result, x); - } - else { - if (additional_radial_tasks == 0) { - applyRadialSection(0, result, x); - } - else if (additional_radial_tasks >= 1) { - applyRadialSection(0, result, x); - applyRadialSection(1, result, x); - } - } - } - /* Radial Section 1 */ - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 1) { - int i_theta = radial_task + additional_radial_tasks; - applyRadialSection(i_theta, result, x); - } - else { - if (additional_radial_tasks == 0) { - applyRadialSection(1, result, x); - } - else if (additional_radial_tasks == 1) { - applyRadialSection(2, result, x); - } - else if (additional_radial_tasks == 2) { - applyRadialSection(2, result, x); - applyRadialSection(3, result, x); - } - } - } - /* Radial Section 2 */ - #pragma omp for - for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { - int i_theta = radial_task + additional_radial_tasks; - applyRadialSection(i_theta, result, x); - } - } + if (additional_radial_tasks > 1) { + const int i_theta = 1; + applyRadialSection(i_theta, result, x); + } + +#pragma omp parallel num_threads(num_omp_threads) + { +#pragma omp for + for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { + const int i_theta = radial_task + additional_radial_tasks; + applyRadialSection(i_theta, result, x); + } /* Implicit barrier */ +#pragma omp for + for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { + const int i_theta = radial_task + additional_radial_tasks; + applyRadialSection(i_theta, result, x); + } /* Implicit barrier */ +#pragma omp for + for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { + const int i_theta = radial_task + additional_radial_tasks; + applyRadialSection(i_theta, result, x); + } /* Implicit barrier */ } } -// clang-format on /* ------------------ */ /* result = rhs - A*x */ +/* ------------------ */ template void ResidualGive::computeResidual(Vector result, ConstVector rhs, ConstVector x) const { assert(result.size() == x.size()); + const int num_omp_threads = Residual::num_omp_threads_; + applySystemOperator(result, x); // Subtract A*x from rhs to get the residual. - const int n = result.size(); - const int num_omp_threads = Residual::num_omp_threads_; + const int n = result.size(); #pragma omp parallel for num_threads(num_omp_threads) for (int i = 0; i < n; i++) { result[i] = rhs[i] - result[i]; diff --git a/include/Residual/ResidualTake/applyATake.inl b/include/Residual/ResidualTake/applyATake.inl index 38cbe669..da42f45f 100644 --- a/include/Residual/ResidualTake/applyATake.inl +++ b/include/Residual/ResidualTake/applyATake.inl @@ -123,13 +123,15 @@ void ResidualTake::applyCircleSection(const int i_r, Vector::level_cache_.cacheDensityProfileCoefficients()); assert(Residual::level_cache_.cacheDomainGeometry()); - const PolarGrid& grid = Residual::grid_; - const bool DirBC_Interior = Residual::DirBC_Interior_; - ConstVector arr = Residual::level_cache_.arr(); - ConstVector att = Residual::level_cache_.att(); - ConstVector art = Residual::level_cache_.art(); - ConstVector detDF = Residual::level_cache_.detDF(); - ConstVector coeff_beta = Residual::level_cache_.coeff_beta(); + const PolarGrid& grid = Residual::grid_; + const LevelCacheType& level_cache = Residual::level_cache_; + const bool DirBC_Interior = Residual::DirBC_Interior_; + + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { node_apply_a_take(i_r, i_theta, grid, DirBC_Interior, result, x, arr, att, art, detDF, coeff_beta); @@ -145,13 +147,15 @@ void ResidualTake::applyRadialSection(const int i_theta, Vector< assert(Residual::level_cache_.cacheDensityProfileCoefficients()); assert(Residual::level_cache_.cacheDomainGeometry()); - const PolarGrid& grid = Residual::grid_; - const bool DirBC_Interior = Residual::DirBC_Interior_; - ConstVector arr = Residual::level_cache_.arr(); - ConstVector att = Residual::level_cache_.att(); - ConstVector art = Residual::level_cache_.art(); - ConstVector detDF = Residual::level_cache_.detDF(); - ConstVector coeff_beta = Residual::level_cache_.coeff_beta(); + const PolarGrid& grid = Residual::grid_; + const LevelCacheType& level_cache = Residual::level_cache_; + const bool DirBC_Interior = Residual::DirBC_Interior_; + + ConstVector arr = level_cache.arr(); + ConstVector att = level_cache.att(); + ConstVector art = level_cache.art(); + ConstVector detDF = level_cache.detDF(); + ConstVector coeff_beta = level_cache.coeff_beta(); for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { node_apply_a_take(i_r, i_theta, grid, DirBC_Interior, result, x, arr, att, art, detDF, coeff_beta); diff --git a/include/Residual/ResidualTake/residualTake.inl b/include/Residual/ResidualTake/residualTake.inl index c6916277..e5cf7fd3 100644 --- a/include/Residual/ResidualTake/residualTake.inl +++ b/include/Residual/ResidualTake/residualTake.inl @@ -14,13 +14,9 @@ void ResidualTake::applySystemOperator(Vector result, Co { assert(result.size() == x.size()); + const PolarGrid& grid = Residual::grid_; const int num_omp_threads = Residual::num_omp_threads_; - assert(Residual::level_cache_.cacheDensityProfileCoefficients()); - assert(Residual::level_cache_.cacheDomainGeometry()); - - const PolarGrid& grid = Residual::grid_; - #pragma omp parallel num_threads(num_omp_threads) { /* Circle Section */ diff --git a/include/Smoother/SmootherGive/applyAscOrtho.inl b/include/Smoother/SmootherGive/applyAscOrtho.inl index 63bfef95..f91f5366 100644 --- a/include/Smoother/SmootherGive/applyAscOrtho.inl +++ b/include/Smoother/SmootherGive/applyAscOrtho.inl @@ -434,6 +434,7 @@ void SmootherGive::applyAscOrthoCircleSection(const int i_r, con const PolarGrid& grid = Smoother::grid_; const LevelCacheType& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; assert(i_r >= 0 && i_r < grid.numberSmootherCircles() + 1); @@ -447,8 +448,8 @@ void SmootherGive::applyAscOrthoCircleSection(const int i_r, con level_cache.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); // Apply Asc Ortho at the current node - nodeApplyAscOrthoCircleGive(i_r, i_theta, grid, Smoother::DirBC_Interior_, smoother_color, x, - rhs, temp, arr, att, art, detDF, coeff_beta); + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art, + detDF, coeff_beta); } } @@ -461,6 +462,7 @@ void SmootherGive::applyAscOrthoRadialSection(const int i_theta, const PolarGrid& grid = Smoother::grid_; const LevelCacheType& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; const double theta = grid.theta(i_theta); @@ -474,7 +476,7 @@ void SmootherGive::applyAscOrthoRadialSection(const int i_theta, level_cache.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); // Apply Asc Ortho at the current node - nodeApplyAscOrthoRadialGive(i_r, i_theta, grid, Smoother::DirBC_Interior_, smoother_color, x, - rhs, temp, arr, att, art, detDF, coeff_beta); + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art, + detDF, coeff_beta); } -} +} \ No newline at end of file diff --git a/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl new file mode 100644 index 00000000..9c011718 --- /dev/null +++ b/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl @@ -0,0 +1,295 @@ +#pragma once + +namespace smoother_give +{ + +#ifdef GMGPOLAR_USE_MUMPS +// When using the MUMPS solver, the matrix is assembled in COO format. +static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, + int column, double value) +{ + matrix.row_index(ptr + offset) = row; + matrix.col_index(ptr + offset) = column; + matrix.value(ptr + offset) += value; +} +#else +// When using the in-house solver, the matrix is stored in CSR format. +static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, + int column, double value) +{ + matrix.row_nz_index(row, offset) = column; + matrix.row_nz_entry(row, offset) += value; +} +#endif + +} // namespace smoother_give + +template +void SmootherGive::nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid, + bool DirBC_Interior, + InnerBoundaryMatrix& matrix, double arr, + double att, double art, double detDF, + double coeff_beta) +{ + using smoother_give::update_CSR_COO_MatrixElement; + + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + int ptr, offset; + int row, column; + double value; + + const int i_r = 0; + + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + /* Fill result(i,j) */ + const int center_index = i_theta; + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = getCircleAscIndex(i_r, i_theta); + + const Stencil& CenterStencil = getStencil(i_r); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + // h1 gets replaced with 2 * R0. + // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()>>1)). + // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. + const double h1 = 2 * grid.radius(0); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + /* -| x | o | x | */ + /* -| | | | */ + /* -| O | o | o | */ + /* -| | | | */ + /* -| x | o | x | */ + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); + + const int center_index = i_theta; + const int left_index = i_theta_AcrossOrigin; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); + const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); + const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); + + int nz_index; /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Left]; + column = left_index; + value = -coeff1 * arr; /* Left */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Bottom]; + column = bottom_index; + value = -coeff3 * att; /* Bottom */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Top]; + column = top_index; + value = -coeff4 * att; /* Top */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + /* Fill matrix row of (i-1,j) */ + /* From view the view of the across origin node, */ /* the directions are roatated by 180 degrees in the stencil! */ + row = left_index; + ptr = left_nz_index; + + const Stencil& LeftStencil = CenterStencil; + + offset = LeftStencil[StencilPosition::Left]; + column = center_index; + value = -coeff1 * arr; /* Right -> Left*/ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = LeftStencil[StencilPosition::Center]; + column = left_index; + value = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + /* Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + + /* Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + + /* Fill matrix row of (i,j-1) */ + row = bottom_index; + ptr = bottom_nz_index; + + const Stencil& BottomStencil = CenterStencil; + + offset = BottomStencil[StencilPosition::Top]; + column = center_index; + value = -coeff3 * att; /* Top */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = BottomStencil[StencilPosition::Center]; + column = bottom_index; + value = +coeff3 * att; /* Center: (Top) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + /* TopLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + + /* Fill matrix row of (i,j+1) */ + row = top_index; + ptr = top_nz_index; + + const Stencil& TopStencil = CenterStencil; + + offset = TopStencil[StencilPosition::Bottom]; + column = center_index; + value = -coeff4 * att; /* Bottom */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + offset = TopStencil[StencilPosition::Center]; + column = top_index; + value = +coeff4 * att; /* Center: (Bottom) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + + /* BottomLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + } +} + +template +void SmootherGive::nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid, + bool DirBC_Interior, + InnerBoundaryMatrix& matrix, double arr, + double att, double art, double detDF, + double coeff_beta) +{ + using smoother_give::update_CSR_COO_MatrixElement; + + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + int ptr, offset; + int row, column; + double value; + + const int i_r = 1; + + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left_index = i_theta; + + /* Fill matrix row of (i-1,j) */ + if (!DirBC_Interior) { + row = left_index; + ptr = getCircleAscIndex(i_r - 1, i_theta); + + const Stencil& LeftStencil = getStencil(i_r - 1); + + offset = LeftStencil[StencilPosition::Center]; + column = left_index; + value = coeff1 * arr; /* Center: (Right) */ + update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value); + } +} + +template +typename SmootherGive::InnerBoundaryMatrix +SmootherGive::buildInteriorBoundarySolverMatrix() +{ + const PolarGrid& grid = Smoother::grid_; + const LevelCacheType& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + + const int i_r = 0; + const int ntheta = grid.ntheta(); + + // The interior boundary matrix is symmetric due to the periodicity in the theta direction + // and the assumption that ntheta is even, which is required for the across-origin discretization. + // We store all non-zero entries of the matrix, both in COO format (for MUMPS) + // and in CSR format (for the in-house solver). If the COO matrix is marked as symmetric, + // the COO_Mumps_Solver optimizes the factorization by only using the upper triangular part of the matrix, + // which is extracted by the COO_Mumps_Solver internally. +#ifdef GMGPOLAR_USE_MUMPS + const int nnz = getNonZeroCountCircleAsc(i_r); + SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); + inner_boundary_solver_matrix.is_symmetric(true); +#else + // The stencils size for the inner boundary matrix is either 1 (Dirichlet BC) or 4 (across-origin discretization). + std::function nnz_per_row = [&](int i_theta) { + return DirBC_Interior ? 1 : 4; + }; + SparseMatrixCSR inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row); +#endif + + { + const int i_r = 0; + const double r = grid.radius(i_r); + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + { + const int global_index = grid.index(i_r, i_theta); + const double theta = grid.theta(i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + + nodeBuildInteriorBoundarySolverMatrix_i_r_0(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, + arr, att, art, detDF, coeff_beta); + } + } + } + + { + const int i_r = 1; + const double r = grid.radius(i_r); + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + { + const int global_index = grid.index(i_r, i_theta); + const double theta = grid.theta(i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + + nodeBuildInteriorBoundarySolverMatrix_i_r_1(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix, + arr, att, art, detDF, coeff_beta); + } + } + } + + return inner_boundary_solver_matrix; +} \ No newline at end of file diff --git a/include/Smoother/SmootherGive/buildMatrix.inl b/include/Smoother/SmootherGive/buildTridiagonalAsc.inl similarity index 56% rename from include/Smoother/SmootherGive/buildMatrix.inl rename to include/Smoother/SmootherGive/buildTridiagonalAsc.inl index 7a77fc1c..7f49e302 100644 --- a/include/Smoother/SmootherGive/buildMatrix.inl +++ b/include/Smoother/SmootherGive/buildTridiagonalAsc.inl @@ -3,7 +3,6 @@ namespace smoother_give { -/* Tridiagonal matrices */ static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, double value) { @@ -15,34 +14,15 @@ static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, solver.cyclic_corner(batch) += value; } -/* Inner Boundary COO/CSR matrix */ -#ifdef GMGPOLAR_USE_MUMPS -static inline void updateCOOCSRMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_index(ptr + offset) = row; - matrix.col_index(ptr + offset) = col; - matrix.value(ptr + offset) += val; -} -#else -static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_nz_index(row, offset) = col; - matrix.row_nz_entry(row, offset) += val; -} -#endif - } // namespace smoother_give template -void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, - double arr, double att, double art, double detDF, double coeff_beta) +void SmootherGive::nodeBuildTridiagonalSolverMatrices( + int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, double art, double detDF, + double coeff_beta) { - using smoother_give::updateCOOCSRMatrixElement; using smoother_give::updateMatrixElement; assert(i_r >= 0 && i_r < grid.nr()); @@ -105,7 +85,7 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -146,17 +126,7 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ - if (!DirBC_Interior && i_r == 1) { - row = left_index; - ptr = getCircleAscIndex(i_r - 1, i_theta); - - const Stencil& LeftStencil = getStencil(i_r - 1); - - offset = LeftStencil[StencilPosition::Center]; - col = left_index; - val = coeff1 * arr; /* Center: (Right) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - } + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. if (i_r > 1) { row = left_index; column = left_index; @@ -266,180 +236,39 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const /* Circle Section: Node in the inner boundary */ /* ------------------------------------------ */ else if (i_r == 0) { - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - if (DirBC_Interior) { - /* Fill result(i,j) */ - const double h2 = grid.radialSpacing(i_r); - const double k1 = grid.angularSpacing(i_theta - 1); - const double k2 = grid.angularSpacing(i_theta); - - const double coeff2 = 0.5 * (k1 + k2) / h2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - auto& right_solver = circle_tridiagonal_solver; - int right_batch = i_r + 1; - - const int center_index = i_theta; - const int right_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = getCircleAscIndex(i_r, i_theta); - - const Stencil& CenterStencil = getStencil(i_r); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - // h1 gets replaced with 2 * R0. - // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()>>1)). - // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - const double h1 = 2 * grid.radius(0); - const double h2 = grid.radialSpacing(i_r); - const double k1 = grid.angularSpacing(i_theta - 1); - const double k2 = grid.angularSpacing(i_theta); - - const double coeff1 = 0.5 * (k1 + k2) / h1; - const double coeff2 = 0.5 * (k1 + k2) / h2; - const double coeff3 = 0.5 * (h1 + h2) / k1; - const double coeff4 = 0.5 * (h1 + h2) / k2; - - /* left_matrix (across-the origin), center_matrix, right_matrix */ - /* -| x | o | x | */ - /* -| | | | */ - /* -| O | o | o | */ - /* -| | | | */ - /* -| x | o | x | */ - auto& right_solver = circle_tridiagonal_solver; - int right_batch = i_r + 1; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); - - const int center_index = i_theta; - const int left_index = i_theta_AcrossOrigin; - const int right_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); - const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); - const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); - - int nz_index; /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* beta_{i,j} */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Left]; - col = left_index; - val = -coeff1 * arr; /* Left */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Bottom]; - col = bottom_index; - val = -coeff3 * att; /* Bottom */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Top]; - col = top_index; - val = -coeff4 * att; /* Top */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i-1,j) */ - /* From view the view of the across origin node, */ /* the directions are roatated by 180 degrees in the stencil! */ - row = left_index; - ptr = left_nz_index; - - const Stencil& LeftStencil = CenterStencil; - - offset = LeftStencil[StencilPosition::Left]; - col = center_index; - val = -coeff1 * arr; /* Right -> Left*/ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = LeftStencil[StencilPosition::Center]; - col = left_index; - val = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - - /* Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - - /* Fill matrix row of (i,j-1) */ - row = bottom_index; - ptr = bottom_nz_index; - - const Stencil& BottomStencil = CenterStencil; - - offset = BottomStencil[StencilPosition::Top]; - col = center_index; - val = -coeff3 * att; /* Top */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = BottomStencil[StencilPosition::Center]; - col = bottom_index; - val = +coeff3 * att; /* Center: (Top) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* TopLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - - /* Fill matrix row of (i,j+1) */ - row = top_index; - ptr = top_nz_index; - - const Stencil& TopStencil = CenterStencil; - - offset = TopStencil[StencilPosition::Bottom]; - col = center_index; - val = -coeff4 * att; /* Bottom */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. - offset = TopStencil[StencilPosition::Center]; - col = top_index; - val = +coeff4 * att; /* Center: (Bottom) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); + /* Fill result(i,j) */ + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - /* BottomLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - } + const double coeff2 = 0.5 * (k1 + k2) / h2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + auto& center_solver = circle_tridiagonal_solver; + int center_batch = i_r; + auto& right_solver = circle_tridiagonal_solver; + int right_batch = i_r + 1; + + const int center_index = i_theta; + const int right_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + /* Fill matrix row of (i,j) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(center_solver, center_batch, row, column, value); + + /* Fill matrix row of (i+1,j) */ + row = right_index; + column = right_index; + value = coeff2 * arr; /* Center: (Left) */ + updateMatrixElement(right_solver, right_batch, row, column, value); } /* --------------------------------------------- */ /* Radial Section: Node next to circular section */ @@ -481,7 +310,7 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -564,7 +393,7 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const /* ---------------------------- */ /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -637,11 +466,13 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const } template -void SmootherGive::buildAscCircleSection(const int i_r) +void SmootherGive::buildTridiagonalCircleSection(int i_r) { const PolarGrid& grid = Smoother::grid_; const LevelCacheType& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + // Access pattern is aligned with the memory layout of the grid data to maximize cache efficiency. const double r = grid.radius(i_r); for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { const int global_index = grid.index(i_r, i_theta); @@ -651,17 +482,19 @@ void SmootherGive::buildAscCircleSection(const int i_r) level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildAscGive(i_r, i_theta, grid, Smoother::DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } template -void SmootherGive::buildAscRadialSection(const int i_theta) +void SmootherGive::buildTridiagonalRadialSection(int i_theta) { const PolarGrid& grid = Smoother::grid_; const LevelCacheType& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + // Access pattern is aligned with the memory layout of the grid data to maximize cache efficiency. const double theta = grid.theta(i_theta); for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { const int global_index = grid.index(i_r, i_theta); @@ -671,132 +504,75 @@ void SmootherGive::buildAscRadialSection(const int i_theta) level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildAscGive(i_r, i_theta, grid, Smoother::DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } template -void SmootherGive::buildAscMatrices() +void SmootherGive::buildTridiagonalSolverMatrices() { - /* -------------------------------------- */ - /* Part 1: Allocate Asc Smoother matrices */ - /* -------------------------------------- */ - // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. - // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - - const PolarGrid& grid = Smoother::grid_; - const bool DirBC_Interior = Smoother::DirBC_Interior_; - const int num_omp_threads = Smoother::num_omp_threads_; - -#ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int inner_i_r = 0; - const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); - const int num_circle_nodes = grid.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); - inner_boundary_circle_matrix_.is_symmetric(false); -#else - std::function nnz_per_row = [&](int i_theta) { - return DirBC_Interior ? 1 : 4; - }; - const int num_circle_nodes = grid.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); -#endif - - /* ---------------------------------- */ - /* Part 2: Fill Asc Smoother matrices */ - /* ---------------------------------- */ - - /* Multi-threaded execution: */ + const PolarGrid& grid = Smoother::grid_; + const LevelCacheType& level_cache = Smoother::level_cache_; + const bool DirBC_Interior = Smoother::DirBC_Interior_; + const int num_omp_threads = Smoother::num_omp_threads_; + const int num_smoother_circles = grid.numberSmootherCircles(); const int additional_radial_tasks = grid.ntheta() % 3; const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; + /* ---------------- */ + /* Circular section */ + /* ---------------- */ + // We parallelize the loop with step 3 to avoid data race conditions between adjacent circles. #pragma omp parallel num_threads(num_omp_threads) { #pragma omp for for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); - } + buildTridiagonalCircleSection(i_r); + } /* Implicit barrier */ #pragma omp for for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); - } + buildTridiagonalCircleSection(i_r); + } /* Implicit barrier */ #pragma omp for for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); - } + buildTridiagonalCircleSection(i_r); + } /* Implicit barrier */ + } + + /* ---------------- */ + /* Radial section */ + /* ---------------- */ + // We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines. + // Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks + // that are handled serially before the parallel loops. + if (additional_radial_tasks > 0) { + const int i_theta = 0; + buildTridiagonalRadialSection(i_theta); + } + if (additional_radial_tasks > 1) { + const int i_theta = 1; + buildTridiagonalRadialSection(i_theta); + } + +#pragma omp parallel num_threads(num_omp_threads) + { #pragma omp for for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 0) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } - else { - if (additional_radial_tasks == 0) { - buildAscRadialSection(0); - } - else if (additional_radial_tasks >= 1) { - buildAscRadialSection(0); - buildAscRadialSection(1); - } - } - } + const int i_theta = radial_task + additional_radial_tasks; + buildTridiagonalRadialSection(i_theta); + } /* Implicit barrier */ #pragma omp for for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 1) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } - else { - if (additional_radial_tasks == 0) { - buildAscRadialSection(1); - } - else if (additional_radial_tasks == 1) { - buildAscRadialSection(2); - } - else if (additional_radial_tasks == 2) { - buildAscRadialSection(2); - buildAscRadialSection(3); - } - } - } + const int i_theta = radial_task + additional_radial_tasks; + buildTridiagonalRadialSection(i_theta); + } /* Implicit barrier */ #pragma omp for for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { - int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } + const int i_theta = radial_task + additional_radial_tasks; + buildTridiagonalRadialSection(i_theta); + } /* Implicit barrier */ } - - circle_tridiagonal_solver_.setup(); - radial_tridiagonal_solver_.setup(); - -#ifdef GMGPOLAR_USE_MUMPS - /* ------------------------------------------------------------------ */ - /* Part 3: Convert inner_boundary_circle_matrix to a symmetric matrix */ - /* ------------------------------------------------------------------ */ - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); - - const int nnz = full_matrix.non_zero_size(); - const int numRows = full_matrix.rows(); - const int numColumns = full_matrix.columns(); - const int symmetric_nnz = nnz - (nnz - numRows) / 2; - - inner_boundary_circle_matrix_ = SparseMatrixCOO(numRows, numColumns, symmetric_nnz); - inner_boundary_circle_matrix_.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) { - int current_row = full_matrix.row_index(nz_index); - int current_col = full_matrix.col_index(nz_index); - if (current_row <= current_col) { - inner_boundary_circle_matrix_.row_index(current_nz) = current_row; - inner_boundary_circle_matrix_.col_index(current_nz) = current_col; - inner_boundary_circle_matrix_.value(current_nz) = std::move(full_matrix.value(nz_index)); - current_nz++; - } - } -#endif -} +} \ No newline at end of file diff --git a/include/Smoother/SmootherGive/initializeMumps.inl b/include/Smoother/SmootherGive/initializeMumps.inl deleted file mode 100644 index 11b3a967..00000000 --- a/include/Smoother/SmootherGive/initializeMumps.inl +++ /dev/null @@ -1,114 +0,0 @@ -#pragma once - -#ifdef GMGPOLAR_USE_MUMPS - -template -void SmootherGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout << "Warning: Smoother inner boundary matrix is not positive definite: Negative pivots in the " - "factorization phase." - << std::endl; - } -} - -template -void SmootherGive::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/include/Smoother/SmootherGive/smootherGive.h b/include/Smoother/SmootherGive/smootherGive.h index 8c77e65d..de87c6fc 100644 --- a/include/Smoother/SmootherGive/smootherGive.h +++ b/include/Smoother/SmootherGive/smootherGive.h @@ -53,9 +53,6 @@ class SmootherGive : public Smoother explicit SmootherGive(const PolarGrid& grid, const LevelCacheType& level_cache, bool DirBC_Interior, int num_omp_threads); - // If MUMPS is enabled, this cleans up the inner boundary solver. - ~SmootherGive() override; - // Performs one full coupled smoothing sweep: // BC -> WC -> BR -> WR // Parallel implementation using OpenMP: @@ -64,40 +61,13 @@ class SmootherGive : public Smoother void smoothing(Vector x, ConstVector rhs, Vector temp) override; private: - /* ------------------- */ - /* Tridiagonal solvers */ - /* ------------------- */ - - // Batched solver for cyclic-tridiagonal circle line A_sc matrices. - BatchedTridiagonalSolver circle_tridiagonal_solver_; - - // Batched solver for tridiagonal radial circle line A_sc matrices. - BatchedTridiagonalSolver radial_tridiagonal_solver_; - - // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because - // it potentially includes across-origin coupling. Therefore, it is assembled - // into a sparse matrix and solved using a general-purpose sparse solver. - // When using the MUMPS solver, the matrix is assembled in COO format. - // When using the in-house solver, the matrix is stored in CSR format. -#ifdef GMGPOLAR_USE_MUMPS - using MatrixType = SparseMatrixCOO; - DMUMPS_STRUC_C inner_boundary_mumps_solver_; -#else - using MatrixType = SparseMatrixCSR; - SparseLUSolver inner_boundary_lu_solver_; -#endif - // Sparse matrix for the non-tridiagonal inner boundary circle block. - MatrixType inner_boundary_circle_matrix_; - - // Note: - // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. - // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. - // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. - /* ------------------- */ /* Stencil definitions */ /* ------------------- */ + // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, + // since the mumps solver will be built in the member initializer of the Smoother class. + // Stencils encode neighborhood connectivity for A_sc matrix assembly. // It is only used in the construction of COO/CSR matrices. // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. @@ -114,12 +84,53 @@ class SmootherGive : public Smoother }; const Stencil circle_stencil_across_origin_ = { -1, 3, -1, - 1, 0, -1, + 1, 0, -1, -1, 2, -1 }; - // clang-format on + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------------ */ + /* Interior boundary solver */ + /* ------------------------ */ + + // The inner circle matrix (i_r = 0) is NOT tridiagonal due to across-origin coupling. + // It is solved using a general-purpose sparse solver. + // - MUMPS: matrix assembled in COO format; solver owns the matrix internally. + // - In-house: matrix stored in CSR; solver does not own the matrix. + +#ifdef GMGPOLAR_USE_MUMPS + using InnerBoundaryMatrix = SparseMatrixCOO; + using InnerBoundarySolver = CooMumpsSolver; +#else + using InnerBoundaryMatrix = SparseMatrixCSR; + using InnerBoundarySolver = SparseLUSolver; + + // Stored only for the in-house solver (CSR). + InnerBoundaryMatrix inner_boundary_circle_matrix_; +#endif + + // Solver object (owns matrix if MUMPS, references if in-house solver). + InnerBoundarySolver inner_boundary_solver_; + + /* -------------- */ + /* Stencil access */ + /* -------------- */ + // Select correct stencil depending on the grid position. const Stencil& getStencil(int i_r) const; /* Only i_r = 0 implemented */ // Number of nonzero A_sc entries. @@ -131,19 +142,25 @@ class SmootherGive : public Smoother /* --------------- */ /* Matrix assembly */ /* --------------- */ - // Build all A_sc matrices for circle and radial smoothers. - void buildAscMatrices(); - // Build A_sc matrix block for a single circular line. - void buildAscCircleSection(int i_r); - // Build A_sc matrix block for a single radial line. - void buildAscRadialSection(int i_theta); - // Build A_sc for a specific node (i_r, i_theta) - void nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, - double art, double detDF, double coeff_beta); + void buildTridiagonalSolverMatrices(); + void buildTridiagonalCircleSection(int i_r); + void buildTridiagonalRadialSection(int i_theta); + // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) + void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, + double att, double art, double detDF, double coeff_beta); + + // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. + InnerBoundaryMatrix buildInteriorBoundarySolverMatrix(); + // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. + void nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + InnerBoundaryMatrix& matrix, double arr, double att, double art, + double detDF, double coeff_beta); + void nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + InnerBoundaryMatrix& matrix, double arr, double att, double art, + double detDF, double coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -151,9 +168,9 @@ class SmootherGive : public Smoother // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc - void applyAscOrthoCircleSection(int i_r, const SmootherColor smoother_color, ConstVector x, + void applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(int i_theta, const SmootherColor smoother_color, ConstVector x, + void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); /* ----------------- */ @@ -172,21 +189,11 @@ class SmootherGive : public Smoother void solveWhiteCircleSection(Vector x, Vector temp); void solveBlackRadialSection(Vector x, Vector temp); void solveWhiteRadialSection(Vector x, Vector temp); - - /* ----------------------------------- */ - /* Initialize and destroy MUMPS solver */ - /* ----------------------------------- */ -#ifdef GMGPOLAR_USE_MUMPS - // Initialize sparse MUMPS solver with assembled COO matrix. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Release MUMPS internal memory and MPI structures. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); -#endif }; #include "smootherGive.inl" -#include "buildMatrix.inl" +#include "buildInnerBoundaryAsc.inl" +#include "buildTridiagonalAsc.inl" #include "applyAscOrtho.inl" #include "solveAscSystem.inl" -#include "matrixStencil.inl" -#include "initializeMumps.inl" +#include "matrixStencil.inl" \ No newline at end of file diff --git a/include/Smoother/SmootherGive/smootherGive.inl b/include/Smoother/SmootherGive/smootherGive.inl index 686b8458..decd540e 100644 --- a/include/Smoother/SmootherGive/smootherGive.inl +++ b/include/Smoother/SmootherGive/smootherGive.inl @@ -6,21 +6,16 @@ SmootherGive::SmootherGive(const PolarGrid& grid, const LevelCac : Smoother(grid, level_cache, DirBC_Interior, num_omp_threads) , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) -{ - buildAscMatrices(); #ifdef GMGPOLAR_USE_MUMPS - initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_); + , inner_boundary_solver_(buildInteriorBoundarySolverMatrix()) #else - inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); + , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix()) + , inner_boundary_solver_(inner_boundary_circle_matrix_) #endif -} - -template -SmootherGive::~SmootherGive() { -#ifdef GMGPOLAR_USE_MUMPS - finalizeMumpsSolver(inner_boundary_mumps_solver_); -#endif + buildTridiagonalSolverMatrices(); + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); } // The smoothing solves linear systems of the form: diff --git a/include/Smoother/SmootherGive/solveAscSystem.inl b/include/Smoother/SmootherGive/solveAscSystem.inl index 37c7a2d3..7c484b41 100644 --- a/include/Smoother/SmootherGive/solveAscSystem.inl +++ b/include/Smoother/SmootherGive/solveAscSystem.inl @@ -17,19 +17,8 @@ void SmootherGive::solveBlackCircleSection(Vector x, Vec circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); if (is_inner_circle_black) { -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } -#else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); -#endif + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); + inner_boundary_solver_.solveInPlace(inner_boundary); } // Move updated values to x @@ -59,19 +48,8 @@ void SmootherGive::solveWhiteCircleSection(Vector x, Vec circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); if (is_inner_circle_white) { -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } -#else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); -#endif + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta())); + inner_boundary_solver_.solveInPlace(inner_boundary); } // Move updated values to x @@ -128,4 +106,4 @@ void SmootherGive::solveWhiteRadialSection(Vector x, Vec x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)]; } } -} +} \ No newline at end of file diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl index 978acdc2..9d40522d 100644 --- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl +++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl @@ -142,6 +142,12 @@ SmootherTake::buildInteriorBoundarySolverMatrix() const int i_r = 0; const int ntheta = grid.ntheta(); + // The interior boundary matrix is symmetric due to the periodicity in the theta direction + // and the assumption that ntheta is even, which is required for the across-origin discretization. + // We store all non-zero entries of the matrix, both in COO format (for MUMPS) + // and in CSR format (for the in-house solver). If the COO matrix is marked as symmetric, + // the COO_Mumps_Solver optimizes the factorization by only using the upper triangular part of the matrix, + // which is extracted by the COO_Mumps_Solver internally. #ifdef GMGPOLAR_USE_MUMPS const int nnz = getNonZeroCountCircleAsc(i_r); SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 038278b1..fdd55be3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -20,8 +20,8 @@ add_executable(gmgpolar_tests Interpolation/extrapolated_prolongation.cpp Interpolation/extrapolated_restriction.cpp Residual/residual.cpp - DirectSolver/directSolver.cpp - DirectSolver/directSolverNoMumps.cpp + DirectSolver/directSolver_coo_mumps.cpp + DirectSolver/directSolver_csr_lu.cpp Smoother/smoother.cpp ExtrapolatedSmoother/extrapolated_smoother.cpp ConfigParser/config_parser.cpp diff --git a/tests/DirectSolver/directSolver.cpp b/tests/DirectSolver/directSolver_coo_mumps.cpp similarity index 95% rename from tests/DirectSolver/directSolver.cpp rename to tests/DirectSolver/directSolver_coo_mumps.cpp index 2d193732..ddfd4e9a 100644 --- a/tests/DirectSolver/directSolver.cpp +++ b/tests/DirectSolver/directSolver_coo_mumps.cpp @@ -10,8 +10,6 @@ #include "../../include/Residual/ResidualGive/residualGive.h" #include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h" #include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h" -#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h" -#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h" #include "../../include/InputFunctions/domainGeometry.h" #include "../../include/InputFunctions/densityProfileCoefficients.h" @@ -47,7 +45,7 @@ /* Test 1/2: */ /* Does the Take and Give Implementation match up? */ -TEST(DirectSolverTest, directSolver_DirBC_Interior) +TEST(DirectSolver_COO_MUMPS_Test, directSolver_DirBC_Interior) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -103,7 +101,7 @@ TEST(DirectSolverTest, directSolver_DirBC_Interior) } } -TEST(DirectSolverTest, directSolver_AcrossOrigin) +TEST(DirectSolver_COO_MUMPS_Test, directSolver_AcrossOrigin) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -166,7 +164,7 @@ TEST(DirectSolverTest, directSolver_AcrossOrigin) /* Circular */ /* -------- */ -TEST(DirectSolverTest_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -209,7 +207,7 @@ TEST(DirectSolverTest_CircularGeometry, SequentialDirectSolverDirBC_Interior_Cir ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -253,7 +251,7 @@ TEST(DirectSolverTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_Circu ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -297,7 +295,7 @@ TEST(DirectSolverTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_Circu ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-8); } -TEST(DirectSolverTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry) { std::vector radii = {1e-5, 0.1, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -345,7 +343,7 @@ TEST(DirectSolverTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_Circula /* Shafranov */ /* --------- */ -TEST(DirectSolverTest_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -391,7 +389,7 @@ TEST(DirectSolverTest_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeo ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -441,7 +439,7 @@ TEST(DirectSolverTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeome /* Czarny */ /* ------ */ -TEST(DirectSolverTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -487,7 +485,7 @@ TEST(DirectSolverTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry) ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -537,7 +535,7 @@ TEST(DirectSolverTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry) /* Culham */ /* ------ */ -TEST(DirectSolverTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -581,7 +579,7 @@ TEST(DirectSolverTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry) ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -627,7 +625,7 @@ TEST(DirectSolverTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry) /* We adjust the PolarGrid to increase the precision */ -TEST(DirectSolverTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry) { std::vector radii = {1e-5, 1.441 * 1e-5, 3.8833 * 1e-5, 8.7666 * 1e-5, @@ -681,7 +679,7 @@ TEST(DirectSolverTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_ ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-10); } -TEST(DirectSolverTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry) { std::vector radii = {0.15, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -727,7 +725,7 @@ TEST(DirectSolverTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2 /* Same test now using Take */ -TEST(DirectSolverTakeTest_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -770,7 +768,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, SequentialDirectSolverDirBC_Interior ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTakeTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -814,7 +812,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_C ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTakeTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -858,7 +856,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_C ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-8); } -TEST(DirectSolverTakeTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry) { std::vector radii = {1e-5, 0.1, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -906,7 +904,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_Cir /* Shafranov */ /* --------- */ -TEST(DirectSolverTakeTest_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -952,7 +950,7 @@ TEST(DirectSolverTakeTest_ShafranovGeometry, DirectSolverDirBC_Interior_Shafrano ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTakeTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -1002,7 +1000,7 @@ TEST(DirectSolverTakeTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovG /* Czarny */ /* ------ */ -TEST(DirectSolverTakeTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -1048,7 +1046,7 @@ TEST(DirectSolverTakeTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeome ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTakeTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -1098,7 +1096,7 @@ TEST(DirectSolverTakeTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometr /* Culham */ /* ------ */ -TEST(DirectSolverTakeTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -1142,7 +1140,7 @@ TEST(DirectSolverTakeTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeome ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-11); } -TEST(DirectSolverTakeTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -1186,7 +1184,7 @@ TEST(DirectSolverTakeTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometr ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-8); } -TEST(DirectSolverTakeTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry) { std::vector radii = {1e-5, 1.441 * 1e-5, 3.8833 * 1e-5, 8.7666 * 1e-5, @@ -1240,7 +1238,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecis ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-10); } -TEST(DirectSolverTakeTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry) +TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry) { std::vector radii = {0.15, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { diff --git a/tests/DirectSolver/directSolverNoMumps.cpp b/tests/DirectSolver/directSolver_csr_lu.cpp similarity index 95% rename from tests/DirectSolver/directSolverNoMumps.cpp rename to tests/DirectSolver/directSolver_csr_lu.cpp index 370a9ef0..f1c366dd 100644 --- a/tests/DirectSolver/directSolverNoMumps.cpp +++ b/tests/DirectSolver/directSolver_csr_lu.cpp @@ -8,10 +8,8 @@ #include "../../include/GMGPolar/gmgpolar.h" #include "../../include/Residual/ResidualGive/residualGive.h" -#include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h" -#include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h" -#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h" -#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h" +#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h" +#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h" #include "../../include/InputFunctions/domainGeometry.h" #include "../../include/InputFunctions/densityProfileCoefficients.h" @@ -45,7 +43,7 @@ /* Test 1/2: */ /* Does the Take and Give Implementation match up? */ -TEST(DirectSolverTestNoMumps, directSolver_DirBC_Interior) +TEST(DirectSolver_CSR_LU_Test, directSolver_DirBC_Interior) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -101,7 +99,7 @@ TEST(DirectSolverTestNoMumps, directSolver_DirBC_Interior) } } -TEST(DirectSolverTestNoMumps, directSolver_AcrossOrigin) +TEST(DirectSolver_CSR_LU_Test, directSolver_AcrossOrigin) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -164,7 +162,7 @@ TEST(DirectSolverTestNoMumps, directSolver_AcrossOrigin) /* Circular */ /* -------- */ -TEST(DirectSolverTestNoMumps_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -207,7 +205,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, SequentialDirectSolverDirBC_Inter ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTestNoMumps_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -251,7 +249,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, ParallelDirectSolverDirBC_Interio ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTestNoMumps_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -295,7 +293,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, SequentialDirectSolverAcrossOrigi ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-8); } -TEST(DirectSolverTestNoMumps_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry) { std::vector radii = {1e-5, 0.1, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -343,7 +341,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, ParallelDirectSolverAcrossOrigin_ /* Shafranov */ /* --------- */ -TEST(DirectSolverTestNoMumps_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -389,7 +387,7 @@ TEST(DirectSolverTestNoMumps_ShafranovGeometry, DirectSolverDirBC_Interior_Shafr ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTestNoMumps_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -438,7 +436,7 @@ TEST(DirectSolverTestNoMumps_ShafranovGeometry, DirectSolverAcrossOrigin_Shafran /* Czarny */ /* ------ */ -TEST(DirectSolverTestNoMumps_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -483,7 +481,7 @@ TEST(DirectSolverTestNoMumps_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGe ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTestNoMumps_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -533,7 +531,7 @@ TEST(DirectSolverTestNoMumps_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeom /* Culham */ /* ------ */ -TEST(DirectSolverTestNoMumps_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -576,7 +574,7 @@ TEST(DirectSolverTestNoMumps_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGe ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-11); } -TEST(DirectSolverTestNoMumps_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -621,7 +619,7 @@ TEST(DirectSolverTestNoMumps_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeom /* We adjust the PolarGrid to increase the precision */ -TEST(DirectSolverTestNoMumps_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry) { std::vector radii = {1e-5, 1.441 * 1e-5, 3.8833 * 1e-5, 8.7666 * 1e-5, @@ -674,7 +672,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, DirectSolverAcrossOriginHigherPre ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-10); } -TEST(DirectSolverTestNoMumps_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry) +TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry) { std::vector radii = {0.15, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -719,7 +717,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, DirectSolverAcrossOriginHigherPre /* Same test now using Take */ -TEST(DirectSolverTakeCustomLUTest_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -761,7 +759,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, SequentialDirectSolverDirBC_ ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTakeCustomLUTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -804,7 +802,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, ParallelDirectSolverDirBC_In ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTakeCustomLUTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -847,7 +845,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, SequentialDirectSolverAcross ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-8); } -TEST(DirectSolverTakeCustomLUTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry) { std::vector radii = {1e-5, 0.1, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -894,7 +892,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, ParallelDirectSolverAcrossOr /* Shafranov */ /* --------- */ -TEST(DirectSolverTakeCustomLUTest_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -939,7 +937,7 @@ TEST(DirectSolverTakeCustomLUTest_ShafranovGeometry, DirectSolverDirBC_Interior_ ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTakeCustomLUTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -989,7 +987,7 @@ TEST(DirectSolverTakeCustomLUTest_ShafranovGeometry, DirectSolverAcrossOrigin_Sh /* Czarny */ /* ------ */ -TEST(DirectSolverTakeCustomLUTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -1035,7 +1033,7 @@ TEST(DirectSolverTakeCustomLUTest_CzarnyGeometry, DirectSolverDirBC_Interior_Cza ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-12); } -TEST(DirectSolverTakeCustomLUTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -1085,7 +1083,7 @@ TEST(DirectSolverTakeCustomLUTest_CzarnyGeometry, DirectSolverAcrossOrigin_Czarn /* Culham */ /* ------ */ -TEST(DirectSolverTakeCustomLUTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -1127,7 +1125,7 @@ TEST(DirectSolverTakeCustomLUTest_CulhamGeometry, DirectSolverDirBC_Interior_Cul ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-11); } -TEST(DirectSolverTakeCustomLUTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry) { std::vector radii = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { @@ -1170,7 +1168,7 @@ TEST(DirectSolverTakeCustomLUTest_CulhamGeometry, DirectSolverAcrossOrigin_Culha ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-8); } -TEST(DirectSolverTakeCustomLUTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry) { std::vector radii = {1e-5, 1.441 * 1e-5, 3.8833 * 1e-5, 8.7666 * 1e-5, @@ -1223,7 +1221,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, DirectSolverAcrossOriginHigh ASSERT_NEAR(infinity_norm(ConstVector(residuum)), 0.0, 1e-10); } -TEST(DirectSolverTakeCustomLUTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry) +TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry) { std::vector radii = {0.15, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3}; std::vector angles = { diff --git a/tests/ExtrapolatedSmoother/extrapolated_smoother.cpp b/tests/ExtrapolatedSmoother/extrapolated_smoother.cpp index fd6f950c..34554c33 100644 --- a/tests/ExtrapolatedSmoother/extrapolated_smoother.cpp +++ b/tests/ExtrapolatedSmoother/extrapolated_smoother.cpp @@ -9,8 +9,8 @@ #include "../../include/Residual/ResidualGive/residualGive.h" #include "../../include/Residual/ResidualTake/residualTake.h" -#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h" -#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h" +#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h" +#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h" #include "../../include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h" #include "../../include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h" diff --git a/tests/Smoother/smoother.cpp b/tests/Smoother/smoother.cpp index a69e46a3..6d87c94a 100644 --- a/tests/Smoother/smoother.cpp +++ b/tests/Smoother/smoother.cpp @@ -8,7 +8,7 @@ #include "../../include/GMGPolar/gmgpolar.h" #include "../../include/Residual/ResidualGive/residualGive.h" -#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h" +#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h" #include "../../include/Smoother/SmootherGive/smootherGive.h" #include "../../include/Smoother/SmootherTake/smootherTake.h"