Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions include/Definitions/geometry_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ inline void compute_jacobian_elements(const DomainGeometry& domain_geometry, dou
/* which is represented by: */
/* [arr, 0.5*art] */
/* [0.5*atr, att] */
arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / fabs(detDF);
att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / fabs(detDF);
art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / fabs(detDF);
arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / std::fabs(detDF);
att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / std::fabs(detDF);
art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / std::fabs(detDF);
/* Note that the inverse Jacobian matrix DF^{-1} is: */
/* 1.0 / det(DF) * */
/* [Jtt, -Jrt] */
Expand Down
44 changes: 0 additions & 44 deletions include/Definitions/global_definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,50 +79,6 @@ enum class BetaCoeff
ALPHA_INVERSE = 1
};

/* ---------------------------- */
/* Mumps - Constant Definitions */
/* ---------------------------- */
#ifdef GMGPOLAR_USE_MUMPS
#include "dmumps_c.h"
/* Mumps inline functions s.t. indices match documentation */
inline int& ICNTL(DMUMPS_STRUC_C& mumps_solver, int I)
{
return mumps_solver.icntl[(I)-1];
}
inline double& CNTL(DMUMPS_STRUC_C& mumps_solver, int I)
{
return mumps_solver.cntl[(I)-1];
}
inline int& INFOG(DMUMPS_STRUC_C& mumps_solver, int I)
{
return mumps_solver.infog[(I)-1];
}

constexpr int USE_COMM_WORLD = -987654;
constexpr int PAR_NOT_PARALLEL = 0;
constexpr int PAR_PARALLEL = 1;

constexpr int JOB_INIT = -1;
constexpr int JOB_END = -2;
constexpr int JOB_REMOVE_SAVED_DATA = -3;
constexpr int JOB_FREE_INTERNAL_DATA = -4;
constexpr int JOB_SUPPRESS_OOC_FILES = -200;

constexpr int JOB_ANALYSIS_PHASE = 1;
constexpr int JOB_FACTORIZATION_PHASE = 2;
constexpr int JOB_COMPUTE_SOLUTION = 3;
constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4;
constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5;
constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6;
constexpr int JOB_SAVE_INTERNAL_DATA = 7;
constexpr int JOB_RESTORE_INTERNAL_DATA = 8;
constexpr int JOB_DISTRIBUTE_RHS = 9;

constexpr int SYM_UNSYMMETRIC = 0;
constexpr int SYM_POSITIVE_DEFINITE = 1;
constexpr int SYM_GENERAL_SYMMETRIC = 2;
#endif

// --------------------------------------- //
// Function-like macros for LIKWID markers //
// --------------------------------------- //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
Expand Down Expand Up @@ -184,13 +184,13 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double bottom_value = -coeff3 * (att(center_index) + att(bottom_index)); /* Bottom */
double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
- top_value /* Center: (Top) */
);
double center_value = (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] *
std::fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
- top_value /* Center: (Top) */
);

double bottom_right_value = +0.25 * (art(right_index) + art(bottom_index)); /* Bottom Right */
double top_right_value = -0.25 * (art(right_index) + art(top_index)); /* Top Right */
Expand Down Expand Up @@ -276,7 +276,7 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
Expand Down Expand Up @@ -383,7 +383,7 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
Expand Down
117 changes: 50 additions & 67 deletions include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl
Original file line number Diff line number Diff line change
Expand Up @@ -798,80 +798,63 @@ SparseMatrixCSR<double> DirectSolver_CSR_LU_Give<LevelCacheType>::buildSolverMat

SparseMatrixCSR<double> solver_matrix(n, n, nnz_per_row);

if (num_omp_threads == 1) {
/* Single-threaded execution */
for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) {
buildSolverMatrixCircleSection(i_r, solver_matrix);
}
for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
}
else {
/* Multi-threaded execution: For Loops */
const int num_circle_tasks = grid.numberSmootherCircles();
const int additional_radial_tasks = grid.ntheta() % 3;
const int num_radial_tasks = grid.ntheta() - additional_radial_tasks;

const int num_smoother_circles = grid.numberSmootherCircles();
const int additional_radial_tasks = grid.ntheta() % 3;
const int num_radial_tasks = grid.ntheta() - additional_radial_tasks;

/* ---------------- */
/* Circular section */
/* ---------------- */
// We parallelize the loop with step 3 to avoid data race conditions between adjacent circles.
#pragma omp parallel num_threads(num_omp_threads)
{
{
#pragma omp for
for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) {
int i_r = grid.numberSmootherCircles() - circle_task - 1;
buildSolverMatrixCircleSection(i_r, solver_matrix);
}
for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) {
buildSolverMatrixCircleSection(i_r, solver_matrix);
} /* Implicit barrier */
#pragma omp for
for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) {
int i_r = grid.numberSmootherCircles() - circle_task - 1;
buildSolverMatrixCircleSection(i_r, solver_matrix);
}
#pragma omp for nowait
for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) {
int i_r = grid.numberSmootherCircles() - circle_task - 1;
buildSolverMatrixCircleSection(i_r, solver_matrix);
}
for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) {
buildSolverMatrixCircleSection(i_r, solver_matrix);
} /* Implicit barrier */
#pragma omp for
for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) {
buildSolverMatrixCircleSection(i_r, solver_matrix);
} /* Implicit barrier */
}

/* ---------------- */
/* Radial section */
/* ---------------- */
// We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines.
// Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks
// that are handled serially before the parallel loops.
if (additional_radial_tasks > 0) {
const int i_theta = 0;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}

if (additional_radial_tasks > 1) {
const int i_theta = 1;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}

#pragma omp parallel num_threads(num_omp_threads)
{
#pragma omp for
for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
if (radial_task > 0) {
int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
else {
if (additional_radial_tasks == 0) {
buildSolverMatrixRadialSection(0, solver_matrix);
}
else if (additional_radial_tasks >= 1) {
buildSolverMatrixRadialSection(0, solver_matrix);
buildSolverMatrixRadialSection(1, solver_matrix);
}
}
}
for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
const int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
} /* Implicit barrier */
#pragma omp for
for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
if (radial_task > 1) {
int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
else {
if (additional_radial_tasks == 0) {
buildSolverMatrixRadialSection(1, solver_matrix);
}
else if (additional_radial_tasks == 1) {
buildSolverMatrixRadialSection(2, solver_matrix);
}
else if (additional_radial_tasks == 2) {
buildSolverMatrixRadialSection(2, solver_matrix);
buildSolverMatrixRadialSection(3, solver_matrix);
}
}
}
for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
const int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
} /* Implicit barrier */
#pragma omp for
for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
}
}
for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
const int i_theta = radial_task + additional_radial_tasks;
buildSolverMatrixRadialSection(i_theta, solver_matrix);
} /* Implicit barrier */
}

return solver_matrix;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,6 @@ class DirectSolver_CSR_LU_Give : public DirectSolver<LevelCacheType>
-1, 0, -1,
-1, -1, -1
};
const Stencil stencil_next_inner_DB_ = {
7, 4, 8,
1, 0, 2,
5, 3, 6
};
const Stencil stencil_next_outer_DB_ = {
7, 4, 8,
1, 0, 2,
5, 3, 6
};
// clang-format on

SparseMatrixCSR<double> buildSolverMatrix();
Expand Down
39 changes: 11 additions & 28 deletions include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getStencilSize(int global_index) c
int i_r, i_theta;
grid.multiIndex(global_index, i_r, i_theta);

const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9;
const int size_stencil_interior = 9;
const int size_stencil_next_outer_boundary = 9;
const int size_stencil_outer_boundary = 1;
const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
const int size_stencil_interior = 9;
const int size_stencil_outer_boundary = 1;

if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) {
if ((i_r > 0 && i_r < grid.nr() - 1)) {
return size_stencil_interior;
}
else if (i_r == 0 && !DirBC_Interior) {
Expand All @@ -24,12 +22,7 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getStencilSize(int global_index) c
else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) {
return size_stencil_outer_boundary;
}
else if (i_r == 1 && DirBC_Interior) {
return size_stencil_next_inner_boundary;
}
else if (i_r == grid.nr() - 2) {
return size_stencil_next_outer_boundary;
}

throw std::out_of_range("Invalid index for stencil");
}

Expand All @@ -42,7 +35,7 @@ const Stencil& DirectSolver_CSR_LU_Give<LevelCacheType>::getStencil(int i_r) con
assert(0 <= i_r && i_r < grid.nr());
assert(grid.nr() >= 4);

if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) {
if ((i_r > 0 && i_r < grid.nr() - 1)) {
return stencil_interior_;
}
else if (i_r == 0 && !DirBC_Interior) {
Expand All @@ -51,12 +44,7 @@ const Stencil& DirectSolver_CSR_LU_Give<LevelCacheType>::getStencil(int i_r) con
else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) {
return stencil_DB_;
}
else if (i_r == 1 && DirBC_Interior) {
return stencil_next_inner_DB_;
}
else if (i_r == grid.nr() - 2) {
return stencil_next_outer_DB_;
}

throw std::out_of_range("Invalid index for stencil");
}

Expand All @@ -66,15 +54,10 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getNonZeroCountSolverMatrix() cons
const PolarGrid& grid = DirectSolver<LevelCacheType>::grid_;
const bool DirBC_Interior = DirectSolver<LevelCacheType>::DirBC_Interior_;

const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9;
const int size_stencil_interior = 9;
const int size_stencil_next_outer_boundary = 9;
const int size_stencil_outer_boundary = 1;

assert(grid.nr() >= 4);
const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
const int size_stencil_interior = 9;
const int size_stencil_outer_boundary = 1;

return grid.ntheta() *
(size_stencil_inner_boundary + size_stencil_next_inner_boundary + (grid.nr() - 4) * size_stencil_interior +
size_stencil_next_outer_boundary + size_stencil_outer_boundary);
(size_stencil_inner_boundary + (grid.nr() - 2) * size_stencil_interior + size_stencil_outer_boundary);
}
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,13 @@ void DirectSolver_CSR_LU_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
double bottom_value = -coeff3 * (att[center_index] + att[bottom_index]); /* Bottom */
double top_value = -coeff4 * (att[center_index] + att[top_index]); /* Top */

double center_value =
(+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF[center_index]) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
- top_value /* Center: (Top) */
);
double center_value = (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] *
std::fabs(detDF[center_index]) /* beta_{i,j} */
- left_value /* Center: (Left) */
- right_value /* Center: (Right) */
- bottom_value /* Center: (Bottom) */
- top_value /* Center: (Top) */
);

double bottom_right_value = +0.25 * (art[right_index] + art[bottom_index]); /* Bottom Right */
double top_right_value = -0.25 * (art[right_index] + art[top_index]); /* Top Right */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,6 @@ class DirectSolver_CSR_LU_Take : public DirectSolver<LevelCacheType>
-1, 0, -1,
-1, -1, -1
};
const Stencil stencil_next_inner_DB_ = {
7, 4, 8,
1, 0, 2,
5, 3, 6
};
const Stencil stencil_next_outer_DB_ = {
7, 4, 8,
1, 0, 2,
5, 3, 6
};
// clang-format on

SparseMatrixCSR<double> buildSolverMatrix();
Expand Down
Loading
Loading