diff --git a/include/Definitions/geometry_helper.h b/include/Definitions/geometry_helper.h
index 3fcb9934..29821c9e 100644
--- a/include/Definitions/geometry_helper.h
+++ b/include/Definitions/geometry_helper.h
@@ -24,9 +24,9 @@ inline void compute_jacobian_elements(const DomainGeometry& domain_geometry, dou
     /* which is represented by: */
     /*  [arr, 0.5*art]  */
     /*  [0.5*atr, att]  */
-    arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / fabs(detDF);
-    att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / fabs(detDF);
-    art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / fabs(detDF);
+    arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / std::fabs(detDF);
+    att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / std::fabs(detDF);
+    art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / std::fabs(detDF);
     /* Note that the inverse Jacobian matrix DF^{-1} is: */
     /*  1.0 / det(DF) *  */
     /*  [Jtt, -Jrt]      */
diff --git a/include/Definitions/global_definitions.h b/include/Definitions/global_definitions.h
index 2f4f1bef..d90ae9a6 100644
--- a/include/Definitions/global_definitions.h
+++ b/include/Definitions/global_definitions.h
@@ -79,50 +79,6 @@ enum class BetaCoeff
     ALPHA_INVERSE = 1
 };
 
-/* ---------------------------- */
-/* Mumps - Constant Definitions */
-/* ---------------------------- */
-#ifdef GMGPOLAR_USE_MUMPS
-    #include "dmumps_c.h"
-/* Mumps inline functions s.t. indices match documentation */
-inline int& ICNTL(DMUMPS_STRUC_C& mumps_solver, int I)
-{
-    return mumps_solver.icntl[(I)-1];
-}
-inline double& CNTL(DMUMPS_STRUC_C& mumps_solver, int I)
-{
-    return mumps_solver.cntl[(I)-1];
-}
-inline int& INFOG(DMUMPS_STRUC_C& mumps_solver, int I)
-{
-    return mumps_solver.infog[(I)-1];
-}
-
-constexpr int USE_COMM_WORLD   = -987654;
-constexpr int PAR_NOT_PARALLEL = 0;
-constexpr int PAR_PARALLEL     = 1;
-
-constexpr int JOB_INIT               = -1;
-constexpr int JOB_END                = -2;
-constexpr int JOB_REMOVE_SAVED_DATA  = -3;
-constexpr int JOB_FREE_INTERNAL_DATA = -4;
-constexpr int JOB_SUPPRESS_OOC_FILES = -200;
-
-constexpr int JOB_ANALYSIS_PHASE                  = 1;
-constexpr int JOB_FACTORIZATION_PHASE             = 2;
-constexpr int JOB_COMPUTE_SOLUTION                = 3;
-constexpr int JOB_ANALYSIS_AND_FACTORIZATION      = 4;
-constexpr int JOB_FACTORIZATION_AND_SOLUTION      = 5;
-constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6;
-constexpr int JOB_SAVE_INTERNAL_DATA              = 7;
-constexpr int JOB_RESTORE_INTERNAL_DATA           = 8;
-constexpr int JOB_DISTRIBUTE_RHS                  = 9;
-
-constexpr int SYM_UNSYMMETRIC       = 0;
-constexpr int SYM_POSITIVE_DEFINITE = 1;
-constexpr int SYM_GENERAL_SYMMETRIC = 2;
-#endif
-
 // --------------------------------------- //
 // Function-like macros for LIKWID markers //
 // --------------------------------------- //
diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl
index 9f952eb8..e103d68a 100644
--- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl
+++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.inl
@@ -60,7 +60,7 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
         double top_value    = -coeff4 * (att(center_index) + att(top_index)); /* Top */
 
         double center_value =
-            (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
+            (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */
              - left_value /* Center: (Left) */
              - right_value /* Center: (Right) */
              - bottom_value /* Center: (Bottom) */
@@ -184,13 +184,13 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
             double bottom_value = -coeff3 * (att(center_index) + att(bottom_index)); /* Bottom */
             double top_value    = -coeff4 * (att(center_index) + att(top_index)); /* Top */
 
-            double center_value =
-                (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
-                 - left_value /* Center: (Left) */
-                 - right_value /* Center: (Right) */
-                 - bottom_value /* Center: (Bottom) */
-                 - top_value /* Center: (Top) */
-                );
+            double center_value = (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] *
+                                       std::fabs(detDF(center_index)) /* beta_{i,j} */
+                                   - left_value /* Center: (Left) */
+                                   - right_value /* Center: (Right) */
+                                   - bottom_value /* Center: (Bottom) */
+                                   - top_value /* Center: (Top) */
+            );
 
             double bottom_right_value = +0.25 * (art(right_index) + art(bottom_index)); /* Bottom Right */
             double top_right_value    = -0.25 * (art(right_index) + art(top_index)); /* Top Right */
@@ -276,7 +276,7 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
         double top_value    = -coeff4 * (att(center_index) + att(top_index)); /* Top */
 
         double center_value =
-            (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
+            (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */
              - left_value /* Center: (Left) */
              - right_value /* Center: (Right) */
              - bottom_value /* Center: (Bottom) */
@@ -383,7 +383,7 @@ void DirectSolver_COO_MUMPS_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
         double top_value    = -coeff4 * (att(center_index) + att(top_index)); /* Top */
 
         double center_value =
-            (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */
+            (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */
              - left_value /* Center: (Left) */
              - right_value /* Center: (Right) */
              - bottom_value /* Center: (Bottom) */
diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl
index 55cdb111..08893805 100644
--- a/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl
+++ b/include/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.inl
@@ -798,80 +798,63 @@ SparseMatrixCSR<double> DirectSolver_CSR_LU_Give<LevelCacheType>::buildSolverMat
 
     SparseMatrixCSR<double> solver_matrix(n, n, nnz_per_row);
 
-    if (num_omp_threads == 1) {
-        /* Single-threaded execution */
-        for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) {
-            buildSolverMatrixCircleSection(i_r, solver_matrix);
-        }
-        for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
-            buildSolverMatrixRadialSection(i_theta, solver_matrix);
-        }
-    }
-    else {
-        /*  Multi-threaded execution: For Loops */
-        const int num_circle_tasks        = grid.numberSmootherCircles();
-        const int additional_radial_tasks = grid.ntheta() % 3;
-        const int num_radial_tasks        = grid.ntheta() - additional_radial_tasks;
-
+    const int num_smoother_circles    = grid.numberSmootherCircles();
+    const int additional_radial_tasks = grid.ntheta() % 3;
+    const int num_radial_tasks        = grid.ntheta() - additional_radial_tasks;
+
+    /* ---------------- */
+    /* Circular section */
+    /* ---------------- */
+    // We parallelize the loop with step 3 to avoid data race conditions between adjacent circles.
 #pragma omp parallel num_threads(num_omp_threads)
-        {
+    {
 #pragma omp for
-            for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) {
-                int i_r = grid.numberSmootherCircles() - circle_task - 1;
-                buildSolverMatrixCircleSection(i_r, solver_matrix);
-            }
+        for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) {
+            buildSolverMatrixCircleSection(i_r, solver_matrix);
+        } /* Implicit barrier */
 #pragma omp for
-            for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) {
-                int i_r = grid.numberSmootherCircles() - circle_task - 1;
-                buildSolverMatrixCircleSection(i_r, solver_matrix);
-            }
-#pragma omp for nowait
-            for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) {
-                int i_r = grid.numberSmootherCircles() - circle_task - 1;
-                buildSolverMatrixCircleSection(i_r, solver_matrix);
-            }
+        for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) {
+            buildSolverMatrixCircleSection(i_r, solver_matrix);
+        } /* Implicit barrier */
+#pragma omp for
+        for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) {
+            buildSolverMatrixCircleSection(i_r, solver_matrix);
+        } /* Implicit barrier */
+    }
+
+    /* ---------------- */
+    /* Radial section */
+    /* ---------------- */
+    // We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines.
+    // Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks
+    // that are handled serially before the parallel loops.
+    if (additional_radial_tasks > 0) {
+        const int i_theta = 0;
+        buildSolverMatrixRadialSection(i_theta, solver_matrix);
+    }
+
+    if (additional_radial_tasks > 1) {
+        const int i_theta = 1;
+        buildSolverMatrixRadialSection(i_theta, solver_matrix);
+    }
 
+#pragma omp parallel num_threads(num_omp_threads)
+    {
 #pragma omp for
-            for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
-                if (radial_task > 0) {
-                    int i_theta = radial_task + additional_radial_tasks;
-                    buildSolverMatrixRadialSection(i_theta, solver_matrix);
-                }
-                else {
-                    if (additional_radial_tasks == 0) {
-                        buildSolverMatrixRadialSection(0, solver_matrix);
-                    }
-                    else if (additional_radial_tasks >= 1) {
-                        buildSolverMatrixRadialSection(0, solver_matrix);
-                        buildSolverMatrixRadialSection(1, solver_matrix);
-                    }
-                }
-            }
+        for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
+            const int i_theta = radial_task + additional_radial_tasks;
+            buildSolverMatrixRadialSection(i_theta, solver_matrix);
+        } /* Implicit barrier */
 #pragma omp for
-            for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
-                if (radial_task > 1) {
-                    int i_theta = radial_task + additional_radial_tasks;
-                    buildSolverMatrixRadialSection(i_theta, solver_matrix);
-                }
-                else {
-                    if (additional_radial_tasks == 0) {
-                        buildSolverMatrixRadialSection(1, solver_matrix);
-                    }
-                    else if (additional_radial_tasks == 1) {
-                        buildSolverMatrixRadialSection(2, solver_matrix);
-                    }
-                    else if (additional_radial_tasks == 2) {
-                        buildSolverMatrixRadialSection(2, solver_matrix);
-                        buildSolverMatrixRadialSection(3, solver_matrix);
-                    }
-                }
-            }
+        for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
+            const int i_theta = radial_task + additional_radial_tasks;
+            buildSolverMatrixRadialSection(i_theta, solver_matrix);
+        } /* Implicit barrier */
 #pragma omp for
-            for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
-                int i_theta = radial_task + additional_radial_tasks;
-                buildSolverMatrixRadialSection(i_theta, solver_matrix);
-            }
-        }
+        for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
+            const int i_theta = radial_task + additional_radial_tasks;
+            buildSolverMatrixRadialSection(i_theta, solver_matrix);
+        } /* Implicit barrier */
     }
 
     return solver_matrix;
diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h b/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h
similarity index 90%
rename from include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h
rename to include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h
index 2e902808..e1f3c926 100644
--- a/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h
+++ b/include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h
@@ -33,16 +33,6 @@ class DirectSolver_CSR_LU_Give : public DirectSolver<LevelCacheType>
         -1,  0, -1,
         -1, -1, -1
     };
-    const Stencil stencil_next_inner_DB_ = {
-        7, 4, 8,
-        1, 0, 2,
-        5, 3, 6
-    };
-    const Stencil stencil_next_outer_DB_ = {
-        7, 4, 8,
-        1, 0, 2,
-        5, 3, 6
-    };
     // clang-format on
 
     SparseMatrixCSR<double> buildSolverMatrix();
diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl b/include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl
index 8e65f71f..58418146 100644
--- a/include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl
+++ b/include/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.inl
@@ -9,13 +9,11 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getStencilSize(int global_index) c
     int i_r, i_theta;
     grid.multiIndex(global_index, i_r, i_theta);
 
-    const int size_stencil_inner_boundary      = DirBC_Interior ? 1 : 7;
-    const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9;
-    const int size_stencil_interior            = 9;
-    const int size_stencil_next_outer_boundary = 9;
-    const int size_stencil_outer_boundary      = 1;
+    const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
+    const int size_stencil_interior       = 9;
+    const int size_stencil_outer_boundary = 1;
 
-    if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) {
+    if ((i_r > 0 && i_r < grid.nr() - 1)) {
         return size_stencil_interior;
     }
     else if (i_r == 0 && !DirBC_Interior) {
@@ -24,12 +22,7 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getStencilSize(int global_index) c
     else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) {
         return size_stencil_outer_boundary;
     }
-    else if (i_r == 1 && DirBC_Interior) {
-        return size_stencil_next_inner_boundary;
-    }
-    else if (i_r == grid.nr() - 2) {
-        return size_stencil_next_outer_boundary;
-    }
+
     throw std::out_of_range("Invalid index for stencil");
 }
 
@@ -42,7 +35,7 @@ const Stencil& DirectSolver_CSR_LU_Give<LevelCacheType>::getStencil(int i_r) con
     assert(0 <= i_r && i_r < grid.nr());
     assert(grid.nr() >= 4);
 
-    if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) {
+    if ((i_r > 0 && i_r < grid.nr() - 1)) {
         return stencil_interior_;
     }
     else if (i_r == 0 && !DirBC_Interior) {
@@ -51,12 +44,7 @@ const Stencil& DirectSolver_CSR_LU_Give<LevelCacheType>::getStencil(int i_r) con
     else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) {
         return stencil_DB_;
     }
-    else if (i_r == 1 && DirBC_Interior) {
-        return stencil_next_inner_DB_;
-    }
-    else if (i_r == grid.nr() - 2) {
-        return stencil_next_outer_DB_;
-    }
+
     throw std::out_of_range("Invalid index for stencil");
 }
 
@@ -66,15 +54,10 @@ int DirectSolver_CSR_LU_Give<LevelCacheType>::getNonZeroCountSolverMatrix() cons
     const PolarGrid& grid     = DirectSolver<LevelCacheType>::grid_;
     const bool DirBC_Interior = DirectSolver<LevelCacheType>::DirBC_Interior_;
 
-    const int size_stencil_inner_boundary      = DirBC_Interior ? 1 : 7;
-    const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9;
-    const int size_stencil_interior            = 9;
-    const int size_stencil_next_outer_boundary = 9;
-    const int size_stencil_outer_boundary      = 1;
-
-    assert(grid.nr() >= 4);
+    const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
+    const int size_stencil_interior       = 9;
+    const int size_stencil_outer_boundary = 1;
 
     return grid.ntheta() *
-           (size_stencil_inner_boundary + size_stencil_next_inner_boundary + (grid.nr() - 4) * size_stencil_interior +
-            size_stencil_next_outer_boundary + size_stencil_outer_boundary);
+           (size_stencil_inner_boundary + (grid.nr() - 2) * size_stencil_interior + size_stencil_outer_boundary);
 }
diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl b/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl
index b9a31e36..3cd34717 100644
--- a/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl
+++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.inl
@@ -172,13 +172,13 @@ void DirectSolver_CSR_LU_Take<LevelCacheType>::nodeBuildSolverMatrixTake(
             double bottom_value = -coeff3 * (att[center_index] + att[bottom_index]); /* Bottom */
             double top_value    = -coeff4 * (att[center_index] + att[top_index]); /* Top */
 
-            double center_value =
-                (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF[center_index]) /* beta_{i,j} */
-                 - left_value /* Center: (Left) */
-                 - right_value /* Center: (Right) */
-                 - bottom_value /* Center: (Bottom) */
-                 - top_value /* Center: (Top) */
-                );
+            double center_value = (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] *
+                                       std::fabs(detDF[center_index]) /* beta_{i,j} */
+                                   - left_value /* Center: (Left) */
+                                   - right_value /* Center: (Right) */
+                                   - bottom_value /* Center: (Bottom) */
+                                   - top_value /* Center: (Top) */
+            );
 
             double bottom_right_value = +0.25 * (art[right_index] + art[bottom_index]); /* Bottom Right */
             double top_right_value    = -0.25 * (art[right_index] + art[top_index]); /* Top Right */
diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h
similarity index 89%
rename from include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h
rename to include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h
index a387371c..248fe70b 100644
--- a/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h
+++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h
@@ -33,16 +33,6 @@ class DirectSolver_CSR_LU_Take : public DirectSolver<LevelCacheType>
         -1,  0, -1,
         -1, -1, -1
     };
-    const Stencil stencil_next_inner_DB_ = {
-        7, 4, 8,
-        1, 0, 2,
-        5, 3, 6
-    };
-    const Stencil stencil_next_outer_DB_ = {
-        7, 4, 8,
-        1, 0, 2,
-        5, 3, 6
-    };
     // clang-format on
 
     SparseMatrixCSR<double> buildSolverMatrix();
diff --git a/include/DirectSolver/DirectSolver-CSR-LU-Take/matrixStencil.inl b/include/DirectSolver/DirectSolver-CSR-LU-Take/matrixStencil.inl
index cb160569..90887972 100644
--- a/include/DirectSolver/DirectSolver-CSR-LU-Take/matrixStencil.inl
+++ b/include/DirectSolver/DirectSolver-CSR-LU-Take/matrixStencil.inl
@@ -9,13 +9,11 @@ int DirectSolver_CSR_LU_Take<LevelCacheType>::getStencilSize(int global_index) c
     int i_r, i_theta;
     grid.multiIndex(global_index, i_r, i_theta);
 
-    const int size_stencil_inner_boundary      = DirBC_Interior ? 1 : 7;
-    const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9;
-    const int size_stencil_interior            = 9;
-    const int size_stencil_next_outer_boundary = 9;
-    const int size_stencil_outer_boundary      = 1;
+    const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
+    const int size_stencil_interior       = 9;
+    const int size_stencil_outer_boundary = 1;
 
-    if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) {
+    if ((i_r > 0 && i_r < grid.nr() - 1)) {
         return size_stencil_interior;
     }
     else if (i_r == 0 && !DirBC_Interior) {
@@ -24,12 +22,7 @@ int DirectSolver_CSR_LU_Take<LevelCacheType>::getStencilSize(int global_index) c
     else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) {
         return size_stencil_outer_boundary;
     }
-    else if (i_r == 1 && DirBC_Interior) {
-        return size_stencil_next_inner_boundary;
-    }
-    else if (i_r == grid.nr() - 2) {
-        return size_stencil_next_outer_boundary;
-    }
+
     throw std::out_of_range("Invalid index for stencil");
 }
 
@@ -42,7 +35,7 @@ const Stencil& DirectSolver_CSR_LU_Take<LevelCacheType>::getStencil(int i_r) con
     assert(0 <= i_r && i_r < grid.nr());
     assert(grid.nr() >= 4);
 
-    if ((i_r > 1 && i_r < grid.nr() - 2) || (i_r == 1 && !DirBC_Interior)) {
+    if ((i_r > 0 && i_r < grid.nr() - 1)) {
         return stencil_interior_;
     }
     else if (i_r == 0 && !DirBC_Interior) {
@@ -51,12 +44,7 @@ const Stencil& DirectSolver_CSR_LU_Take<LevelCacheType>::getStencil(int i_r) con
     else if ((i_r == 0 && DirBC_Interior) || i_r == grid.nr() - 1) {
         return stencil_DB_;
     }
-    else if (i_r == 1 && DirBC_Interior) {
-        return stencil_next_inner_DB_;
-    }
-    else if (i_r == grid.nr() - 2) {
-        return stencil_next_outer_DB_;
-    }
+
     throw std::out_of_range("Invalid index for stencil");
 }
 
@@ -66,15 +54,10 @@ int DirectSolver_CSR_LU_Take<LevelCacheType>::getNonZeroCountSolverMatrix() cons
     const PolarGrid& grid     = DirectSolver<LevelCacheType>::grid_;
     const bool DirBC_Interior = DirectSolver<LevelCacheType>::DirBC_Interior_;
 
-    const int size_stencil_inner_boundary      = DirBC_Interior ? 1 : 7;
-    const int size_stencil_next_inner_boundary = DirBC_Interior ? 9 : 9;
-    const int size_stencil_interior            = 9;
-    const int size_stencil_next_outer_boundary = 9;
-    const int size_stencil_outer_boundary      = 1;
-
-    assert(grid.nr() >= 4);
+    const int size_stencil_inner_boundary = DirBC_Interior ? 1 : 7;
+    const int size_stencil_interior       = 9;
+    const int size_stencil_outer_boundary = 1;
 
     return grid.ntheta() *
-           (size_stencil_inner_boundary + size_stencil_next_inner_boundary + (grid.nr() - 4) * size_stencil_interior +
-            size_stencil_next_outer_boundary + size_stencil_outer_boundary);
+           (size_stencil_inner_boundary + (grid.nr() - 2) * size_stencil_interior + size_stencil_outer_boundary);
 }
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl
index 6ef9cbbb..3a845970 100644
--- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl
+++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.inl
@@ -897,8 +897,11 @@ void ExtrapolatedSmootherGive<LevelCacheType>::applyAscOrthoCircleSection(int i_
                                                                           ConstVector<double> x,
                                                                           ConstVector<double> rhs, Vector<double> temp)
 {
+    using extrapolated_smoother_give::nodeApplyAscOrthoCircleGive;
+
     const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
     const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
 
     assert(i_r >= 0 && i_r < grid.numberSmootherCircles() + 1);
 
@@ -912,9 +915,8 @@ void ExtrapolatedSmootherGive<LevelCacheType>::applyAscOrthoCircleSection(int i_
         level_cache.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF);
 
         // Apply Asc Ortho at the current node
-        extrapolated_smoother_give::nodeApplyAscOrthoCircleGive(
-            i_r, i_theta, grid, ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_, smoother_color, x, rhs, temp,
-            arr, att, art, detDF, coeff_beta);
+        nodeApplyAscOrthoCircleGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art,
+                                    detDF, coeff_beta);
     }
 }
 
@@ -923,11 +925,15 @@ void ExtrapolatedSmootherGive<LevelCacheType>::applyAscOrthoRadialSection(int i_
                                                                           ConstVector<double> x,
                                                                           ConstVector<double> rhs, Vector<double> temp)
 {
+    using extrapolated_smoother_give::nodeApplyAscOrthoRadialGive;
+
     const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
     const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
 
     const double theta = grid.theta(i_theta);
 
+    /* We need to obtain left contributions from the circular section for AscOrtho. */
     /* !!! i_r = grid.numberSmootherCircles()-1 !!! */
     for (int i_r = grid.numberSmootherCircles() - 1; i_r < grid.nr(); i_r++) {
         const double r  = grid.radius(i_r);
@@ -937,8 +943,7 @@ void ExtrapolatedSmootherGive<LevelCacheType>::applyAscOrthoRadialSection(int i_
         level_cache.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF);
 
         // Apply Asc Ortho at the current node
-        extrapolated_smoother_give::nodeApplyAscOrthoRadialGive(
-            i_r, i_theta, grid, ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_, smoother_color, x, rhs, temp,
-            arr, att, art, detDF, coeff_beta);
+        nodeApplyAscOrthoRadialGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art,
+                                    detDF, coeff_beta);
     }
 }
\ No newline at end of file
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl
new file mode 100644
index 00000000..8c88946e
--- /dev/null
+++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.inl
@@ -0,0 +1,307 @@
+#include "../../../include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h"
+
+#include "../../../include/Definitions/geometry_helper.h"
+
+namespace extrapolated_smoother_give
+{
+
+#ifdef GMGPOLAR_USE_MUMPS
+// When using the MUMPS solver, the matrix is assembled in COO format.
+static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double>& matrix, int ptr, int offset, int row,
+                                                int column, double value)
+{
+    matrix.row_index(ptr + offset) = row;
+    matrix.col_index(ptr + offset) = column;
+    matrix.value(ptr + offset) += value;
+}
+#else
+// When using the in-house solver, the matrix is stored in CSR format.
+static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR<double>& matrix, int ptr, int offset, int row,
+                                                int column, double value)
+{
+    matrix.row_nz_index(row, offset) = column;
+    matrix.row_nz_entry(row, offset) += value;
+}
+#endif
+
+} // namespace extrapolated_smoother_give
+
+template <class LevelCacheType>
+void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildInteriorBoundarySolverMatrix_i_r_0(
+    int i_theta, const PolarGrid& grid, bool DirBC_Interior, InnerBoundaryMatrix& matrix, double arr, double att,
+    double art, double detDF, double coeff_beta)
+{
+    using extrapolated_smoother_give::update_CSR_COO_MatrixElement;
+
+    assert(i_theta >= 0 && i_theta < grid.ntheta());
+
+    int ptr, offset;
+    int row, column;
+    double value;
+
+    const int i_r = 0;
+
+    /* ------------------------------------------------ */
+    /* Case 1: Dirichlet boundary on the inner boundary */
+    /* ------------------------------------------------ */
+    if (DirBC_Interior) {
+        /* Fill result(i,j) */
+        int center_index = i_theta;
+
+        /* Fill matrix row of (i,j) */
+        row = center_index;
+        ptr = getCircleAscIndex(i_r, i_theta);
+
+        const Stencil& CenterStencil = getStencil(i_r, i_theta);
+
+        offset = CenterStencil[StencilPosition::Center];
+        column = center_index;
+        value  = 1.0;
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+    }
+    else {
+        /* ------------------------------------------------------------- */
+        /* Case 2: Across origin discretization on the interior boundary */
+        /* ------------------------------------------------------------- */
+        // h1 gets replaced with 2 * R0.
+        // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)).
+        // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil.
+        double h1 = 2.0 * grid.radius(0);
+        double h2 = grid.radialSpacing(i_r);
+        double k1 = grid.angularSpacing(i_theta - 1);
+        double k2 = grid.angularSpacing(i_theta);
+
+        double coeff1 = 0.5 * (k1 + k2) / h1;
+        double coeff2 = 0.5 * (k1 + k2) / h2;
+        double coeff3 = 0.5 * (h1 + h2) / k1;
+        double coeff4 = 0.5 * (h1 + h2) / k2;
+
+        const int i_theta_M1           = grid.wrapThetaIndex(i_theta - 1);
+        const int i_theta_P1           = grid.wrapThetaIndex(i_theta + 1);
+        const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2));
+
+        const int center_index = i_theta;
+        const int left_index   = i_theta_AcrossOrigin;
+        const int right_index  = i_theta;
+        const int bottom_index = i_theta_M1;
+        const int top_index    = i_theta_P1;
+
+        const int center_nz_index = getCircleAscIndex(i_r, i_theta);
+        const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1);
+        const int top_nz_index    = getCircleAscIndex(i_r, i_theta_P1);
+        const int left_nz_index   = getCircleAscIndex(i_r, i_theta_AcrossOrigin);
+
+        int nz_index;
+        const Stencil& CenterStencil = getStencil(i_r, i_theta);
+
+        if (i_theta & 1) {
+            /* i_theta % 2 == 1 */
+            /* -| x | o | x | */
+            /* -|   |   |   | */
+            /* -| O | o | o | */
+            /* -|   |   |   | */
+            /* -| x | o | x | */
+
+            /* Fill matrix row of (i,j) */
+            row = center_index;
+            ptr = center_nz_index;
+
+            offset = CenterStencil[StencilPosition::Center];
+            column = center_index;
+            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */
+            update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+            offset = CenterStencil[StencilPosition::Left];
+            column = left_index;
+            value  = -coeff1 * arr; /* Left */
+            update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+            offset = CenterStencil[StencilPosition::Center];
+            column = center_index;
+            value  = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */
+            update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+            /* Fill matrix row of (i-1,j) */
+            /* From view the view of the across origin node, */
+            /* the directions are roatated by 180 degrees in the stencil! */
+            row = left_index;
+            ptr = left_nz_index;
+
+            const Stencil& LeftStencil = CenterStencil;
+
+            offset = LeftStencil[StencilPosition::Left];
+            column = center_index;
+            value  = -coeff1 * arr; /* Right -> Left*/
+            update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+            offset = LeftStencil[StencilPosition::Center];
+            column = left_index;
+            value  = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */
+            update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+        }
+        else {
+            /* i_theta % 2 == 0 */
+            /* -| o | o | o | */
+            /* -|   |   |   | */
+            /* -| X | o | x | */
+            /* -|   |   |   | */
+            /* -| o | o | o | */
+
+            /* Fill matrix row of (i,j) */
+            row = center_index;
+            ptr = center_nz_index;
+
+            offset = CenterStencil[StencilPosition::Center];
+            column = center_index;
+            value  = 1.0;
+            update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+            /* Fill matrix row of (i,j-1) */
+            row = bottom_index;
+            ptr = bottom_nz_index;
+
+            const Stencil& BottomStencil = CenterStencil;
+
+            offset = BottomStencil[StencilPosition::Center];
+            column = bottom_index;
+            value  = +coeff3 * att; /* Center: (Top) */
+            update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+            /* Fill matrix row of (i,j+1) */
+            row = top_index;
+            ptr = top_nz_index;
+
+            const Stencil& TopStencil = CenterStencil;
+
+            offset = TopStencil[StencilPosition::Center];
+            column = top_index;
+            value  = +coeff4 * att; /* Center: (Bottom) */
+            update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+        }
+    }
+}
+
+template <class LevelCacheType>
+void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildInteriorBoundarySolverMatrix_i_r_1(
+    int i_theta, const PolarGrid& grid, bool DirBC_Interior, InnerBoundaryMatrix& matrix, double arr, double att,
+    double art, double detDF, double coeff_beta)
+{
+    using extrapolated_smoother_give::update_CSR_COO_MatrixElement;
+
+    assert(i_theta >= 0 && i_theta < grid.ntheta());
+
+    int ptr, offset;
+    int row, column;
+    double value;
+
+    const int i_r = 1;
+
+    const double h1 = grid.radialSpacing(i_r - 1);
+    const double h2 = grid.radialSpacing(i_r);
+    const double k1 = grid.angularSpacing(i_theta - 1);
+    const double k2 = grid.angularSpacing(i_theta);
+
+    const double coeff1 = 0.5 * (k1 + k2) / h1;
+
+    const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1);
+    const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1);
+
+    const int left_index = i_theta;
+
+    /* -------------------------- */
+    /* Cyclic Tridiagonal Section */
+    /* i_r % 2 == 1               */
+    if (i_r & 1) {
+        if (i_theta & 1) {
+            /* i_theta % 2 == 1 */
+            /* | x | o | x | */
+            /* |   |   |   | */
+            /* | o | O | o | */
+            /* |   |   |   | */
+            /* | x | o | x | */
+
+            /* Fill matrix row of (i-1,j) */
+
+            /* Only in the case of AcrossOrigin */
+            if (!DirBC_Interior) {
+                row = left_index;
+                ptr = getCircleAscIndex(i_r - 1, i_theta);
+
+                const Stencil& LeftStencil = getStencil(i_r - 1, i_theta);
+
+                offset = LeftStencil[StencilPosition::Center];
+                column = left_index;
+                value  = +coeff1 * arr; /* Center: (Right) */
+                update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+            }
+        }
+    }
+}
+
+template <class LevelCacheType>
+typename ExtrapolatedSmootherGive<LevelCacheType>::InnerBoundaryMatrix
+ExtrapolatedSmootherGive<LevelCacheType>::buildInteriorBoundarySolverMatrix()
+{
+    const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
+    const int num_omp_threads         = ExtrapolatedSmoother<LevelCacheType>::num_omp_threads_;
+
+    const int ntheta = grid.ntheta();
+
+    // The interior boundary matrix is symmetric due to the periodicity in the theta direction
+    // and the assumption that ntheta is even, which is required for the across-origin discretization.
+    // We store all non-zero entries of the matrix, both in COO format (for MUMPS)
+    // and in CSR format (for the in-house solver). If the COO matrix is marked as symmetric,
+    // the COO_Mumps_Solver optimizes the factorization by only using the upper triangular part of the matrix,
+    // which is extracted by the COO_Mumps_Solver internally.
+#ifdef GMGPOLAR_USE_MUMPS
+    const int nnz = getNonZeroCountCircleAsc(0);
+    SparseMatrixCOO<double> inner_boundary_solver_matrix(ntheta, ntheta, nnz);
+    inner_boundary_solver_matrix.is_symmetric(true);
+#else
+    std::function<int(int)> nnz_per_row = [&](int i_theta) {
+        if (DirBC_Interior)
+            return 1;
+        else
+            return i_theta % 2 == 0 ? 1 : 2;
+    };
+    SparseMatrixCSR<double> inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row);
+#endif
+
+    {
+        const int i_r  = 0;
+        const double r = grid.radius(i_r);
+        for (int i_theta = 0; i_theta < ntheta; i_theta++) {
+            {
+                const int global_index = grid.index(i_r, i_theta);
+                const double theta     = grid.theta(i_theta);
+
+                double coeff_beta, arr, att, art, detDF;
+                level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF);
+
+                nodeBuildInteriorBoundarySolverMatrix_i_r_0(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix,
+                                                            arr, att, art, detDF, coeff_beta);
+            }
+        }
+    }
+
+    {
+        const int i_r  = 1;
+        const double r = grid.radius(i_r);
+        for (int i_theta = 0; i_theta < ntheta; i_theta++) {
+            {
+                const int global_index = grid.index(i_r, i_theta);
+                const double theta     = grid.theta(i_theta);
+
+                double coeff_beta, arr, att, art, detDF;
+                level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF);
+
+                nodeBuildInteriorBoundarySolverMatrix_i_r_1(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix,
+                                                            arr, att, art, detDF, coeff_beta);
+            }
+        }
+    }
+
+    return inner_boundary_solver_matrix;
+}
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.inl
similarity index 75%
rename from include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.inl
rename to include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.inl
index f5310376..c12e50f4 100644
--- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.inl
+++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.inl
@@ -1,9 +1,10 @@
-#pragma once
+#include "../../../include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h"
+
+#include "../../../include/Definitions/geometry_helper.h"
 
 namespace extrapolated_smoother_give
 {
 
-/* Tridiagonal matrices */
 static inline void updateMatrixElement(BatchedTridiagonalSolver<double>& solver, int batch, int row, int column,
                                        double value)
 {
@@ -15,34 +16,15 @@ static inline void updateMatrixElement(BatchedTridiagonalSolver<double>& solver,
         solver.cyclic_corner(batch) += value;
 }
 
-/* Inner Boundary COO/CSR matrix */
-#ifdef GMGPOLAR_USE_MUMPS
-static inline void updateCOOCSRMatrixElement(SparseMatrixCOO<double>& matrix, int ptr, int offset, int row, int col,
-                                             double val)
-{
-    matrix.row_index(ptr + offset) = row;
-    matrix.col_index(ptr + offset) = col;
-    matrix.value(ptr + offset) += val;
-}
-#else
-static inline void updateCOOCSRMatrixElement(SparseMatrixCSR<double>& matrix, int ptr, int offset, int row, int col,
-                                             double val)
-{
-    matrix.row_nz_index(row, offset) = col;
-    matrix.row_nz_entry(row, offset) += val;
-}
-#endif
-
 } // namespace extrapolated_smoother_give
 
 template <class LevelCacheType>
-void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildAscGive(
-    int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, MatrixType& inner_boundary_circle_matrix,
+void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildTridiagonalSolverMatrices(
+    int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
     BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
     BatchedTridiagonalSolver<double>& radial_tridiagonal_solver, double arr, double att, double art, double detDF,
     double coeff_beta)
 {
-    using extrapolated_smoother_give::updateCOOCSRMatrixElement;
     using extrapolated_smoother_give::updateMatrixElement;
 
     assert(i_r >= 0 && i_r < grid.nr());
@@ -55,8 +37,8 @@ void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildAscGive(
     assert(lengthSmootherRadial >= 3);
 
     int ptr, offset;
-    int row, column, col;
-    double value, val;
+    int row, column;
+    double value;
     /* ------------------------------------------ */
     /* Node in the interior of the Circle Section */
     /* ------------------------------------------ */
@@ -108,7 +90,7 @@ void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildAscGive(
             /* Fill matrix row of (i,j) */
             row    = center_index;
             column = center_index;
-            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */
+            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */
             updateMatrixElement(center_solver, center_batch, row, column, value);
 
             row    = center_index;
@@ -157,21 +139,8 @@ void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildAscGive(
                 /* | x | o | x | */
 
                 /* Fill matrix row of (i-1,j) */
-                if (i_r == 1) {
-                    /* Only in the case of AcrossOrigin */
-                    if (!DirBC_Interior) {
-                        row = left_index;
-                        ptr = getCircleAscIndex(i_r - 1, i_theta);
-
-                        const Stencil& LeftStencil = getStencil(i_r - 1, i_theta);
-
-                        offset = LeftStencil[StencilPosition::Center];
-                        col    = left_index;
-                        val    = +coeff1 * arr; /* Center: (Right) */
-                        updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-                    }
-                }
-                else {
+                // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix.
+                if (i_r > 1) {
                     row    = left_index;
                     column = left_index;
                     value  = coeff1 * arr; /* Center: (Right) */
@@ -302,7 +271,7 @@ void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildAscGive(
             /* Fill matrix row of (i,j) */
             row    = center_index;
             column = center_index;
-            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */
+            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */
             updateMatrixElement(center_solver, center_batch, row, column, value);
 
             row    = center_index;
@@ -433,177 +402,38 @@ void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildAscGive(
     /* Circle Section: Node in the inner boundary */
     /* ------------------------------------------ */
     else if (i_r == 0) {
-        auto& right_solver = circle_tridiagonal_solver;
-        int right_batch    = i_r + 1;
-
-        /* ------------------------------------------------ */
-        /* Case 1: Dirichlet boundary on the inner boundary */
-        /* ------------------------------------------------ */
-        if (DirBC_Interior) {
-            /* Fill result(i,j) */
-            double h2 = grid.radialSpacing(i_r);
-            double k1 = grid.angularSpacing(i_theta - 1);
-            double k2 = grid.angularSpacing(i_theta);
-
-            double coeff2 = 0.5 * (k1 + k2) / h2;
-
-            int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1);
-            int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1);
-
-            int center_index = i_theta;
-            int right_index  = i_theta;
-            int bottom_index = i_theta_M1;
-            int top_index    = i_theta_P1;
-
-            /* Fill matrix row of (i,j) */
-            row = center_index;
-            ptr = getCircleAscIndex(i_r, i_theta);
-
-            const Stencil& CenterStencil = getStencil(i_r, i_theta);
-
-            offset = CenterStencil[StencilPosition::Center];
-            col    = center_index;
-            val    = 1.0;
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            /* Fill matrix row of (i+1,j) */
-            row    = right_index;
-            column = right_index;
-            value  = coeff2 * arr; /* Center: (Left) */
-            updateMatrixElement(right_solver, right_batch, row, column, value);
-        }
-        else {
-            /* ------------------------------------------------------------- */
-            /* Case 2: Across origin discretization on the interior boundary */
-            /* ------------------------------------------------------------- */
-            // h1 gets replaced with 2 * R0.
-            // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)).
-            // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil.
-            double h1 = 2.0 * grid.radius(0);
-            double h2 = grid.radialSpacing(i_r);
-            double k1 = grid.angularSpacing(i_theta - 1);
-            double k2 = grid.angularSpacing(i_theta);
-
-            double coeff1 = 0.5 * (k1 + k2) / h1;
-            double coeff2 = 0.5 * (k1 + k2) / h2;
-            double coeff3 = 0.5 * (h1 + h2) / k1;
-            double coeff4 = 0.5 * (h1 + h2) / k2;
-
-            const int i_theta_M1           = grid.wrapThetaIndex(i_theta - 1);
-            const int i_theta_P1           = grid.wrapThetaIndex(i_theta + 1);
-            const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2));
-
-            const int center_index = i_theta;
-            const int left_index   = i_theta_AcrossOrigin;
-            const int right_index  = i_theta;
-            const int bottom_index = i_theta_M1;
-            const int top_index    = i_theta_P1;
-
-            const int center_nz_index = getCircleAscIndex(i_r, i_theta);
-            const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1);
-            const int top_nz_index    = getCircleAscIndex(i_r, i_theta_P1);
-            const int left_nz_index   = getCircleAscIndex(i_r, i_theta_AcrossOrigin);
-
-            int nz_index;
-            const Stencil& CenterStencil = getStencil(i_r, i_theta);
-
-            if (i_theta & 1) {
-                /* i_theta % 2 == 1 */
-                /* -| x | o | x | */
-                /* -|   |   |   | */
-                /* -| O | o | o | */
-                /* -|   |   |   | */
-                /* -| x | o | x | */
-
-                /* Fill matrix row of (i,j) */
-                row = center_index;
-                ptr = center_nz_index;
-
-                offset = CenterStencil[StencilPosition::Center];
-                col    = center_index;
-                val    = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */
-                updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-                offset = CenterStencil[StencilPosition::Left];
-                col    = left_index;
-                val    = -coeff1 * arr; /* Left */
-                updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-                offset = CenterStencil[StencilPosition::Center];
-                col    = center_index;
-                val    = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */
-                updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-                /* Fill matrix row of (i-1,j) */
-                /* From view the view of the across origin node, */
-                /* the directions are roatated by 180 degrees in the stencil! */
-                row = left_index;
-                ptr = left_nz_index;
-
-                const Stencil& LeftStencil = CenterStencil;
-
-                offset = LeftStencil[StencilPosition::Left];
-                col    = center_index;
-                val    = -coeff1 * arr; /* Right -> Left*/
-                updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-                offset = LeftStencil[StencilPosition::Center];
-                col    = left_index;
-                val    = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */
-                updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-                /* Fill matrix row of (i+1,j) */
-                row    = right_index;
-                column = right_index;
-                value  = coeff2 * arr; /* Center: (Left) */
-                updateMatrixElement(right_solver, right_batch, row, column, value);
-            }
-            else {
-                /* i_theta % 2 == 0 */
-                /* -| o | o | o | */
-                /* -|   |   |   | */
-                /* -| X | o | x | */
-                /* -|   |   |   | */
-                /* -| o | o | o | */
-
-                /* Fill matrix row of (i,j) */
-                row = center_index;
-                ptr = center_nz_index;
-
-                offset = CenterStencil[StencilPosition::Center];
-                col    = center_index;
-                val    = 1.0;
-                updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-                /* Fill matrix row of (i,j-1) */
-                row = bottom_index;
-                ptr = bottom_nz_index;
-
-                const Stencil& BottomStencil = CenterStencil;
+        // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix.
+        auto& center_solver = circle_tridiagonal_solver;
+        int center_batch    = i_r;
+        auto& right_solver  = circle_tridiagonal_solver;
+        int right_batch     = i_r + 1;
 
-                offset = BottomStencil[StencilPosition::Center];
-                col    = bottom_index;
-                val    = +coeff3 * att; /* Center: (Top) */
-                updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
+        /* Fill result(i,j) */
+        double h2 = grid.radialSpacing(i_r);
+        double k1 = grid.angularSpacing(i_theta - 1);
+        double k2 = grid.angularSpacing(i_theta);
 
-                /* Fill matrix row of (i,j+1) */
-                row = top_index;
-                ptr = top_nz_index;
+        double coeff2 = 0.5 * (k1 + k2) / h2;
 
-                const Stencil& TopStencil = CenterStencil;
+        int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1);
+        int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1);
 
-                offset = TopStencil[StencilPosition::Center];
-                col    = top_index;
-                val    = +coeff4 * att; /* Center: (Bottom) */
-                updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
+        int center_index = i_theta;
+        int right_index  = i_theta;
+        int bottom_index = i_theta_M1;
+        int top_index    = i_theta_P1;
 
-                /* Fill matrix row of (i+1,j) */
-                row    = right_index;
-                column = right_index;
-                value  = coeff2 * arr; /* Center: (Left) */
-                updateMatrixElement(right_solver, right_batch, row, column, value);
-            }
-        }
+        /* Fill matrix row of (i,j) */
+        row    = center_index;
+        column = center_index;
+        value  = 1.0;
+        updateMatrixElement(center_solver, center_batch, row, column, value);
+
+        /* Fill matrix row of (i+1,j) */
+        row    = right_index;
+        column = right_index;
+        value  = coeff2 * arr; /* Center: (Left) */
+        updateMatrixElement(right_solver, right_batch, row, column, value);
     }
     /* ------------------------------------------- */
     /* Circle Section: Node next to radial section */
@@ -1211,11 +1041,13 @@ void ExtrapolatedSmootherGive<LevelCacheType>::nodeBuildAscGive(
 }
 
 template <class LevelCacheType>
-void ExtrapolatedSmootherGive<LevelCacheType>::buildAscCircleSection(const int i_r)
+void ExtrapolatedSmootherGive<LevelCacheType>::buildTridiagonalCircleSection(int i_r)
 {
     const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
     const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
 
+    // Access pattern is aligned with the memory layout of the grid data to maximize cache efficiency.
     const double r = grid.radius(i_r);
     for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
         const int global_index = grid.index(i_r, i_theta);
@@ -1225,18 +1057,19 @@ void ExtrapolatedSmootherGive<LevelCacheType>::buildAscCircleSection(const int i
         level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF);
 
         // Build Asc at the current node
-        nodeBuildAscGive(i_r, i_theta, grid, ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_,
-                         inner_boundary_circle_matrix_, circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr,
-                         att, art, detDF, coeff_beta);
+        nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_,
+                                           radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
     }
 }
 
 template <class LevelCacheType>
-void ExtrapolatedSmootherGive<LevelCacheType>::buildAscRadialSection(const int i_theta)
+void ExtrapolatedSmootherGive<LevelCacheType>::buildTridiagonalRadialSection(int i_theta)
 {
     const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
     const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
 
+    // Access pattern is aligned with the memory layout of the grid data to maximize cache efficiency.
     const double theta = grid.theta(i_theta);
     for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
         const int global_index = grid.index(i_r, i_theta);
@@ -1246,143 +1079,75 @@ void ExtrapolatedSmootherGive<LevelCacheType>::buildAscRadialSection(const int i
         level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF);
 
         // Build Asc at the current node
-        nodeBuildAscGive(i_r, i_theta, grid, ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_,
-                         inner_boundary_circle_matrix_, circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr,
-                         att, art, detDF, coeff_beta);
+        nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_,
+                                           radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
     }
 }
 
 template <class LevelCacheType>
-void ExtrapolatedSmootherGive<LevelCacheType>::buildAscMatrices()
+void ExtrapolatedSmootherGive<LevelCacheType>::buildTridiagonalSolverMatrices()
 {
-    /* -------------------------------------- */
-    /* Part 1: Allocate Asc Smoother matrices */
-    /* -------------------------------------- */
-    // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor.
-    // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead.
-
-    const PolarGrid& grid     = ExtrapolatedSmoother<LevelCacheType>::grid_;
-    const bool DirBC_Interior = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
-    const int num_omp_threads = ExtrapolatedSmoother<LevelCacheType>::num_omp_threads_;
-
-#ifdef GMGPOLAR_USE_MUMPS
-    // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry.
-    const int inner_i_r           = 0;
-    const int inner_nnz           = getNonZeroCountCircleAsc(inner_i_r);
-    const int num_circle_nodes    = grid.ntheta();
-    inner_boundary_circle_matrix_ = SparseMatrixCOO<double>(num_circle_nodes, num_circle_nodes, inner_nnz);
-    inner_boundary_circle_matrix_.is_symmetric(false);
-#else
-    std::function<int(int)> nnz_per_row = [&](int i_theta) {
-        if (DirBC_Interior)
-            return 1;
-        else
-            return i_theta % 2 == 0 ? 1 : 2;
-    };
-    const int num_circle_nodes    = grid.ntheta();
-    inner_boundary_circle_matrix_ = SparseMatrixCSR<double>(num_circle_nodes, num_circle_nodes, nnz_per_row);
-
-    for (int i = 0; i < inner_boundary_circle_matrix_.non_zero_size(); i++) {
-        inner_boundary_circle_matrix_.values_data()[i] = 0.0;
-    }
-
-#endif
-
-    /* ---------------------------------- */
-    /* Part 2: Fill Asc Smoother matrices */
-    /* ---------------------------------- */
+    const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
+    const int num_omp_threads         = ExtrapolatedSmoother<LevelCacheType>::num_omp_threads_;
 
-    /*  Multi-threaded execution: */
     const int num_smoother_circles    = grid.numberSmootherCircles();
     const int additional_radial_tasks = grid.ntheta() % 3;
     const int num_radial_tasks        = grid.ntheta() - additional_radial_tasks;
 
+    /* ---------------- */
+    /* Circular section */
+    /* ---------------- */
+    // We parallelize the loop with step 3 to avoid data race conditions between adjacent circles.
 #pragma omp parallel num_threads(num_omp_threads)
     {
 #pragma omp for
         for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) {
-            buildAscCircleSection(i_r);
-        }
+            buildTridiagonalCircleSection(i_r);
+        } /* Implicit barrier */
 #pragma omp for
         for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) {
-            buildAscCircleSection(i_r);
-        }
+            buildTridiagonalCircleSection(i_r);
+        } /* Implicit barrier */
 #pragma omp for
         for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) {
-            buildAscCircleSection(i_r);
-        }
+            buildTridiagonalCircleSection(i_r);
+        } /* Implicit barrier */
+    }
+
+    /* ---------------- */
+    /* Radial section */
+    /* ---------------- */
+    // We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines.
+    // Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks
+    // that are handled serially before the parallel loops.
+    if (additional_radial_tasks > 0) {
+        const int i_theta = 0;
+        buildTridiagonalRadialSection(i_theta);
+    }
+
+    if (additional_radial_tasks > 1) {
+        const int i_theta = 1;
+        buildTridiagonalRadialSection(i_theta);
+    }
 
+#pragma omp parallel num_threads(num_omp_threads)
+    {
 #pragma omp for
         for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
-            if (radial_task > 0) {
-                int i_theta = radial_task + additional_radial_tasks;
-                buildAscRadialSection(i_theta);
-            }
-            else {
-                if (additional_radial_tasks == 0) {
-                    buildAscRadialSection(0);
-                }
-                else if (additional_radial_tasks >= 1) {
-                    buildAscRadialSection(0);
-                    buildAscRadialSection(1);
-                }
-            }
-        }
+            const int i_theta = radial_task + additional_radial_tasks;
+            buildTridiagonalRadialSection(i_theta);
+        } /* Implicit barrier */
 #pragma omp for
         for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
-            if (radial_task > 1) {
-                int i_theta = radial_task + additional_radial_tasks;
-                buildAscRadialSection(i_theta);
-            }
-            else {
-                if (additional_radial_tasks == 0) {
-                    buildAscRadialSection(1);
-                }
-                else if (additional_radial_tasks == 1) {
-                    buildAscRadialSection(2);
-                }
-                else if (additional_radial_tasks == 2) {
-                    buildAscRadialSection(2);
-                    buildAscRadialSection(3);
-                }
-            }
-        }
+            const int i_theta = radial_task + additional_radial_tasks;
+            buildTridiagonalRadialSection(i_theta);
+        } /* Implicit barrier */
 #pragma omp for
         for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
-            int i_theta = radial_task + additional_radial_tasks;
-            buildAscRadialSection(i_theta);
-        }
-    }
-
-    circle_tridiagonal_solver_.setup();
-    radial_tridiagonal_solver_.setup();
-
-#ifdef GMGPOLAR_USE_MUMPS
-    /* ------------------------------------------------------------------- */
-    /* Part 3: Convert inner_boundary_circle_matrix_ to a symmetric matrix */
-    /* ------------------------------------------------------------------- */
-
-    SparseMatrixCOO<double> full_matrix = std::move(inner_boundary_circle_matrix_);
-
-    const int nnz           = full_matrix.non_zero_size();
-    const int numRows       = full_matrix.rows();
-    const int numColumns    = full_matrix.columns();
-    const int symmetric_nnz = nnz - (nnz - numRows) / 2;
-
-    inner_boundary_circle_matrix_ = SparseMatrixCOO<double>(numRows, numColumns, symmetric_nnz);
-    inner_boundary_circle_matrix_.is_symmetric(true);
-
-    int current_nz = 0;
-    for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) {
-        int current_row = full_matrix.row_index(nz_index);
-        int current_col = full_matrix.col_index(nz_index);
-        if (current_row <= current_col) {
-            inner_boundary_circle_matrix_.row_index(current_nz) = current_row;
-            inner_boundary_circle_matrix_.col_index(current_nz) = current_col;
-            inner_boundary_circle_matrix_.value(current_nz)     = std::move(full_matrix.value(nz_index));
-            current_nz++;
-        }
+            const int i_theta = radial_task + additional_radial_tasks;
+            buildTridiagonalRadialSection(i_theta);
+        } /* Implicit barrier */
     }
-#endif
 }
-// clang-format on
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h
index df31ca82..54290bfe 100644
--- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h
+++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h
@@ -60,9 +60,6 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother<LevelCacheType>
     explicit ExtrapolatedSmootherGive(const PolarGrid& grid, const LevelCacheType& level_cache, bool DirBC_Interior,
                                       int num_omp_threads);
 
-    // If MUMPS is enabled, this cleans up the inner boundary solver.
-    ~ExtrapolatedSmootherGive() override;
-
     // Performs one full coupled extrapolated smoothing sweep:
     //   BC -> WC -> BR -> WR
     // Parallel implementation using OpenMP:
@@ -71,40 +68,13 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother<LevelCacheType>
     void extrapolatedSmoothing(Vector<double> x, ConstVector<double> rhs, Vector<double> temp) override;
 
 private:
-    /* ------------------- */
-    /* Tridiagonal solvers */
-    /* ------------------- */
-
-    // Batched solver for cyclic-tridiagonal circle line A_sc matrices.
-    BatchedTridiagonalSolver<double> circle_tridiagonal_solver_;
-
-    // Batched solver for tridiagonal radial circle line A_sc matrices.
-    BatchedTridiagonalSolver<double> radial_tridiagonal_solver_;
-
-    // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because
-    // it potentially includes across-origin coupling. Therefore, it is assembled
-    // into a sparse matrix and solved using a general-purpose sparse solver.
-    // When using the MUMPS solver, the matrix is assembled in COO format.
-    // When using the in-house solver, the matrix is stored in CSR format.
-#ifdef GMGPOLAR_USE_MUMPS
-    using MatrixType = SparseMatrixCOO<double>;
-    DMUMPS_STRUC_C inner_boundary_mumps_solver_;
-#else
-    using MatrixType = SparseMatrixCSR<double>;
-    SparseLUSolver<double> inner_boundary_lu_solver_;
-#endif
-    // Sparse matrix for the non-tridiagonal inner boundary circle block.
-    MatrixType inner_boundary_circle_matrix_;
-
-    // Note:
-    //   - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead.
-    //   - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r.
-    //   - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta.
-
     /* ------------------- */
     /* Stencil definitions */
     /* ------------------- */
 
+    // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_,
+    // since the mumps solver will be build in the member initializer of the Smoother class.
+
     // Stencils encode neighborhood connectivity for A_sc matrix assembly.
     // It is only used in the construction of COO/CSR matrices.
     // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices.
@@ -126,6 +96,48 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother<LevelCacheType>
     };
     // clang-format on
 
+    /* ------------------- */
+    /* Tridiagonal solvers */
+    /* ------------------- */
+
+    // Batched solver for cyclic-tridiagonal circle line A_sc matrices.
+    BatchedTridiagonalSolver<double> circle_tridiagonal_solver_;
+
+    // Batched solver for tridiagonal radial line A_sc matrices.
+    BatchedTridiagonalSolver<double> radial_tridiagonal_solver_;
+
+    // Note:
+    //   - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead.
+    //   - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r.
+    //   - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta.
+
+    /* ------------------------ */
+    /* Interior boundary solver */
+    /* ------------------------ */
+
+    // The inner circle matrix (i_r = 0) is NOT tridiagonal due to across-origin coupling.
+    // It is solved using a general-purpose sparse solver.
+    // - MUMPS: matrix assembled in COO format; solver owns the matrix internally.
+    // - In-house: matrix stored in CSR; solver does not own the matrix.
+
+#ifdef GMGPOLAR_USE_MUMPS
+    using InnerBoundaryMatrix = SparseMatrixCOO<double>;
+    using InnerBoundarySolver = CooMumpsSolver;
+#else
+    using InnerBoundaryMatrix = SparseMatrixCSR<double>;
+    using InnerBoundarySolver = SparseLUSolver<double>;
+
+    // Stored only for the in-house solver (CSR).
+    InnerBoundaryMatrix inner_boundary_circle_matrix_;
+#endif
+
+    // Solver object (owns matrix if MUMPS, references if in-house solver).
+    InnerBoundarySolver inner_boundary_solver_;
+
+    /* -------------- */
+    /* Stencil access */
+    /* -------------- */
+
     // Select correct stencil depending on the grid position.
     const Stencil& getStencil(int i_r, int i_theta) const; /* Only i_r = 0 implemented */
     // Number of nonzero A_sc entries.
@@ -137,19 +149,25 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother<LevelCacheType>
     /* --------------- */
     /* Matrix assembly */
     /* --------------- */
-
     // Build all A_sc matrices for circle and radial smoothers.
-    void buildAscMatrices();
-    // Build A_sc matrix block for a single circular line.
-    void buildAscCircleSection(int i_r);
-    // Build A_sc matrix block for a single radial line.
-    void buildAscRadialSection(int i_theta);
-    // Build A_sc for a specific node (i_r, i_theta)
-    void nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
-                          MatrixType& inner_boundary_circle_matrix,
-                          BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
-                          BatchedTridiagonalSolver<double>& radial_tridiagonal_solver, double arr, double att,
-                          double art, double detDF, double coeff_beta);
+    void buildTridiagonalSolverMatrices();
+    void buildTridiagonalCircleSection(int i_r);
+    void buildTridiagonalRadialSection(int i_theta);
+    // Build the tridiagonal solver matrices for a specific node (i_r, i_theta)
+    void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
+                                            BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
+                                            BatchedTridiagonalSolver<double>& radial_tridiagonal_solver, double arr,
+                                            double att, double art, double detDF, double coeff_beta);
+
+    // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling.
+    InnerBoundaryMatrix buildInteriorBoundarySolverMatrix();
+    // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary.
+    void nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid, bool DirBC_Interior,
+                                                     InnerBoundaryMatrix& matrix, double arr, double att, double art,
+                                                     double detDF, double coeff_beta);
+    void nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid, bool DirBC_Interior,
+                                                     InnerBoundaryMatrix& matrix, double arr, double att, double art,
+                                                     double detDF, double coeff_beta);
 
     /* ---------------------- */
     /* Orthogonal application */
@@ -157,9 +175,9 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother<LevelCacheType>
 
     // Compute temp = f_sc − A_sc^ortho * u_sc^ortho   (precomputed right-hand side)
     // where x = u_sc and rhs = f_sc
-    void applyAscOrthoCircleSection(int i_r, SmootherColor smoother_color, ConstVector<double> x,
+    void applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector<double> x,
                                     ConstVector<double> rhs, Vector<double> temp);
-    void applyAscOrthoRadialSection(int i_theta, SmootherColor smoother_color, ConstVector<double> x,
+    void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector<double> x,
                                     ConstVector<double> rhs, Vector<double> temp);
 
     /* ----------------- */
@@ -178,21 +196,11 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother<LevelCacheType>
     void solveWhiteCircleSection(Vector<double> x, Vector<double> temp);
     void solveBlackRadialSection(Vector<double> x, Vector<double> temp);
     void solveWhiteRadialSection(Vector<double> x, Vector<double> temp);
-
-    /* ----------------------------------- */
-    /* Initialize and destroy MUMPS solver */
-    /* ----------------------------------- */
-#ifdef GMGPOLAR_USE_MUMPS
-    // Initialize sparse MUMPS solver with assembled COO matrix.
-    void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO<double>& solver_matrix);
-    // Release MUMPS internal memory and MPI structures.
-    void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver);
-#endif
 };
 
 #include "extrapolatedSmootherGive.inl"
 #include "smootherStencil.inl"
-#include "buildAscMatrices.inl"
+#include "buildInnerBoundaryAsc.inl"
+#include "buildTridiagonalAsc.inl"
 #include "applyAscOrtho.inl"
 #include "solveAscSystem.inl"
-#include "initializeMumps.inl"
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.inl
index 734f163f..e55b51ce 100644
--- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.inl
+++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.inl
@@ -7,21 +7,16 @@ ExtrapolatedSmootherGive<LevelCacheType>::ExtrapolatedSmootherGive(const PolarGr
     : ExtrapolatedSmoother<LevelCacheType>(grid, level_cache, DirBC_Interior, num_omp_threads)
     , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true)
     , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false)
-{
-    buildAscMatrices();
 #ifdef GMGPOLAR_USE_MUMPS
-    initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_);
+    , inner_boundary_solver_(buildInteriorBoundarySolverMatrix())
 #else
-    inner_boundary_lu_solver_ = SparseLUSolver<double>(inner_boundary_circle_matrix_);
+    , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix())
+    , inner_boundary_solver_(inner_boundary_circle_matrix_)
 #endif
-}
-
-template <class LevelCacheType>
-ExtrapolatedSmootherGive<LevelCacheType>::~ExtrapolatedSmootherGive()
 {
-#ifdef GMGPOLAR_USE_MUMPS
-    finalizeMumpsSolver(inner_boundary_mumps_solver_);
-#endif
+    buildTridiagonalSolverMatrices();
+    circle_tridiagonal_solver_.setup();
+    radial_tridiagonal_solver_.setup();
 }
 
 // The smoothing solves linear systems of the form:
@@ -65,7 +60,7 @@ void ExtrapolatedSmootherGive<LevelCacheType>::extrapolatedSmoothing(Vector<doub
                 temp[index]     = (i_r & 1 || i_theta & 1) ? rhs[index] : x[index];
             }
         }
-#pragma omp for
+#pragma omp for nowait
         for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
             for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
                 const int index = grid.index(i_r, i_theta);
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.inl
deleted file mode 100644
index 1f939b07..00000000
--- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.inl
+++ /dev/null
@@ -1,114 +0,0 @@
-#pragma once
-
-#ifdef GMGPOLAR_USE_MUMPS
-
-template <class LevelCacheType>
-void ExtrapolatedSmootherGive<LevelCacheType>::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver,
-                                                                     SparseMatrixCOO<double>& solver_matrix)
-{
-    /* 
-     * MUMPS (a parallel direct solver) uses 1-based indexing, 
-     * whereas the input matrix follows 0-based indexing. 
-     * Adjust row and column indices to match MUMPS' requirements.
-     */
-    for (int i = 0; i < solver_matrix.non_zero_size(); i++) {
-        solver_matrix.row_index(i) += 1;
-        solver_matrix.col_index(i) += 1;
-    }
-
-    mumps_solver.job = JOB_INIT;
-    mumps_solver.par = PAR_PARALLEL;
-    /* The matrix is positive definite for invertible mappings. */
-    /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */
-    mumps_solver.sym          = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC);
-    mumps_solver.comm_fortran = USE_COMM_WORLD;
-    dmumps_c(&mumps_solver);
-
-    ICNTL(mumps_solver, 1) = 0; // Output stream for error messages.
-    ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process.
-    ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host
-    ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages.
-    ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format
-    ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix
-    ICNTL(mumps_solver, 7) =
-        5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis
-    ICNTL(mumps_solver, 8)  = 77; // Describes the scaling strategy
-    ICNTL(mumps_solver, 9)  = 1; // Computes the solution using A or A^T
-    ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution
-    ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved
-    ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used
-    ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node
-    ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space
-        (solver_matrix.is_symmetric() ? 5 : 20);
-    ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format
-    ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads
-    // ICNTL(17) Doesn't exist
-    ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix
-    ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix
-    ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides
-    ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors.
-    ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve.
-    ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can
-    //                             allocate per working process
-    ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”.
-    ICNTL(mumps_solver, 25) =
-        0; // Allows the computation of a solution of a deficient matrix and also of a null space basis
-    ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed
-    ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides.
-    ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed
-    ICNTL(mumps_solver, 29) =
-        0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation.
-    ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix
-    ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization.
-    ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization
-    ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix.
-    ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3
-    ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature
-    ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant
-    ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks
-    ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors
-    ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks
-    // ICNTL(40-47) Don't exist
-    ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism
-    ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase
-    // ICNTL(50-55) Don't exist
-    ICNTL(mumps_solver, 56) =
-        0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method
-    // ICNTL(57) Doesn't exist
-    ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization
-    // ICNTL(59-60) Don't exist
-
-    CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting
-    CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement
-    CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows
-    CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting
-    CNTL(mumps_solver, 5) =
-        0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active
-    // CNTL(6) Doesn't exist
-    CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression
-    // CNTL(8-15) Don't exist
-
-    mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION;
-    assert(solver_matrix.rows() == solver_matrix.columns());
-    mumps_solver.n   = solver_matrix.rows();
-    mumps_solver.nz  = solver_matrix.non_zero_size();
-    mumps_solver.irn = solver_matrix.row_indices_data();
-    mumps_solver.jcn = solver_matrix.column_indices_data();
-    mumps_solver.a   = solver_matrix.values_data();
-    dmumps_c(&mumps_solver);
-
-    if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) {
-        std::cout << "Warning: ExtrapolatedSmoother inner boundary matrix is not positive definite: Negative pivots in "
-                     "the factorization phase."
-                  << std::endl;
-    }
-}
-
-template <class LevelCacheType>
-void ExtrapolatedSmootherGive<LevelCacheType>::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver)
-{
-    mumps_solver.job = JOB_END;
-    dmumps_c(&mumps_solver);
-}
-
-#endif
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.inl
index e17e6278..11cb6d1a 100644
--- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.inl
+++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.inl
@@ -22,19 +22,8 @@ void ExtrapolatedSmootherGive<LevelCacheType>::solveBlackCircleSection(Vector<do
         int batch_stride = 2;
         circle_tridiagonal_solver_.solve_diagonal(circle_section, batch_offset, batch_stride);
 
-#ifdef GMGPOLAR_USE_MUMPS
-        inner_boundary_mumps_solver_.job    = JOB_COMPUTE_SOLUTION;
-        inner_boundary_mumps_solver_.nrhs   = 1; // single rhs vector
-        inner_boundary_mumps_solver_.nz_rhs = grid.ntheta(); // non-zeros in rhs
-        inner_boundary_mumps_solver_.rhs    = circle_section.data();
-        inner_boundary_mumps_solver_.lrhs   = grid.ntheta(); // leading dimension of rhs
-        dmumps_c(&inner_boundary_mumps_solver_);
-        if (inner_boundary_mumps_solver_.info[0] != 0) {
-            std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl;
-        }
-#else
-        inner_boundary_lu_solver_.solveInPlace(circle_section.data());
-#endif
+        Vector<double> inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta()));
+        inner_boundary_solver_.solveInPlace(inner_boundary);
     }
 
     // Move updated values to x
@@ -69,19 +58,8 @@ void ExtrapolatedSmootherGive<LevelCacheType>::solveWhiteCircleSection(Vector<do
         int batch_stride = 2;
         circle_tridiagonal_solver_.solve_diagonal(circle_section, batch_offset, batch_stride);
 
-#ifdef GMGPOLAR_USE_MUMPS
-        inner_boundary_mumps_solver_.job    = JOB_COMPUTE_SOLUTION;
-        inner_boundary_mumps_solver_.nrhs   = 1; // single rhs vector
-        inner_boundary_mumps_solver_.nz_rhs = grid.ntheta(); // non-zeros in rhs
-        inner_boundary_mumps_solver_.rhs    = circle_section.data();
-        inner_boundary_mumps_solver_.lrhs   = grid.ntheta(); // leading dimension of rhs
-        dmumps_c(&inner_boundary_mumps_solver_);
-        if (inner_boundary_mumps_solver_.info[0] != 0) {
-            std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl;
-        }
-#else
-        inner_boundary_lu_solver_.solveInPlace(circle_section.data());
-#endif
+        Vector<double> inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta()));
+        inner_boundary_solver_.solveInPlace(inner_boundary);
     }
 
     // Move updated values to x
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl
index 7686504c..0d8ac1da 100644
--- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl
+++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.inl
@@ -467,10 +467,10 @@ void ExtrapolatedSmootherTake<LevelCacheType>::applyAscOrthoBlackCircleSection(C
 {
     using extrapolated_smoother_take::nodeApplyAscOrthoCircleTake;
 
-    const PolarGrid& grid             = ExtrapolatedSmootherTake<LevelCacheType>::grid_;
-    const LevelCacheType& level_cache = ExtrapolatedSmootherTake<LevelCacheType>::level_cache_;
-    const bool DirBC_Interior         = ExtrapolatedSmootherTake<LevelCacheType>::DirBC_Interior_;
-    const int num_omp_threads         = ExtrapolatedSmootherTake<LevelCacheType>::num_omp_threads_;
+    const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
+    const int num_omp_threads         = ExtrapolatedSmoother<LevelCacheType>::num_omp_threads_;
 
     assert(level_cache.cacheDensityProfileCoefficients());
     assert(level_cache.cacheDomainGeometry());
@@ -500,10 +500,10 @@ void ExtrapolatedSmootherTake<LevelCacheType>::applyAscOrthoWhiteCircleSection(C
 {
     using extrapolated_smoother_take::nodeApplyAscOrthoCircleTake;
 
-    const PolarGrid& grid             = ExtrapolatedSmootherTake<LevelCacheType>::grid_;
-    const LevelCacheType& level_cache = ExtrapolatedSmootherTake<LevelCacheType>::level_cache_;
-    const bool DirBC_Interior         = ExtrapolatedSmootherTake<LevelCacheType>::DirBC_Interior_;
-    const int num_omp_threads         = ExtrapolatedSmootherTake<LevelCacheType>::num_omp_threads_;
+    const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
+    const int num_omp_threads         = ExtrapolatedSmoother<LevelCacheType>::num_omp_threads_;
 
     assert(level_cache.cacheDensityProfileCoefficients());
     assert(level_cache.cacheDomainGeometry());
@@ -533,10 +533,10 @@ void ExtrapolatedSmootherTake<LevelCacheType>::applyAscOrthoBlackRadialSection(C
 {
     using extrapolated_smoother_take::nodeApplyAscOrthoRadialTake;
 
-    const PolarGrid& grid             = ExtrapolatedSmootherTake<LevelCacheType>::grid_;
-    const LevelCacheType& level_cache = ExtrapolatedSmootherTake<LevelCacheType>::level_cache_;
-    const bool DirBC_Interior         = ExtrapolatedSmootherTake<LevelCacheType>::DirBC_Interior_;
-    const int num_omp_threads         = ExtrapolatedSmootherTake<LevelCacheType>::num_omp_threads_;
+    const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
+    const int num_omp_threads         = ExtrapolatedSmoother<LevelCacheType>::num_omp_threads_;
 
     assert(level_cache.cacheDensityProfileCoefficients());
     assert(level_cache.cacheDomainGeometry());
@@ -563,10 +563,10 @@ void ExtrapolatedSmootherTake<LevelCacheType>::applyAscOrthoWhiteRadialSection(C
 {
     using extrapolated_smoother_take::nodeApplyAscOrthoRadialTake;
 
-    const PolarGrid& grid             = ExtrapolatedSmootherTake<LevelCacheType>::grid_;
-    const LevelCacheType& level_cache = ExtrapolatedSmootherTake<LevelCacheType>::level_cache_;
-    const bool DirBC_Interior         = ExtrapolatedSmootherTake<LevelCacheType>::DirBC_Interior_;
-    const int num_omp_threads         = ExtrapolatedSmootherTake<LevelCacheType>::num_omp_threads_;
+    const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
+    const int num_omp_threads         = ExtrapolatedSmoother<LevelCacheType>::num_omp_threads_;
 
     assert(level_cache.cacheDensityProfileCoefficients());
     assert(level_cache.cacheDomainGeometry());
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl
index 373a5c74..548fd9ed 100644
--- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl
+++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.inl
@@ -109,10 +109,10 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildInteriorBoundarySolverMa
             const int top    = grid.index(i_r, i_theta_P1);
             const int right  = grid.index(i_r + 1, i_theta);
 
-            const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) +
+            const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) +
                                         coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
                                         coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
-            const double left_value = -coeff1 * (arr[center] + arr[left]);
+            const double left_value   = -coeff1 * (arr[center] + arr[left]);
 
             /* Fill matrix row of (i,j) */
             row = center_index;
@@ -156,14 +156,20 @@ template <class LevelCacheType>
 typename ExtrapolatedSmootherTake<LevelCacheType>::InnerBoundaryMatrix
 ExtrapolatedSmootherTake<LevelCacheType>::buildInteriorBoundarySolverMatrix()
 {
-    const PolarGrid& grid             = ExtrapolatedSmootherTake<LevelCacheType>::grid_;
-    const LevelCacheType& level_cache = ExtrapolatedSmootherTake<LevelCacheType>::level_cache_;
-    const bool DirBC_Interior         = ExtrapolatedSmootherTake<LevelCacheType>::DirBC_Interior_;
-    const int num_omp_threads         = ExtrapolatedSmootherTake<LevelCacheType>::num_omp_threads_;
+    const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
+    const int num_omp_threads         = ExtrapolatedSmoother<LevelCacheType>::num_omp_threads_;
 
     const int i_r    = 0;
     const int ntheta = grid.ntheta();
 
+    // The interior boundary matrix is symmetric due to the periodicity in the theta direction
+    // and the assumption that ntheta is even, which is required for the across-origin discretization.
+    // We store all non-zero entries of the matrix, both in COO format (for MUMPS)
+    // and in CSR format (for the in-house solver). If the COO matrix is marked as symmetric,
+    // the COO_Mumps_Solver optimizes the factorization by only using the upper triangular part of the matrix,
+    // which is extracted by the COO_Mumps_Solver internally.
 #ifdef GMGPOLAR_USE_MUMPS
     const int nnz = getNonZeroCountCircleAsc(i_r);
     SparseMatrixCOO<double> inner_boundary_solver_matrix(ntheta, ntheta, nnz);
diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl
index d487aa00..b4553470 100644
--- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl
+++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.inl
@@ -89,9 +89,9 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrice
             /* Center: (Left, Right, Bottom, Top) */
             row    = center_index;
             column = center_index;
-            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) +
-                    coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
-                    coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
+            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) +
+                     coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
+                     coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
             updateMatrixElement(solver, batch, row, column, value);
 
             /* Bottom */
@@ -128,9 +128,9 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrice
                 /* Center: (Left, Right, Bottom, Top) */
                 row    = center_index;
                 column = center_index;
-                value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) +
-                        coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
-                        coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
+                value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) +
+                         coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
+                         coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
                 updateMatrixElement(solver, batch, row, column, value);
             }
             else {
@@ -232,9 +232,9 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrice
             /* Center: (Left, Right, Bottom, Top) */
             row    = center_index;
             column = center_index;
-            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) +
-                    coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
-                    coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
+            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) +
+                     coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
+                     coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
             updateMatrixElement(solver, batch, row, column, value);
 
             /* Left */
@@ -276,9 +276,9 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrice
                 /* Center: (Left, Right, Bottom, Top) */
                 row    = center_index;
                 column = center_index;
-                value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) +
-                        coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
-                        coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
+                value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) +
+                         coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
+                         coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
                 updateMatrixElement(solver, batch, row, column, value);
             }
             else {
@@ -340,9 +340,9 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrice
             /* Center: (Left, Right, Bottom, Top) */
             row    = center_index;
             column = center_index;
-            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) +
-                    coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
-                    coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
+            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) +
+                     coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
+                     coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
             updateMatrixElement(solver, batch, row, column, value);
 
             /* Right */
@@ -363,9 +363,9 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrice
                 /* Center: (Left, Right, Bottom, Top) */
                 row    = center_index;
                 column = center_index;
-                value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) +
-                        coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
-                        coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
+                value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) +
+                         coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
+                         coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
                 updateMatrixElement(solver, batch, row, column, value);
             }
             else {
@@ -429,9 +429,9 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrice
             /* Center: (Left, Right, Bottom, Top) */
             row    = center_index;
             column = center_index;
-            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) +
-                    coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
-                    coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
+            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) +
+                     coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
+                     coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
             updateMatrixElement(solver, batch, row, column, value);
 
             /* Left */
@@ -457,9 +457,9 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrice
             /* Center: (Left, Right, Bottom, Top) */
             row    = center_index;
             column = center_index;
-            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) +
-                    coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
-                    coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
+            value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) +
+                     coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) +
+                     coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]);
             updateMatrixElement(solver, batch, row, column, value);
         }
     }
@@ -518,10 +518,10 @@ void ExtrapolatedSmootherTake<LevelCacheType>::nodeBuildTridiagonalSolverMatrice
 template <class LevelCacheType>
 void ExtrapolatedSmootherTake<LevelCacheType>::buildTridiagonalSolverMatrices()
 {
-    const PolarGrid& grid             = ExtrapolatedSmootherTake<LevelCacheType>::grid_;
-    const LevelCacheType& level_cache = ExtrapolatedSmootherTake<LevelCacheType>::level_cache_;
-    const bool DirBC_Interior         = ExtrapolatedSmootherTake<LevelCacheType>::DirBC_Interior_;
-    const int num_omp_threads         = ExtrapolatedSmootherTake<LevelCacheType>::num_omp_threads_;
+    const PolarGrid& grid             = ExtrapolatedSmoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = ExtrapolatedSmoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = ExtrapolatedSmoother<LevelCacheType>::DirBC_Interior_;
+    const int num_omp_threads         = ExtrapolatedSmoother<LevelCacheType>::num_omp_threads_;
 
     assert(level_cache.cacheDensityProfileCoefficients());
     assert(level_cache.cacheDomainGeometry());
diff --git a/include/GMGPolar/setup.h b/include/GMGPolar/setup.h
index 3fd729c7..e33d96b9 100644
--- a/include/GMGPolar/setup.h
+++ b/include/GMGPolar/setup.h
@@ -211,7 +211,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::discretize_rhs_f(
                         double k1          = grid.angularSpacing(i_theta - 1);
                         double k2          = grid.angularSpacing(i_theta);
                         const double detDF = detDF_cache[grid.index(i_r, i_theta)];
-                        rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF);
+                        rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF);
                     }
                     else if (i_r == 0 && DirBC_Interior_) {
                         rhs_f[grid.index(i_r, i_theta)] *= 1.0;
@@ -236,7 +236,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::discretize_rhs_f(
                         double k1          = grid.angularSpacing(i_theta - 1);
                         double k2          = grid.angularSpacing(i_theta);
                         const double detDF = detDF_cache[grid.index(i_r, i_theta)];
-                        rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF);
+                        rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF);
                     }
                     else if (i_r == 0 && DirBC_Interior_) {
                         rhs_f[grid.index(i_r, i_theta)] *= 1.0;
@@ -277,7 +277,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::discretize_rhs_f(
                         double Jtt = domain_geometry_.dFy_dt(r, theta);
                         /* Compute the determinant of the Jacobian matrix */
                         double detDF = Jrr * Jtt - Jrt * Jtr;
-                        rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF);
+                        rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF);
                     }
                     else if (i_r == 0 && DirBC_Interior_) {
                         rhs_f[grid.index(i_r, i_theta)] *= 1.0;
@@ -312,7 +312,7 @@ void GMGPolar<DomainGeometry, DensityProfileCoefficients>::discretize_rhs_f(
                         double Jtt = domain_geometry_.dFy_dt(r, theta);
                         /* Compute the determinant of the Jacobian matrix */
                         double detDF = Jrr * Jtt - Jrt * Jtr;
-                        rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF);
+                        rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF);
                     }
                     else if (i_r == 0 && DirBC_Interior_) {
                         rhs_f[grid.index(i_r, i_theta)] *= 1.0;
diff --git a/include/Level/level.inl b/include/Level/level.inl
index b26a0e1f..288d6dbe 100644
--- a/include/Level/level.inl
+++ b/include/Level/level.inl
@@ -4,8 +4,8 @@
 
 #include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h"
 #include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h"
-#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h"
-#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h"
+#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h"
+#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h"
 
 #include "../../include/Smoother/SmootherGive/smootherGive.h"
 #include "../../include/Smoother/SmootherTake/smootherTake.h"
@@ -16,9 +16,10 @@
 // ----------- //
 // Constructor //
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-Level<DomainGeometry, DensityProfileCoefficients>::Level(const int level_depth, std::unique_ptr<const PolarGrid> grid,
-                             std::unique_ptr<const LevelCache<DomainGeometry, DensityProfileCoefficients>> level_cache,
-                             const ExtrapolationType extrapolation, const bool FMG, const bool PCG_FMG)
+Level<DomainGeometry, DensityProfileCoefficients>::Level(
+    const int level_depth, std::unique_ptr<const PolarGrid> grid,
+    std::unique_ptr<const LevelCache<DomainGeometry, DensityProfileCoefficients>> level_cache,
+    const ExtrapolationType extrapolation, const bool FMG, const bool PCG_FMG)
     : level_depth_(level_depth)
     , grid_(std::move(grid))
     , level_cache_(std::move(level_cache))
@@ -46,7 +47,8 @@ const PolarGrid& Level<DomainGeometry, DensityProfileCoefficients>::grid() const
 }
 
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-const LevelCache<DomainGeometry, DensityProfileCoefficients>& Level<DomainGeometry, DensityProfileCoefficients>::levelCache() const
+const LevelCache<DomainGeometry, DensityProfileCoefficients>&
+Level<DomainGeometry, DensityProfileCoefficients>::levelCache() const
 {
     return *level_cache_;
 }
@@ -102,8 +104,8 @@ ConstVector<double> Level<DomainGeometry, DensityProfileCoefficients>::error_cor
 // -------------- //
 // Apply Residual //
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-void Level<DomainGeometry, DensityProfileCoefficients>::initializeResidual(const bool DirBC_Interior, const int num_omp_threads,
-                                               const StencilDistributionMethod stencil_distribution_method)
+void Level<DomainGeometry, DensityProfileCoefficients>::initializeResidual(
+    const bool DirBC_Interior, const int num_omp_threads, const StencilDistributionMethod stencil_distribution_method)
 {
     if (stencil_distribution_method == StencilDistributionMethod::CPU_TAKE) {
         op_residual_ =
@@ -118,14 +120,16 @@ void Level<DomainGeometry, DensityProfileCoefficients>::initializeResidual(const
 }
 
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-void Level<DomainGeometry, DensityProfileCoefficients>::computeResidual(Vector<double> result, ConstVector<double> rhs, ConstVector<double> x) const
+void Level<DomainGeometry, DensityProfileCoefficients>::computeResidual(Vector<double> result, ConstVector<double> rhs,
+                                                                        ConstVector<double> x) const
 {
     if (!op_residual_)
         throw std::runtime_error("Residual not initialized.");
     op_residual_->computeResidual(result, rhs, x);
 }
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-void Level<DomainGeometry, DensityProfileCoefficients>::applySystemOperator(Vector<double> result, ConstVector<double> x) const
+void Level<DomainGeometry, DensityProfileCoefficients>::applySystemOperator(Vector<double> result,
+                                                                            ConstVector<double> x) const
 {
     if (!op_residual_)
         throw std::runtime_error("Residual not initialized.");
@@ -135,8 +139,8 @@ void Level<DomainGeometry, DensityProfileCoefficients>::applySystemOperator(Vect
 // ------------------- //
 // Solve coarse System //
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-void Level<DomainGeometry, DensityProfileCoefficients>::initializeDirectSolver(const bool DirBC_Interior, const int num_omp_threads,
-                                                   const StencilDistributionMethod stencil_distribution_method)
+void Level<DomainGeometry, DensityProfileCoefficients>::initializeDirectSolver(
+    const bool DirBC_Interior, const int num_omp_threads, const StencilDistributionMethod stencil_distribution_method)
 {
 #ifdef GMGPOLAR_USE_MUMPS
     if (stencil_distribution_method == StencilDistributionMethod::CPU_TAKE) {
@@ -172,8 +176,8 @@ void Level<DomainGeometry, DensityProfileCoefficients>::directSolveInPlace(Vecto
 // --------------- //
 // Apply Smoothing //
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-void Level<DomainGeometry, DensityProfileCoefficients>::initializeSmoothing(const bool DirBC_Interior, const int num_omp_threads,
-                                                const StencilDistributionMethod stencil_distribution_method)
+void Level<DomainGeometry, DensityProfileCoefficients>::initializeSmoothing(
+    const bool DirBC_Interior, const int num_omp_threads, const StencilDistributionMethod stencil_distribution_method)
 {
     if (stencil_distribution_method == StencilDistributionMethod::CPU_TAKE) {
         op_smoother_ =
@@ -188,7 +192,8 @@ void Level<DomainGeometry, DensityProfileCoefficients>::initializeSmoothing(cons
 }
 
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-void Level<DomainGeometry, DensityProfileCoefficients>::smoothing(Vector<double> x, ConstVector<double> rhs, Vector<double> temp) const
+void Level<DomainGeometry, DensityProfileCoefficients>::smoothing(Vector<double> x, ConstVector<double> rhs,
+                                                                  Vector<double> temp) const
 {
     if (!op_smoother_)
         throw std::runtime_error("Smoother not initialized.");
@@ -198,8 +203,8 @@ void Level<DomainGeometry, DensityProfileCoefficients>::smoothing(Vector<double>
 // ---------------------------- //
 // Apply Extrapolated Smoothing //
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-void Level<DomainGeometry, DensityProfileCoefficients>::initializeExtrapolatedSmoothing(const bool DirBC_Interior, const int num_omp_threads,
-                                                            const StencilDistributionMethod stencil_distribution_method)
+void Level<DomainGeometry, DensityProfileCoefficients>::initializeExtrapolatedSmoothing(
+    const bool DirBC_Interior, const int num_omp_threads, const StencilDistributionMethod stencil_distribution_method)
 {
     if (stencil_distribution_method == StencilDistributionMethod::CPU_TAKE) {
         op_extrapolated_smoother_ = std::make_unique<ExtrapolatedSmootherTake<LevelCacheType>>(
@@ -214,7 +219,8 @@ void Level<DomainGeometry, DensityProfileCoefficients>::initializeExtrapolatedSm
 }
 
 template <concepts::DomainGeometry DomainGeometry, concepts::DensityProfileCoefficients DensityProfileCoefficients>
-void Level<DomainGeometry, DensityProfileCoefficients>::extrapolatedSmoothing(Vector<double> x, ConstVector<double> rhs, Vector<double> temp) const
+void Level<DomainGeometry, DensityProfileCoefficients>::extrapolatedSmoothing(Vector<double> x, ConstVector<double> rhs,
+                                                                              Vector<double> temp) const
 {
     if (!op_extrapolated_smoother_)
         throw std::runtime_error("Extrapolated Smoother not initialized.");
diff --git a/include/Residual/ResidualGive/applyAGive.inl b/include/Residual/ResidualGive/applyAGive.inl
index 2802e0ef..0ace217e 100644
--- a/include/Residual/ResidualGive/applyAGive.inl
+++ b/include/Residual/ResidualGive/applyAGive.inl
@@ -296,13 +296,14 @@ void ResidualGive<LevelCacheType>::applyCircleSection(const int i_r, Vector<doub
 {
     using residual_give::node_apply_a_give;
 
-    const PolarGrid& grid = Residual<LevelCacheType>::grid_;
+    const PolarGrid& grid             = Residual<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = Residual<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Residual<LevelCacheType>::DirBC_Interior_;
 
     const double r = grid.radius(i_r);
     for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
         const double theta = grid.theta(i_theta);
-        node_apply_a_give(i_r, i_theta, r, theta, grid, Residual<LevelCacheType>::level_cache_,
-                          Residual<LevelCacheType>::DirBC_Interior_, result, x);
+        node_apply_a_give(i_r, i_theta, r, theta, grid, level_cache, DirBC_Interior, result, x);
     }
 }
 
@@ -312,12 +313,13 @@ void ResidualGive<LevelCacheType>::applyRadialSection(const int i_theta, Vector<
 {
     using residual_give::node_apply_a_give;
 
-    const PolarGrid& grid = Residual<LevelCacheType>::grid_;
+    const PolarGrid& grid             = Residual<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = Residual<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Residual<LevelCacheType>::DirBC_Interior_;
 
     const double theta = grid.theta(i_theta);
     for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
         const double r = grid.radius(i_r);
-        node_apply_a_give(i_r, i_theta, r, theta, grid, Residual<LevelCacheType>::level_cache_,
-                          Residual<LevelCacheType>::DirBC_Interior_, result, x);
+        node_apply_a_give(i_r, i_theta, r, theta, grid, level_cache, DirBC_Interior, result, x);
     }
 }
diff --git a/include/Residual/ResidualGive/residualGive.inl b/include/Residual/ResidualGive/residualGive.inl
index bc50f620..0f9b6eb8 100644
--- a/include/Residual/ResidualGive/residualGive.inl
+++ b/include/Residual/ResidualGive/residualGive.inl
@@ -9,8 +9,7 @@ ResidualGive<LevelCacheType>::ResidualGive(const PolarGrid& grid, const LevelCac
 
 /* ------------ */
 /* result = A*x */
-
-// clang-format off
+/* ------------ */
 template <class LevelCacheType>
 void ResidualGive<LevelCacheType>::applySystemOperator(Vector<double> result, ConstVector<double> x) const
 {
@@ -18,107 +17,84 @@ void ResidualGive<LevelCacheType>::applySystemOperator(Vector<double> result, Co
 
     assign(result, 0.0);
 
-    const PolarGrid& grid = Residual<LevelCacheType>::grid_;
-
+    const PolarGrid& grid     = Residual<LevelCacheType>::grid_;
     const int num_omp_threads = Residual<LevelCacheType>::num_omp_threads_;
 
-    /* Single-threaded execution */
-    if (num_omp_threads == 1) {
-        for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) {
+    const int num_smoother_circles    = grid.numberSmootherCircles();
+    const int additional_radial_tasks = grid.ntheta() % 3;
+    const int num_radial_tasks        = grid.ntheta() - additional_radial_tasks;
+
+    /* ---------------- */
+    /* Circular section */
+    /* ---------------- */
+    // We parallelize the loop with step 3 to avoid data race conditions between adjacent circles.
+#pragma omp parallel num_threads(num_omp_threads)
+    {
+#pragma omp for
+        for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) {
             applyCircleSection(i_r, result, x);
-        }
-        for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
-            applyRadialSection(i_theta, result, x);
-        }
+        } /* Implicit barrier */
+#pragma omp for
+        for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) {
+            applyCircleSection(i_r, result, x);
+        } /* Implicit barrier */
+#pragma omp for
+        for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) {
+            applyCircleSection(i_r, result, x);
+        } /* Implicit barrier */
     }
-    /* Multi-threaded execution */
-    else {
-        const int num_circle_tasks        = grid.numberSmootherCircles();
-        const int additional_radial_tasks = grid.ntheta() % 3;
-        const int num_radial_tasks        = grid.ntheta() - additional_radial_tasks;
 
-        #pragma omp parallel num_threads(num_omp_threads)
-        {
-            /* Circle Section 0 */
-            #pragma omp for
-            for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) {
-                int i_r = grid.numberSmootherCircles() - circle_task - 1;
-                applyCircleSection(i_r, result, x);
-            }
-            /* Circle Section 1 */
-            #pragma omp for
-            for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) {
-                int i_r = grid.numberSmootherCircles() - circle_task - 1;
-                applyCircleSection(i_r, result, x);
-            }
-            /* Circle Section 2 */
-            #pragma omp for nowait
-            for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) {
-                int i_r = grid.numberSmootherCircles() - circle_task - 1;
-                applyCircleSection(i_r, result, x);
-            }
+    /* -------------- */
+    /* Radial section */
+    /* -------------- */
+    // We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines.
+    // Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks
+    // that are handled serially before the parallel loops.
+    if (additional_radial_tasks > 0) {
+        const int i_theta = 0;
+        applyRadialSection(i_theta, result, x);
+    }
 
-            /* Radial Section 0 */
-            #pragma omp for
-            for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
-                if (radial_task > 0) {
-                    int i_theta = radial_task + additional_radial_tasks;
-                    applyRadialSection(i_theta, result, x);
-                }
-                else {
-                    if (additional_radial_tasks == 0) {
-                        applyRadialSection(0, result, x);
-                    }
-                    else if (additional_radial_tasks >= 1) {
-                        applyRadialSection(0, result, x);
-                        applyRadialSection(1, result, x);
-                    }
-                }
-            }
-            /* Radial Section 1 */
-            #pragma omp for
-            for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
-                if (radial_task > 1) {
-                    int i_theta = radial_task + additional_radial_tasks;
-                    applyRadialSection(i_theta, result, x);
-                }
-                else {
-                    if (additional_radial_tasks == 0) {
-                        applyRadialSection(1, result, x);
-                    }
-                    else if (additional_radial_tasks == 1) {
-                        applyRadialSection(2, result, x);
-                    }
-                    else if (additional_radial_tasks == 2) {
-                        applyRadialSection(2, result, x);
-                        applyRadialSection(3, result, x);
-                    }
-                }
-            }
-            /* Radial Section 2 */
-            #pragma omp for
-            for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
-                int i_theta = radial_task + additional_radial_tasks;
-                applyRadialSection(i_theta, result, x);
-            }
-        }
+    if (additional_radial_tasks > 1) {
+        const int i_theta = 1;
+        applyRadialSection(i_theta, result, x);
+    }
+
+#pragma omp parallel num_threads(num_omp_threads)
+    {
+#pragma omp for
+        for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
+            const int i_theta = radial_task + additional_radial_tasks;
+            applyRadialSection(i_theta, result, x);
+        } /* Implicit barrier */
+#pragma omp for
+        for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
+            const int i_theta = radial_task + additional_radial_tasks;
+            applyRadialSection(i_theta, result, x);
+        } /* Implicit barrier */
+#pragma omp for
+        for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
+            const int i_theta = radial_task + additional_radial_tasks;
+            applyRadialSection(i_theta, result, x);
+        } /* Implicit barrier */
     }
 }
-// clang-format on
 
 /* ------------------ */
 /* result = rhs - A*x */
+/* ------------------ */
 template <class LevelCacheType>
 void ResidualGive<LevelCacheType>::computeResidual(Vector<double> result, ConstVector<double> rhs,
                                                    ConstVector<double> x) const
 {
     assert(result.size() == x.size());
 
+    const int num_omp_threads = Residual<LevelCacheType>::num_omp_threads_;
+
     applySystemOperator(result, x);
 
     // Subtract A*x from rhs to get the residual.
-    const int n               = result.size();
-    const int num_omp_threads = Residual<LevelCacheType>::num_omp_threads_;
+    const int n = result.size();
 #pragma omp parallel for num_threads(num_omp_threads)
     for (int i = 0; i < n; i++) {
         result[i] = rhs[i] - result[i];
diff --git a/include/Residual/ResidualTake/applyATake.inl b/include/Residual/ResidualTake/applyATake.inl
index 38cbe669..da42f45f 100644
--- a/include/Residual/ResidualTake/applyATake.inl
+++ b/include/Residual/ResidualTake/applyATake.inl
@@ -123,13 +123,15 @@ void ResidualTake<LevelCacheType>::applyCircleSection(const int i_r, Vector<doub
     assert(Residual<LevelCacheType>::level_cache_.cacheDensityProfileCoefficients());
     assert(Residual<LevelCacheType>::level_cache_.cacheDomainGeometry());
 
-    const PolarGrid& grid          = Residual<LevelCacheType>::grid_;
-    const bool DirBC_Interior      = Residual<LevelCacheType>::DirBC_Interior_;
-    ConstVector<double> arr        = Residual<LevelCacheType>::level_cache_.arr();
-    ConstVector<double> att        = Residual<LevelCacheType>::level_cache_.att();
-    ConstVector<double> art        = Residual<LevelCacheType>::level_cache_.art();
-    ConstVector<double> detDF      = Residual<LevelCacheType>::level_cache_.detDF();
-    ConstVector<double> coeff_beta = Residual<LevelCacheType>::level_cache_.coeff_beta();
+    const PolarGrid& grid             = Residual<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = Residual<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Residual<LevelCacheType>::DirBC_Interior_;
+
+    ConstVector<double> arr        = level_cache.arr();
+    ConstVector<double> att        = level_cache.att();
+    ConstVector<double> art        = level_cache.art();
+    ConstVector<double> detDF      = level_cache.detDF();
+    ConstVector<double> coeff_beta = level_cache.coeff_beta();
 
     for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
         node_apply_a_take(i_r, i_theta, grid, DirBC_Interior, result, x, arr, att, art, detDF, coeff_beta);
@@ -145,13 +147,15 @@ void ResidualTake<LevelCacheType>::applyRadialSection(const int i_theta, Vector<
     assert(Residual<LevelCacheType>::level_cache_.cacheDensityProfileCoefficients());
     assert(Residual<LevelCacheType>::level_cache_.cacheDomainGeometry());
 
-    const PolarGrid& grid          = Residual<LevelCacheType>::grid_;
-    const bool DirBC_Interior      = Residual<LevelCacheType>::DirBC_Interior_;
-    ConstVector<double> arr        = Residual<LevelCacheType>::level_cache_.arr();
-    ConstVector<double> att        = Residual<LevelCacheType>::level_cache_.att();
-    ConstVector<double> art        = Residual<LevelCacheType>::level_cache_.art();
-    ConstVector<double> detDF      = Residual<LevelCacheType>::level_cache_.detDF();
-    ConstVector<double> coeff_beta = Residual<LevelCacheType>::level_cache_.coeff_beta();
+    const PolarGrid& grid             = Residual<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = Residual<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Residual<LevelCacheType>::DirBC_Interior_;
+
+    ConstVector<double> arr        = level_cache.arr();
+    ConstVector<double> att        = level_cache.att();
+    ConstVector<double> art        = level_cache.art();
+    ConstVector<double> detDF      = level_cache.detDF();
+    ConstVector<double> coeff_beta = level_cache.coeff_beta();
 
     for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
         node_apply_a_take(i_r, i_theta, grid, DirBC_Interior, result, x, arr, att, art, detDF, coeff_beta);
diff --git a/include/Residual/ResidualTake/residualTake.inl b/include/Residual/ResidualTake/residualTake.inl
index c6916277..e5cf7fd3 100644
--- a/include/Residual/ResidualTake/residualTake.inl
+++ b/include/Residual/ResidualTake/residualTake.inl
@@ -14,13 +14,9 @@ void ResidualTake<LevelCacheType>::applySystemOperator(Vector<double> result, Co
 {
     assert(result.size() == x.size());
 
+    const PolarGrid& grid     = Residual<LevelCacheType>::grid_;
     const int num_omp_threads = Residual<LevelCacheType>::num_omp_threads_;
 
-    assert(Residual<LevelCacheType>::level_cache_.cacheDensityProfileCoefficients());
-    assert(Residual<LevelCacheType>::level_cache_.cacheDomainGeometry());
-
-    const PolarGrid& grid = Residual<LevelCacheType>::grid_;
-
 #pragma omp parallel num_threads(num_omp_threads)
     {
 /* Circle Section */
diff --git a/include/Smoother/SmootherGive/applyAscOrtho.inl b/include/Smoother/SmootherGive/applyAscOrtho.inl
index 63bfef95..f91f5366 100644
--- a/include/Smoother/SmootherGive/applyAscOrtho.inl
+++ b/include/Smoother/SmootherGive/applyAscOrtho.inl
@@ -434,6 +434,7 @@ void SmootherGive<LevelCacheType>::applyAscOrthoCircleSection(const int i_r, con
 
     const PolarGrid& grid             = Smoother<LevelCacheType>::grid_;
     const LevelCacheType& level_cache = Smoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Smoother<LevelCacheType>::DirBC_Interior_;
 
     assert(i_r >= 0 && i_r < grid.numberSmootherCircles() + 1);
 
@@ -447,8 +448,8 @@ void SmootherGive<LevelCacheType>::applyAscOrthoCircleSection(const int i_r, con
         level_cache.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF);
 
         // Apply Asc Ortho at the current node
-        nodeApplyAscOrthoCircleGive(i_r, i_theta, grid, Smoother<LevelCacheType>::DirBC_Interior_, smoother_color, x,
-                                    rhs, temp, arr, att, art, detDF, coeff_beta);
+        nodeApplyAscOrthoCircleGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art,
+                                    detDF, coeff_beta);
     }
 }
 
@@ -461,6 +462,7 @@ void SmootherGive<LevelCacheType>::applyAscOrthoRadialSection(const int i_theta,
 
     const PolarGrid& grid             = Smoother<LevelCacheType>::grid_;
     const LevelCacheType& level_cache = Smoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Smoother<LevelCacheType>::DirBC_Interior_;
 
     const double theta = grid.theta(i_theta);
 
@@ -474,7 +476,7 @@ void SmootherGive<LevelCacheType>::applyAscOrthoRadialSection(const int i_theta,
         level_cache.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF);
 
         // Apply Asc Ortho at the current node
-        nodeApplyAscOrthoRadialGive(i_r, i_theta, grid, Smoother<LevelCacheType>::DirBC_Interior_, smoother_color, x,
-                                    rhs, temp, arr, att, art, detDF, coeff_beta);
+        nodeApplyAscOrthoRadialGive(i_r, i_theta, grid, DirBC_Interior, smoother_color, x, rhs, temp, arr, att, art,
+                                    detDF, coeff_beta);
     }
-}
+}
\ No newline at end of file
diff --git a/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl
new file mode 100644
index 00000000..9c011718
--- /dev/null
+++ b/include/Smoother/SmootherGive/buildInnerBoundaryAsc.inl
@@ -0,0 +1,295 @@
+#pragma once
+
+namespace smoother_give
+{
+
+#ifdef GMGPOLAR_USE_MUMPS
+// When using the MUMPS solver, the matrix is assembled in COO format.
+static inline void update_CSR_COO_MatrixElement(SparseMatrixCOO<double>& matrix, int ptr, int offset, int row,
+                                                int column, double value)
+{
+    matrix.row_index(ptr + offset) = row;
+    matrix.col_index(ptr + offset) = column;
+    matrix.value(ptr + offset) += value;
+}
+#else
+// When using the in-house solver, the matrix is stored in CSR format.
+static inline void update_CSR_COO_MatrixElement(SparseMatrixCSR<double>& matrix, int ptr, int offset, int row,
+                                                int column, double value)
+{
+    matrix.row_nz_index(row, offset) = column;
+    matrix.row_nz_entry(row, offset) += value;
+}
+#endif
+
+} // namespace smoother_give
+
+template <class LevelCacheType>
+void SmootherGive<LevelCacheType>::nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid,
+                                                                               bool DirBC_Interior,
+                                                                               InnerBoundaryMatrix& matrix, double arr,
+                                                                               double att, double art, double detDF,
+                                                                               double coeff_beta)
+{
+    using smoother_give::update_CSR_COO_MatrixElement;
+
+    assert(i_theta >= 0 && i_theta < grid.ntheta());
+
+    int ptr, offset;
+    int row, column;
+    double value;
+
+    const int i_r = 0;
+
+    /* ------------------------------------------------ */
+    /* Case 1: Dirichlet boundary on the inner boundary */
+    /* ------------------------------------------------ */
+    if (DirBC_Interior) {
+        /* Fill result(i,j) */
+        const int center_index = i_theta;
+
+        /* Fill matrix row of (i,j) */
+        row = center_index;
+        ptr = getCircleAscIndex(i_r, i_theta);
+
+        const Stencil& CenterStencil = getStencil(i_r);
+
+        offset = CenterStencil[StencilPosition::Center];
+        column = center_index;
+        value  = 1.0;
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+    }
+    else {
+        /* ------------------------------------------------------------- */
+        /* Case 2: Across origin discretization on the interior boundary */
+        /* ------------------------------------------------------------- */
+        // h1 gets replaced with 2 * R0.
+        // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()>>1)).
+        // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil.
+        const double h1 = 2 * grid.radius(0);
+        const double h2 = grid.radialSpacing(i_r);
+        const double k1 = grid.angularSpacing(i_theta - 1);
+        const double k2 = grid.angularSpacing(i_theta);
+
+        const double coeff1 = 0.5 * (k1 + k2) / h1;
+        const double coeff2 = 0.5 * (k1 + k2) / h2;
+        const double coeff3 = 0.5 * (h1 + h2) / k1;
+        const double coeff4 = 0.5 * (h1 + h2) / k2;
+
+        /* -| x | o | x | */
+        /* -|   |   |   | */
+        /* -| O | o | o | */
+        /* -|   |   |   | */
+        /* -| x | o | x | */
+        const int i_theta_M1           = grid.wrapThetaIndex(i_theta - 1);
+        const int i_theta_P1           = grid.wrapThetaIndex(i_theta + 1);
+        const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2);
+
+        const int center_index = i_theta;
+        const int left_index   = i_theta_AcrossOrigin;
+        const int bottom_index = i_theta_M1;
+        const int top_index    = i_theta_P1;
+
+        const int center_nz_index = getCircleAscIndex(i_r, i_theta);
+        const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1);
+        const int top_nz_index    = getCircleAscIndex(i_r, i_theta_P1);
+        const int left_nz_index   = getCircleAscIndex(i_r, i_theta_AcrossOrigin);
+
+        int nz_index; /* Fill matrix row of (i,j) */
+        row = center_index;
+        ptr = center_nz_index;
+
+        const Stencil& CenterStencil = getStencil(i_r);
+
+        offset = CenterStencil[StencilPosition::Center];
+        column = center_index;
+        value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        offset = CenterStencil[StencilPosition::Left];
+        column = left_index;
+        value  = -coeff1 * arr; /* Left */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        offset = CenterStencil[StencilPosition::Bottom];
+        column = bottom_index;
+        value  = -coeff3 * att; /* Bottom */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        offset = CenterStencil[StencilPosition::Top];
+        column = top_index;
+        value  = -coeff4 * att; /* Top */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        offset = CenterStencil[StencilPosition::Center];
+        column = center_index;
+        value  = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        /* Fill matrix row of (i-1,j) */
+        /* From view the view of the across origin node, */ /* the directions are roatated by 180 degrees in the stencil! */
+        row = left_index;
+        ptr = left_nz_index;
+
+        const Stencil& LeftStencil = CenterStencil;
+
+        offset = LeftStencil[StencilPosition::Left];
+        column = center_index;
+        value  = -coeff1 * arr; /* Right -> Left*/
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        offset = LeftStencil[StencilPosition::Center];
+        column = left_index;
+        value  = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        /* Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */
+
+        /* Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */
+
+        /* Fill matrix row of (i,j-1) */
+        row = bottom_index;
+        ptr = bottom_nz_index;
+
+        const Stencil& BottomStencil = CenterStencil;
+
+        offset = BottomStencil[StencilPosition::Top];
+        column = center_index;
+        value  = -coeff3 * att; /* Top */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        offset = BottomStencil[StencilPosition::Center];
+        column = bottom_index;
+        value  = +coeff3 * att; /* Center: (Top) */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        /* TopLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */
+
+        /* Fill matrix row of (i,j+1) */
+        row = top_index;
+        ptr = top_nz_index;
+
+        const Stencil& TopStencil = CenterStencil;
+
+        offset = TopStencil[StencilPosition::Bottom];
+        column = center_index;
+        value  = -coeff4 * att; /* Bottom */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        offset = TopStencil[StencilPosition::Center];
+        column = top_index;
+        value  = +coeff4 * att; /* Center: (Bottom) */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+
+        /* BottomLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */
+    }
+}
+
+template <class LevelCacheType>
+void SmootherGive<LevelCacheType>::nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid,
+                                                                               bool DirBC_Interior,
+                                                                               InnerBoundaryMatrix& matrix, double arr,
+                                                                               double att, double art, double detDF,
+                                                                               double coeff_beta)
+{
+    using smoother_give::update_CSR_COO_MatrixElement;
+
+    assert(i_theta >= 0 && i_theta < grid.ntheta());
+
+    int ptr, offset;
+    int row, column;
+    double value;
+
+    const int i_r = 1;
+
+    const double h1 = grid.radialSpacing(i_r - 1);
+    const double h2 = grid.radialSpacing(i_r);
+    const double k1 = grid.angularSpacing(i_theta - 1);
+    const double k2 = grid.angularSpacing(i_theta);
+
+    const double coeff1 = 0.5 * (k1 + k2) / h1;
+
+    const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1);
+    const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1);
+
+    const int left_index = i_theta;
+
+    /* Fill matrix row of (i-1,j) */
+    if (!DirBC_Interior) {
+        row = left_index;
+        ptr = getCircleAscIndex(i_r - 1, i_theta);
+
+        const Stencil& LeftStencil = getStencil(i_r - 1);
+
+        offset = LeftStencil[StencilPosition::Center];
+        column = left_index;
+        value  = coeff1 * arr; /* Center: (Right) */
+        update_CSR_COO_MatrixElement(matrix, ptr, offset, row, column, value);
+    }
+}
+
+template <class LevelCacheType>
+typename SmootherGive<LevelCacheType>::InnerBoundaryMatrix
+SmootherGive<LevelCacheType>::buildInteriorBoundarySolverMatrix()
+{
+    const PolarGrid& grid             = Smoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = Smoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Smoother<LevelCacheType>::DirBC_Interior_;
+
+    const int i_r    = 0;
+    const int ntheta = grid.ntheta();
+
+    // The interior boundary matrix is symmetric due to the periodicity in the theta direction
+    // and the assumption that ntheta is even, which is required for the across-origin discretization.
+    // We store all non-zero entries of the matrix, both in COO format (for MUMPS)
+    // and in CSR format (for the in-house solver). If the COO matrix is marked as symmetric,
+    // the COO_Mumps_Solver optimizes the factorization by only using the upper triangular part of the matrix,
+    // which is extracted by the COO_Mumps_Solver internally.
+#ifdef GMGPOLAR_USE_MUMPS
+    const int nnz = getNonZeroCountCircleAsc(i_r);
+    SparseMatrixCOO<double> inner_boundary_solver_matrix(ntheta, ntheta, nnz);
+    inner_boundary_solver_matrix.is_symmetric(true);
+#else
+    // The stencils size for the inner boundary matrix is either 1 (Dirichlet BC) or 4 (across-origin discretization).
+    std::function<int(int)> nnz_per_row = [&](int i_theta) {
+        return DirBC_Interior ? 1 : 4;
+    };
+    SparseMatrixCSR<double> inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row);
+#endif
+
+    {
+        const int i_r  = 0;
+        const double r = grid.radius(i_r);
+        for (int i_theta = 0; i_theta < ntheta; i_theta++) {
+            {
+                const int global_index = grid.index(i_r, i_theta);
+                const double theta     = grid.theta(i_theta);
+
+                double coeff_beta, arr, att, art, detDF;
+                level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF);
+
+                nodeBuildInteriorBoundarySolverMatrix_i_r_0(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix,
+                                                            arr, att, art, detDF, coeff_beta);
+            }
+        }
+    }
+
+    {
+        const int i_r  = 1;
+        const double r = grid.radius(i_r);
+        for (int i_theta = 0; i_theta < ntheta; i_theta++) {
+            {
+                const int global_index = grid.index(i_r, i_theta);
+                const double theta     = grid.theta(i_theta);
+
+                double coeff_beta, arr, att, art, detDF;
+                level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF);
+
+                nodeBuildInteriorBoundarySolverMatrix_i_r_1(i_theta, grid, DirBC_Interior, inner_boundary_solver_matrix,
+                                                            arr, att, art, detDF, coeff_beta);
+            }
+        }
+    }
+
+    return inner_boundary_solver_matrix;
+}
\ No newline at end of file
diff --git a/include/Smoother/SmootherGive/buildMatrix.inl b/include/Smoother/SmootherGive/buildTridiagonalAsc.inl
similarity index 56%
rename from include/Smoother/SmootherGive/buildMatrix.inl
rename to include/Smoother/SmootherGive/buildTridiagonalAsc.inl
index 7a77fc1c..7f49e302 100644
--- a/include/Smoother/SmootherGive/buildMatrix.inl
+++ b/include/Smoother/SmootherGive/buildTridiagonalAsc.inl
@@ -3,7 +3,6 @@
 namespace smoother_give
 {
 
-/* Tridiagonal matrices */
 static inline void updateMatrixElement(BatchedTridiagonalSolver<double>& solver, int batch, int row, int column,
                                        double value)
 {
@@ -15,34 +14,15 @@ static inline void updateMatrixElement(BatchedTridiagonalSolver<double>& solver,
         solver.cyclic_corner(batch) += value;
 }
 
-/* Inner Boundary COO/CSR matrix */
-#ifdef GMGPOLAR_USE_MUMPS
-static inline void updateCOOCSRMatrixElement(SparseMatrixCOO<double>& matrix, int ptr, int offset, int row, int col,
-                                             double val)
-{
-    matrix.row_index(ptr + offset) = row;
-    matrix.col_index(ptr + offset) = col;
-    matrix.value(ptr + offset) += val;
-}
-#else
-static inline void updateCOOCSRMatrixElement(SparseMatrixCSR<double>& matrix, int ptr, int offset, int row, int col,
-                                             double val)
-{
-    matrix.row_nz_index(row, offset) = col;
-    matrix.row_nz_entry(row, offset) += val;
-}
-#endif
-
 } // namespace smoother_give
 
 template <class LevelCacheType>
-void SmootherGive<LevelCacheType>::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
-                                                    MatrixType& inner_boundary_circle_matrix,
-                                                    BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
-                                                    BatchedTridiagonalSolver<double>& radial_tridiagonal_solver,
-                                                    double arr, double att, double art, double detDF, double coeff_beta)
+void SmootherGive<LevelCacheType>::nodeBuildTridiagonalSolverMatrices(
+    int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
+    BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
+    BatchedTridiagonalSolver<double>& radial_tridiagonal_solver, double arr, double att, double art, double detDF,
+    double coeff_beta)
 {
-    using smoother_give::updateCOOCSRMatrixElement;
     using smoother_give::updateMatrixElement;
 
     assert(i_r >= 0 && i_r < grid.nr());
@@ -105,7 +85,7 @@ void SmootherGive<LevelCacheType>::nodeBuildAscGive(int i_r, int i_theta, const
         /* Fill matrix row of (i,j) */
         row    = center_index;
         column = center_index;
-        value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */
+        value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */
         updateMatrixElement(center_solver, center_batch, row, column, value);
 
         row    = center_index;
@@ -146,17 +126,7 @@ void SmootherGive<LevelCacheType>::nodeBuildAscGive(int i_r, int i_theta, const
         updateMatrixElement(center_solver, center_batch, row, column, value);
 
         /* Fill matrix row of (i-1,j) */
-        if (!DirBC_Interior && i_r == 1) {
-            row = left_index;
-            ptr = getCircleAscIndex(i_r - 1, i_theta);
-
-            const Stencil& LeftStencil = getStencil(i_r - 1);
-
-            offset = LeftStencil[StencilPosition::Center];
-            col    = left_index;
-            val    = coeff1 * arr; /* Center: (Right) */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-        }
+        // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix.
         if (i_r > 1) {
             row    = left_index;
             column = left_index;
@@ -266,180 +236,39 @@ void SmootherGive<LevelCacheType>::nodeBuildAscGive(int i_r, int i_theta, const
     /* Circle Section: Node in the inner boundary */
     /* ------------------------------------------ */
     else if (i_r == 0) {
-        /* ------------------------------------------------ */
-        /* Case 1: Dirichlet boundary on the inner boundary */
-        /* ------------------------------------------------ */
-        if (DirBC_Interior) {
-            /* Fill result(i,j) */
-            const double h2 = grid.radialSpacing(i_r);
-            const double k1 = grid.angularSpacing(i_theta - 1);
-            const double k2 = grid.angularSpacing(i_theta);
-
-            const double coeff2 = 0.5 * (k1 + k2) / h2;
-
-            const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1);
-            const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1);
-
-            auto& right_solver = circle_tridiagonal_solver;
-            int right_batch    = i_r + 1;
-
-            const int center_index = i_theta;
-            const int right_index  = i_theta;
-            const int bottom_index = i_theta_M1;
-            const int top_index    = i_theta_P1;
-
-            /* Fill matrix row of (i,j) */
-            row = center_index;
-            ptr = getCircleAscIndex(i_r, i_theta);
-
-            const Stencil& CenterStencil = getStencil(i_r);
-
-            offset = CenterStencil[StencilPosition::Center];
-            col    = center_index;
-            val    = 1.0;
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            /* Fill matrix row of (i+1,j) */
-            row    = right_index;
-            column = right_index;
-            value  = coeff2 * arr; /* Center: (Left) */
-            updateMatrixElement(right_solver, right_batch, row, column, value);
-        }
-        else {
-            /* ------------------------------------------------------------- */
-            /* Case 2: Across origin discretization on the interior boundary */
-            /* ------------------------------------------------------------- */
-            // h1 gets replaced with 2 * R0.
-            // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()>>1)).
-            // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil.
-            const double h1 = 2 * grid.radius(0);
-            const double h2 = grid.radialSpacing(i_r);
-            const double k1 = grid.angularSpacing(i_theta - 1);
-            const double k2 = grid.angularSpacing(i_theta);
-
-            const double coeff1 = 0.5 * (k1 + k2) / h1;
-            const double coeff2 = 0.5 * (k1 + k2) / h2;
-            const double coeff3 = 0.5 * (h1 + h2) / k1;
-            const double coeff4 = 0.5 * (h1 + h2) / k2;
-
-            /* left_matrix (across-the origin), center_matrix, right_matrix */
-            /* -| x | o | x | */
-            /* -|   |   |   | */
-            /* -| O | o | o | */
-            /* -|   |   |   | */
-            /* -| x | o | x | */
-            auto& right_solver = circle_tridiagonal_solver;
-            int right_batch    = i_r + 1;
-
-            const int i_theta_M1           = grid.wrapThetaIndex(i_theta - 1);
-            const int i_theta_P1           = grid.wrapThetaIndex(i_theta + 1);
-            const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2);
-
-            const int center_index = i_theta;
-            const int left_index   = i_theta_AcrossOrigin;
-            const int right_index  = i_theta;
-            const int bottom_index = i_theta_M1;
-            const int top_index    = i_theta_P1;
-
-            const int center_nz_index = getCircleAscIndex(i_r, i_theta);
-            const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1);
-            const int top_nz_index    = getCircleAscIndex(i_r, i_theta_P1);
-            const int left_nz_index   = getCircleAscIndex(i_r, i_theta_AcrossOrigin);
-
-            int nz_index; /* Fill matrix row of (i,j) */
-            row = center_index;
-            ptr = center_nz_index;
-
-            const Stencil& CenterStencil = getStencil(i_r);
-
-            offset = CenterStencil[StencilPosition::Center];
-            col    = center_index;
-            val    = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* beta_{i,j} */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            offset = CenterStencil[StencilPosition::Left];
-            col    = left_index;
-            val    = -coeff1 * arr; /* Left */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            offset = CenterStencil[StencilPosition::Bottom];
-            col    = bottom_index;
-            val    = -coeff3 * att; /* Bottom */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            offset = CenterStencil[StencilPosition::Top];
-            col    = top_index;
-            val    = -coeff4 * att; /* Top */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            offset = CenterStencil[StencilPosition::Center];
-            col    = center_index;
-            val    = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            /* Fill matrix row of (i-1,j) */
-            /* From view the view of the across origin node, */ /* the directions are roatated by 180 degrees in the stencil! */
-            row = left_index;
-            ptr = left_nz_index;
-
-            const Stencil& LeftStencil = CenterStencil;
-
-            offset = LeftStencil[StencilPosition::Left];
-            col    = center_index;
-            val    = -coeff1 * arr; /* Right -> Left*/
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            offset = LeftStencil[StencilPosition::Center];
-            col    = left_index;
-            val    = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            /* Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */
-
-            /* Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */
-
-            /* Fill matrix row of (i+1,j) */
-            row    = right_index;
-            column = right_index;
-            value  = coeff2 * arr; /* Center: (Left) */
-            updateMatrixElement(right_solver, right_batch, row, column, value);
-
-            /* Fill matrix row of (i,j-1) */
-            row = bottom_index;
-            ptr = bottom_nz_index;
-
-            const Stencil& BottomStencil = CenterStencil;
-
-            offset = BottomStencil[StencilPosition::Top];
-            col    = center_index;
-            val    = -coeff3 * att; /* Top */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            offset = BottomStencil[StencilPosition::Center];
-            col    = bottom_index;
-            val    = +coeff3 * att; /* Center: (Top) */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
-
-            /* TopLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */
-
-            /* Fill matrix row of (i,j+1) */
-            row = top_index;
-            ptr = top_nz_index;
-
-            const Stencil& TopStencil = CenterStencil;
-
-            offset = TopStencil[StencilPosition::Bottom];
-            col    = center_index;
-            val    = -coeff4 * att; /* Bottom */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
+        // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix.
 
-            offset = TopStencil[StencilPosition::Center];
-            col    = top_index;
-            val    = +coeff4 * att; /* Center: (Bottom) */
-            updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val);
+        /* Fill result(i,j) */
+        const double h2 = grid.radialSpacing(i_r);
+        const double k1 = grid.angularSpacing(i_theta - 1);
+        const double k2 = grid.angularSpacing(i_theta);
 
-            /* BottomLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */
-        }
+        const double coeff2 = 0.5 * (k1 + k2) / h2;
+
+        const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1);
+        const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1);
+
+        auto& center_solver = circle_tridiagonal_solver;
+        int center_batch    = i_r;
+        auto& right_solver  = circle_tridiagonal_solver;
+        int right_batch     = i_r + 1;
+
+        const int center_index = i_theta;
+        const int right_index  = i_theta;
+        const int bottom_index = i_theta_M1;
+        const int top_index    = i_theta_P1;
+
+        /* Fill matrix row of (i,j) */
+        row    = center_index;
+        column = center_index;
+        value  = 1.0;
+        updateMatrixElement(center_solver, center_batch, row, column, value);
+
+        /* Fill matrix row of (i+1,j) */
+        row    = right_index;
+        column = right_index;
+        value  = coeff2 * arr; /* Center: (Left) */
+        updateMatrixElement(right_solver, right_batch, row, column, value);
     }
     /* --------------------------------------------- */
     /* Radial Section: Node next to circular section */
@@ -481,7 +310,7 @@ void SmootherGive<LevelCacheType>::nodeBuildAscGive(int i_r, int i_theta, const
         /* Fill matrix row of (i,j) */
         row    = center_index;
         column = center_index;
-        value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */
+        value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */
         updateMatrixElement(center_solver, center_batch, row, column, value);
 
         row    = center_index;
@@ -564,7 +393,7 @@ void SmootherGive<LevelCacheType>::nodeBuildAscGive(int i_r, int i_theta, const
         /* ---------------------------- */ /* Fill matrix row of (i,j) */
         row    = center_index;
         column = center_index;
-        value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */
+        value  = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */
         updateMatrixElement(center_solver, center_batch, row, column, value);
 
         row    = center_index;
@@ -637,11 +466,13 @@ void SmootherGive<LevelCacheType>::nodeBuildAscGive(int i_r, int i_theta, const
 }
 
 template <class LevelCacheType>
-void SmootherGive<LevelCacheType>::buildAscCircleSection(const int i_r)
+void SmootherGive<LevelCacheType>::buildTridiagonalCircleSection(int i_r)
 {
     const PolarGrid& grid             = Smoother<LevelCacheType>::grid_;
     const LevelCacheType& level_cache = Smoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Smoother<LevelCacheType>::DirBC_Interior_;
 
+    // Access pattern is aligned with the memory layout of the grid data to maximize cache efficiency.
     const double r = grid.radius(i_r);
     for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) {
         const int global_index = grid.index(i_r, i_theta);
@@ -651,17 +482,19 @@ void SmootherGive<LevelCacheType>::buildAscCircleSection(const int i_r)
         level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF);
 
         // Build Asc at the current node
-        nodeBuildAscGive(i_r, i_theta, grid, Smoother<LevelCacheType>::DirBC_Interior_, inner_boundary_circle_matrix_,
-                         circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
+        nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_,
+                                           radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
     }
 }
 
 template <class LevelCacheType>
-void SmootherGive<LevelCacheType>::buildAscRadialSection(const int i_theta)
+void SmootherGive<LevelCacheType>::buildTridiagonalRadialSection(int i_theta)
 {
     const PolarGrid& grid             = Smoother<LevelCacheType>::grid_;
     const LevelCacheType& level_cache = Smoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Smoother<LevelCacheType>::DirBC_Interior_;
 
+    // Access pattern is aligned with the memory layout of the grid data to maximize cache efficiency.
     const double theta = grid.theta(i_theta);
     for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) {
         const int global_index = grid.index(i_r, i_theta);
@@ -671,132 +504,75 @@ void SmootherGive<LevelCacheType>::buildAscRadialSection(const int i_theta)
         level_cache.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF);
 
         // Build Asc at the current node
-        nodeBuildAscGive(i_r, i_theta, grid, Smoother<LevelCacheType>::DirBC_Interior_, inner_boundary_circle_matrix_,
-                         circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
+        nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid, DirBC_Interior, circle_tridiagonal_solver_,
+                                           radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta);
     }
 }
 
 template <class LevelCacheType>
-void SmootherGive<LevelCacheType>::buildAscMatrices()
+void SmootherGive<LevelCacheType>::buildTridiagonalSolverMatrices()
 {
-    /* -------------------------------------- */
-    /* Part 1: Allocate Asc Smoother matrices */
-    /* -------------------------------------- */
-    // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor.
-    // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead.
-
-    const PolarGrid& grid     = Smoother<LevelCacheType>::grid_;
-    const bool DirBC_Interior = Smoother<LevelCacheType>::DirBC_Interior_;
-    const int num_omp_threads = Smoother<LevelCacheType>::num_omp_threads_;
-
-#ifdef GMGPOLAR_USE_MUMPS
-    // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry.
-    const int inner_i_r           = 0;
-    const int inner_nnz           = getNonZeroCountCircleAsc(inner_i_r);
-    const int num_circle_nodes    = grid.ntheta();
-    inner_boundary_circle_matrix_ = SparseMatrixCOO<double>(num_circle_nodes, num_circle_nodes, inner_nnz);
-    inner_boundary_circle_matrix_.is_symmetric(false);
-#else
-    std::function<int(int)> nnz_per_row = [&](int i_theta) {
-        return DirBC_Interior ? 1 : 4;
-    };
-    const int num_circle_nodes    = grid.ntheta();
-    inner_boundary_circle_matrix_ = SparseMatrixCSR<double>(num_circle_nodes, num_circle_nodes, nnz_per_row);
-#endif
-
-    /* ---------------------------------- */
-    /* Part 2: Fill Asc Smoother matrices */
-    /* ---------------------------------- */
-
-    /*  Multi-threaded execution: */
+    const PolarGrid& grid             = Smoother<LevelCacheType>::grid_;
+    const LevelCacheType& level_cache = Smoother<LevelCacheType>::level_cache_;
+    const bool DirBC_Interior         = Smoother<LevelCacheType>::DirBC_Interior_;
+    const int num_omp_threads         = Smoother<LevelCacheType>::num_omp_threads_;
+
     const int num_smoother_circles    = grid.numberSmootherCircles();
     const int additional_radial_tasks = grid.ntheta() % 3;
     const int num_radial_tasks        = grid.ntheta() - additional_radial_tasks;
 
+    /* ---------------- */
+    /* Circular section */
+    /* ---------------- */
+    // We parallelize the loop with step 3 to avoid data race conditions between adjacent circles.
 #pragma omp parallel num_threads(num_omp_threads)
     {
 #pragma omp for
         for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) {
-            buildAscCircleSection(i_r);
-        }
+            buildTridiagonalCircleSection(i_r);
+        } /* Implicit barrier */
 #pragma omp for
         for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) {
-            buildAscCircleSection(i_r);
-        }
+            buildTridiagonalCircleSection(i_r);
+        } /* Implicit barrier */
 #pragma omp for
         for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) {
-            buildAscCircleSection(i_r);
-        }
+            buildTridiagonalCircleSection(i_r);
+        } /* Implicit barrier */
+    }
+
+    /* ---------------- */
+    /* Radial section */
+    /* ---------------- */
+    // We parallelize the loop with step 3 to avoid data race conditions between adjacent radial lines.
+    // Due to the periodicity in the angular direction, we can have at most 2 additional radial tasks
+    // that are handled serially before the parallel loops.
+    if (additional_radial_tasks > 0) {
+        const int i_theta = 0;
+        buildTridiagonalRadialSection(i_theta);
+    }
 
+    if (additional_radial_tasks > 1) {
+        const int i_theta = 1;
+        buildTridiagonalRadialSection(i_theta);
+    }
+
+#pragma omp parallel num_threads(num_omp_threads)
+    {
 #pragma omp for
         for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) {
-            if (radial_task > 0) {
-                int i_theta = radial_task + additional_radial_tasks;
-                buildAscRadialSection(i_theta);
-            }
-            else {
-                if (additional_radial_tasks == 0) {
-                    buildAscRadialSection(0);
-                }
-                else if (additional_radial_tasks >= 1) {
-                    buildAscRadialSection(0);
-                    buildAscRadialSection(1);
-                }
-            }
-        }
+            const int i_theta = radial_task + additional_radial_tasks;
+            buildTridiagonalRadialSection(i_theta);
+        } /* Implicit barrier */
 #pragma omp for
         for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) {
-            if (radial_task > 1) {
-                int i_theta = radial_task + additional_radial_tasks;
-                buildAscRadialSection(i_theta);
-            }
-            else {
-                if (additional_radial_tasks == 0) {
-                    buildAscRadialSection(1);
-                }
-                else if (additional_radial_tasks == 1) {
-                    buildAscRadialSection(2);
-                }
-                else if (additional_radial_tasks == 2) {
-                    buildAscRadialSection(2);
-                    buildAscRadialSection(3);
-                }
-            }
-        }
+            const int i_theta = radial_task + additional_radial_tasks;
+            buildTridiagonalRadialSection(i_theta);
+        } /* Implicit barrier */
 #pragma omp for
         for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) {
-            int i_theta = radial_task + additional_radial_tasks;
-            buildAscRadialSection(i_theta);
-        }
+            const int i_theta = radial_task + additional_radial_tasks;
+            buildTridiagonalRadialSection(i_theta);
+        } /* Implicit barrier */
     }
-
-    circle_tridiagonal_solver_.setup();
-    radial_tridiagonal_solver_.setup();
-
-#ifdef GMGPOLAR_USE_MUMPS
-    /* ------------------------------------------------------------------ */
-    /* Part 3: Convert inner_boundary_circle_matrix to a symmetric matrix */
-    /* ------------------------------------------------------------------ */
-    SparseMatrixCOO<double> full_matrix = std::move(inner_boundary_circle_matrix_);
-
-    const int nnz           = full_matrix.non_zero_size();
-    const int numRows       = full_matrix.rows();
-    const int numColumns    = full_matrix.columns();
-    const int symmetric_nnz = nnz - (nnz - numRows) / 2;
-
-    inner_boundary_circle_matrix_ = SparseMatrixCOO<double>(numRows, numColumns, symmetric_nnz);
-    inner_boundary_circle_matrix_.is_symmetric(true);
-
-    int current_nz = 0;
-    for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) {
-        int current_row = full_matrix.row_index(nz_index);
-        int current_col = full_matrix.col_index(nz_index);
-        if (current_row <= current_col) {
-            inner_boundary_circle_matrix_.row_index(current_nz) = current_row;
-            inner_boundary_circle_matrix_.col_index(current_nz) = current_col;
-            inner_boundary_circle_matrix_.value(current_nz)     = std::move(full_matrix.value(nz_index));
-            current_nz++;
-        }
-    }
-#endif
-}
+}
\ No newline at end of file
diff --git a/include/Smoother/SmootherGive/initializeMumps.inl b/include/Smoother/SmootherGive/initializeMumps.inl
deleted file mode 100644
index 11b3a967..00000000
--- a/include/Smoother/SmootherGive/initializeMumps.inl
+++ /dev/null
@@ -1,114 +0,0 @@
-#pragma once
-
-#ifdef GMGPOLAR_USE_MUMPS
-
-template <class LevelCacheType>
-void SmootherGive<LevelCacheType>::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver,
-                                                         SparseMatrixCOO<double>& solver_matrix)
-{
-    /*
-     * MUMPS (a parallel direct solver) uses 1-based indexing,
-     * whereas the input matrix follows 0-based indexing.
-     * Adjust row and column indices to match MUMPS' requirements.
-     */
-    for (int i = 0; i < solver_matrix.non_zero_size(); i++) {
-        solver_matrix.row_index(i) += 1;
-        solver_matrix.col_index(i) += 1;
-    }
-
-    mumps_solver.job = JOB_INIT;
-    mumps_solver.par = PAR_PARALLEL;
-    /* The matrix is positive definite for invertible mappings. */
-    /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */
-    mumps_solver.sym          = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC);
-    mumps_solver.comm_fortran = USE_COMM_WORLD;
-    dmumps_c(&mumps_solver);
-
-    ICNTL(mumps_solver, 1) = 0; // Output stream for error messages.
-    ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process.
-    ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host
-    ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages.
-    ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format
-    ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix
-    ICNTL(mumps_solver, 7) =
-        5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis
-    ICNTL(mumps_solver, 8)  = 77; // Describes the scaling strategy
-    ICNTL(mumps_solver, 9)  = 1; // Computes the solution using A or A^T
-    ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution
-    ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved
-    ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used
-    ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node
-    ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space
-        (solver_matrix.is_symmetric() ? 5 : 20);
-    ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format
-    ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads
-    // ICNTL(17) Doesn't exist
-    ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix
-    ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix
-    ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides
-    ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors.
-    ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve.
-    ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can
-    //                             allocate per working process
-    ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”.
-    ICNTL(mumps_solver, 25) =
-        0; // Allows the computation of a solution of a deficient matrix and also of a null space basis
-    ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed
-    ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides.
-    ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed
-    ICNTL(mumps_solver, 29) =
-        0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation.
-    ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix
-    ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization.
-    ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization
-    ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix.
-    ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3
-    ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature
-    ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant
-    ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks
-    ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors
-    ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks
-    // ICNTL(40-47) Don't exist
-    ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism
-    ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase
-    // ICNTL(50-55) Don't exist
-    ICNTL(mumps_solver, 56) =
-        0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method
-    // ICNTL(57) Doesn't exist
-    ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization
-    // ICNTL(59-60) Don't exist
-
-    CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting
-    CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement
-    CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows
-    CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting
-    CNTL(mumps_solver, 5) =
-        0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active
-    // CNTL(6) Doesn't exist
-    CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression
-    // CNTL(8-15) Don't exist
-
-    mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION;
-    assert(solver_matrix.rows() == solver_matrix.columns());
-    mumps_solver.n   = solver_matrix.rows();
-    mumps_solver.nz  = solver_matrix.non_zero_size();
-    mumps_solver.irn = solver_matrix.row_indices_data();
-    mumps_solver.jcn = solver_matrix.column_indices_data();
-    mumps_solver.a   = solver_matrix.values_data();
-    dmumps_c(&mumps_solver);
-
-    if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) {
-        std::cout << "Warning: Smoother inner boundary matrix is not positive definite: Negative pivots in the "
-                     "factorization phase."
-                  << std::endl;
-    }
-}
-
-template <class LevelCacheType>
-void SmootherGive<LevelCacheType>::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver)
-{
-    mumps_solver.job = JOB_END;
-    dmumps_c(&mumps_solver);
-}
-
-#endif
diff --git a/include/Smoother/SmootherGive/smootherGive.h b/include/Smoother/SmootherGive/smootherGive.h
index 8c77e65d..de87c6fc 100644
--- a/include/Smoother/SmootherGive/smootherGive.h
+++ b/include/Smoother/SmootherGive/smootherGive.h
@@ -53,9 +53,6 @@ class SmootherGive : public Smoother<LevelCacheType>
     explicit SmootherGive(const PolarGrid& grid, const LevelCacheType& level_cache, bool DirBC_Interior,
                           int num_omp_threads);
 
-    // If MUMPS is enabled, this cleans up the inner boundary solver.
-    ~SmootherGive() override;
-
     // Performs one full coupled smoothing sweep:
     //   BC -> WC -> BR -> WR
     // Parallel implementation using OpenMP:
@@ -64,40 +61,13 @@ class SmootherGive : public Smoother<LevelCacheType>
     void smoothing(Vector<double> x, ConstVector<double> rhs, Vector<double> temp) override;
 
 private:
-    /* ------------------- */
-    /* Tridiagonal solvers */
-    /* ------------------- */
-
-    // Batched solver for cyclic-tridiagonal circle line A_sc matrices.
-    BatchedTridiagonalSolver<double> circle_tridiagonal_solver_;
-
-    // Batched solver for tridiagonal radial circle line A_sc matrices.
-    BatchedTridiagonalSolver<double> radial_tridiagonal_solver_;
-
-    // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because
-    // it potentially includes across-origin coupling. Therefore, it is assembled
-    // into a sparse matrix and solved using a general-purpose sparse solver.
-    // When using the MUMPS solver, the matrix is assembled in COO format.
-    // When using the in-house solver, the matrix is stored in CSR format.
-#ifdef GMGPOLAR_USE_MUMPS
-    using MatrixType = SparseMatrixCOO<double>;
-    DMUMPS_STRUC_C inner_boundary_mumps_solver_;
-#else
-    using MatrixType = SparseMatrixCSR<double>;
-    SparseLUSolver<double> inner_boundary_lu_solver_;
-#endif
-    // Sparse matrix for the non-tridiagonal inner boundary circle block.
-    MatrixType inner_boundary_circle_matrix_;
-
-    // Note:
-    //   - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead.
-    //   - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r.
-    //   - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta.
-
     /* ------------------- */
     /* Stencil definitions */
     /* ------------------- */
 
+    // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_,
+    // since the mumps solver will be built in the member initializer of the Smoother class.
+
     // Stencils encode neighborhood connectivity for A_sc matrix assembly.
     // It is only used in the construction of COO/CSR matrices.
     // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices.
@@ -114,12 +84,53 @@ class SmootherGive : public Smoother<LevelCacheType>
     };
     const Stencil circle_stencil_across_origin_ = {
         -1,  3, -1,
-        1,  0, -1,
+         1,  0, -1,
         -1,  2, -1
     };
-
     // clang-format on
 
+    /* ------------------- */
+    /* Tridiagonal solvers */
+    /* ------------------- */
+
+    // Batched solver for cyclic-tridiagonal circle line A_sc matrices.
+    BatchedTridiagonalSolver<double> circle_tridiagonal_solver_;
+
+    // Batched solver for tridiagonal radial line A_sc matrices.
+    BatchedTridiagonalSolver<double> radial_tridiagonal_solver_;
+
+    // Note:
+    //   - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead.
+    //   - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r.
+    //   - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta.
+
+    /* ------------------------ */
+    /* Interior boundary solver */
+    /* ------------------------ */
+
+    // The inner circle matrix (i_r = 0) is NOT tridiagonal due to across-origin coupling.
+    // It is solved using a general-purpose sparse solver.
+    // - MUMPS: matrix assembled in COO format; solver owns the matrix internally.
+    // - In-house: matrix stored in CSR; solver does not own the matrix.
+
+#ifdef GMGPOLAR_USE_MUMPS
+    using InnerBoundaryMatrix = SparseMatrixCOO<double>;
+    using InnerBoundarySolver = CooMumpsSolver;
+#else
+    using InnerBoundaryMatrix = SparseMatrixCSR<double>;
+    using InnerBoundarySolver = SparseLUSolver<double>;
+
+    // Stored only for the in-house solver (CSR).
+    InnerBoundaryMatrix inner_boundary_circle_matrix_;
+#endif
+
+    // Solver object (owns matrix if MUMPS, references if in-house solver).
+    InnerBoundarySolver inner_boundary_solver_;
+
+    /* -------------- */
+    /* Stencil access */
+    /* -------------- */
+
     // Select correct stencil depending on the grid position.
     const Stencil& getStencil(int i_r) const; /* Only i_r = 0 implemented */
     // Number of nonzero A_sc entries.
@@ -131,19 +142,25 @@ class SmootherGive : public Smoother<LevelCacheType>
     /* --------------- */
     /* Matrix assembly */
     /* --------------- */
-
     // Build all A_sc matrices for circle and radial smoothers.
-    void buildAscMatrices();
-    // Build A_sc matrix block for a single circular line.
-    void buildAscCircleSection(int i_r);
-    // Build A_sc matrix block for a single radial line.
-    void buildAscRadialSection(int i_theta);
-    // Build A_sc for a specific node (i_r, i_theta)
-    void nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
-                          MatrixType& inner_boundary_circle_matrix,
-                          BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
-                          BatchedTridiagonalSolver<double>& radial_tridiagonal_solver, double arr, double att,
-                          double art, double detDF, double coeff_beta);
+    void buildTridiagonalSolverMatrices();
+    void buildTridiagonalCircleSection(int i_r);
+    void buildTridiagonalRadialSection(int i_theta);
+    // Build the tridiagonal solver matrices for a specific node (i_r, i_theta)
+    void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior,
+                                            BatchedTridiagonalSolver<double>& circle_tridiagonal_solver,
+                                            BatchedTridiagonalSolver<double>& radial_tridiagonal_solver, double arr,
+                                            double att, double art, double detDF, double coeff_beta);
+
+    // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling.
+    InnerBoundaryMatrix buildInteriorBoundarySolverMatrix();
+    // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary.
+    void nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid, bool DirBC_Interior,
+                                                     InnerBoundaryMatrix& matrix, double arr, double att, double art,
+                                                     double detDF, double coeff_beta);
+    void nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid, bool DirBC_Interior,
+                                                     InnerBoundaryMatrix& matrix, double arr, double att, double art,
+                                                     double detDF, double coeff_beta);
 
     /* ---------------------- */
     /* Orthogonal application */
@@ -151,9 +168,9 @@ class SmootherGive : public Smoother<LevelCacheType>
 
     // Compute temp = f_sc − A_sc^ortho * u_sc^ortho   (precomputed right-hand side)
     // where x = u_sc and rhs = f_sc
-    void applyAscOrthoCircleSection(int i_r, const SmootherColor smoother_color, ConstVector<double> x,
+    void applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector<double> x,
                                     ConstVector<double> rhs, Vector<double> temp);
-    void applyAscOrthoRadialSection(int i_theta, const SmootherColor smoother_color, ConstVector<double> x,
+    void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector<double> x,
                                     ConstVector<double> rhs, Vector<double> temp);
 
     /* ----------------- */
@@ -172,21 +189,11 @@ class SmootherGive : public Smoother<LevelCacheType>
     void solveWhiteCircleSection(Vector<double> x, Vector<double> temp);
     void solveBlackRadialSection(Vector<double> x, Vector<double> temp);
     void solveWhiteRadialSection(Vector<double> x, Vector<double> temp);
-
-    /* ----------------------------------- */
-    /* Initialize and destroy MUMPS solver */
-    /* ----------------------------------- */
-#ifdef GMGPOLAR_USE_MUMPS
-    // Initialize sparse MUMPS solver with assembled COO matrix.
-    void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO<double>& solver_matrix);
-    // Release MUMPS internal memory and MPI structures.
-    void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver);
-#endif
 };
 
 #include "smootherGive.inl"
-#include "buildMatrix.inl"
+#include "buildInnerBoundaryAsc.inl"
+#include "buildTridiagonalAsc.inl"
 #include "applyAscOrtho.inl"
 #include "solveAscSystem.inl"
-#include "matrixStencil.inl"
-#include "initializeMumps.inl"
+#include "matrixStencil.inl"
\ No newline at end of file
diff --git a/include/Smoother/SmootherGive/smootherGive.inl b/include/Smoother/SmootherGive/smootherGive.inl
index 686b8458..decd540e 100644
--- a/include/Smoother/SmootherGive/smootherGive.inl
+++ b/include/Smoother/SmootherGive/smootherGive.inl
@@ -6,21 +6,16 @@ SmootherGive<LevelCacheType>::SmootherGive(const PolarGrid& grid, const LevelCac
     : Smoother<LevelCacheType>(grid, level_cache, DirBC_Interior, num_omp_threads)
     , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true)
     , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false)
-{
-    buildAscMatrices();
 #ifdef GMGPOLAR_USE_MUMPS
-    initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_);
+    , inner_boundary_solver_(buildInteriorBoundarySolverMatrix())
 #else
-    inner_boundary_lu_solver_ = SparseLUSolver<double>(inner_boundary_circle_matrix_);
+    , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix())
+    , inner_boundary_solver_(inner_boundary_circle_matrix_)
 #endif
-}
-
-template <class LevelCacheType>
-SmootherGive<LevelCacheType>::~SmootherGive()
 {
-#ifdef GMGPOLAR_USE_MUMPS
-    finalizeMumpsSolver(inner_boundary_mumps_solver_);
-#endif
+    buildTridiagonalSolverMatrices();
+    circle_tridiagonal_solver_.setup();
+    radial_tridiagonal_solver_.setup();
 }
 
 // The smoothing solves linear systems of the form:
diff --git a/include/Smoother/SmootherGive/solveAscSystem.inl b/include/Smoother/SmootherGive/solveAscSystem.inl
index 37c7a2d3..7c484b41 100644
--- a/include/Smoother/SmootherGive/solveAscSystem.inl
+++ b/include/Smoother/SmootherGive/solveAscSystem.inl
@@ -17,19 +17,8 @@ void SmootherGive<LevelCacheType>::solveBlackCircleSection(Vector<double> x, Vec
     circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride);
 
     if (is_inner_circle_black) {
-#ifdef GMGPOLAR_USE_MUMPS
-        inner_boundary_mumps_solver_.job    = JOB_COMPUTE_SOLUTION;
-        inner_boundary_mumps_solver_.nrhs   = 1; // single rhs vector
-        inner_boundary_mumps_solver_.nz_rhs = grid.ntheta(); // non-zeros in rhs
-        inner_boundary_mumps_solver_.rhs    = circle_section.data();
-        inner_boundary_mumps_solver_.lrhs   = grid.ntheta(); // leading dimension of rhs
-        dmumps_c(&inner_boundary_mumps_solver_);
-        if (inner_boundary_mumps_solver_.info[0] != 0) {
-            std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl;
-        }
-#else
-        inner_boundary_lu_solver_.solveInPlace(circle_section.data());
-#endif
+        Vector<double> inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta()));
+        inner_boundary_solver_.solveInPlace(inner_boundary);
     }
 
     // Move updated values to x
@@ -59,19 +48,8 @@ void SmootherGive<LevelCacheType>::solveWhiteCircleSection(Vector<double> x, Vec
     circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride);
 
     if (is_inner_circle_white) {
-#ifdef GMGPOLAR_USE_MUMPS
-        inner_boundary_mumps_solver_.job    = JOB_COMPUTE_SOLUTION;
-        inner_boundary_mumps_solver_.nrhs   = 1; // single rhs vector
-        inner_boundary_mumps_solver_.nz_rhs = grid.ntheta(); // non-zeros in rhs
-        inner_boundary_mumps_solver_.rhs    = circle_section.data();
-        inner_boundary_mumps_solver_.lrhs   = grid.ntheta(); // leading dimension of rhs
-        dmumps_c(&inner_boundary_mumps_solver_);
-        if (inner_boundary_mumps_solver_.info[0] != 0) {
-            std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl;
-        }
-#else
-        inner_boundary_lu_solver_.solveInPlace(circle_section.data());
-#endif
+        Vector<double> inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid.ntheta()));
+        inner_boundary_solver_.solveInPlace(inner_boundary);
     }
 
     // Move updated values to x
@@ -128,4 +106,4 @@ void SmootherGive<LevelCacheType>::solveWhiteRadialSection(Vector<double> x, Vec
             x[grid.index(i_r, i_theta)] = temp[grid.index(i_r, i_theta)];
         }
     }
-}
+}
\ No newline at end of file
diff --git a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl
index 978acdc2..9d40522d 100644
--- a/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl
+++ b/include/Smoother/SmootherTake/buildInnerBoundaryAsc.inl
@@ -142,6 +142,12 @@ SmootherTake<LevelCacheType>::buildInteriorBoundarySolverMatrix()
     const int i_r    = 0;
     const int ntheta = grid.ntheta();
 
+    // The interior boundary matrix is symmetric due to the periodicity in the theta direction
+    // and the assumption that ntheta is even, which is required for the across-origin discretization.
+    // We store all non-zero entries of the matrix, both in COO format (for MUMPS)
+    // and in CSR format (for the in-house solver). If the COO matrix is marked as symmetric,
+    // the COO_Mumps_Solver optimizes the factorization by only using the upper triangular part of the matrix,
+    // which is extracted by the COO_Mumps_Solver internally.
 #ifdef GMGPOLAR_USE_MUMPS
     const int nnz = getNonZeroCountCircleAsc(i_r);
     SparseMatrixCOO<double> inner_boundary_solver_matrix(ntheta, ntheta, nnz);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 038278b1..fdd55be3 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -20,8 +20,8 @@ add_executable(gmgpolar_tests
     Interpolation/extrapolated_prolongation.cpp
     Interpolation/extrapolated_restriction.cpp
     Residual/residual.cpp
-    DirectSolver/directSolver.cpp
-    DirectSolver/directSolverNoMumps.cpp
+    DirectSolver/directSolver_coo_mumps.cpp
+    DirectSolver/directSolver_csr_lu.cpp
     Smoother/smoother.cpp
     ExtrapolatedSmoother/extrapolated_smoother.cpp
     ConfigParser/config_parser.cpp
diff --git a/tests/DirectSolver/directSolver.cpp b/tests/DirectSolver/directSolver_coo_mumps.cpp
similarity index 95%
rename from tests/DirectSolver/directSolver.cpp
rename to tests/DirectSolver/directSolver_coo_mumps.cpp
index 2d193732..ddfd4e9a 100644
--- a/tests/DirectSolver/directSolver.cpp
+++ b/tests/DirectSolver/directSolver_coo_mumps.cpp
@@ -10,8 +10,6 @@
 #include "../../include/Residual/ResidualGive/residualGive.h"
 #include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h"
 #include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h"
-#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h"
-#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h"
 
 #include "../../include/InputFunctions/domainGeometry.h"
 #include "../../include/InputFunctions/densityProfileCoefficients.h"
@@ -47,7 +45,7 @@
 /* Test 1/2: */
 /* Does the Take and Give Implementation match up? */
 
-TEST(DirectSolverTest, directSolver_DirBC_Interior)
+TEST(DirectSolver_COO_MUMPS_Test, directSolver_DirBC_Interior)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -103,7 +101,7 @@ TEST(DirectSolverTest, directSolver_DirBC_Interior)
     }
 }
 
-TEST(DirectSolverTest, directSolver_AcrossOrigin)
+TEST(DirectSolver_COO_MUMPS_Test, directSolver_AcrossOrigin)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -166,7 +164,7 @@ TEST(DirectSolverTest, directSolver_AcrossOrigin)
 /* Circular */
 /* -------- */
 
-TEST(DirectSolverTest_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -209,7 +207,7 @@ TEST(DirectSolverTest_CircularGeometry, SequentialDirectSolverDirBC_Interior_Cir
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -253,7 +251,7 @@ TEST(DirectSolverTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_Circu
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -297,7 +295,7 @@ TEST(DirectSolverTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_Circu
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-8);
 }
 
-TEST(DirectSolverTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.1, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -345,7 +343,7 @@ TEST(DirectSolverTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_Circula
 /* Shafranov */
 /* --------- */
 
-TEST(DirectSolverTest_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -391,7 +389,7 @@ TEST(DirectSolverTest_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeo
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -441,7 +439,7 @@ TEST(DirectSolverTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeome
 /* Czarny */
 /* ------ */
 
-TEST(DirectSolverTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -487,7 +485,7 @@ TEST(DirectSolverTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry)
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -537,7 +535,7 @@ TEST(DirectSolverTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry)
 /* Culham */
 /* ------ */
 
-TEST(DirectSolverTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -581,7 +579,7 @@ TEST(DirectSolverTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry)
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -627,7 +625,7 @@ TEST(DirectSolverTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry)
 
 /* We adjust the PolarGrid to increase the precision */
 
-TEST(DirectSolverTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5,          1.441 * 1e-5,
                                   3.8833 * 1e-5, 8.7666 * 1e-5,
@@ -681,7 +679,7 @@ TEST(DirectSolverTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-10);
 }
 
-TEST(DirectSolverTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Give_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry)
 {
     std::vector<double> radii  = {0.15, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -727,7 +725,7 @@ TEST(DirectSolverTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2
 
 /* Same test now using Take */
 
-TEST(DirectSolverTakeTest_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -770,7 +768,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, SequentialDirectSolverDirBC_Interior
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTakeTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -814,7 +812,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_C
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTakeTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -858,7 +856,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_C
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-8);
 }
 
-TEST(DirectSolverTakeTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.1, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -906,7 +904,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_Cir
 /* Shafranov */
 /* --------- */
 
-TEST(DirectSolverTakeTest_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -952,7 +950,7 @@ TEST(DirectSolverTakeTest_ShafranovGeometry, DirectSolverDirBC_Interior_Shafrano
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTakeTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -1002,7 +1000,7 @@ TEST(DirectSolverTakeTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovG
 /* Czarny */
 /* ------ */
 
-TEST(DirectSolverTakeTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -1048,7 +1046,7 @@ TEST(DirectSolverTakeTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeome
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTakeTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -1098,7 +1096,7 @@ TEST(DirectSolverTakeTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometr
 /* Culham */
 /* ------ */
 
-TEST(DirectSolverTakeTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -1142,7 +1140,7 @@ TEST(DirectSolverTakeTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeome
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-11);
 }
 
-TEST(DirectSolverTakeTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -1186,7 +1184,7 @@ TEST(DirectSolverTakeTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometr
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-8);
 }
 
-TEST(DirectSolverTakeTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5,          1.441 * 1e-5,
                                   3.8833 * 1e-5, 8.7666 * 1e-5,
@@ -1240,7 +1238,7 @@ TEST(DirectSolverTakeTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecis
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-10);
 }
 
-TEST(DirectSolverTakeTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry)
+TEST(DirectSolver_COO_MUMPS_Take_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry)
 {
     std::vector<double> radii  = {0.15, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
diff --git a/tests/DirectSolver/directSolverNoMumps.cpp b/tests/DirectSolver/directSolver_csr_lu.cpp
similarity index 95%
rename from tests/DirectSolver/directSolverNoMumps.cpp
rename to tests/DirectSolver/directSolver_csr_lu.cpp
index 370a9ef0..f1c366dd 100644
--- a/tests/DirectSolver/directSolverNoMumps.cpp
+++ b/tests/DirectSolver/directSolver_csr_lu.cpp
@@ -8,10 +8,8 @@
 #include "../../include/GMGPolar/gmgpolar.h"
 
 #include "../../include/Residual/ResidualGive/residualGive.h"
-#include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h"
-#include "../../include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h"
-#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h"
-#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h"
+#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h"
+#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h"
 
 #include "../../include/InputFunctions/domainGeometry.h"
 #include "../../include/InputFunctions/densityProfileCoefficients.h"
@@ -45,7 +43,7 @@
 /* Test 1/2: */
 /* Does the Take and Give Implementation match up? */
 
-TEST(DirectSolverTestNoMumps, directSolver_DirBC_Interior)
+TEST(DirectSolver_CSR_LU_Test, directSolver_DirBC_Interior)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -101,7 +99,7 @@ TEST(DirectSolverTestNoMumps, directSolver_DirBC_Interior)
     }
 }
 
-TEST(DirectSolverTestNoMumps, directSolver_AcrossOrigin)
+TEST(DirectSolver_CSR_LU_Test, directSolver_AcrossOrigin)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -164,7 +162,7 @@ TEST(DirectSolverTestNoMumps, directSolver_AcrossOrigin)
 /* Circular */
 /* -------- */
 
-TEST(DirectSolverTestNoMumps_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -207,7 +205,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, SequentialDirectSolverDirBC_Inter
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTestNoMumps_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -251,7 +249,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, ParallelDirectSolverDirBC_Interio
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTestNoMumps_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -295,7 +293,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, SequentialDirectSolverAcrossOrigi
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-8);
 }
 
-TEST(DirectSolverTestNoMumps_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.1, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -343,7 +341,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, ParallelDirectSolverAcrossOrigin_
 /* Shafranov */
 /* --------- */
 
-TEST(DirectSolverTestNoMumps_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -389,7 +387,7 @@ TEST(DirectSolverTestNoMumps_ShafranovGeometry, DirectSolverDirBC_Interior_Shafr
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTestNoMumps_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -438,7 +436,7 @@ TEST(DirectSolverTestNoMumps_ShafranovGeometry, DirectSolverAcrossOrigin_Shafran
 /* Czarny */
 /* ------ */
 
-TEST(DirectSolverTestNoMumps_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -483,7 +481,7 @@ TEST(DirectSolverTestNoMumps_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGe
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTestNoMumps_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -533,7 +531,7 @@ TEST(DirectSolverTestNoMumps_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeom
 /* Culham */
 /* ------ */
 
-TEST(DirectSolverTestNoMumps_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -576,7 +574,7 @@ TEST(DirectSolverTestNoMumps_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGe
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-11);
 }
 
-TEST(DirectSolverTestNoMumps_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -621,7 +619,7 @@ TEST(DirectSolverTestNoMumps_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeom
 
 /* We adjust the PolarGrid to increase the precision */
 
-TEST(DirectSolverTestNoMumps_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5,          1.441 * 1e-5,
                                   3.8833 * 1e-5, 8.7666 * 1e-5,
@@ -674,7 +672,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, DirectSolverAcrossOriginHigherPre
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-10);
 }
 
-TEST(DirectSolverTestNoMumps_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Give_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry)
 {
     std::vector<double> radii  = {0.15, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -719,7 +717,7 @@ TEST(DirectSolverTestNoMumps_CircularGeometry, DirectSolverAcrossOriginHigherPre
 
 /* Same test now using Take */
 
-TEST(DirectSolverTakeCustomLUTest_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, SequentialDirectSolverDirBC_Interior_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -761,7 +759,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, SequentialDirectSolverDirBC_
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTakeCustomLUTest_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, ParallelDirectSolverDirBC_Interior_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -804,7 +802,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, ParallelDirectSolverDirBC_In
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTakeCustomLUTest_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, SequentialDirectSolverAcrossOrigin_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -847,7 +845,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, SequentialDirectSolverAcross
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-8);
 }
 
-TEST(DirectSolverTakeCustomLUTest_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, ParallelDirectSolverAcrossOrigin_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.1, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -894,7 +892,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, ParallelDirectSolverAcrossOr
 /* Shafranov */
 /* --------- */
 
-TEST(DirectSolverTakeCustomLUTest_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_ShafranovGeometry, DirectSolverDirBC_Interior_ShafranovGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -939,7 +937,7 @@ TEST(DirectSolverTakeCustomLUTest_ShafranovGeometry, DirectSolverDirBC_Interior_
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTakeCustomLUTest_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_ShafranovGeometry, DirectSolverAcrossOrigin_ShafranovGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -989,7 +987,7 @@ TEST(DirectSolverTakeCustomLUTest_ShafranovGeometry, DirectSolverAcrossOrigin_Sh
 /* Czarny */
 /* ------ */
 
-TEST(DirectSolverTakeCustomLUTest_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CzarnyGeometry, DirectSolverDirBC_Interior_CzarnyGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -1035,7 +1033,7 @@ TEST(DirectSolverTakeCustomLUTest_CzarnyGeometry, DirectSolverDirBC_Interior_Cza
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-12);
 }
 
-TEST(DirectSolverTakeCustomLUTest_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CzarnyGeometry, DirectSolverAcrossOrigin_CzarnyGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -1085,7 +1083,7 @@ TEST(DirectSolverTakeCustomLUTest_CzarnyGeometry, DirectSolverAcrossOrigin_Czarn
 /* Culham */
 /* ------ */
 
-TEST(DirectSolverTakeCustomLUTest_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CulhamGeometry, DirectSolverDirBC_Interior_CulhamGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -1127,7 +1125,7 @@ TEST(DirectSolverTakeCustomLUTest_CulhamGeometry, DirectSolverDirBC_Interior_Cul
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-11);
 }
 
-TEST(DirectSolverTakeCustomLUTest_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CulhamGeometry, DirectSolverAcrossOrigin_CulhamGeometry)
 {
     std::vector<double> radii  = {1e-5, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
@@ -1170,7 +1168,7 @@ TEST(DirectSolverTakeCustomLUTest_CulhamGeometry, DirectSolverAcrossOrigin_Culha
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-8);
 }
 
-TEST(DirectSolverTakeCustomLUTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision_CircularGeometry)
 {
     std::vector<double> radii  = {1e-5,          1.441 * 1e-5,
                                   3.8833 * 1e-5, 8.7666 * 1e-5,
@@ -1223,7 +1221,7 @@ TEST(DirectSolverTakeCustomLUTest_CircularGeometry, DirectSolverAcrossOriginHigh
     ASSERT_NEAR(infinity_norm(ConstVector<double>(residuum)), 0.0, 1e-10);
 }
 
-TEST(DirectSolverTakeCustomLUTest_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry)
+TEST(DirectSolver_CSR_LU_Take_Test_CircularGeometry, DirectSolverAcrossOriginHigherPrecision2_CircularGeometry)
 {
     std::vector<double> radii  = {0.15, 0.2, 0.25, 0.5, 0.8, 0.9, 0.95, 1.2, 1.3};
     std::vector<double> angles = {
diff --git a/tests/ExtrapolatedSmoother/extrapolated_smoother.cpp b/tests/ExtrapolatedSmoother/extrapolated_smoother.cpp
index fd6f950c..34554c33 100644
--- a/tests/ExtrapolatedSmoother/extrapolated_smoother.cpp
+++ b/tests/ExtrapolatedSmoother/extrapolated_smoother.cpp
@@ -9,8 +9,8 @@
 
 #include "../../include/Residual/ResidualGive/residualGive.h"
 #include "../../include/Residual/ResidualTake/residualTake.h"
-#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h"
-#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTakeCustomLU.h"
+#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h"
+#include "../../include/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.h"
 #include "../../include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h"
 #include "../../include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h"
 
diff --git a/tests/Smoother/smoother.cpp b/tests/Smoother/smoother.cpp
index a69e46a3..6d87c94a 100644
--- a/tests/Smoother/smoother.cpp
+++ b/tests/Smoother/smoother.cpp
@@ -8,7 +8,7 @@
 #include "../../include/GMGPolar/gmgpolar.h"
 
 #include "../../include/Residual/ResidualGive/residualGive.h"
-#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGiveCustomLU.h"
+#include "../../include/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.h"
 #include "../../include/Smoother/SmootherGive/smootherGive.h"
 #include "../../include/Smoother/SmootherTake/smootherTake.h"