From b5908dcaa9a9d53f879ca04f9539fd3acf206b19 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Wed, 4 Feb 2026 19:23:36 +0100 Subject: [PATCH 1/7] Substitute batched tridiag solver --- .../Solvers/tridiagonal_solver.h | 14 ++ include/Smoother/SmootherTake/smootherTake.h | 23 ++- src/Smoother/SmootherTake/buildMatrix.cpp | 142 ++++++-------- src/Smoother/SmootherTake/smootherSolver.cpp | 181 +++++++++++------- src/Smoother/SmootherTake/smootherTake.cpp | 2 + 5 files changed, 202 insertions(+), 160 deletions(-) diff --git a/include/LinearAlgebra/Solvers/tridiagonal_solver.h b/include/LinearAlgebra/Solvers/tridiagonal_solver.h index 091cf89b..6f799c42 100644 --- a/include/LinearAlgebra/Solvers/tridiagonal_solver.h +++ b/include/LinearAlgebra/Solvers/tridiagonal_solver.h @@ -23,6 +23,20 @@ class BatchedTridiagonalSolver assign(sub_diagonal_, T(0)); } + /* ------------------- */ + /* Accessors for sizes */ + /* ------------------- */ + + int matrixDimension() const + { + return matrix_dimension_; + } + + int batchCount() const + { + return batch_count_; + } + /* ---------------------------- */ /* Accessors for matrix entries */ /* ---------------------------- */ diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index 83b8b349..f6016f1b 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -2,6 +2,8 @@ #include "../smoother.h" +#include "../../LinearAlgebra/Solvers/tridiagonal_solver.h" + #ifdef GMGPOLAR_USE_MUMPS #include "dmumps_c.h" #include "mpi.h" @@ -21,7 +23,7 @@ class SmootherTake : public Smoother // The A_sc matrix on i_r = 0 is defined through the COO/CSR matrix // 'inner_boundary_circle_matrix_' due to the across-origin treatment. // It isn't tridiagonal and thus it requires a more advanced solver. - // Note that circle_tridiagonal_solver_[0] is thus unused! + // Note that circle_tridiagonal_solver_[index=0] is thus unused! // Additionally 'circle_tridiagonal_solver_[index]' will refer to the circular line i_r = index and // 'radial_tridiagonal_solver_[index] will refer to the radial line i_theta = index. #ifdef GMGPOLAR_USE_MUMPS @@ -33,8 +35,8 @@ class SmootherTake : public Smoother #endif MatrixType inner_boundary_circle_matrix_; - std::vector> circle_tridiagonal_solver_; - std::vector> radial_tridiagonal_solver_; + BatchedTridiagonalSolver circle_tridiagonal_solver_; + BatchedTridiagonalSolver radial_tridiagonal_solver_; // clang-format off const Stencil stencil_DB_ = { @@ -87,9 +89,10 @@ class SmootherTake : public Smoother void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); - void solveCircleSection(const int i_r, Vector x, Vector temp, Vector solver_storage_1, - Vector solver_storage_2); - void solveRadialSection(const int i_theta, Vector x, Vector temp, Vector solver_storage); + void solveEvenCircleSection(Vector x, Vector temp); + void solveOddCircleSection(Vector x, Vector temp); + void solveEvenRadialSection(Vector x, Vector temp); + void solveOddRadialSection(Vector x, Vector temp); #ifdef GMGPOLAR_USE_MUMPS void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); @@ -98,8 +101,8 @@ class SmootherTake : public Smoother void nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, MatrixType& inner_boundary_circle_matrix, - std::vector>& circle_tridiagonal_solver, - std::vector>& radial_tridiagonal_solver, - ConstVector& arr, ConstVector& att, ConstVector& art, - ConstVector& detDF, ConstVector& coeff_beta); + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, + ConstVector& att, ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta); }; diff --git a/src/Smoother/SmootherTake/buildMatrix.cpp b/src/Smoother/SmootherTake/buildMatrix.cpp index d8430837..ded10461 100644 --- a/src/Smoother/SmootherTake/buildMatrix.cpp +++ b/src/Smoother/SmootherTake/buildMatrix.cpp @@ -1,14 +1,14 @@ #include "../../../include/Smoother/SmootherTake/smootherTake.h" /* Tridiagonal matrices */ -inline void updateMatrixElement(SymmetricTridiagonalSolver& matrix, int row, int column, double value) +inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, double value) { if (row == column) - matrix.main_diagonal(row) = value; + solver.main_diagonal(batch, row) = value; else if (row == column - 1) - matrix.sub_diagonal(row) = value; - else if (row == 0 && column == matrix.columns() - 1) - matrix.cyclic_corner_element() = value; + solver.sub_diagonal(batch, row) = value; + else if (row == 0 && column == solver.matrixDimension() - 1) + solver.cyclic_corner(batch) = value; } /* Inner Boundary COO/CSR matrix */ @@ -31,8 +31,8 @@ inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, MatrixType& inner_boundary_circle_matrix, - std::vector>& circle_tridiagonal_solver, - std::vector>& radial_tridiagonal_solver, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, ConstVector& att, ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) { @@ -70,7 +70,9 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& const int top = grid.index(i_r, i_theta_P1); const int right = grid.index(i_r + 1, i_theta); - auto& matrix = circle_tridiagonal_solver[i_r]; + auto& solver = circle_tridiagonal_solver; + int batch = i_r; + const int center_index = i_theta; const int bottom_index = i_theta_M1; const int top_index = i_theta_P1; @@ -81,19 +83,19 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); /* Bottom */ row = center_index; column = bottom_index; value = -coeff3 * (att[center] + att[bottom]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); /* Top */ row = center_index; column = top_index; value = -coeff4 * (att[center] + att[top]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); } /* ------------------------------------------ */ /* Node in the interior of the Radial Section */ @@ -118,7 +120,9 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& const int top = grid.index(i_r, i_theta_P1); const int right = grid.index(i_r + 1, i_theta); - auto& matrix = radial_tridiagonal_solver[i_theta]; + auto& solver = radial_tridiagonal_solver; + int batch = i_theta; + const int center_index = i_r - numberSmootherCircles; const int left_index = i_r - numberSmootherCircles - 1; const int right_index = i_r - numberSmootherCircles + 1; @@ -129,19 +133,19 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); /* Left */ row = center_index; column = left_index; value = -coeff1 * (arr[center] + arr[left]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); /* Right */ row = center_index; column = right_index; value = -coeff2 * (arr[center] + arr[right]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); } /* ------------------------------------------ */ /* Circle Section: Node in the inner boundary */ @@ -262,7 +266,9 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& const int top = grid.index(i_r, i_theta_P1); const int right = grid.index(i_r + 1, i_theta); - auto& matrix = radial_tridiagonal_solver[i_theta]; + auto& solver = radial_tridiagonal_solver; + int batch = i_theta; + const int center_index = i_r - numberSmootherCircles; const int right_index = i_r - numberSmootherCircles + 1; @@ -272,13 +278,13 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); /* Right */ row = center_index; column = right_index; value = -coeff2 * (arr[center] + arr[right]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); } /* ------------------------------------------- */ /* Radial Section: Node next to outer boundary */ @@ -303,7 +309,9 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& const int top = grid.index(i_r, i_theta_P1); const int right = grid.index(i_r + 1, i_theta); - auto& matrix = radial_tridiagonal_solver[i_theta]; + auto& solver = radial_tridiagonal_solver; + int batch = i_theta; + const int center_index = i_r - numberSmootherCircles; const int left_index = i_r - numberSmootherCircles - 1; const int right_index = i_r - numberSmootherCircles + 1; @@ -314,25 +322,27 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); /* Left */ row = center_index; column = left_index; value = -coeff1 * (arr[center] + arr[left]); - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); /* Right: NOT INCLUDED! */ row = center_index; column = right_index; value = 0.0; - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); } /* ------------------------------------------ */ /* Radial Section: Node on the outer boundary */ /* ------------------------------------------ */ else if (i_r == grid.nr() - 1) { - auto& matrix = radial_tridiagonal_solver[i_theta]; + auto& solver = radial_tridiagonal_solver; + int batch = i_theta; + const int center_index = i_r - numberSmootherCircles; const int left_index = i_r - numberSmootherCircles - 1; @@ -340,13 +350,13 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& row = center_index; column = center_index; value = 1.0; - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); /* Left: NOT INCLUDED */ row = center_index; column = left_index; value = 0.0; - updateMatrixElement(matrix, row, column, value); + updateMatrixElement(solver, batch, row, column, value); } } @@ -386,86 +396,52 @@ void SmootherTake::buildAscRadialSection(const int i_theta) } } -// clang-format off void SmootherTake::buildAscMatrices() { /* -------------------------------------- */ /* Part 1: Allocate Asc Smoother matrices */ /* -------------------------------------- */ + // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. + // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - const int number_smoother_circles = grid_.numberSmootherCircles(); - const int length_smoother_radial = grid_.lengthSmootherRadial(); - - const int num_circle_nodes = grid_.ntheta(); - circle_tridiagonal_solver_.resize(number_smoother_circles); - - const int num_radial_nodes = length_smoother_radial; - radial_tridiagonal_solver_.resize(grid_.ntheta()); - - // Remark: circle_tridiagonal_solver_[0] is unitialized. - // Please use inner_boundary_circle_matrix_ instead! - #pragma omp parallel num_threads(num_omp_threads_) if (grid_.numberOfNodes() > 10'000) - { - // ---------------- // - // Circular Section // - #pragma omp for nowait - for (int circle_Asc_index = 0; circle_Asc_index < number_smoother_circles; circle_Asc_index++) { - - /* Inner boundary circle */ - if (circle_Asc_index == 0) { - #ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int nnz = getNonZeroCountCircleAsc(circle_Asc_index); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, nnz); - inner_boundary_circle_matrix_.is_symmetric(false); - #else - std::function nnz_per_row = [&](int i_theta) { - return DirBC_Interior_? 1 : 4; - }; - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); - #endif - } - - /* Interior Circle Section */ - else { - auto& solverMatrix = circle_tridiagonal_solver_[circle_Asc_index]; - solverMatrix = SymmetricTridiagonalSolver(num_circle_nodes); - solverMatrix.is_cyclic(true); - } - } - - // -------------- // - // Radial Section // - #pragma omp for nowait - for (int radial_Asc_index = 0; radial_Asc_index < grid_.ntheta(); radial_Asc_index++) { - auto& solverMatrix = radial_tridiagonal_solver_[radial_Asc_index]; - solverMatrix = SymmetricTridiagonalSolver(num_radial_nodes); - solverMatrix.is_cyclic(false); - } - } +#ifdef GMGPOLAR_USE_MUMPS + // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. + const int i_r = 0; + const int nnz = getNonZeroCountCircleAsc(i_r); + const int num_circle_nodes = grid_.ntheta(); + inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, nnz); + inner_boundary_circle_matrix_.is_symmetric(false); +#else + std::function nnz_per_row = [&](int i_theta) { + return DirBC_Interior_ ? 1 : 4; + }; + const int num_circle_nodes = grid_.ntheta(); + inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); +#endif /* ---------------------------------- */ /* Part 2: Fill Asc Smoother matrices */ /* ---------------------------------- */ - - #pragma omp parallel num_threads(num_omp_threads_) +#pragma omp parallel num_threads(num_omp_threads_) { - #pragma omp for nowait +#pragma omp for nowait for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { buildAscCircleSection(i_r); } - #pragma omp for nowait +#pragma omp for nowait for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { buildAscRadialSection(i_theta); } } - #ifdef GMGPOLAR_USE_MUMPS + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); + +#ifdef GMGPOLAR_USE_MUMPS /* ------------------------------------------------------------------ */ /* Part 3: Convert inner_boundary_circle_matrix to a symmetric matrix */ /* ------------------------------------------------------------------ */ - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); const int nnz = full_matrix.non_zero_size(); @@ -487,6 +463,6 @@ void SmootherTake::buildAscMatrices() current_nz++; } } - #endif +#endif } // clang-format on diff --git a/src/Smoother/SmootherTake/smootherSolver.cpp b/src/Smoother/SmootherTake/smootherSolver.cpp index 66a7db06..fa7a18c9 100644 --- a/src/Smoother/SmootherTake/smootherSolver.cpp +++ b/src/Smoother/SmootherTake/smootherSolver.cpp @@ -249,51 +249,91 @@ void SmootherTake::applyAscOrthoRadialSection(const int i_theta, const SmootherC } } -void SmootherTake::solveCircleSection(const int i_r, Vector x, Vector temp, - Vector solver_storage_1, Vector solver_storage_2) +void SmootherTake::solveEvenCircleSection(Vector x, Vector temp) { - const int start = grid_.index(i_r, 0); - const int end = start + grid_.ntheta(); + int start = 0; + int end = grid_.numberCircularSmootherNodes(); + Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 2; + int batch_stride = 2; + circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); - if (i_r > 0) { - circle_tridiagonal_solver_[i_r].solveInPlace(temp.data() + start, solver_storage_1.data(), - solver_storage_2.data()); - } - else if (i_r == 0) { #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = temp.data() + start; - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; + inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector + inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs + inner_boundary_mumps_solver_.rhs = circle_section.data(); + inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs + dmumps_c(&inner_boundary_mumps_solver_); + if (inner_boundary_mumps_solver_.info[0] != 0) { + std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; + } #else - inner_boundary_lu_solver_.solveInPlace(temp.data() + start); + inner_boundary_lu_solver_.solveInPlace(circle_section.data()); #endif + + // Move updated values to x + for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } } +} + +void SmootherTake::solveOddCircleSection(Vector x, Vector temp) +{ + int start = 0; + int end = grid_.numberCircularSmootherNodes(); + Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 1; + int batch_stride = 2; + circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); // Move updated values to x - Kokkos::deep_copy(Kokkos::subview(x, Kokkos::make_pair(start, end)), - Kokkos::subview(temp, Kokkos::make_pair(start, end))); + for (int i_r = 1; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } } -void SmootherTake::solveRadialSection(const int i_theta, Vector x, Vector temp, - Vector solver_storage) +void SmootherTake::solveEvenRadialSection(Vector x, Vector temp) { - const int start = grid_.index(grid_.numberSmootherCircles(), i_theta); - const int end = start + grid_.lengthSmootherRadial(); + int start = grid_.numberCircularSmootherNodes(); + int end = grid_.numberOfNodes(); + Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); - radial_tridiagonal_solver_[i_theta].solveInPlace(temp.data() + start, solver_storage.data()); + int batch_offset = 0; + int batch_stride = 2; + radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride); // Move updated values to x - Kokkos::deep_copy(Kokkos::subview(x, Kokkos::make_pair(start, end)), - Kokkos::subview(temp, Kokkos::make_pair(start, end))); + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } } -// clang-format off +void SmootherTake::solveOddRadialSection(Vector x, Vector temp) +{ + int start = grid_.numberCircularSmootherNodes(); + int end = grid_.numberOfNodes(); + Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 1; + int batch_stride = 2; + radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride); + + // Move updated values to x + for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} // In temp we store the vector 'rhs - A_sc^ortho u_sc^ortho' and then we solve the system // Asc * u_sc = temp in place and move the updated values into 'x'. @@ -305,43 +345,50 @@ void SmootherTake::smoothing(Vector x, ConstVector rhs, Vector circle_solver_storage_1("circle_solver_storage_1",grid_.ntheta()); - Vector circle_solver_storage_2("circle_solver_storage_2",grid_.ntheta()); - Vector radial_solver_storage("circle_solver_storage",grid_.lengthSmootherRadial()); - - /* The outer most circle next to the radial section is defined to be black. */ - /* Priority: Black -> White. */ - const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; - const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; - - /* Black Circle Section */ - #pragma omp for - for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } /* Implicit barrier */ - - /* White Circle Section */ - #pragma omp for nowait - for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - solveCircleSection(i_r, x, temp, circle_solver_storage_1, circle_solver_storage_2); - } - /* Black Radial Section */ - #pragma omp for - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } /* Implicit barrier */ - - /* White Radial Section*/ - #pragma omp for - for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - solveRadialSection(i_theta, x, temp, radial_solver_storage); - } /* Implicit barrier */ + /* The outer most circle next to the radial section is defined to be black. */ + /* Priority: Black -> White. */ + const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; + const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; + +/* Black Circle Section */ +#pragma omp parallel for + for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); + } /* Implicit barrier */ + + if (start_black_circles == 1) { + solveOddCircleSection(x, temp); + } + else if (start_black_circles == 0) { + solveEvenCircleSection(x, temp); } + +/* White Circle Section */ +#pragma omp parallel for + for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); + } /* Implicit barrier */ + + if (start_white_circles == 1) { + solveOddCircleSection(x, temp); + } + else if (start_white_circles == 0) { + solveEvenCircleSection(x, temp); + } + +/* Black Radial Section */ +#pragma omp parallel for + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { + applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); + } /* Implicit barrier */ + + solveEvenRadialSection(x, temp); + +/* White Radial Section*/ +#pragma omp parallel for + for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { + applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); + } /* Implicit barrier */ + + solveOddRadialSection(x, temp); } -// clang-format on diff --git a/src/Smoother/SmootherTake/smootherTake.cpp b/src/Smoother/SmootherTake/smootherTake.cpp index 78f99d03..588da61e 100644 --- a/src/Smoother/SmootherTake/smootherTake.cpp +++ b/src/Smoother/SmootherTake/smootherTake.cpp @@ -4,6 +4,8 @@ SmootherTake::SmootherTake(const PolarGrid& grid, const LevelCache& level_cache, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : Smoother(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) + , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) + , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) { buildAscMatrices(); #ifdef GMGPOLAR_USE_MUMPS From f146f28adaf103d55548641f68f6c70e00bcea20 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Wed, 4 Feb 2026 19:36:50 +0100 Subject: [PATCH 2/7] Name collision --- src/Smoother/SmootherTake/buildMatrix.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Smoother/SmootherTake/buildMatrix.cpp b/src/Smoother/SmootherTake/buildMatrix.cpp index ded10461..71244a1f 100644 --- a/src/Smoother/SmootherTake/buildMatrix.cpp +++ b/src/Smoother/SmootherTake/buildMatrix.cpp @@ -407,9 +407,9 @@ void SmootherTake::buildAscMatrices() #ifdef GMGPOLAR_USE_MUMPS // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. const int i_r = 0; - const int nnz = getNonZeroCountCircleAsc(i_r); + const int inner_nnz = getNonZeroCountCircleAsc(i_r); const int num_circle_nodes = grid_.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, nnz); + inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); inner_boundary_circle_matrix_.is_symmetric(false); #else std::function nnz_per_row = [&](int i_theta) { From c322c59df9366ecb8ce3b13ccc55dcdba4b8a8fe Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Wed, 4 Feb 2026 20:11:14 +0100 Subject: [PATCH 3/7] Fix comment for unused circle_tridiagonal_solver --- include/Smoother/SmootherTake/smootherTake.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index f6016f1b..26055348 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -23,7 +23,7 @@ class SmootherTake : public Smoother // The A_sc matrix on i_r = 0 is defined through the COO/CSR matrix // 'inner_boundary_circle_matrix_' due to the across-origin treatment. // It isn't tridiagonal and thus it requires a more advanced solver. - // Note that circle_tridiagonal_solver_[index=0] is thus unused! + // Note that circle_tridiagonal_solver_[0] is thus unused! // Additionally 'circle_tridiagonal_solver_[index]' will refer to the circular line i_r = index and // 'radial_tridiagonal_solver_[index] will refer to the radial line i_theta = index. #ifdef GMGPOLAR_USE_MUMPS From 347888ceee38f29d34a81dd5e82eee39323b73b6 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Fri, 6 Feb 2026 22:25:11 +0100 Subject: [PATCH 4/7] Better readability and comments --- include/Smoother/SmootherTake/smootherTake.h | 132 ++++++++++++++++--- 1 file changed, 117 insertions(+), 15 deletions(-) diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index 26055348..10ea85fd 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -9,23 +9,82 @@ #include "mpi.h" #endif +// SmootherTake implements a coupled circle-radial smoothing procedure. +// It performs iterative updates on different section of the grid based +// on the circle/radial section of the grid and a black/white coloring scheme. +// +// The smoothing solves linear systems of the form: +// A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho +// where: +// - s ∈ {Circle, Radial} denotes the smoother section type, +// - c ∈ {Black, White} denotes the coloring (even/odd sub-system). +// +// The update sequence is as follows: +// 1. Black-Circle update (u_bc): +// A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho +// 2. White-Circle update (u_wc): +// A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho +// 3. Black-Radial update (u_br): +// A_br * u_br = f_br − A_br^ortho * u_br^ortho +// 4. White-Radial update (u_wr): +// A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho +// +// Algorithm details: +// - 'rhs' corresponds to the f vector, 'x' stores the final solution, +// and 'temp' is used for temporary storage during updates. +// - First, temp is updated with f_sc − A_sc^ortho * u_sc^ortho. +// - The system is then solved in-place in temp, and the results +// are copied back to x. +// - Steps 2 (White-Circle) and 3 (Black-Radial) can be started +// simultaneously if the outermost circle is defined as black. +// - The system solves use the A-Take stencil for matrix application. +// +// Solver and matrix structure: +// - The matrix A_sc is block tridiagonal due to the smoother-based +// grid indexing, which allows efficient line-wise factorization. +// - The inner boundary requires special handling because it +// contains an additional across-origin coupling, making it +// non-tridiagonal; therefore, a more general solver is used there. +// When using the MUMPS solver, the matrix is assembled in COO format. +// When using the in-house solver, the matrix is stored in CSR format. +// - Circular line matrices are cyclic tridiagonal due to angular +// periodicity, whereas radial line matrices are strictly tridiagonal. +// - Dirichlet boundary contributions in radial matrices are shifted +// into the right-hand side to preserve symmetry. +// - Enforcing symmetric matrix structure enables reduced memory usage +// and allows in-place factorization with the tridiagonal solver. + class SmootherTake : public Smoother { public: + // Constructs the coupled circle-radial smoother. + // Builds the A_sc smoother matrices and prepares the solvers. explicit SmootherTake(const PolarGrid& grid, const LevelCache& level_cache, const DomainGeometry& domain_geometry, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); + + // If MUMPS is enabled, this cleans up the inner boundary solver. ~SmootherTake() override; + // Performs one full coupled smoothing sweep: + // BC -> WC -> BR -> WR + // using temp as RHS workspace. void smoothing(Vector x, ConstVector rhs, Vector temp) override; private: - // The A_sc matrix on i_r = 0 is defined through the COO/CSR matrix - // 'inner_boundary_circle_matrix_' due to the across-origin treatment. - // It isn't tridiagonal and thus it requires a more advanced solver. - // Note that circle_tridiagonal_solver_[0] is thus unused! - // Additionally 'circle_tridiagonal_solver_[index]' will refer to the circular line i_r = index and - // 'radial_tridiagonal_solver_[index] will refer to the radial line i_theta = index. + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solvers for cyclic-tridiagonal circle lines. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solvers for tridiagonal radial lines. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because + // it includes across-origin coupling. Therefore, it is assembled into a + // sparse matrix and solved using a general-purpose sparse solver. #ifdef GMGPOLAR_USE_MUMPS using MatrixType = SparseMatrixCOO; DMUMPS_STRUC_C inner_boundary_mumps_solver_; @@ -33,11 +92,19 @@ class SmootherTake : public Smoother using MatrixType = SparseMatrixCSR; SparseLUSolver inner_boundary_lu_solver_; #endif + // Sparse matrix for the non-tridiagonal inner boundary circle block. MatrixType inner_boundary_circle_matrix_; - BatchedTridiagonalSolver circle_tridiagonal_solver_; - BatchedTridiagonalSolver radial_tridiagonal_solver_; + // Note: + // - circle_tridiagonal_solver_[0] is unused. + // - circle_tridiagonal_solver_[i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[i_theta] solves radial line i_theta. + /* ------------------- */ + /* Stencil definitions */ + /* ------------------- */ + + // Stencils encode neighborhood connectivity for A_sc matrix assembly. // clang-format off const Stencil stencil_DB_ = { -1, -1, -1, @@ -73,36 +140,71 @@ class SmootherTake : public Smoother }; // clang-format on + // Select correct stencil depending on radial index and boundary type. const Stencil& getStencil(int i_r) const; + + /* --------------- */ + /* Matrix assembly */ + /* --------------- */ + + // Unused: Number of nonzero A_sc entries for circle/radial sections. int getNonZeroCountCircleAsc(const int i_r) const; int getNonZeroCountRadialAsc(const int i_theta) const; + // Used only for interior boundary A_sc to obtain a ptr to index into COO matrices. + // It accumulates all stencil sizes within a line up to, but excluding the current node. int getCircleAscIndex(const int i_r, const int i_theta) const; int getRadialAscIndex(const int i_r, const int i_theta) const; + // Build all A_sc matrices for circle and radial smoothers. void buildAscMatrices(); + // Build A_sc matrix block for a single circular line. void buildAscCircleSection(const int i_r); + // Build A_sc matrix block for a single radial line. void buildAscRadialSection(const int i_theta); + // Build A_sc for a specific node (i_r, i_theta) + void nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& inner_boundary_circle_matrix, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, + ConstVector& att, ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta); + /* ---------------------- */ + /* Orthogonal application */ + /* ---------------------- */ + + // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) + // where x = u_sc and rhs = f_sc void applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector x, ConstVector rhs, Vector temp); + /* ----------------- */ + /* Line-wise solvers */ + /* ----------------- */ + + // Solve the linear system: + // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho + // Parameter mapping: + // x = u_sc (solution vector for section s and color c) + // temp = f_sc − A_sc^⊥ * u_sc^⊥ (precomputed right-hand side) + // where: + // s in {Circle, Radial} denotes the smoother section type, + // c in {Black, White} denotes the coloring (even/odd sub-system). void solveEvenCircleSection(Vector x, Vector temp); void solveOddCircleSection(Vector x, Vector temp); void solveEvenRadialSection(Vector x, Vector temp); void solveOddRadialSection(Vector x, Vector temp); + /* ----------------------------------- */ + /* Initialize and destroy MUMPS solver */ + /* ----------------------------------- */ #ifdef GMGPOLAR_USE_MUMPS + // Initialize sparse MUMPS solver with assembled COO matrix. void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); + // Release MUMPS internal memory and MPI structures. void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); #endif - - void nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, - ConstVector& att, ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta); }; From f415bb5b102c469bdce2461cfd0f9ad249245015 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 7 Feb 2026 01:09:22 +0100 Subject: [PATCH 5/7] Restructure for clarity --- include/Smoother/SmootherTake/smootherTake.h | 51 +++-- .../{smootherSolver.cpp => applyAscOrtho.cpp} | 177 ++---------------- src/Smoother/SmootherTake/buildMatrix.cpp | 163 ++++++++-------- src/Smoother/SmootherTake/smootherTake.cpp | 63 +++++++ src/Smoother/SmootherTake/solveAscSystem.cpp | 91 +++++++++ 5 files changed, 274 insertions(+), 271 deletions(-) rename src/Smoother/SmootherTake/{smootherSolver.cpp => applyAscOrtho.cpp} (64%) create mode 100644 src/Smoother/SmootherTake/solveAscSystem.cpp diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index 10ea85fd..0048366b 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -9,15 +9,15 @@ #include "mpi.h" #endif -// SmootherTake implements a coupled circle-radial smoothing procedure. -// It performs iterative updates on different section of the grid based +// SmootherTake implements the coupled circle-radial smoothing procedure. +// It performs iterative updates on different parts of the grid based // on the circle/radial section of the grid and a black/white coloring scheme. // // The smoothing solves linear systems of the form: // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho // where: -// - s ∈ {Circle, Radial} denotes the smoother section type, -// - c ∈ {Black, White} denotes the coloring (even/odd sub-system). +// - s in {Circle, Radial} denotes the smoother section type, +// - c in {Black, White} denotes the coloring (even/odd line sub-system). // // The update sequence is as follows: // 1. Black-Circle update (u_bc): @@ -35,9 +35,6 @@ // - First, temp is updated with f_sc − A_sc^ortho * u_sc^ortho. // - The system is then solved in-place in temp, and the results // are copied back to x. -// - Steps 2 (White-Circle) and 3 (Black-Radial) can be started -// simultaneously if the outermost circle is defined as black. -// - The system solves use the A-Take stencil for matrix application. // // Solver and matrix structure: // - The matrix A_sc is block tridiagonal due to the smoother-based @@ -105,6 +102,13 @@ class SmootherTake : public Smoother /* ------------------- */ // Stencils encode neighborhood connectivity for A_sc matrix assembly. + // It is only used in the construction of COO/CSR matrices. + // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. + // The Stencil class stores the offset for each position. + // - Non-zero matrix indicesare obtained via `ptr + offset` + // - A value of `-1` means the position is not included in the stencil pattern. + // - Other values (0, 1, 2, ..., stencil_size - 1) correspond to valid stencil indices. + // clang-format off const Stencil stencil_DB_ = { -1, -1, -1, @@ -119,18 +123,18 @@ class SmootherTake : public Smoother }; const Stencil circle_stencil_across_origin_ = { -1, 3, -1, - 1, 0, -1, + 1, 0, -1, -1, 2, -1 }; /* Radial Stencils */ const Stencil radial_stencil_interior_ = { -1, -1, -1, - 1, 0, 2, + 1, 0, 2, -1, -1, -1 }; const Stencil radial_stencil_next_outer_DB_ = { -1, -1, -1, - 1, 0, -1, + 1, 0, -1, -1, -1, -1 }; const Stencil radial_stencil_next_circular_smoothing_ = { @@ -163,12 +167,8 @@ class SmootherTake : public Smoother // Build A_sc matrix block for a single radial line. void buildAscRadialSection(const int i_theta); // Build A_sc for a specific node (i_r, i_theta) - void nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, - ConstVector& att, ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta); + void nodeBuildSmootherTake(int i_r, int i_theta, ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -176,10 +176,9 @@ class SmootherTake : public Smoother // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc - void applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector x, - ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, ConstVector x, - ConstVector rhs, Vector temp); + void applyAscOrthoCircleSection(const int i_r, ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoRadialSection(const int i_theta, ConstVector x, ConstVector rhs, + Vector temp); /* ----------------- */ /* Line-wise solvers */ @@ -189,14 +188,14 @@ class SmootherTake : public Smoother // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho // Parameter mapping: // x = u_sc (solution vector for section s and color c) - // temp = f_sc − A_sc^⊥ * u_sc^⊥ (precomputed right-hand side) + // temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where: // s in {Circle, Radial} denotes the smoother section type, - // c in {Black, White} denotes the coloring (even/odd sub-system). - void solveEvenCircleSection(Vector x, Vector temp); - void solveOddCircleSection(Vector x, Vector temp); - void solveEvenRadialSection(Vector x, Vector temp); - void solveOddRadialSection(Vector x, Vector temp); + // c in {Black, White} denotes the line coloring. + void solveBlackCircleSection(Vector x, Vector temp); + void solveWhiteCircleSection(Vector x, Vector temp); + void solveBlackRadialSection(Vector x, Vector temp); + void solveWhiteRadialSection(Vector x, Vector temp); /* ----------------------------------- */ /* Initialize and destroy MUMPS solver */ diff --git a/src/Smoother/SmootherTake/smootherSolver.cpp b/src/Smoother/SmootherTake/applyAscOrtho.cpp similarity index 64% rename from src/Smoother/SmootherTake/smootherSolver.cpp rename to src/Smoother/SmootherTake/applyAscOrtho.cpp index fa7a18c9..07bced89 100644 --- a/src/Smoother/SmootherTake/smootherSolver.cpp +++ b/src/Smoother/SmootherTake/applyAscOrtho.cpp @@ -1,10 +1,9 @@ #include "../../../include/Smoother/SmootherTake/smootherTake.h" inline void nodeApplyAscOrthoCircleTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - SmootherColor smoother_color, ConstVector& x, ConstVector& rhs, - Vector& result, ConstVector& arr, ConstVector& att, - ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta) + ConstVector& x, ConstVector& rhs, Vector& result, + ConstVector& arr, ConstVector& att, ConstVector& art, + ConstVector& detDF, ConstVector& coeff_beta) { assert(i_r >= 0 && i_r < grid.numberSmootherCircles()); @@ -89,10 +88,10 @@ inline void nodeApplyAscOrthoCircleTake(int i_r, int i_theta, const PolarGrid& g } inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - SmootherColor smoother_color, ConstVector& x, ConstVector& rhs, - Vector& result, ConstVector& arr, - const ConstVector& att, ConstVector& art, - const ConstVector& detDF, ConstVector& coeff_beta) + ConstVector& x, ConstVector& rhs, Vector& result, + ConstVector& arr, const ConstVector& att, + ConstVector& art, const ConstVector& detDF, + ConstVector& coeff_beta) { assert(i_r >= grid.numberSmootherCircles() && i_r < grid.nr()); /* -------------------- */ @@ -209,8 +208,8 @@ inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const PolarGrid& g } } -void SmootherTake::applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector x, - ConstVector rhs, Vector temp) +void SmootherTake::applyAscOrthoCircleSection(const int i_r, ConstVector x, ConstVector rhs, + Vector temp) { assert(i_r >= 0 && i_r < grid_.numberSmootherCircles()); @@ -224,13 +223,13 @@ void SmootherTake::applyAscOrthoCircleSection(const int i_r, const SmootherColor ConstVector coeff_beta = level_cache_.coeff_beta(); for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - nodeApplyAscOrthoCircleTake(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, att, art, - detDF, coeff_beta); + nodeApplyAscOrthoCircleTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); } } -void SmootherTake::applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, - ConstVector x, ConstVector rhs, Vector temp) +void SmootherTake::applyAscOrthoRadialSection(const int i_theta, ConstVector x, ConstVector rhs, + Vector temp) { assert(i_theta >= 0 && i_theta < grid_.ntheta()); @@ -244,151 +243,7 @@ void SmootherTake::applyAscOrthoRadialSection(const int i_theta, const SmootherC ConstVector coeff_beta = level_cache_.coeff_beta(); for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - nodeApplyAscOrthoRadialTake(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, att, art, - detDF, coeff_beta); - } -} - -void SmootherTake::solveEvenCircleSection(Vector x, Vector temp) -{ - int start = 0; - int end = grid_.numberCircularSmootherNodes(); - Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); - - int batch_offset = 2; - int batch_stride = 2; - circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); - -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } -#else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); -#endif - - // Move updated values to x - for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r += 2) { - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; - } - } -} - -void SmootherTake::solveOddCircleSection(Vector x, Vector temp) -{ - int start = 0; - int end = grid_.numberCircularSmootherNodes(); - Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); - - int batch_offset = 1; - int batch_stride = 2; - circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); - - // Move updated values to x - for (int i_r = 1; i_r < grid_.numberSmootherCircles(); i_r += 2) { - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; - } + nodeApplyAscOrthoRadialTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); } -} - -void SmootherTake::solveEvenRadialSection(Vector x, Vector temp) -{ - int start = grid_.numberCircularSmootherNodes(); - int end = grid_.numberOfNodes(); - Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); - - int batch_offset = 0; - int batch_stride = 2; - radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride); - - // Move updated values to x - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { - for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; - } - } -} - -void SmootherTake::solveOddRadialSection(Vector x, Vector temp) -{ - int start = grid_.numberCircularSmootherNodes(); - int end = grid_.numberOfNodes(); - Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); - - int batch_offset = 1; - int batch_stride = 2; - radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride); - - // Move updated values to x - for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { - for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; - } - } -} - -// In temp we store the vector 'rhs - A_sc^ortho u_sc^ortho' and then we solve the system -// Asc * u_sc = temp in place and move the updated values into 'x'. -void SmootherTake::smoothing(Vector x, ConstVector rhs, Vector temp) -{ - assert(x.size() == rhs.size()); - assert(temp.size() == rhs.size()); - - assert(level_cache_.cacheDensityProfileCoefficients()); - assert(level_cache_.cacheDomainGeometry()); - - /* The outer most circle next to the radial section is defined to be black. */ - /* Priority: Black -> White. */ - const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; - const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; - -/* Black Circle Section */ -#pragma omp parallel for - for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } /* Implicit barrier */ - - if (start_black_circles == 1) { - solveOddCircleSection(x, temp); - } - else if (start_black_circles == 0) { - solveEvenCircleSection(x, temp); - } - -/* White Circle Section */ -#pragma omp parallel for - for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } /* Implicit barrier */ - - if (start_white_circles == 1) { - solveOddCircleSection(x, temp); - } - else if (start_white_circles == 0) { - solveEvenCircleSection(x, temp); - } - -/* Black Radial Section */ -#pragma omp parallel for - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } /* Implicit barrier */ - - solveEvenRadialSection(x, temp); - -/* White Radial Section*/ -#pragma omp parallel for - for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } /* Implicit barrier */ - - solveOddRadialSection(x, temp); -} +} \ No newline at end of file diff --git a/src/Smoother/SmootherTake/buildMatrix.cpp b/src/Smoother/SmootherTake/buildMatrix.cpp index 71244a1f..2f554cc4 100644 --- a/src/Smoother/SmootherTake/buildMatrix.cpp +++ b/src/Smoother/SmootherTake/buildMatrix.cpp @@ -29,18 +29,15 @@ inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, } #endif -void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, - ConstVector& arr, ConstVector& att, ConstVector& art, - ConstVector& detDF, ConstVector& coeff_beta) +void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta) { - assert(i_r >= 0 && i_r < grid.nr()); - assert(i_theta >= 0 && i_theta < grid.ntheta()); + assert(i_r >= 0 && i_r < grid_.nr()); + assert(i_theta >= 0 && i_theta < grid_.ntheta()); - const int numberSmootherCircles = grid.numberSmootherCircles(); - const int lengthSmootherRadial = grid.lengthSmootherRadial(); + const int numberSmootherCircles = grid_.numberSmootherCircles(); + const int lengthSmootherRadial = grid_.lengthSmootherRadial(); assert(numberSmootherCircles >= 2); assert(lengthSmootherRadial >= 3); @@ -51,26 +48,26 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& /* Node in the interior of the Circle Section */ /* ------------------------------------------ */ if (i_r > 0 && i_r < numberSmootherCircles) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + double h1 = grid_.radialSpacing(i_r - 1); + double h2 = grid_.radialSpacing(i_r); + double k1 = grid_.angularSpacing(i_theta - 1); + double k2 = grid_.angularSpacing(i_theta); double coeff1 = 0.5 * (k1 + k2) / h1; double coeff2 = 0.5 * (k1 + k2) / h2; double coeff3 = 0.5 * (h1 + h2) / k1; double coeff4 = 0.5 * (h1 + h2) / k2; - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_M1 = grid_.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid_.wrapThetaIndex(i_theta + 1); - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); + const int left = grid_.index(i_r - 1, i_theta); + const int bottom = grid_.index(i_r, i_theta_M1); + const int center = grid_.index(i_r, i_theta); + const int top = grid_.index(i_r, i_theta_P1); + const int right = grid_.index(i_r + 1, i_theta); - auto& solver = circle_tridiagonal_solver; + auto& solver = circle_tridiagonal_solver_; int batch = i_r; const int center_index = i_theta; @@ -100,27 +97,27 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& /* ------------------------------------------ */ /* Node in the interior of the Radial Section */ /* ------------------------------------------ */ - else if (i_r > numberSmootherCircles && i_r < grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + else if (i_r > numberSmootherCircles && i_r < grid_.nr() - 2) { + double h1 = grid_.radialSpacing(i_r - 1); + double h2 = grid_.radialSpacing(i_r); + double k1 = grid_.angularSpacing(i_theta - 1); + double k2 = grid_.angularSpacing(i_theta); double coeff1 = 0.5 * (k1 + k2) / h1; double coeff2 = 0.5 * (k1 + k2) / h2; double coeff3 = 0.5 * (h1 + h2) / k1; double coeff4 = 0.5 * (h1 + h2) / k2; - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_M1 = grid_.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid_.wrapThetaIndex(i_theta + 1); - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); + const int left = grid_.index(i_r - 1, i_theta); + const int bottom = grid_.index(i_r, i_theta_M1); + const int center = grid_.index(i_r, i_theta); + const int top = grid_.index(i_r, i_theta_P1); + const int right = grid_.index(i_r + 1, i_theta); - auto& solver = radial_tridiagonal_solver; + auto& solver = radial_tridiagonal_solver_; int batch = i_theta; const int center_index = i_r - numberSmootherCircles; @@ -157,8 +154,8 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& int ptr, offset; int row, col; double val; - if (DirBC_Interior) { - auto& matrix = inner_boundary_circle_matrix; + if (DirBC_Interior_) { + auto& matrix = inner_boundary_circle_matrix_; const int center_index = i_theta; const int center_nz_index = getCircleAscIndex(i_r, i_theta); @@ -178,29 +175,29 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& /* Case 2: Across origin discretization on the interior boundary */ /* ------------------------------------------------------------- */ // h1 gets replaced with 2 * R0. - // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). + // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid_.ntheta()/2)). // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - const double h1 = 2.0 * grid.radius(0); - const double h2 = grid.radialSpacing(i_r); - const double k1 = grid.angularSpacing(i_theta - 1); - const double k2 = grid.angularSpacing(i_theta); + const double h1 = 2.0 * grid_.radius(0); + const double h2 = grid_.radialSpacing(i_r); + const double k1 = grid_.angularSpacing(i_theta - 1); + const double k2 = grid_.angularSpacing(i_theta); const double coeff1 = 0.5 * (k1 + k2) / h1; const double coeff2 = 0.5 * (k1 + k2) / h2; const double coeff3 = 0.5 * (h1 + h2) / k1; const double coeff4 = 0.5 * (h1 + h2) / k2; - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); + const int i_theta_M1 = grid_.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid_.wrapThetaIndex(i_theta + 1); + const int i_theta_AcrossOrigin = grid_.wrapThetaIndex(i_theta + grid_.ntheta() / 2); - const int left = grid.index(i_r, i_theta_AcrossOrigin); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); + const int left = grid_.index(i_r, i_theta_AcrossOrigin); + const int bottom = grid_.index(i_r, i_theta_M1); + const int center = grid_.index(i_r, i_theta); + const int top = grid_.index(i_r, i_theta_P1); + const int right = grid_.index(i_r + 1, i_theta); - auto& matrix = inner_boundary_circle_matrix; + auto& matrix = inner_boundary_circle_matrix_; const int center_index = i_theta; const int left_index = i_theta_AcrossOrigin; @@ -247,26 +244,26 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& /* Radial Section: Node next to circular section */ /* --------------------------------------------- */ else if (i_r == numberSmootherCircles) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + double h1 = grid_.radialSpacing(i_r - 1); + double h2 = grid_.radialSpacing(i_r); + double k1 = grid_.angularSpacing(i_theta - 1); + double k2 = grid_.angularSpacing(i_theta); double coeff1 = 0.5 * (k1 + k2) / h1; double coeff2 = 0.5 * (k1 + k2) / h2; double coeff3 = 0.5 * (h1 + h2) / k1; double coeff4 = 0.5 * (h1 + h2) / k2; - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_M1 = grid_.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid_.wrapThetaIndex(i_theta + 1); - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); + const int left = grid_.index(i_r - 1, i_theta); + const int bottom = grid_.index(i_r, i_theta_M1); + const int center = grid_.index(i_r, i_theta); + const int top = grid_.index(i_r, i_theta_P1); + const int right = grid_.index(i_r + 1, i_theta); - auto& solver = radial_tridiagonal_solver; + auto& solver = radial_tridiagonal_solver_; int batch = i_theta; const int center_index = i_r - numberSmootherCircles; @@ -289,27 +286,27 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& /* ------------------------------------------- */ /* Radial Section: Node next to outer boundary */ /* ------------------------------------------- */ - else if (i_r == grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + else if (i_r == grid_.nr() - 2) { + double h1 = grid_.radialSpacing(i_r - 1); + double h2 = grid_.radialSpacing(i_r); + double k1 = grid_.angularSpacing(i_theta - 1); + double k2 = grid_.angularSpacing(i_theta); double coeff1 = 0.5 * (k1 + k2) / h1; double coeff2 = 0.5 * (k1 + k2) / h2; double coeff3 = 0.5 * (h1 + h2) / k1; double coeff4 = 0.5 * (h1 + h2) / k2; - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_M1 = grid_.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid_.wrapThetaIndex(i_theta + 1); - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); + const int left = grid_.index(i_r - 1, i_theta); + const int bottom = grid_.index(i_r, i_theta_M1); + const int center = grid_.index(i_r, i_theta); + const int top = grid_.index(i_r, i_theta_P1); + const int right = grid_.index(i_r + 1, i_theta); - auto& solver = radial_tridiagonal_solver; + auto& solver = radial_tridiagonal_solver_; int batch = i_theta; const int center_index = i_r - numberSmootherCircles; @@ -339,8 +336,8 @@ void SmootherTake::nodeBuildSmootherTake(int i_r, int i_theta, const PolarGrid& /* ------------------------------------------ */ /* Radial Section: Node on the outer boundary */ /* ------------------------------------------ */ - else if (i_r == grid.nr() - 1) { - auto& solver = radial_tridiagonal_solver; + else if (i_r == grid_.nr() - 1) { + auto& solver = radial_tridiagonal_solver_; int batch = i_theta; const int center_index = i_r - numberSmootherCircles; @@ -373,8 +370,7 @@ void SmootherTake::buildAscCircleSection(const int i_r) for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { // Build Asc at the current node - nodeBuildSmootherTake(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildSmootherTake(i_r, i_theta, arr, att, art, detDF, coeff_beta); } } @@ -391,8 +387,7 @@ void SmootherTake::buildAscRadialSection(const int i_theta) for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { // Build Asc at the current node - nodeBuildSmootherTake(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildSmootherTake(i_r, i_theta, arr, att, art, detDF, coeff_beta); } } @@ -406,8 +401,8 @@ void SmootherTake::buildAscMatrices() #ifdef GMGPOLAR_USE_MUMPS // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int i_r = 0; - const int inner_nnz = getNonZeroCountCircleAsc(i_r); + const int inner_i_r = 0; + const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); const int num_circle_nodes = grid_.ntheta(); inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); inner_boundary_circle_matrix_.is_symmetric(false); diff --git a/src/Smoother/SmootherTake/smootherTake.cpp b/src/Smoother/SmootherTake/smootherTake.cpp index 588da61e..6255d93c 100644 --- a/src/Smoother/SmootherTake/smootherTake.cpp +++ b/src/Smoother/SmootherTake/smootherTake.cpp @@ -21,3 +21,66 @@ SmootherTake::~SmootherTake() finalizeMumpsSolver(inner_boundary_mumps_solver_); #endif } + +// The smoothing solves linear systems of the form: +// A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho +// where: +// - s in {Circle, Radial} denotes the smoother section type, +// - c in {Black, White} denotes the coloring (even/odd line sub-system). +// +// The update sequence is as follows: +// 1. Black-Circle update (u_bc): +// A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho +// 2. White-Circle update (u_wc): +// A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho +// 3. Black-Radial update (u_br): +// A_br * u_br = f_br − A_br^ortho * u_br^ortho +// 4. White-Radial update (u_wr): +// A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho +// +// Algorithm details: +// - 'rhs' corresponds to the f vector, 'x' stores the final solution, +// and 'temp' is used for temporary storage during updates. +// - First, temp is updated with f_sc − A_sc^ortho * u_sc^ortho. +// - The system is then solved in-place in temp, and the results +// are copied back to x. +void SmootherTake::smoothing(Vector x, ConstVector rhs, Vector temp) +{ + assert(x.size() == rhs.size()); + assert(temp.size() == rhs.size()); + + assert(level_cache_.cacheDensityProfileCoefficients()); + assert(level_cache_.cacheDomainGeometry()); + + /* Black Circle Section */ +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r += 2) { + applyAscOrthoCircleSection(i_r, x, rhs, temp); + } /* Implicit barrier */ + + solveBlackCircleSection(x, temp); + + /* White Circle Section */ +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = 1; i_r < grid_.numberSmootherCircles(); i_r += 2) { + applyAscOrthoCircleSection(i_r, x, rhs, temp); + } /* Implicit barrier */ + + solveWhiteCircleSection(x, temp); + + /* Black Radial Section */ +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { + applyAscOrthoRadialSection(i_theta, x, rhs, temp); + } /* Implicit barrier */ + + solveBlackRadialSection(x, temp); + + /* White Radial Section*/ +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { + applyAscOrthoRadialSection(i_theta, x, rhs, temp); + } /* Implicit barrier */ + + solveWhiteRadialSection(x, temp); +} diff --git a/src/Smoother/SmootherTake/solveAscSystem.cpp b/src/Smoother/SmootherTake/solveAscSystem.cpp new file mode 100644 index 00000000..eef1c3c6 --- /dev/null +++ b/src/Smoother/SmootherTake/solveAscSystem.cpp @@ -0,0 +1,91 @@ +#include "../../../include/Smoother/SmootherTake/smootherTake.h" + +void SmootherTake::solveBlackCircleSection(Vector x, Vector temp) +{ + int start = 0; + int end = grid_.numberCircularSmootherNodes(); + Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 2; + int batch_stride = 2; + circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); + +#ifdef GMGPOLAR_USE_MUMPS + inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; + inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector + inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs + inner_boundary_mumps_solver_.rhs = circle_section.data(); + inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs + dmumps_c(&inner_boundary_mumps_solver_); + if (inner_boundary_mumps_solver_.info[0] != 0) { + std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; + } +#else + inner_boundary_lu_solver_.solveInPlace(circle_section.data()); +#endif + +// Move updated values to x +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} + +void SmootherTake::solveWhiteCircleSection(Vector x, Vector temp) +{ + int start = 0; + int end = grid_.numberCircularSmootherNodes(); + Vector circle_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 1; + int batch_stride = 2; + circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); + +// Move updated values to x +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = 1; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} + +void SmootherTake::solveBlackRadialSection(Vector x, Vector temp) +{ + int start = grid_.numberCircularSmootherNodes(); + int end = grid_.numberOfNodes(); + Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 0; + int batch_stride = 2; + radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride); + +// Move updated values to x +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} + +void SmootherTake::solveWhiteRadialSection(Vector x, Vector temp) +{ + int start = grid_.numberCircularSmootherNodes(); + int end = grid_.numberOfNodes(); + Vector radial_section = Kokkos::subview(temp, Kokkos::make_pair(start, end)); + + int batch_offset = 1; + int batch_stride = 2; + radial_tridiagonal_solver_.solve(radial_section, batch_offset, batch_stride); + +// Move updated values to x +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + x[grid_.index(i_r, i_theta)] = temp[grid_.index(i_r, i_theta)]; + } + } +} From 5e03cded0e357d4907db0448e7a8c893985fcdf4 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 7 Feb 2026 01:22:15 +0100 Subject: [PATCH 6/7] Remove unused code parts --- include/Smoother/SmootherTake/smootherTake.h | 40 ++----- src/Smoother/SmootherTake/matrixStencil.cpp | 110 +++---------------- 2 files changed, 20 insertions(+), 130 deletions(-) diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index 0048366b..2561657f 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -115,51 +115,25 @@ class SmootherTake : public Smoother -1, 0, -1, -1, -1, -1 }; - /* Circle Stencils */ - const Stencil circle_stencil_interior_ = { - -1, 2, -1, - -1, 0, -1, - -1, 1, -1 - }; const Stencil circle_stencil_across_origin_ = { -1, 3, -1, 1, 0, -1, -1, 2, -1 }; - /* Radial Stencils */ - const Stencil radial_stencil_interior_ = { - -1, -1, -1, - 1, 0, 2, - -1, -1, -1 - }; - const Stencil radial_stencil_next_outer_DB_ = { - -1, -1, -1, - 1, 0, -1, - -1, -1, -1 - }; - const Stencil radial_stencil_next_circular_smoothing_ = { - -1, -1, -1, - -1, 0, 1, - -1, -1, -1 - }; // clang-format on - // Select correct stencil depending on radial index and boundary type. - const Stencil& getStencil(int i_r) const; + // Select correct stencil depending on the grid position. + const Stencil& getStencil(const int i_r) const; /* Only i_r = 0 implemented */ + // Number of nonzero A_sc entries. + int getNonZeroCountCircleAsc(const int i_r) const; /* Only i_r = 0 implemented */ + // Obtain a ptr to index into COO matrices. + // It accumulates all stencil sizes within a line up to, but excluding the current node. + int getCircleAscIndex(const int i_r, const int i_theta) const; /* Only i_r = 0 implemented */ /* --------------- */ /* Matrix assembly */ /* --------------- */ - // Unused: Number of nonzero A_sc entries for circle/radial sections. - int getNonZeroCountCircleAsc(const int i_r) const; - int getNonZeroCountRadialAsc(const int i_theta) const; - - // Used only for interior boundary A_sc to obtain a ptr to index into COO matrices. - // It accumulates all stencil sizes within a line up to, but excluding the current node. - int getCircleAscIndex(const int i_r, const int i_theta) const; - int getRadialAscIndex(const int i_r, const int i_theta) const; - // Build all A_sc matrices for circle and radial smoothers. void buildAscMatrices(); // Build A_sc matrix block for a single circular line. diff --git a/src/Smoother/SmootherTake/matrixStencil.cpp b/src/Smoother/SmootherTake/matrixStencil.cpp index d86be48e..cd5a8822 100644 --- a/src/Smoother/SmootherTake/matrixStencil.cpp +++ b/src/Smoother/SmootherTake/matrixStencil.cpp @@ -1,117 +1,33 @@ #include "../../../include/Smoother/SmootherTake/smootherTake.h" -const Stencil& SmootherTake::getStencil(int i_r) const +const Stencil& SmootherTake::getStencil(const int i_r) const { - assert(0 <= i_r && i_r < grid_.nr()); - - assert(grid_.numberSmootherCircles() >= 2); - assert(grid_.lengthSmootherRadial() >= 3); - - const int numberSmootherCircles = grid_.numberSmootherCircles(); - - if (i_r < numberSmootherCircles) { - /* Circle Section */ - if (i_r > 0 && i_r < numberSmootherCircles) { - return circle_stencil_interior_; - } - else if (i_r == 0) { - return DirBC_Interior_ ? stencil_DB_ : circle_stencil_across_origin_; - } + if (i_r == 0) { + return DirBC_Interior_ ? stencil_DB_ : circle_stencil_across_origin_; } else { - /* Radial Section */ - if (i_r > numberSmootherCircles && i_r < grid_.nr() - 2) { - return radial_stencil_interior_; - } - else if (i_r == numberSmootherCircles) { - return radial_stencil_next_circular_smoothing_; - } - else if (i_r == grid_.nr() - 1) { - return stencil_DB_; - } - else if (i_r == grid_.nr() - 2) { - return radial_stencil_next_outer_DB_; - } + throw std::out_of_range("getStencil: Only i_r = 0 implemented."); } - throw std::out_of_range("Invalid index for stencil"); } int SmootherTake::getNonZeroCountCircleAsc(const int i_r) const { - assert(i_r >= 0 && i_r < grid_.numberSmootherCircles()); - - const int numberSmootherCircles = grid_.numberSmootherCircles(); - - const int size_stencil_inner_boundary = DirBC_Interior_ ? 1 : 4; - const int size_stencil_interior = 3; - - if (i_r > 0) { - return size_stencil_interior * grid_.ntheta(); - } - else if (i_r == 0) { + if (i_r == 0) { + const int size_stencil_inner_boundary = DirBC_Interior_ ? 1 : 4; return size_stencil_inner_boundary * grid_.ntheta(); } - throw std::out_of_range("Invalid index for nnz_circle_Asc"); -} - -int SmootherTake::getCircleAscIndex(const int i_r, const int i_theta) const -{ - assert(i_r >= 0 && i_r < grid_.numberSmootherCircles()); - - const int numberSmootherCircles = grid_.numberSmootherCircles(); - - const int size_stencil_inner_boundary = DirBC_Interior_ ? 1 : 4; - const int size_stencil_interior = 3; - - if (i_r > 0) { - return size_stencil_interior * i_theta; - } else { - return size_stencil_inner_boundary * i_theta; + throw std::out_of_range("getNonZeroCountCircleAsc: Only i_r = 0 implemented."); } } -int SmootherTake::getNonZeroCountRadialAsc(const int i_theta) const -{ - assert(i_theta >= 0 && i_theta < grid_.ntheta()); - - const int size_stencil_next_circluar_smoothing = 2; - const int size_stencil_interior = 3; - const int size_stencil_next_outer_boundary = 2; - const int size_stencil_outer_boundary = 1; - - assert(grid_.lengthSmootherRadial() >= 3); - - return size_stencil_next_circluar_smoothing + (grid_.lengthSmootherRadial() - 3) * size_stencil_interior + - size_stencil_next_outer_boundary + size_stencil_outer_boundary; -} - -int SmootherTake::getRadialAscIndex(const int i_r, const int i_theta) const +int SmootherTake::getCircleAscIndex(const int i_r, const int i_theta) const { - assert(i_theta >= 0 && i_theta < grid_.ntheta()); - - const int size_stencil_next_circluar_smoothing = 2; - const int size_stencil_interior = 3; - const int size_stencil_next_outer_boundary = 2; - const int size_stencil_outer_boundary = 1; - - assert(grid_.lengthSmootherRadial() >= 3); - assert(grid_.numberSmootherCircles() >= 2); - - const int numberSmootherCircles = grid_.numberSmootherCircles(); - - if (i_r > numberSmootherCircles && i_r < grid_.nr() - 2) { - return size_stencil_next_circluar_smoothing + (i_r - numberSmootherCircles - 1) * size_stencil_interior; - } - else if (i_r == numberSmootherCircles) { - return 0; - } - else if (i_r == grid_.nr() - 2) { - return size_stencil_next_circluar_smoothing + (grid_.lengthSmootherRadial() - 3) * size_stencil_interior; + if (i_r == 0) { + const int size_stencil_inner_boundary = DirBC_Interior_ ? 1 : 4; + return size_stencil_inner_boundary * i_theta; } - else if (i_r == grid_.nr() - 1) { - return size_stencil_next_circluar_smoothing + (grid_.lengthSmootherRadial() - 3) * size_stencil_interior + - size_stencil_next_outer_boundary; + else { + throw std::out_of_range("getCircleAscIndex: Only i_r = 0 implemented."); } - throw std::out_of_range("Invalid index for stencil"); } From 109c994a789b921c447c801d621b08d8b87f3e46 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sat, 7 Feb 2026 01:23:06 +0100 Subject: [PATCH 7/7] Update CMake --- src/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 60e5fbd3..cf376bb7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -124,11 +124,12 @@ set(SMOOTHER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/smootherSolver.cpp # SmootherTake + ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/applyAscOrtho.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/buildMatrix.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/initializeMumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/matrixStencil.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/smootherSolver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/smootherTake.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/solveAscSystem.cpp ) # file(GLOB_RECURSE EXTRAPOLATED_SMOOTHER_SOURCES