diff --git a/AMDiS/src/parallel/PetscProblemStat.cc b/AMDiS/src/parallel/PetscProblemStat.cc index 0bde5bede490604dd104b21af00d637a14e260ff..6e03c86bca056e5812326ef8ba64880b3301f220 100644 --- a/AMDiS/src/parallel/PetscProblemStat.cc +++ b/AMDiS/src/parallel/PetscProblemStat.cc @@ -89,7 +89,6 @@ namespace AMDiS { petscSolver->fillPetscRhs(rhs); - #if 0 processMemUsage(vm, rss); MSG("STAGE 2\n"); diff --git a/AMDiS/src/parallel/PetscSolverFeti.cc b/AMDiS/src/parallel/PetscSolverFeti.cc index 7930f7254b2405a84045271b13d9e7e47de1be27..688fb21467c8fcc8cb81d5bc4de6819969a7daa6 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.cc +++ b/AMDiS/src/parallel/PetscSolverFeti.cc @@ -18,6 +18,7 @@ namespace AMDiS { using namespace std; + FetiStatisticsData PetscSolverFeti::fetiStatistics; #ifdef HAVE_PETSC_DEV // y = mat * x @@ -25,17 +26,20 @@ namespace AMDiS { { // S_PiPi = K_PiPi - K_PiB inv(K_BB) K_BPi + double first = MPI::Wtime(); void *ctx; MatShellGetContext(mat, &ctx); SchurPrimalData* data = static_cast<SchurPrimalData*>(ctx); MatMult(*(data->mat_b_primal), x, data->tmp_vec_b); - KSPSolve(*(data->ksp_b), data->tmp_vec_b, data->tmp_vec_b); - + data->fetiSolver->solveLocalProblem(data->tmp_vec_b, data->tmp_vec_b); MatMult(*(data->mat_primal_b), data->tmp_vec_b, data->tmp_vec_primal); MatMult(*(data->mat_primal_primal), x, y); VecAXPBY(y, -1.0, 1.0, data->tmp_vec_primal); + PetscSolverFeti::fetiStatistics.nSchurPrimalApply++; + PetscSolverFeti::fetiStatistics.timeSchurPrimalApply += MPI::Wtime() - first; + return 0; } @@ -52,17 +56,25 @@ namespace AMDiS { // tmp_vec_b0 = inv(K_BB) trans(L) x MatMultTranspose(*(data->mat_lagrange), x, data->tmp_vec_b0); - KSPSolve(*(data->ksp_b), data->tmp_vec_b0, data->tmp_vec_b0); + data->fetiSolver->solveLocalProblem(data->tmp_vec_b0, data->tmp_vec_b0); // tmp_vec_b1 = inv(K_BB) K_BPi inv(S_PiPi) K_PiB tmp_vec_b0 MatMult(*(data->mat_primal_b), data->tmp_vec_b0, data->tmp_vec_primal); + + double first = MPI::Wtime(); KSPSolve(*(data->ksp_schur_primal), data->tmp_vec_primal, data->tmp_vec_primal); + PetscSolverFeti::fetiStatistics.nSchurPrimalSolve++; + PetscSolverFeti::fetiStatistics.timeSchurPrimalSolve += + MPI::Wtime() - first; + MatMult(*(data->mat_b_primal), data->tmp_vec_primal, data->tmp_vec_b1); // y = L (tmp_vec_b0 + tmp_vec_b1) VecAXPBY(data->tmp_vec_b0, 1.0, 1.0, data->tmp_vec_b1); MatMult(*(data->mat_lagrange), data->tmp_vec_b0, y); + PetscSolverFeti::fetiStatistics.nFetiApply++; + return 0; } @@ -183,7 +195,8 @@ namespace AMDiS { PetscSolverFeti::PetscSolverFeti() : PetscSolver(), - nComponents(-1) + nComponents(-1), + schurPrimalSolver(0) { FUNCNAME("PetscSolverFeti::PetscSolverFeti()"); @@ -196,8 +209,14 @@ namespace AMDiS { } else if (preconditionerName == "lumped") { fetiPreconditioner = FETI_LUMPED; } else { - ERROR_EXIT("Preconditioner \"%s\" not available!\n", preconditionerName.c_str()); + ERROR_EXIT("Preconditioner \"%s\" not available!\n", + preconditionerName.c_str()); } + + Parameters::get("parallel->feti->schur primal solver", schurPrimalSolver); + TEST_EXIT(schurPrimalSolver == 0 || schurPrimalSolver == 1) + ("Wrong solver \"%d\"for the Schur primal complement!\n", + schurPrimalSolver); } @@ -492,7 +511,7 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::createIndeB()"); - globalIndexB.clear(); + localIndexB.clear(); DOFAdmin* admin = meshDistributor->getFeSpace()->getAdmin(); // === To ensure that all interior node on each rank are listen first in === @@ -502,15 +521,15 @@ namespace AMDiS { for (int i = 0; i < admin->getUsedSize(); i++) if (admin->isDofFree(i) == false && primals.count(i) == 0) if (duals.count(i) == 0 && primals.count(i) == 0) - globalIndexB[i] = -1; + localIndexB[i] = -1; // === Get correct index for all interior nodes. === nLocalInterior = 0; - for (DofMapping::iterator it = globalIndexB.begin(); - it != globalIndexB.end(); ++it) { - it->second = nLocalInterior + rStartB; + for (DofMapping::iterator it = localIndexB.begin(); + it != localIndexB.end(); ++it) { + it->second = nLocalInterior; nLocalInterior++; } nLocalDuals = duals.size(); @@ -524,7 +543,7 @@ namespace AMDiS { for (DofIndexSet::iterator it = duals.begin(); it != duals.end(); ++it) - globalIndexB[*it] = globalDualIndex[*it]; + localIndexB[*it] = globalDualIndex[*it] - rStartB; } @@ -590,31 +609,47 @@ namespace AMDiS { { FUNCNAME("PetscSolverFeti::createSchurPrimal()"); - schurPrimalData.mat_primal_primal = &mat_primal_primal; - schurPrimalData.mat_primal_b = &mat_primal_b; - schurPrimalData.mat_b_primal = &mat_b_primal; - schurPrimalData.ksp_b = &ksp_b; + MSG("MAT SCHUR PRIMAL SOLVER = %d\n", schurPrimalSolver); - VecCreateMPI(PETSC_COMM_WORLD, - nRankB * nComponents, nOverallB * nComponents, - &(schurPrimalData.tmp_vec_b)); - VecCreateMPI(PETSC_COMM_WORLD, - nRankPrimals * nComponents, nOverallPrimals * nComponents, - &(schurPrimalData.tmp_vec_primal)); + if (schurPrimalSolver == 0) { + schurPrimalData.mat_primal_primal = &mat_primal_primal; + schurPrimalData.mat_primal_b = &mat_primal_b; + schurPrimalData.mat_b_primal = &mat_b_primal; + schurPrimalData.fetiSolver = this; + + VecCreateMPI(PETSC_COMM_WORLD, + nRankB * nComponents, nOverallB * nComponents, + &(schurPrimalData.tmp_vec_b)); + VecCreateMPI(PETSC_COMM_WORLD, + nRankPrimals * nComponents, nOverallPrimals * nComponents, + &(schurPrimalData.tmp_vec_primal)); + + MatCreateShell(PETSC_COMM_WORLD, + nRankPrimals * nComponents, nRankPrimals * nComponents, + nOverallPrimals * nComponents, nOverallPrimals * nComponents, + &schurPrimalData, + &mat_schur_primal); + MatShellSetOperation(mat_schur_primal, MATOP_MULT, + (void(*)(void))petscMultMatSchurPrimal); + + KSPCreate(PETSC_COMM_WORLD, &ksp_schur_primal); + KSPSetOperators(ksp_schur_primal, mat_schur_primal, mat_schur_primal, SAME_NONZERO_PATTERN); + KSPSetOptionsPrefix(ksp_schur_primal, "solver_sp_"); + KSPSetFromOptions(ksp_schur_primal); + } else { + MatDuplicate(mat_primal_primal, MAT_COPY_VALUES, &mat_schur_primal); + Mat tmp0, tmp1; + MatMatSolve(mat_b_b, mat_b_primal, tmp0); + MatMatMult(mat_primal_b, tmp0, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tmp1); - MatCreateShell(PETSC_COMM_WORLD, - nRankPrimals * nComponents, nRankPrimals * nComponents, - nOverallPrimals * nComponents, nOverallPrimals * nComponents, - &schurPrimalData, - &mat_schur_primal); - MatShellSetOperation(mat_schur_primal, MATOP_MULT, - (void(*)(void))petscMultMatSchurPrimal); - - KSPCreate(PETSC_COMM_WORLD, &ksp_schur_primal); - KSPSetOperators(ksp_schur_primal, mat_schur_primal, mat_schur_primal, SAME_NONZERO_PATTERN); - KSPSetOptionsPrefix(ksp_schur_primal, "solver_sp_"); - KSPSetFromOptions(ksp_schur_primal); + MatDestroy(&tmp0); + MatDestroy(&tmp1); + + MSG("EXIT!\n"); + + exit(0); + } } @@ -625,7 +660,7 @@ namespace AMDiS { schurPrimalData.mat_primal_primal = PETSC_NULL; schurPrimalData.mat_primal_b = PETSC_NULL; schurPrimalData.mat_b_primal = PETSC_NULL; - schurPrimalData.ksp_b = PETSC_NULL; + schurPrimalData.fetiSolver = NULL; VecDestroy(&schurPrimalData.tmp_vec_b); VecDestroy(&schurPrimalData.tmp_vec_primal); @@ -645,7 +680,7 @@ namespace AMDiS { fetiData.mat_primal_b = &mat_primal_b; fetiData.mat_b_primal = &mat_b_primal; fetiData.mat_lagrange = &mat_lagrange; - fetiData.ksp_b = &ksp_b; + fetiData.fetiSolver = this; fetiData.ksp_schur_primal = &ksp_schur_primal; VecCreateMPI(PETSC_COMM_WORLD, @@ -681,7 +716,8 @@ namespace AMDiS { switch (fetiPreconditioner) { case FETI_DIRICHLET: KSPCreate(PETSC_COMM_SELF, &ksp_interior); - KSPSetOperators(ksp_interior, mat_interior_interior, mat_interior_interior, SAME_NONZERO_PATTERN); + KSPSetOperators(ksp_interior, mat_interior_interior, mat_interior_interior, + SAME_NONZERO_PATTERN); KSPSetOptionsPrefix(ksp_interior, "solver_interior_"); KSPSetFromOptions(ksp_interior); @@ -736,7 +772,7 @@ namespace AMDiS { fetiData.mat_primal_b = PETSC_NULL; fetiData.mat_b_primal = PETSC_NULL; fetiData.mat_lagrange = PETSC_NULL; - fetiData.ksp_b = PETSC_NULL; + fetiData.fetiSolver = NULL; fetiData.ksp_schur_primal = PETSC_NULL; VecDestroy(&fetiData.tmp_vec_b0); @@ -844,9 +880,9 @@ namespace AMDiS { for (int i = 0; i < nComponents; i++) { DOFVector<double>& dofVec = *(vec.getDOFVector(i)); - for (DofMapping::iterator it = globalIndexB.begin(); - it != globalIndexB.end(); ++it) { - int petscIndex = (it->second - rStartB) * nComponents + i; + for (DofMapping::iterator it = localIndexB.begin(); + it != localIndexB.end(); ++it) { + int petscIndex = it->second * nComponents + i; dofVec[it->first] = localSolB[petscIndex]; } @@ -886,9 +922,12 @@ namespace AMDiS { int nRowsInterior = nLocalInterior * nComponents; int nRowsDual = nLocalDuals * nComponents; - MatCreateMPIAIJ(PETSC_COMM_WORLD, - nRowsRankB, nRowsRankB, nRowsOverallB, nRowsOverallB, - 30, PETSC_NULL, 0, PETSC_NULL, &mat_b_b); +// MatCreateMPIAIJ(PETSC_COMM_WORLD, +// nRowsRankB, nRowsRankB, nRowsOverallB, nRowsOverallB, +// 30, PETSC_NULL, 0, PETSC_NULL, &mat_b_b); + + MatCreateSeqAIJ(PETSC_COMM_SELF, nRowsRankB, nRowsRankB, 30, PETSC_NULL, + &mat_b_b); MatCreateMPIAIJ(PETSC_COMM_WORLD, nRowsRankPrimal, nRowsRankPrimal, @@ -1004,10 +1043,11 @@ namespace AMDiS { } else { // Column is not a primal variable. - TEST_EXIT_DBG(globalIndexB.count(col(*icursor))) + TEST_EXIT_DBG(localIndexB.count(col(*icursor))) ("No global B index for DOF %d!\n", col(*icursor)); - int colIndex = globalIndexB[col(*icursor)] * nComponents + j; + int colIndex = + (localIndexB[col(*icursor)] + rStartB) * nComponents + j; if (rowPrimal) { colsOther.push_back(colIndex); @@ -1023,33 +1063,31 @@ namespace AMDiS { // === For preconditioner === if (!rowPrimal && !colPrimal) { - int rowIndex = globalIndexB[*cursor] - rStartB; - int colIndex = globalIndexB[col(*icursor)] - rStartB; + int rowIndex = localIndexB[*cursor]; + int colIndex = localIndexB[col(*icursor)]; if (rowIndex < nLocalInterior) { if (colIndex < nLocalInterior) { - int colIndex2 = - (globalIndexB[col(*icursor)] - rStartB) * nComponents + j; + int colIndex2 = localIndexB[col(*icursor)] * nComponents + j; colsLocal.push_back(colIndex2); valuesLocal.push_back(value(*icursor)); } else { - int colIndex2 = - (globalIndexB[col(*icursor)] - rStartB - nLocalInterior) * nComponents + j; + int colIndex2 = (localIndexB[col(*icursor)] - nLocalInterior) * + nComponents + j; colsLocalOther.push_back(colIndex2); valuesLocalOther.push_back(value(*icursor)); } } else { if (colIndex < nLocalInterior) { - int colIndex2 = - (globalIndexB[col(*icursor)] - rStartB) * nComponents + j; + int colIndex2 = localIndexB[col(*icursor)] * nComponents + j; colsLocalOther.push_back(colIndex2); valuesLocalOther.push_back(value(*icursor)); } else { - int colIndex2 = - (globalIndexB[col(*icursor)] - rStartB - nLocalInterior) * nComponents + j; + int colIndex2 = (localIndexB[col(*icursor)] - nLocalInterior) * + nComponents + j; colsLocal.push_back(colIndex2); valuesLocal.push_back(value(*icursor)); @@ -1072,10 +1110,13 @@ namespace AMDiS { MatSetValues(mat_primal_b, 1, &rowIndex, colsOther.size(), &(colsOther[0]), &(valuesOther[0]), ADD_VALUES); } else { - TEST_EXIT_DBG(globalIndexB.count(*cursor)) + TEST_EXIT_DBG(localIndexB.count(*cursor)) ("Should not happen!\n"); - int rowIndex = globalIndexB[*cursor] * nComponents + i; + // int rowIndex = (localIndexB[*cursor] + rStartB) * nComponents + i; + int rowIndex = localIndexB[*cursor] * nComponents + i; + for (unsigned int k = 0; k < cols.size(); k++) + cols[k] -= rStartB * nComponents; MatSetValues(mat_b_b, 1, &rowIndex, cols.size(), &(cols[0]), &(values[0]), ADD_VALUES); @@ -1090,11 +1131,10 @@ namespace AMDiS { switch (fetiPreconditioner) { case FETI_DIRICHLET: { - int rowIndex = globalIndexB[*cursor] - rStartB; + int rowIndex = localIndexB[*cursor]; if (rowIndex < nLocalInterior) { - int rowIndex2 = - (globalIndexB[*cursor] - rStartB) * nComponents + i; + int rowIndex2 = localIndexB[*cursor] * nComponents + i; MatSetValues(mat_interior_interior, 1, &rowIndex2, colsLocal.size(), &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES); @@ -1104,7 +1144,7 @@ namespace AMDiS { &(colsLocalOther[0]), &(valuesLocalOther[0]), INSERT_VALUES); } else { int rowIndex2 = - (globalIndexB[*cursor] - rStartB - nLocalInterior) * nComponents + i; + (localIndexB[*cursor] - nLocalInterior) * nComponents + i; MatSetValues(mat_duals_duals, 1, &rowIndex2, colsLocal.size(), &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES); @@ -1119,11 +1159,11 @@ namespace AMDiS { case FETI_LUMPED: { - int rowIndex = globalIndexB[*cursor] - rStartB; + int rowIndex = localIndexB[*cursor]; if (rowIndex >= nLocalInterior) { int rowIndex2 = - (globalIndexB[*cursor] - rStartB - nLocalInterior) * nComponents + i; + (localIndexB[*cursor] - nLocalInterior) * nComponents + i; MatSetValues(mat_duals_duals, 1, &rowIndex2, colsLocal.size(), &(colsLocal[0]), &(valuesLocal[0]), INSERT_VALUES); @@ -1186,7 +1226,7 @@ namespace AMDiS { // === Create solver for the non primal (thus local) variables. === - KSPCreate(PETSC_COMM_WORLD, &ksp_b); + KSPCreate(PETSC_COMM_SELF, &ksp_b); KSPSetOperators(ksp_b, mat_b_b, mat_b_b, SAME_NONZERO_PATTERN); KSPSetOptionsPrefix(ksp_b, "solver_b_"); KSPSetFromOptions(ksp_b); @@ -1200,7 +1240,8 @@ namespace AMDiS { int nComponents = vec->getSize(); VecCreateMPI(PETSC_COMM_WORLD, - nRankB * nComponents, nOverallB * nComponents, &f_b); + nRankB * nComponents, + nOverallB * nComponents, &f_b); VecCreateMPI(PETSC_COMM_WORLD, nRankPrimals * nComponents, nOverallPrimals * nComponents, &f_primal); @@ -1217,12 +1258,11 @@ namespace AMDiS { double value = *dofIt; VecSetValues(f_primal, 1, &index, &value, ADD_VALUES); } else { - TEST_EXIT_DBG(globalIndexB.count(index)) + TEST_EXIT_DBG(localIndexB.count(index)) ("Should not happen!\n"); - index = globalIndexB[index] * nComponents + i; - double value = *dofIt; - VecSetValues(f_b, 1, &index, &value, ADD_VALUES); + index = (localIndexB[index] + rStartB) * nComponents + i; + VecSetValue(f_b, index, *dofIt, INSERT_VALUES); } } } @@ -1234,134 +1274,38 @@ namespace AMDiS { VecAssemblyEnd(f_primal); } - - void PetscSolverFeti::solveFetiMatrix(SystemVector &vec) - { - FUNCNAME("PetscSolverFeti::solveFetiMatrix()"); - - Mat mat_lagrange_transpose; - MatTranspose(mat_lagrange, MAT_INITIAL_MATRIX, &mat_lagrange_transpose); - - // === Create nested matrix which will contain the overall FETI system. === - - Mat A; - Mat nestedA[3][3]; - nestedA[0][0] = mat_b_b; - nestedA[0][1] = mat_b_primal; - nestedA[0][2] = mat_lagrange_transpose; - nestedA[1][0] = mat_primal_b; - nestedA[1][1] = mat_primal_primal; - nestedA[1][2] = PETSC_NULL; - nestedA[2][0] = mat_lagrange; - nestedA[2][1] = PETSC_NULL; - nestedA[2][2] = PETSC_NULL; - - MatCreateNest(PETSC_COMM_WORLD, 3, PETSC_NULL, 3, PETSC_NULL, &(nestedA[0][0]), &A); - - MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); + void PetscSolverFeti::solveLocalProblem(Vec &rhs, Vec &sol) + { + FUNCNAME("PetscSolverFeti::solveLocalProblem()"); + Vec tmp; + VecCreateSeq(PETSC_COMM_SELF, nRankB * nComponents, &tmp); + PetscScalar *tmpValues; + VecGetArray(tmp, &tmpValues); - int nRankNest = (nRankB + nRankPrimals + nRankLagrange) * nComponents; - int nOverallNest = (nOverallB + nOverallPrimals + nOverallLagrange) * nComponents; - int rStartNest = (rStartB + rStartPrimals + rStartLagrange) * nComponents; - - { - // === Test some matrix sizes. === - - int matRow, matCol; - MatGetLocalSize(A, &matRow, &matCol); - TEST_EXIT_DBG(matRow == nRankNest)("Should not happen!\n"); - mpi::globalAdd(matRow); - TEST_EXIT_DBG(matRow == nOverallNest)("Should not happen!\n"); - - MatGetOwnershipRange(A, &matRow, &matCol); - TEST_EXIT_DBG(matRow == rStartNest)("Should not happen!\n"); - } - - // === Create rhs and solution vectors for the overall FETI system. === - - Vec f, b; - VecCreateMPI(PETSC_COMM_WORLD, nRankNest, nOverallNest, &f); - VecDuplicate(f, &b); - - - // === Fill rhs vector by coping the primal and non primal PETSc vectors. === - - PetscScalar *local_f_b; - VecGetArray(f_b, &local_f_b); - - PetscScalar *local_f_primal; - VecGetArray(f_primal, &local_f_primal); - - { - int size; - VecGetLocalSize(f_b, &size); - TEST_EXIT_DBG(size == nRankB * nComponents)("Should not happen!\n"); - VecGetLocalSize(f_primal, &size); - TEST_EXIT_DBG(size == nRankPrimals * nComponents)("Should not happen!\n"); - } - - PetscScalar *local_f; - VecGetArray(f, &local_f); + PetscScalar *rhsValues; + VecGetArray(rhs, &rhsValues); - int index = 0; for (int i = 0; i < nRankB * nComponents; i++) - local_f[index++] = local_f_b[i]; - for (int i = 0; i < nRankPrimals * nComponents; i++) - local_f[index++] = local_f_primal[i]; + tmpValues[i] = rhsValues[i]; - VecRestoreArray(f, &local_f); - VecRestoreArray(f_b, &local_f_b); - VecRestoreArray(f_primal, &local_f_primal); - - - // === Create solver and solve the overall FETI system. === - - KSP ksp; - KSPCreate(PETSC_COMM_WORLD, &ksp); - KSPSetOperators(ksp, A, A, SAME_NONZERO_PATTERN); - KSPSetFromOptions(ksp); + VecRestoreArray(rhs, &rhsValues); + VecRestoreArray(tmp, &tmpValues); + KSPSolve(ksp_b, tmp, tmp); - KSPSolve(ksp, f, b); + VecGetArray(tmp, &tmpValues); + PetscScalar *solValues; + VecGetArray(sol, &solValues); + for (int i = 0; i < nRankB * nComponents; i++) + solValues[i] = tmpValues[i]; - // === Reconstruct FETI solution vectors. === - - Vec u_b, u_primal; - VecDuplicate(f_b, &u_b); - VecDuplicate(f_primal, &u_primal); - + VecRestoreArray(sol, &solValues); + VecRestoreArray(tmp, &tmpValues); - PetscScalar *local_b; - VecGetArray(b, &local_b); - - PetscScalar *local_u_b; - VecGetArray(u_b, &local_u_b); - - PetscScalar *local_u_primal; - VecGetArray(u_primal, &local_u_primal); - - index = 0; - for (int i = 0; i < nRankB * nComponents; i++) - local_u_b[i] = local_b[index++]; - for (int i = 0; i < nRankPrimals * nComponents; i++) - local_u_primal[i] = local_b[index++]; - - VecRestoreArray(f, &local_b); - VecRestoreArray(u_b, &local_u_b); - VecRestoreArray(u_primal, &local_u_primal); - - recoverSolution(u_b, u_primal, vec); - - VecDestroy(&u_b); - VecDestroy(&u_primal); - VecDestroy(&b); - VecDestroy(&f); - - KSPDestroy(&ksp); + VecDestroy(&tmp); } @@ -1376,8 +1320,8 @@ namespace AMDiS { Vec tmp_b0, tmp_b1, tmp_lagrange0, tmp_primal0, tmp_primal1; MatGetVecs(mat_lagrange, PETSC_NULL, &tmp_lagrange0); MatGetVecs(mat_lagrange, PETSC_NULL, &vec_rhs); - MatGetVecs(mat_b_b, PETSC_NULL, &tmp_b0); - MatGetVecs(mat_b_b, PETSC_NULL, &tmp_b1); + VecDuplicate(f_b, &tmp_b0); + VecDuplicate(f_b, &tmp_b1); MatGetVecs(mat_primal_primal, PETSC_NULL, &tmp_primal0); MatGetVecs(mat_primal_primal, PETSC_NULL, &tmp_primal1); @@ -1387,7 +1331,8 @@ namespace AMDiS { // d = L inv(K_BB) f_B - L inv(K_BB) K_BPi inv(S_PiPi) [f_Pi - K_PiB inv(K_BB) f_B] // vec_rhs = L * inv(K_BB) * f_B - KSPSolve(ksp_b, f_b, tmp_b0); + solveLocalProblem(f_b, tmp_b0); + MatMult(mat_lagrange, tmp_b0, vec_rhs); // tmp_primal0 = M_PiB * inv(K_BB) * f_B @@ -1397,11 +1342,15 @@ namespace AMDiS { VecAXPBY(tmp_primal0, 1.0, -1.0, f_primal); // tmp_primal0 = inv(S_PiPi) (f_Pi - M_PiB * inv(K_BB) * f_B) + double first = MPI::Wtime(); KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); + PetscSolverFeti::fetiStatistics.nSchurPrimalSolve++; + PetscSolverFeti::fetiStatistics.timeSchurPrimalSolve += + MPI::Wtime() - first; // MatMult(mat_b_primal, tmp_primal0, tmp_b0); - KSPSolve(ksp_b, tmp_b0, tmp_b0); + solveLocalProblem(tmp_b0, tmp_b0); MatMult(mat_lagrange, tmp_b0, tmp_lagrange0); // @@ -1410,23 +1359,26 @@ namespace AMDiS { // === Solve with FETI-DP operator. === + first = MPI::Wtime(); KSPSolve(ksp_feti, vec_rhs, vec_rhs); + fetiStatistics.timeFetiApply = MPI::Wtime() -first; // === Solve for u_primals. === VecCopy(f_primal, tmp_primal0); - - KSPSolve(ksp_b, f_b, tmp_b0); + solveLocalProblem(f_b, tmp_b0); MatMult(mat_primal_b, tmp_b0, tmp_primal1); - VecAXPBY(tmp_primal0, -1.0, 1.0, tmp_primal1); - MatMultTranspose(mat_lagrange, vec_rhs, tmp_b0); - KSPSolve(ksp_b, tmp_b0, tmp_b0); + solveLocalProblem(tmp_b0, tmp_b0); MatMult(mat_primal_b, tmp_b0, tmp_primal1); VecAXPBY(tmp_primal0, 1.0, 1.0, tmp_primal1); + first = MPI::Wtime(); KSPSolve(ksp_schur_primal, tmp_primal0, tmp_primal0); + PetscSolverFeti::fetiStatistics.nSchurPrimalSolve++; + PetscSolverFeti::fetiStatistics.timeSchurPrimalSolve += + MPI::Wtime() - first; // === Solve for u_b. === @@ -1437,8 +1389,7 @@ namespace AMDiS { MatMult(mat_b_primal, tmp_primal0, tmp_b1); VecAXPBY(tmp_b0, -1.0, 1.0, tmp_b1); - - KSPSolve(ksp_b, tmp_b0, tmp_b0); + solveLocalProblem(tmp_b0, tmp_b0); // === And recover AMDiS solution vectors. === @@ -1487,6 +1438,7 @@ namespace AMDiS { KSPDestroy(&ksp_b); } + void PetscSolverFeti::solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo) { FUNCNAME("PetscSolverFeti::solvePetscMatrix()"); @@ -1494,18 +1446,42 @@ namespace AMDiS { int debug = 0; Parameters::get("parallel->debug feti", debug); - if (debug) { - WARNING("FETI matrix is solved globally, thus without reducing to the lagrange multipliers!\n"); - - solveFetiMatrix(vec); - } else { - solveReducedFetiMatrix(vec); - } + resetStatistics(); + solveReducedFetiMatrix(vec); + printStatistics(); MeshDistributor::globalMeshDistributor->synchVector(vec); } + void PetscSolverFeti::resetStatistics() + { + fetiStatistics.nFetiApply = 0; + fetiStatistics.timeFetiApply = 0.0; + fetiStatistics.nSchurPrimalApply = 0; + fetiStatistics.timeSchurPrimalApply = 0.0; + fetiStatistics.nSchurPrimalSolve = 0; + fetiStatistics.timeSchurPrimalSolve = 0.0; + fetiStatistics.nLocalSolve = 0; + fetiStatistics.timeLocalSolve = 0.0; + } + + + void PetscSolverFeti::printStatistics() + { + MSG("PetscSolverFeti::printStatistics()"); + + if (MPI::COMM_WORLD.Get_rank() == 0) + MSG("FETI-STATISTICS: %d %f %d %f %d %f %d %f\n", + fetiStatistics.nFetiApply, + fetiStatistics.timeFetiApply, + fetiStatistics.nSchurPrimalApply, + fetiStatistics.timeSchurPrimalApply, + fetiStatistics.nSchurPrimalSolve, + fetiStatistics.timeSchurPrimalSolve, + fetiStatistics.nLocalSolve, + fetiStatistics.timeLocalSolve); + } #endif } diff --git a/AMDiS/src/parallel/PetscSolverFeti.h b/AMDiS/src/parallel/PetscSolverFeti.h index 6607b79cef06560eb5bf00e23da9a550096432b3..e9feae304362908a6f6dc8f8b2c4186ab82110c7 100644 --- a/AMDiS/src/parallel/PetscSolverFeti.h +++ b/AMDiS/src/parallel/PetscSolverFeti.h @@ -33,6 +33,8 @@ namespace AMDiS { #ifdef HAVE_PETSC_DEV + class PetscSolverFeti; + /** \brief * This structure is used when defining the MatShell operation for solving * primal schur complement. \ref petscMultMatSchurPrimal @@ -50,8 +52,7 @@ namespace AMDiS { /// Temporal vecor in the primal variables. Vec tmp_vec_primal; - /// Pointer to the solver for \ref PetscSolverFeti::mat_bb. - KSP *ksp_b; + PetscSolverFeti *fetiSolver; }; @@ -76,8 +77,7 @@ namespace AMDiS { /// Temporal vector on the primal variables. Vec tmp_vec_primal; - /// Pointer to the solver for \ref PetscSolverFeti::mat_bb. - KSP *ksp_b; + PetscSolverFeti *fetiSolver; /// Pointer to the solver of the schur complement on the primal variables. KSP *ksp_schur_primal; @@ -125,6 +125,34 @@ namespace AMDiS { } FetiPreconditioner; + struct FetiStatisticsData + { + /// Number of application of the FETI-DP operator. + int nFetiApply; + + /// Time for solving the reduced FETI system. + double timeFetiApply; + + /// Number of application of the Schur primal operator. + int nSchurPrimalApply; + + /// Time for appling the Schur primal operator. + double timeSchurPrimalApply; + + /// Number of solution of the Schur primal system. + int nSchurPrimalSolve; + + /// Time for solving the Schur primal system. + double timeSchurPrimalSolve; + + /// Number of solution of the local subdomain problems. + int nLocalSolve; + + /// Time for solving the local subdomain problems. + double timeLocalSolve; + }; + + /** \brief * FETI-DP implementation based on PETSc. */ @@ -156,6 +184,8 @@ namespace AMDiS { MeshDistributor::BOUNDARY_FILL_INFO_RECV_DOFS; } + void solveLocalProblem(Vec &rhs, Vec &sol); + protected: /// After mesh changes, or if the solver is called the first time, this /// function creates all matrix and vector objects with the approriated @@ -212,15 +242,6 @@ namespace AMDiS { Vec &vec_sol_primal, SystemVector &vec); - /** \brief - * Solves the FETI-DP system globally, thus without reducing it to the - * Lagrange multipliers. This should be used for debugging only to test - * if the FETI-DP system is setup correctly. - * - * \param[out] vec Solution DOF vectors. - */ - void solveFetiMatrix(SystemVector &vec); - /** \brief * Solves the FETI-DP system with reducing it first to the Lagrange * multipliers. This is what one expects when using the FETI-DP methid :) @@ -229,6 +250,10 @@ namespace AMDiS { */ void solveReducedFetiMatrix(SystemVector &vec); + void resetStatistics(); + + void printStatistics(); + protected: /// Number of components in the PDE to be solved. int nComponents; @@ -262,7 +287,7 @@ namespace AMDiS { /// Index for each non primal variables to the global index of /// B variables. - DofMapping globalIndexB; + DofMapping globalIndexB, localIndexB; /// Number of non primal, thus B, variables on rank and globally. int nRankB, nOverallB, rStartB; @@ -287,6 +312,11 @@ namespace AMDiS { /// variables, \ref mat_b_b KSP ksp_b; + /// 0: Solve the Schur complement on primal variables with iterative solver. + /// 1: Create the Schur complement matrix explicitly and solve it with a + /// direct solver. + int schurPrimalSolver; + /// PETSc solver object to solve the Schur complement on the /// primal variables. KSP ksp_schur_primal; @@ -329,6 +359,9 @@ namespace AMDiS { // Number of local nodes that are duals. int nLocalDuals; + + public: + static FetiStatisticsData fetiStatistics; }; #endif