From b640da9eadeaa8f51e7fc553acaa9d0083ce1f3d Mon Sep 17 00:00:00 2001 From: Thomas Witkowski <thomas.witkowski@gmx.de> Date: Thu, 31 Mar 2011 15:34:49 +0000 Subject: [PATCH] Work on PETSc solver structur. Removed all OpenMP stuff. --- AMDiS/CMakeLists.txt | 2 - AMDiS/libtool | 6 +- AMDiS/src/Assembler.cc | 1 - AMDiS/src/Assembler.h | 3 +- AMDiS/src/BasisFunction.cc | 1 - AMDiS/src/DOFAdmin.cc | 32 +- AMDiS/src/DOFAdmin.h | 1 - AMDiS/src/DOFVector.cc | 8 - AMDiS/src/DOFVector.hh | 46 +- AMDiS/src/FirstOrderAssembler.cc | 61 +- AMDiS/src/Lagrange.cc | 3 - AMDiS/src/Makefile.am | 5 - AMDiS/src/Makefile.in | 70 +- AMDiS/src/OpenMP.h | 69 -- AMDiS/src/Operator.h | 1 - AMDiS/src/ProblemScal.cc | 9 - AMDiS/src/ProblemVec.cc | 288 +------- AMDiS/src/ProblemVecDbg.cc | 40 -- AMDiS/src/Quadrature.cc | 54 +- AMDiS/src/ResidualParallelEstimator.cc | 119 ---- AMDiS/src/ResidualParallelEstimator.h | 90 --- AMDiS/src/SecondOrderAssembler.cc | 1 - AMDiS/src/StandardProblemIteration.cc | 1 - AMDiS/src/SubAssembler.cc | 1 - AMDiS/src/SystemVector.h | 59 -- AMDiS/src/Traverse.cc | 1 - AMDiS/src/Traverse.h | 1 - AMDiS/src/TraverseParallel.cc | 91 --- AMDiS/src/TraverseParallel.h | 62 -- AMDiS/src/ZeroOrderAssembler.cc | 61 +- .../src/compositeFEM/CompositeFEMOperator.cc | 20 +- AMDiS/src/io/FileWriter.cc | 1 - AMDiS/src/parallel/PetscProblemStat.cc | 628 +---------------- AMDiS/src/parallel/PetscProblemStat.h | 82 +-- AMDiS/src/parallel/PetscSolver.cc | 657 +++++++++++++++++- AMDiS/src/parallel/PetscSolver.h | 106 ++- 36 files changed, 881 insertions(+), 1800 deletions(-) delete mode 100644 AMDiS/src/OpenMP.h delete mode 100644 AMDiS/src/ResidualParallelEstimator.cc delete mode 100644 AMDiS/src/ResidualParallelEstimator.h delete mode 100644 AMDiS/src/TraverseParallel.cc delete mode 100644 AMDiS/src/TraverseParallel.h diff --git a/AMDiS/CMakeLists.txt b/AMDiS/CMakeLists.txt index ee38f54e..73782538 100644 --- a/AMDiS/CMakeLists.txt +++ b/AMDiS/CMakeLists.txt @@ -121,7 +121,6 @@ SET(AMDIS_SRC ${SOURCE_DIR}/AdaptBase.cc ${SOURCE_DIR}/RefinementManager2d.cc ${SOURCE_DIR}/RefinementManager3d.cc ${SOURCE_DIR}/ResidualEstimator.cc - ${SOURCE_DIR}/ResidualParallelEstimator.cc ${SOURCE_DIR}/RobinBC.cc ${SOURCE_DIR}/ScalableQuadrature.cc ${SOURCE_DIR}/SecondOrderAssembler.cc @@ -134,7 +133,6 @@ SET(AMDIS_SRC ${SOURCE_DIR}/AdaptBase.cc ${SOURCE_DIR}/SurfaceQuadrature.cc ${SOURCE_DIR}/Tetrahedron.cc ${SOURCE_DIR}/Traverse.cc - ${SOURCE_DIR}/TraverseParallel.cc ${SOURCE_DIR}/Triangle.cc ${SOURCE_DIR}/VertexVector.cc ${SOURCE_DIR}/ZeroOrderAssembler.cc diff --git a/AMDiS/libtool b/AMDiS/libtool index a502627c..21443861 100755 --- a/AMDiS/libtool +++ b/AMDiS/libtool @@ -44,7 +44,7 @@ available_tags=" CXX F77" # ### BEGIN LIBTOOL CONFIG -# Libtool was configured on host p1q024: +# Libtool was configured on host deimos101: # Shell to use when invoking shell scripts. SHELL="/bin/sh" @@ -6760,7 +6760,7 @@ build_old_libs=`case $build_libtool_libs in yes) $echo no;; *) $echo yes;; esac` # End: # ### BEGIN LIBTOOL TAG CONFIG: CXX -# Libtool was configured on host p1q024: +# Libtool was configured on host deimos101: # Shell to use when invoking shell scripts. SHELL="/bin/sh" @@ -7065,7 +7065,7 @@ include_expsyms="" # ### BEGIN LIBTOOL TAG CONFIG: F77 -# Libtool was configured on host p1q024: +# Libtool was configured on host deimos101: # Shell to use when invoking shell scripts. SHELL="/bin/sh" diff --git a/AMDiS/src/Assembler.cc b/AMDiS/src/Assembler.cc index ce5e77ce..119cfeb4 100644 --- a/AMDiS/src/Assembler.cc +++ b/AMDiS/src/Assembler.cc @@ -18,7 +18,6 @@ #include "Element.h" #include "QPsiPhi.h" #include "DOFVector.h" -#include "OpenMP.h" namespace AMDiS { diff --git a/AMDiS/src/Assembler.h b/AMDiS/src/Assembler.h index 3ded21f9..fabbb2e0 100644 --- a/AMDiS/src/Assembler.h +++ b/AMDiS/src/Assembler.h @@ -32,13 +32,12 @@ #define AMDIS_ASSEMBLER_H #include <vector> +#include "AMDiS_fwd.h" #include "FixVec.h" #include "ZeroOrderAssembler.h" #include "FirstOrderAssembler.h" #include "SecondOrderAssembler.h" #include "ElInfo.h" -#include "OpenMP.h" -#include "AMDiS_fwd.h" namespace AMDiS { diff --git a/AMDiS/src/BasisFunction.cc b/AMDiS/src/BasisFunction.cc index 2c5c6ab8..efa11ded 100644 --- a/AMDiS/src/BasisFunction.cc +++ b/AMDiS/src/BasisFunction.cc @@ -16,7 +16,6 @@ #include "DOFVector.h" #include "BasisFunction.h" #include "Lagrange.h" -#include "OpenMP.h" namespace AMDiS { diff --git a/AMDiS/src/DOFAdmin.cc b/AMDiS/src/DOFAdmin.cc index 67c604db..a007b8b7 100644 --- a/AMDiS/src/DOFAdmin.cc +++ b/AMDiS/src/DOFAdmin.cc @@ -207,15 +207,10 @@ namespace AMDiS { TEST_EXIT(dofIndexed)("no dofIndexed\n"); -#ifdef _OPENMP -#pragma omp critical (dofIndexAccess) -#endif - { - if (dofIndexed->getSize() < size) - dofIndexed->resize(size); - - dofIndexedList.push_back(dofIndexed); - } + if (dofIndexed->getSize() < size) + dofIndexed->resize(size); + + dofIndexedList.push_back(dofIndexed); } @@ -224,18 +219,13 @@ namespace AMDiS { FUNCNAME("DOFAdmin::removeDOFIndexed()"); bool removed = false; -#ifdef _OPENMP -#pragma omp critical (dofIndexAccess) -#endif - { - std::list<DOFIndexedBase*>::iterator it; - std::list<DOFIndexedBase*>::iterator end = dofIndexedList.end(); - for (it = dofIndexedList.begin(); it != end; ++it) { - if (*it == dofIndexed) { - dofIndexedList.erase(it); - removed = true; - break; - } + std::list<DOFIndexedBase*>::iterator it; + std::list<DOFIndexedBase*>::iterator end = dofIndexedList.end(); + for (it = dofIndexedList.begin(); it != end; ++it) { + if (*it == dofIndexed) { + dofIndexedList.erase(it); + removed = true; + break; } } diff --git a/AMDiS/src/DOFAdmin.h b/AMDiS/src/DOFAdmin.h index 6feb59f2..56ba2d14 100644 --- a/AMDiS/src/DOFAdmin.h +++ b/AMDiS/src/DOFAdmin.h @@ -35,7 +35,6 @@ #include "Global.h" #include "FixVec.h" #include "Serializable.h" -#include "OpenMP.h" #include "AMDiS_fwd.h" namespace AMDiS { diff --git a/AMDiS/src/DOFVector.cc b/AMDiS/src/DOFVector.cc index 96ab5a49..4e68d3a7 100644 --- a/AMDiS/src/DOFVector.cc +++ b/AMDiS/src/DOFVector.cc @@ -157,10 +157,6 @@ namespace AMDiS { { FUNCNAME("DOFVector<double>::getRecoveryGradient()"); -#ifdef _OPENMP - ERROR_EXIT("Using static variable while using OpenMP parallelization!\n"); -#endif - // define result vector static DOFVector<WorldVector<double> > *vec = NULL; @@ -614,10 +610,6 @@ namespace AMDiS { { FUNCNAME("DOFVector<double>::getGradient()"); -#ifdef _OPENMP - ERROR_EXIT("Using static variable while using OpenMP parallelization!\n"); -#endif - Mesh *mesh = feSpace->getMesh(); int dim = mesh->getDim(); int dow = Global::getGeo(WORLD); diff --git a/AMDiS/src/DOFVector.hh b/AMDiS/src/DOFVector.hh index 9cc5b51c..474b34b0 100644 --- a/AMDiS/src/DOFVector.hh +++ b/AMDiS/src/DOFVector.hh @@ -826,26 +826,9 @@ namespace AMDiS { ("y.size = %d too small: admin->size = %d\n", y.getSize(), admin->getUsedSize()); - // This is the old implementation of the mv-multiplication. It have been changed - // because of the OpenMP-parallelization: -// typename DOFVector<T>::Iterator xIterator(dynamic_cast<DOFIndexed<T>*>(const_cast<DOFVector<T>*>(&x)), USED_DOFS); -// typename DOFVector<T>::Iterator yIterator(dynamic_cast<DOFIndexed<T>*>(&y), USED_DOFS); -// for(xIterator.reset(), yIterator.reset(); -// !xIterator.end(); -// ++xIterator, ++yIterator) -// { -// *yIterator += alpha * (*xIterator); -// }; - - int i; - int maxI = y.getSize(); - -#ifdef _OPENMP -#pragma omp parallel for schedule(dynamic, 25000) default(shared) private(i) -#endif - for (i = 0; i < maxI; i++) - if (!admin->isDofFree(i)) - y[i] += alpha * x[i]; + for (int i = 0; i < y.getSize(); i++) + if (!admin->isDofFree(i)) + y[i] += alpha * x[i]; } template<typename T> @@ -892,26 +875,9 @@ namespace AMDiS { ("y.size = %d too small: admin->size = %d\n", y.getSize(), admin->getUsedSize()); - // This is the old implementation of the mv-multiplication. It have been changed - // because of the OpenMP-parallelization: - // typename DOFVector<T>::Iterator xIterator(dynamic_cast<DOFIndexed<T>*>(const_cast<DOFVector<T>*>(&x)), USED_DOFS); - // typename DOFVector<T>::Iterator yIterator(dynamic_cast<DOFIndexed<T>*>(&y), USED_DOFS); - // for(xIterator.reset(), yIterator.reset(); - // !xIterator.end(); - // ++xIterator, ++yIterator) - // { - // *yIterator = alpha *(*yIterator)+ (*xIterator); - // }; - - int i; - int maxI = y.getSize(); - -#ifdef _OPENMP -#pragma omp parallel for schedule(dynamic, 25000) default(shared) private(i) -#endif - for (i = 0; i < maxI; i++) - if (!admin->isDofFree(i)) - y[i] = alpha * y[i] + x[i]; + for (int i = 0; i < y.getSize(); i++) + if (!admin->isDofFree(i)) + y[i] = alpha * y[i] + x[i]; } template<typename T> diff --git a/AMDiS/src/FirstOrderAssembler.cc b/AMDiS/src/FirstOrderAssembler.cc index f811efe1..4359539f 100644 --- a/AMDiS/src/FirstOrderAssembler.cc +++ b/AMDiS/src/FirstOrderAssembler.cc @@ -19,7 +19,6 @@ #include "FiniteElemSpace.h" #include "Quadrature.h" #include "DOFVector.h" -#include "OpenMP.h" namespace AMDiS { @@ -266,16 +265,11 @@ namespace AMDiS { const double *values; if (firstCall) { -#ifdef _OPENMP -#pragma omp critical -#endif - { - q10 = Q10PsiPhi::provideQ10PsiPhi(rowFeSpace->getBasisFcts(), - colFeSpace->getBasisFcts(), - quadrature); - q1 = Q1Psi::provideQ1Psi(rowFeSpace->getBasisFcts(), quadrature); - firstCall = false; - } + q10 = Q10PsiPhi::provideQ10PsiPhi(rowFeSpace->getBasisFcts(), + colFeSpace->getBasisFcts(), + quadrature); + q1 = Q1Psi::provideQ1Psi(rowFeSpace->getBasisFcts(), quadrature); + firstCall = false; } const int **nEntries = q10->getNumberEntries(); @@ -353,16 +347,11 @@ namespace AMDiS { void Quad01::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix& mat) { if (firstCall) { -#ifdef _OPENMP -#pragma omp critical -#endif - { - const BasisFunction *basFcts = rowFeSpace->getBasisFcts(); - psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI); - basFcts = colFeSpace->getBasisFcts(); - phiFast = updateFastQuadrature(phiFast, basFcts, INIT_GRD_PHI); - firstCall = false; - } + const BasisFunction *basFcts = rowFeSpace->getBasisFcts(); + psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI); + basFcts = colFeSpace->getBasisFcts(); + phiFast = updateFastQuadrature(phiFast, basFcts, INIT_GRD_PHI); + firstCall = false; } int nPoints = quadrature->getNumPoints(); @@ -406,16 +395,11 @@ namespace AMDiS { const double *values; if (firstCall) { -#ifdef _OPENMP -#pragma omp critical -#endif - { - q01 = Q01PsiPhi::provideQ01PsiPhi(rowFeSpace->getBasisFcts(), - colFeSpace->getBasisFcts(), - quadrature); - q1 = Q1Psi::provideQ1Psi(rowFeSpace->getBasisFcts(), quadrature); - firstCall = false; - } + q01 = Q01PsiPhi::provideQ01PsiPhi(rowFeSpace->getBasisFcts(), + colFeSpace->getBasisFcts(), + quadrature); + q1 = Q1Psi::provideQ1Psi(rowFeSpace->getBasisFcts(), quadrature); + firstCall = false; } const int **nEntries = q01->getNumberEntries(); @@ -447,16 +431,11 @@ namespace AMDiS { const double *values; if (firstCall) { -#ifdef _OPENMP -#pragma omp critical -#endif - { - q10 = Q10PsiPhi::provideQ10PsiPhi(rowFeSpace->getBasisFcts(), - colFeSpace->getBasisFcts(), - quadrature); - q1 = Q1Psi::provideQ1Psi(rowFeSpace->getBasisFcts(), quadrature); - firstCall = false; - } + q10 = Q10PsiPhi::provideQ10PsiPhi(rowFeSpace->getBasisFcts(), + colFeSpace->getBasisFcts(), + quadrature); + q1 = Q1Psi::provideQ1Psi(rowFeSpace->getBasisFcts(), quadrature); + firstCall = false; } const int *nEntries = q1->getNumberEntries(); diff --git a/AMDiS/src/Lagrange.cc b/AMDiS/src/Lagrange.cc index 7fa578c9..c51f00ba 100644 --- a/AMDiS/src/Lagrange.cc +++ b/AMDiS/src/Lagrange.cc @@ -932,9 +932,6 @@ namespace AMDiS { if (indices) { result = indices; } else { -#ifdef _OPENMP - ERROR_EXIT("Using static variable while using OpenMP parallelization!\n"); -#endif if (localVec && nBasFcts > localVecSize) { delete [] localVec; localVec = new DegreeOfFreedom[nBasFcts]; diff --git a/AMDiS/src/Makefile.am b/AMDiS/src/Makefile.am index fc437c35..71ede176 100644 --- a/AMDiS/src/Makefile.am +++ b/AMDiS/src/Makefile.am @@ -152,7 +152,6 @@ NonLinSolver.h \ NonLinSolver.hh \ NonLinUpdater.h \ OEMSolver.h \ -OpenMP.h \ Operator.h \ Operator.hh \ OperatorTerm.h \ @@ -187,7 +186,6 @@ RefinementManager1d.h \ RefinementManager2d.h \ RefinementManager3d.h \ ResidualEstimator.h \ -ResidualParallelEstimator.h \ RobinBC.h \ RobinBC.hh \ ScalableQuadrature.h \ @@ -211,7 +209,6 @@ SystemVector.h \ Tetrahedron.h \ TimedObject.h \ Traverse.h \ -TraverseParallel.h \ Triangle.h \ UmfPackSolver.h \ VertexInfo.h \ @@ -354,7 +351,6 @@ RefinementManager1d.cc \ RefinementManager2d.cc \ RefinementManager3d.cc \ ResidualEstimator.cc \ -ResidualParallelEstimator.cc \ RobinBC.cc \ ScalableQuadrature.cc \ SecondOrderAssembler.cc \ @@ -367,7 +363,6 @@ SubQuadrature.cc \ SurfaceQuadrature.cc \ Tetrahedron.cc \ Traverse.cc \ -TraverseParallel.cc \ Triangle.cc \ VertexVector.cc \ ZeroOrderAssembler.cc \ diff --git a/AMDiS/src/Makefile.in b/AMDiS/src/Makefile.in index 3f5a03d0..e72541b6 100644 --- a/AMDiS/src/Makefile.in +++ b/AMDiS/src/Makefile.in @@ -118,18 +118,17 @@ am__libamdis_la_SOURCES_DIST = parallel/CheckerPartitioner.cc \ RCNeighbourList.cc Recovery.cc RecoveryEstimator.cc \ RefinementManager.cc RefinementManager1d.cc \ RefinementManager2d.cc RefinementManager3d.cc \ - ResidualEstimator.cc ResidualParallelEstimator.cc RobinBC.cc \ - ScalableQuadrature.cc SecondOrderAssembler.cc \ - SecondOrderTerm.cc Serializer.cc StandardProblemIteration.cc \ - SubAssembler.cc SubElInfo.cc SubQuadrature.cc \ - SurfaceQuadrature.cc Tetrahedron.cc Traverse.cc \ - TraverseParallel.cc Triangle.cc VertexVector.cc \ - ZeroOrderAssembler.cc ZeroOrderTerm.cc io/ArhReader.cc \ - io/ArhWriter.cc io/DataCollector.cc io/DofWriter.cc \ - io/ElementFileWriter.cc io/FileWriter.cc io/GNUPlotWriter.cc \ - io/MacroInfo.cc io/MacroReader.cc io/MacroWriter.cc \ - io/PngWriter.cc io/PovrayWriter.cc io/ValueReader.cc \ - io/ValueWriter.cc io/VtkWriter.cc parallel/InteriorBoundary.cc \ + ResidualEstimator.cc RobinBC.cc ScalableQuadrature.cc \ + SecondOrderAssembler.cc SecondOrderTerm.cc Serializer.cc \ + StandardProblemIteration.cc SubAssembler.cc SubElInfo.cc \ + SubQuadrature.cc SurfaceQuadrature.cc Tetrahedron.cc \ + Traverse.cc Triangle.cc VertexVector.cc ZeroOrderAssembler.cc \ + ZeroOrderTerm.cc io/ArhReader.cc io/ArhWriter.cc \ + io/DataCollector.cc io/DofWriter.cc io/ElementFileWriter.cc \ + io/FileWriter.cc io/GNUPlotWriter.cc io/MacroInfo.cc \ + io/MacroReader.cc io/MacroWriter.cc io/PngWriter.cc \ + io/PovrayWriter.cc io/ValueReader.cc io/ValueWriter.cc \ + io/VtkWriter.cc parallel/InteriorBoundary.cc \ time/RosenbrockAdaptInstationary.cc time/RosenbrockMethod.cc \ time/RosenbrockStationary.cc @USE_PARALLEL_DOMAIN_AMDIS_TRUE@am__objects_1 = libamdis_la-CheckerPartitioner.lo \ @@ -187,26 +186,24 @@ am_libamdis_la_OBJECTS = $(am__objects_3) libamdis_la-AdaptBase.lo \ libamdis_la-RefinementManager1d.lo \ libamdis_la-RefinementManager2d.lo \ libamdis_la-RefinementManager3d.lo \ - libamdis_la-ResidualEstimator.lo \ - libamdis_la-ResidualParallelEstimator.lo \ - libamdis_la-RobinBC.lo libamdis_la-ScalableQuadrature.lo \ + libamdis_la-ResidualEstimator.lo libamdis_la-RobinBC.lo \ + libamdis_la-ScalableQuadrature.lo \ libamdis_la-SecondOrderAssembler.lo \ libamdis_la-SecondOrderTerm.lo libamdis_la-Serializer.lo \ libamdis_la-StandardProblemIteration.lo \ libamdis_la-SubAssembler.lo libamdis_la-SubElInfo.lo \ libamdis_la-SubQuadrature.lo libamdis_la-SurfaceQuadrature.lo \ libamdis_la-Tetrahedron.lo libamdis_la-Traverse.lo \ - libamdis_la-TraverseParallel.lo libamdis_la-Triangle.lo \ - libamdis_la-VertexVector.lo libamdis_la-ZeroOrderAssembler.lo \ - libamdis_la-ZeroOrderTerm.lo libamdis_la-ArhReader.lo \ - libamdis_la-ArhWriter.lo libamdis_la-DataCollector.lo \ - libamdis_la-DofWriter.lo libamdis_la-ElementFileWriter.lo \ - libamdis_la-FileWriter.lo libamdis_la-GNUPlotWriter.lo \ - libamdis_la-MacroInfo.lo libamdis_la-MacroReader.lo \ - libamdis_la-MacroWriter.lo libamdis_la-PngWriter.lo \ - libamdis_la-PovrayWriter.lo libamdis_la-ValueReader.lo \ - libamdis_la-ValueWriter.lo libamdis_la-VtkWriter.lo \ - libamdis_la-InteriorBoundary.lo \ + libamdis_la-Triangle.lo libamdis_la-VertexVector.lo \ + libamdis_la-ZeroOrderAssembler.lo libamdis_la-ZeroOrderTerm.lo \ + libamdis_la-ArhReader.lo libamdis_la-ArhWriter.lo \ + libamdis_la-DataCollector.lo libamdis_la-DofWriter.lo \ + libamdis_la-ElementFileWriter.lo libamdis_la-FileWriter.lo \ + libamdis_la-GNUPlotWriter.lo libamdis_la-MacroInfo.lo \ + libamdis_la-MacroReader.lo libamdis_la-MacroWriter.lo \ + libamdis_la-PngWriter.lo libamdis_la-PovrayWriter.lo \ + libamdis_la-ValueReader.lo libamdis_la-ValueWriter.lo \ + libamdis_la-VtkWriter.lo libamdis_la-InteriorBoundary.lo \ libamdis_la-RosenbrockAdaptInstationary.lo \ libamdis_la-RosenbrockMethod.lo \ libamdis_la-RosenbrockStationary.lo @@ -475,7 +472,6 @@ NonLinSolver.h \ NonLinSolver.hh \ NonLinUpdater.h \ OEMSolver.h \ -OpenMP.h \ Operator.h \ Operator.hh \ OperatorTerm.h \ @@ -510,7 +506,6 @@ RefinementManager1d.h \ RefinementManager2d.h \ RefinementManager3d.h \ ResidualEstimator.h \ -ResidualParallelEstimator.h \ RobinBC.h \ RobinBC.hh \ ScalableQuadrature.h \ @@ -534,7 +529,6 @@ SystemVector.h \ Tetrahedron.h \ TimedObject.h \ Traverse.h \ -TraverseParallel.h \ Triangle.h \ UmfPackSolver.h \ VertexInfo.h \ @@ -677,7 +671,6 @@ RefinementManager1d.cc \ RefinementManager2d.cc \ RefinementManager3d.cc \ ResidualEstimator.cc \ -ResidualParallelEstimator.cc \ RobinBC.cc \ ScalableQuadrature.cc \ SecondOrderAssembler.cc \ @@ -690,7 +683,6 @@ SubQuadrature.cc \ SurfaceQuadrature.cc \ Tetrahedron.cc \ Traverse.cc \ -TraverseParallel.cc \ Triangle.cc \ VertexVector.cc \ ZeroOrderAssembler.cc \ @@ -911,7 +903,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-RefinementManager2d.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-RefinementManager3d.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-ResidualEstimator.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-ResidualParallelEstimator.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-RobinBC.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-RosenbrockAdaptInstationary.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-RosenbrockMethod.Plo@am__quote@ @@ -928,7 +919,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-SurfaceQuadrature.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-Tetrahedron.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-Traverse.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-TraverseParallel.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-Triangle.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-ValueReader.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libamdis_la-ValueWriter.Plo@am__quote@ @@ -1569,13 +1559,6 @@ libamdis_la-ResidualEstimator.lo: ResidualEstimator.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-ResidualEstimator.lo `test -f 'ResidualEstimator.cc' || echo '$(srcdir)/'`ResidualEstimator.cc -libamdis_la-ResidualParallelEstimator.lo: ResidualParallelEstimator.cc -@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-ResidualParallelEstimator.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-ResidualParallelEstimator.Tpo" -c -o libamdis_la-ResidualParallelEstimator.lo `test -f 'ResidualParallelEstimator.cc' || echo '$(srcdir)/'`ResidualParallelEstimator.cc; \ -@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libamdis_la-ResidualParallelEstimator.Tpo" "$(DEPDIR)/libamdis_la-ResidualParallelEstimator.Plo"; else rm -f "$(DEPDIR)/libamdis_la-ResidualParallelEstimator.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='ResidualParallelEstimator.cc' object='libamdis_la-ResidualParallelEstimator.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-ResidualParallelEstimator.lo `test -f 'ResidualParallelEstimator.cc' || echo '$(srcdir)/'`ResidualParallelEstimator.cc - libamdis_la-RobinBC.lo: RobinBC.cc @am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-RobinBC.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-RobinBC.Tpo" -c -o libamdis_la-RobinBC.lo `test -f 'RobinBC.cc' || echo '$(srcdir)/'`RobinBC.cc; \ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libamdis_la-RobinBC.Tpo" "$(DEPDIR)/libamdis_la-RobinBC.Plo"; else rm -f "$(DEPDIR)/libamdis_la-RobinBC.Tpo"; exit 1; fi @@ -1660,13 +1643,6 @@ libamdis_la-Traverse.lo: Traverse.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-Traverse.lo `test -f 'Traverse.cc' || echo '$(srcdir)/'`Traverse.cc -libamdis_la-TraverseParallel.lo: TraverseParallel.cc -@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-TraverseParallel.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-TraverseParallel.Tpo" -c -o libamdis_la-TraverseParallel.lo `test -f 'TraverseParallel.cc' || echo '$(srcdir)/'`TraverseParallel.cc; \ -@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libamdis_la-TraverseParallel.Tpo" "$(DEPDIR)/libamdis_la-TraverseParallel.Plo"; else rm -f "$(DEPDIR)/libamdis_la-TraverseParallel.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='TraverseParallel.cc' object='libamdis_la-TraverseParallel.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -c -o libamdis_la-TraverseParallel.lo `test -f 'TraverseParallel.cc' || echo '$(srcdir)/'`TraverseParallel.cc - libamdis_la-Triangle.lo: Triangle.cc @am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libamdis_la_CXXFLAGS) $(CXXFLAGS) -MT libamdis_la-Triangle.lo -MD -MP -MF "$(DEPDIR)/libamdis_la-Triangle.Tpo" -c -o libamdis_la-Triangle.lo `test -f 'Triangle.cc' || echo '$(srcdir)/'`Triangle.cc; \ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/libamdis_la-Triangle.Tpo" "$(DEPDIR)/libamdis_la-Triangle.Plo"; else rm -f "$(DEPDIR)/libamdis_la-Triangle.Tpo"; exit 1; fi diff --git a/AMDiS/src/OpenMP.h b/AMDiS/src/OpenMP.h deleted file mode 100644 index a3da3846..00000000 --- a/AMDiS/src/OpenMP.h +++ /dev/null @@ -1,69 +0,0 @@ -// ============================================================================ -// == == -// == AMDiS - Adaptive multidimensional simulations == -// == == -// == http://www.amdis-fem.org == -// == == -// ============================================================================ -// -// Software License for AMDiS -// -// Copyright (c) 2010 Dresden University of Technology -// All rights reserved. -// Authors: Simon Vey, Thomas Witkowski et al. -// -// This file is part of AMDiS -// -// See also license.opensource.txt in the distribution. - - - -/** \file OpenMP.h */ - -#ifndef AMDIS_OPENMP_H -#define AMDIS_OPENMP_H - -#ifdef _OPENMP - -#include <algorithm> -#include <omp.h> - -const bool amdisHaveOpenMP = true; - -inline int omp_get_overall_max_threads() -{ - return std::max(omp_get_max_threads(), omp_get_num_threads()); -} - -#else - -const bool amdisHaveOpenMP = false; - -inline int omp_get_max_threads() -{ - return 1; -} - -inline int omp_get_num_procs() -{ - return 1; -} - -inline int omp_get_num_threads() -{ - return 1; -} - -inline int omp_get_thread_num() -{ - return 0; -} - -inline int omp_get_overall_max_threads() -{ - return 1; -} - -#endif - -#endif diff --git a/AMDiS/src/Operator.h b/AMDiS/src/Operator.h index cff634d5..ec2d1ba3 100644 --- a/AMDiS/src/Operator.h +++ b/AMDiS/src/Operator.h @@ -30,7 +30,6 @@ #include "MatrixVector.h" #include "ElInfo.h" #include "AbstractFunction.h" -#include "OpenMP.h" #include "SubAssembler.h" #include "OperatorTerm.h" #include "ZeroOrderTerm.h" diff --git a/AMDiS/src/ProblemScal.cc b/AMDiS/src/ProblemScal.cc index 43a2e174..8917c320 100644 --- a/AMDiS/src/ProblemScal.cc +++ b/AMDiS/src/ProblemScal.cc @@ -243,23 +243,14 @@ namespace AMDiS { return; } -#ifdef _OPENMP - double wtime = omp_get_wtime(); -#endif clock_t first = clock(); SolverMatrix<DOFMatrix> solverMatrix; solverMatrix.setMatrix(*systemMatrix); solver->solveSystem(solverMatrix, *solution, *rhs); -#ifdef _OPENMP - INFO(info, 8)("solution of discrete system needed %.5f seconds system time / %.5f seconds wallclock time\n", - TIME_USED(first, clock()), - omp_get_wtime() - wtime); -#else INFO(info, 8)("solution of discrete system needed %.5f seconds\n", TIME_USED(first, clock())); -#endif adaptInfo->setSolverIterations(solver->getIterations()); adaptInfo->setMaxSolverIterations(solver->getMaxIterations()); diff --git a/AMDiS/src/ProblemVec.cc b/AMDiS/src/ProblemVec.cc index 55301c56..78a69273 100644 --- a/AMDiS/src/ProblemVec.cc +++ b/AMDiS/src/ProblemVec.cc @@ -34,7 +34,6 @@ #include "PeriodicBC.h" #include "Lagrange.h" #include "Flag.h" -#include "TraverseParallel.h" #include "io/VtkWriter.h" #include "io/ValueReader.h" #include "ProblemVecDbg.h" @@ -510,21 +509,12 @@ namespace AMDiS { return; } -#ifdef _OPENMP - double wtime = omp_get_wtime(); -#endif - clock_t first = clock(); solver->solveSystem(solverMatrix, *solution, *rhs); -#ifdef _OPENMP - INFO(info, 8)("solution of discrete system needed %.5f seconds system time / %.5f seconds wallclock time\n", - TIME_USED(first, clock()), omp_get_wtime() - wtime); -#else INFO(info, 8)("solution of discrete system needed %.5f seconds\n", TIME_USED(first, clock())); -#endif adaptInfo->setSolverIterations(solver->getIterations()); adaptInfo->setMaxSolverIterations(solver->getMaxIterations()); @@ -539,12 +529,8 @@ namespace AMDiS { #ifdef HAVE_PARALLEL_DOMAIN_AMDIS double first = MPI::Wtime(); -#else -#ifdef _OPENMP - double first = omp_get_wtime(); #else clock_t first = clock(); -#endif #endif if (computeExactError) { @@ -573,14 +559,9 @@ namespace AMDiS { MPI::COMM_WORLD.Barrier(); INFO(info, 8)("estimation of the error needed %.5f seconds\n", MPI::Wtime() - first); -#else -#ifdef _OPENMP - INFO(info, 8)("estimation of the error needed %.5f seconds\n", - omp_get_wtime() - first); #else INFO(info, 8)("estimation of the error needed %.5f seconds\n", TIME_USED(first, clock())); -#endif #endif } @@ -678,13 +659,9 @@ namespace AMDiS { MPI::Wtime() - first); first = MPI::Wtime(); -#else -#ifdef _OPENMP - double first = omp_get_wtime(); #else clock_t first = clock(); #endif -#endif Flag assembleFlag = @@ -808,14 +785,9 @@ namespace AMDiS { MPI::COMM_WORLD.Barrier(); INFO(info, 8)("buildAfterCoarsen needed %.5f seconds\n", MPI::Wtime() - first); -#else -#ifdef _OPENMP - INFO(info, 8)("buildAfterCoarsen needed %.5f seconds\n", - omp_get_wtime() - first); #else INFO(info, 8)("buildAfterCoarsen needed %.5f seconds\n", TIME_USED(first, clock())); -#endif #endif } @@ -824,9 +796,6 @@ namespace AMDiS { FUNCNAME("ProblemVec::buildAfterCoarsen()"); clock_t first = clock(); -#ifdef _OPENMP - double wtime = omp_get_wtime(); -#endif for (int i = 0; i < static_cast<int>(meshes.size()); i++) meshes[i]->dofCompress(); @@ -1001,13 +970,8 @@ namespace AMDiS { INFO(info, 8)("fillin of assembled matrix: %d\n", nnz); -#ifdef _OPENMP - INFO(info, 8)("buildAfterCoarsen needed %.5f seconds system time / %.5f seconds wallclock time\n", - TIME_USED(first, clock()), omp_get_wtime() - wtime); -#else INFO(info, 8)("buildAfterCoarsen needed %.5f seconds\n", TIME_USED(first, clock())); -#endif } bool ProblemVec::dualMeshTraverseRequired() @@ -1032,10 +996,6 @@ namespace AMDiS { clock_t first = clock(); -#ifdef _OPENMP - double wtime = omp_get_wtime(); -#endif - Flag assembleFlag = flag | @@ -1272,13 +1232,8 @@ namespace AMDiS { INFO(info, 8)("fillin of assembled matrix: %d\n", nnz); -#ifdef _OPENMP - INFO(info, 8)("buildAfterCoarsen needed %.5f seconds system time / %.5f seconds wallclock time\n", - TIME_USED(first, clock()), omp_get_wtime() - wtime); -#else INFO(info, 8)("buildAfterCoarsen needed %.5f seconds\n", TIME_USED(first, clock())); -#endif } @@ -1306,34 +1261,21 @@ namespace AMDiS { #ifdef HAVE_PARALLEL_DOMAIN_AMDIS double first = MPI::Wtime(); -#else -#ifdef _OPENMP - double first = omp_get_wtime(); #else clock_t first = clock(); -#endif #endif int size = static_cast<int>(fileWriters.size()); -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 1) -#endif - for (int i = 0; i < size; i++) { - fileWriters[i]->writeFiles(adaptInfo, force); - } + for (int i = 0; i < size; i++) + fileWriters[i]->writeFiles(adaptInfo, force); #ifdef HAVE_PARALLEL_DOMAIN_AMDIS MPI::COMM_WORLD.Barrier(); INFO(info, 8)("writeFiles needed %.5f seconds\n", MPI::Wtime() - first); -#else -#ifdef _OPENMP - INFO(info, 8)("writeFiles needed %.5f seconds\n", - omp_get_wtime() - first); #else INFO(info, 8)("writeFiles needed %.5f seconds\n", TIME_USED(first, clock())); -#endif #endif } @@ -1596,156 +1538,49 @@ namespace AMDiS { Mesh *mesh = feSpace->getMesh(); const BasisFunction *basisFcts = feSpace->getBasisFcts(); -#ifdef _OPENMP - TraverseParallelStack stack(0, 1); -#else TraverseStack stack; -#endif // == Initialize matrix and vector. If we have to assemble in parallel, === // == temporary thread owned matrix and vector must be created. === -#ifdef _OPENMP -#pragma omp parallel -#endif - { - BoundaryType *bound = - useGetBound ? new BoundaryType[basisFcts->getNumber()] : NULL; - - // Create for every thread its private matrix and vector, on that - // the thread will assemble its part of the mesh. - DOFMatrix *tmpMatrix = NULL; - DOFVector<double> *tmpVector = NULL; - -#ifdef _OPENMP - if (matrix) { - tmpMatrix = new DOFMatrix(matrix->getRowFeSpace(), matrix->getColFeSpace(), "tmp"); - - // Copy the global matrix to the private matrix, because we need the - // operators defined on the global matrix in the private one. Only the - // values have to be set to zero. - *tmpMatrix = *matrix; - tmpMatrix->clear(); - - tmpMatrix->getBaseMatrix().change_dim(matrix->getRowFeSpace()->getAdmin()->getUsedSize(), - matrix->getColFeSpace()->getAdmin()->getUsedSize()); - tmpMatrix->startInsertion(10); - } - - if (vector) { - tmpVector = new DOFVector<double>(vector->getFeSpace(), "tmp"); - - // Copy the global vector to the private vector, because we need the - // operatirs defined on the global vector in the private one. But set - // the values to zero of the private vector after copying. - *tmpVector = *vector; - tmpVector->set(0.0); - } -#else + BoundaryType *bound = + useGetBound ? new BoundaryType[basisFcts->getNumber()] : NULL; + + // == Traverse mesh and assemble. == + + ElInfo *elInfo = stack.traverseFirst(mesh, -1, assembleFlag); + + // After creating privat copies of the DOFMatrix and the DOFVector, all threads + // have to wait at this barrier. Especially for small problems this is required, + // because otherwise one thread may be finished with assembling, before another + // has made his private copy. + while (elInfo) { + if (useGetBound) + basisFcts->getBound(elInfo, bound); + if (matrix) { - tmpMatrix = matrix; - tmpMatrix->startInsertion(matrix->getNnz()); - } - if (vector) { - tmpVector = vector; - tmpVector->set(0.0); - } -#endif - - // == Traverse mesh (either sequentially or in parallel) and assemble. == - - - // Because we are using the parallel traverse stack, each thread will - // traverse only a part of the mesh. - ElInfo *elInfo = stack.traverseFirst(mesh, -1, assembleFlag); - - // After creating privat copies of the DOFMatrix and the DOFVector, all threads - // have to wait at this barrier. Especially for small problems this is required, - // because otherwise one thread may be finished with assembling, before another - // has made his private copy. -#ifdef _OPENMP -#pragma omp barrier -#endif - while (elInfo) { - if (useGetBound) - basisFcts->getBound(elInfo, bound); - - if (matrix) { - tmpMatrix->assemble(1.0, elInfo, bound); + matrix->assemble(1.0, elInfo, bound); - // Take the matrix boundary manager from the public matrix, - // but assemble the boundary conditions on the thread private matrix. - if (matrix->getBoundaryManager()) - matrix->getBoundaryManager()->fillBoundaryConditions(elInfo, tmpMatrix); - } - - if (vector) - tmpVector->assemble(1.0, elInfo, bound, NULL); - - elInfo = stack.traverseNext(elInfo); - } - - // == Finally, if we have assembled in parallel, we have to add the thread == - // == private matrix and vector to the global one. == - -#ifdef _OPENMP - if (tmpMatrix) - tmpMatrix->finishInsertion(); - - // After mesh traverse, all thread have to added their private matrices and - // vectors to the global public matrix and public vector. Therefore, this is - // a critical section, which is allowed to be executed by on thread only at - // the same time. - - if (matrix) { -#pragma omp critical - { - matrix->getBaseMatrix() += tmpMatrix->getBaseMatrix(); - } - } - -#pragma omp barrier - -#pragma omp master - { - if (matrix) - matrix->startInsertion(); + // Take the matrix boundary manager from the public matrix, + // but assemble the boundary conditions on the thread private matrix. + if (matrix->getBoundaryManager()) + matrix->getBoundaryManager()->fillBoundaryConditions(elInfo, matrix); } - -#pragma omp barrier - - if (matrix) { - // Remove rows corresponding to DOFs on a Dirichlet boundary. -#pragma omp critical - matrix->removeRowsWithDBC(tmpMatrix->getApplyDBCs()); - - delete tmpMatrix; - } - - if (vector) { -#pragma omp critical - *vector += *tmpVector; - - delete tmpVector; - } - -#else - if (matrix) - matrix->removeRowsWithDBC(matrix->getApplyDBCs()); -#endif - - if (useGetBound) - delete [] bound; - - } // pragma omp parallel - -// INFO(info, 8)("Assemble matrix with %d mat ops and %d vec ops needed %.5f seconds\n", -// matrix ? matrix->getOperators().size() : 0, -// vector ? vector->getOperators().size() : 0, -// TIME_USED(first, clock())); + + if (vector) + vector->assemble(1.0, elInfo, bound, NULL); + + elInfo = stack.traverseNext(elInfo); + } + + if (matrix) + matrix->removeRowsWithDBC(matrix->getApplyDBCs()); + + if (useGetBound) + delete [] bound; } - - + + void ProblemVec::assembleBoundaryConditions(DOFVector<double> *rhs, DOFVector<double> *solution, Mesh *mesh, @@ -1760,55 +1595,6 @@ namespace AMDiS { if (solution->getBoundaryManager()) solution->getBoundaryManager()->initVector(solution); -#ifdef _OPENMP - // === Parallel boundary assemblage === - - TraverseParallelStack stack; - -#pragma omp parallel - { - // Each thread assembles on its own dof-vectors. - DOFVector<double> *tmpRhsVec = new DOFVector<double>(rhs->getFeSpace(), "tmpRhs"); - DOFVector<double> *tmpSolVec = new DOFVector<double>(solution->getFeSpace(), "tmpSol"); - tmpRhsVec->set(0.0); - tmpSolVec->set(0.0); - - // (Parallel) traverse of mesh. - ElInfo *elInfo = stack.traverseFirst(mesh, -1, assembleFlag); - while (elInfo) { - if (rhs->getBoundaryManager()) - rhs->getBoundaryManager()-> fillBoundaryConditions(elInfo, tmpRhsVec); - - if (solution->getBoundaryManager()) - solution->getBoundaryManager()->fillBoundaryConditions(elInfo, tmpSolVec); - - elInfo = stack.traverseNext(elInfo); - } - - // After (parallel) mesh traverse, the result is applied to the final - // vectors. This section is not allowed to be executed by more than one - // thread at the same time. -#pragma omp critical - { - DOFVector<double>::Iterator rhsIt(rhs, USED_DOFS); - DOFVector<double>::Iterator solIt(solution, USED_DOFS); - DOFVector<double>::Iterator tmpRhsIt(tmpRhsVec, USED_DOFS); - DOFVector<double>::Iterator tmpSolIt(tmpSolVec, USED_DOFS); - for (rhsIt.reset(), solIt.reset(), tmpRhsIt.reset(), tmpSolIt.reset(); - !rhsIt.end(); - ++rhsIt, ++solIt, ++tmpRhsIt, ++tmpSolIt) { - *rhsIt += *tmpRhsIt; - *solIt += *tmpSolIt; - } - } // pragma omp critical - - delete tmpRhsVec; - delete tmpSolVec; - } // pragma omp parallel - -#else - // === Sequentiell boundary assemblage === - TraverseStack stack; ElInfo *elInfo = stack.traverseFirst(mesh, -1, assembleFlag); while (elInfo) { @@ -1820,8 +1606,6 @@ namespace AMDiS { elInfo = stack.traverseNext(elInfo); } -#endif - // === Finalize vectors === diff --git a/AMDiS/src/ProblemVecDbg.cc b/AMDiS/src/ProblemVecDbg.cc index 1ddd7af8..5b524ee5 100644 --- a/AMDiS/src/ProblemVecDbg.cc +++ b/AMDiS/src/ProblemVecDbg.cc @@ -414,44 +414,4 @@ namespace AMDiS { } } - void printOpenmpTraverseInfo(ProblemVec *prob, bool wait) - { - int nEl = 0; - std::vector<std::vector<int> > nElRank(9); - for (int i = 2; i <= 8; i++) { - nElRank[i].resize(i); - for (int j = 0; j < i; j++) - nElRank[i][j] = 0; - } - - Mesh *mesh = prob->getMesh(0); - TraverseStack stack; - ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL); - while (elInfo) { - nEl++; - int elIndex = elInfo->getElement()->getIndex(); - for (int i = 2; i <= 8; i++) - for (int j = 0; j < i; j++) - if (elIndex % i == j) - nElRank[i][j]++; - - elInfo = stack.traverseNext(elInfo); - } - - std::cout << "==========================\n"; - std::cout << "= OpenMP traverse info =\n"; - std::cout << "==========================\n\n"; - std::cout << "nElements = " << nEl << "\n"; - - for (int i = 2; i <= 8; i++) { - std::cout << i << " threads: "; - for (int j = 0; j < i; j++) { - std::cout << "T" << j << " = " << nElRank[i][j] << " "; - } - std::cout << "\n"; - } - - if (wait) - WAIT_REALLY; - } } diff --git a/AMDiS/src/Quadrature.cc b/AMDiS/src/Quadrature.cc index 73f35c21..c8f72b56 100644 --- a/AMDiS/src/Quadrature.cc +++ b/AMDiS/src/Quadrature.cc @@ -59,10 +59,6 @@ namespace AMDiS { if (vec) { val = vec; } else { -#ifdef _OPENMP - ERROR_EXIT("Using static variable while using OpenMP parallelization!\n"); -#endif - if (static_cast<int>(size) < n_points) { size_t new_size = std::max(maxNQuadPoints[dim], n_points); delete [] quad_vec_d; @@ -95,10 +91,6 @@ namespace AMDiS { if (vec) { val = vec; } else { -#ifdef _OPENMP - ERROR_EXIT("Using static variable while using OpenMP parallelization!\n"); -#endif - if (static_cast<int>(size) < n_points) { size_t new_size = std::max(maxNQuadPoints[dim], n_points); if (quad_vec) @@ -1486,35 +1478,29 @@ namespace AMDiS { FastQuadrature *quad_fast; -#ifdef _OPENMP -#pragma omp critical (provideFastQuad) -#endif - { - list<FastQuadrature*>::iterator fast = fastQuadList.begin(); - - - for (fast = fastQuadList.begin(); fast != fastQuadList.end(); fast++) - if ((*fast)->basisFunctions == bas_fcts && (*fast)->quadrature == &quad) - break; - - if (fast != fastQuadList.end() && ((*fast)->init_flag & init_flag) == init_flag) { - quad_fast = *fast; + list<FastQuadrature*>::iterator fast = fastQuadList.begin(); + + for (fast = fastQuadList.begin(); fast != fastQuadList.end(); fast++) + if ((*fast)->basisFunctions == bas_fcts && (*fast)->quadrature == &quad) + break; + + if (fast != fastQuadList.end() && ((*fast)->init_flag & init_flag) == init_flag) { + quad_fast = *fast; + } else { + if (fast == fastQuadList.end()) { + quad_fast = + new FastQuadrature(const_cast<BasisFunction*>(bas_fcts), + const_cast<Quadrature*>(&quad), 0); + + fastQuadList.push_front(quad_fast); + + max_points = std::max(max_points, quad.getNumPoints()); } else { - if (fast == fastQuadList.end()) { - quad_fast = - new FastQuadrature(const_cast<BasisFunction*>(bas_fcts), - const_cast<Quadrature*>(&quad), 0); - - fastQuadList.push_front(quad_fast); - - max_points = std::max(max_points, quad.getNumPoints()); - } else { - quad_fast = (*fast); - } + quad_fast = (*fast); } - - quad_fast->init(init_flag); } + + quad_fast->init(init_flag); return quad_fast; } diff --git a/AMDiS/src/ResidualParallelEstimator.cc b/AMDiS/src/ResidualParallelEstimator.cc deleted file mode 100644 index 8c69696e..00000000 --- a/AMDiS/src/ResidualParallelEstimator.cc +++ /dev/null @@ -1,119 +0,0 @@ -// -// Software License for AMDiS -// -// Copyright (c) 2010 Dresden University of Technology -// All rights reserved. -// Authors: Simon Vey, Thomas Witkowski et al. -// -// This file is part of AMDiS -// -// See also license.opensource.txt in the distribution. - - -#include "ResidualParallelEstimator.h" -#include "ResidualEstimator.h" -#include "TraverseParallel.h" -#include "Parameters.h" -#include "OpenMP.h" - -#ifdef _OPENMP - -namespace AMDiS { - ResidualParallelEstimator::ResidualParallelEstimator(std::string name, int r) - : Estimator(name, r), - C3(1.0) - { - GET_PARAMETER(0, name + "->C3", "%f", &C3); - C3 = C3 > 1.e-25 ? sqr(C3) : 0.0; - - seqEstimators_.resize(omp_get_overall_max_threads()); - - for (int i = 0; i < omp_get_overall_max_threads(); i++) - seqEstimators_[i] = new ResidualEstimator(name, r); - } - - ResidualParallelEstimator::~ResidualParallelEstimator() - { - for (int i = 0; i < static_cast<int>(seqEstimators_.size()); i++) - delete seqEstimators_[i]; - } - - void ResidualParallelEstimator::addSystem(DOFMatrix *matrix_, - DOFVector<double> *uh_, - DOFVector<double> *fh_, - DOFVector<double> *uhOld_) - { - Estimator::addSystem(matrix_, uh_, fh_, uhOld_); - - for (int i = 0; i < static_cast<int>(seqEstimators_.size()); i++) - seqEstimators_[i]->addSystem(matrix_, uh_, fh_, uhOld_); - } - - void ResidualParallelEstimator::addUhOldToSystem(int system, - DOFVector<double> *uhOld_) - { - Estimator::addUhOldToSystem(system, uhOld_); - - for (int i = 0; i < static_cast<int>(seqEstimators_.size()); i++) - seqEstimators_[i]->addUhOldToSystem(system, uhOld_); - } - - double ResidualParallelEstimator::estimate(double ts) - { - FUNCNAME("ResidualParallelEstimator::estimate()"); - - mesh = uh[row == -1 ? 0 : row]->getFeSpace()->getMesh(); - - for (int i = 0; i < static_cast<int>(seqEstimators_.size()); i++) - seqEstimators_[i]->init(ts); - - TraverseParallelStack stack; - -#pragma omp parallel - { - ResidualEstimator *estimator = seqEstimators_[omp_get_thread_num()]; - - traverseFlag = - Mesh::FILL_NEIGH | - Mesh::FILL_COORDS | - Mesh::FILL_OPP_COORDS | - Mesh::FILL_BOUND | - Mesh::FILL_GRD_LAMBDA | - Mesh::FILL_DET | - Mesh::CALL_LEAF_EL; - - ElInfo *elInfo = stack.traverseFirst(mesh, -1, traverseFlag); - - while (elInfo) { - estimator->estimateElement(elInfo); - elInfo = stack.traverseNext(elInfo); - } - } - - est_sum = 0.0; - est_max = 0.0; - est_t_sum = 0.0; - est_t_max = 0.0; - - for (int i = 0; i < static_cast<int>(seqEstimators_.size()); i++) { - seqEstimators_[i]->exit(false); - est_sum += pow(seqEstimators_[i]->getErrorSum(), 2); - est_max = max(est_max, seqEstimators_[i]->getErrorMax()); - est_t_sum += pow(seqEstimators_[i]->getTimeEst(), 2); - est_t_max = max(est_t_max, seqEstimators_[i]->getTimeEstMax()); - } - - est_sum = sqrt(est_sum); - est_t_sum = sqrt(est_t_sum); - - MSG("estimate = %.8e\n", est_sum); - if (C3) - MSG("time estimate = %.8e\n", est_t_sum); - - return est_sum; - } - - -} - -#endif // _OPENMP diff --git a/AMDiS/src/ResidualParallelEstimator.h b/AMDiS/src/ResidualParallelEstimator.h deleted file mode 100644 index b2bb9384..00000000 --- a/AMDiS/src/ResidualParallelEstimator.h +++ /dev/null @@ -1,90 +0,0 @@ -// ============================================================================ -// == == -// == AMDiS - Adaptive multidimensional simulations == -// == == -// == http://www.amdis-fem.org == -// == == -// ============================================================================ -// -// Software License for AMDiS -// -// Copyright (c) 2010 Dresden University of Technology -// All rights reserved. -// Authors: Simon Vey, Thomas Witkowski et al. -// -// This file is part of AMDiS -// -// See also license.opensource.txt in the distribution. - - - -/** \file ResidualParallelEstimator.h */ - -/** \defgroup Estimator Estimator module - * @{ <img src="estimator.png"> @} - */ - -#ifndef AMDIS_RESIDUALPARALLELESTIMATOR_H -#define AMDIS_RESIDUALPARALLELESTIMATOR_H - -#ifdef _OPENMP - -#include <vector> -#include "ResidualEstimator.h" - -namespace AMDiS { - - /** - * \ingroup Estimator - * - * \brief - * Parallel Residual Estimator for scalar problems. - */ - class ResidualParallelEstimator : public Estimator - { - public: - /// Creator class used in the OEMSolverMap. - class Creator : public EstimatorCreator - { - public: - Creator() : EstimatorCreator() {} - - virtual ~Creator() {} - - /// Returns a new ODirSolver object. - Estimator* create() - { - return new ResidualParallelEstimator(name, row); - } - }; - - /// Constructor. - ResidualParallelEstimator(std::string name, int r); - - /// Destructor. - ~ResidualParallelEstimator(); - - /// - virtual void addSystem(DOFMatrix *matrix_, - DOFVector<double> *uh_, - DOFVector<double> *fh_, - DOFVector<double> *uhOld_ = NULL); - - /// - virtual void addUhOldToSystem(int system, DOFVector<double> *uhOld_); - - /// - virtual double estimate(double timestep); - - private: - std::vector<ResidualEstimator*> seqEstimators_; - - /// Constant in fromt of the time - double C3; - }; - -} - -#endif // _OPENMP - -#endif // AMDIS_RESIDUALPARALLELESTIMATOR_H diff --git a/AMDiS/src/SecondOrderAssembler.cc b/AMDiS/src/SecondOrderAssembler.cc index 6f0c2d4c..618d5e3f 100644 --- a/AMDiS/src/SecondOrderAssembler.cc +++ b/AMDiS/src/SecondOrderAssembler.cc @@ -19,7 +19,6 @@ #include "FiniteElemSpace.h" #include "Quadrature.h" #include "DOFVector.h" -#include "OpenMP.h" namespace AMDiS { diff --git a/AMDiS/src/StandardProblemIteration.cc b/AMDiS/src/StandardProblemIteration.cc index ca259224..135cc613 100644 --- a/AMDiS/src/StandardProblemIteration.cc +++ b/AMDiS/src/StandardProblemIteration.cc @@ -14,7 +14,6 @@ #include "AdaptInfo.h" #include "ProblemStatBase.h" #include "Global.h" -#include "OpenMP.h" namespace AMDiS { diff --git a/AMDiS/src/SubAssembler.cc b/AMDiS/src/SubAssembler.cc index b6c06b3e..c61e2c9e 100644 --- a/AMDiS/src/SubAssembler.cc +++ b/AMDiS/src/SubAssembler.cc @@ -15,7 +15,6 @@ #include "FiniteElemSpace.h" #include "Operator.h" #include "BasisFunction.h" -#include "OpenMP.h" #include "Mesh.h" #include "Quadrature.h" #include "DOFVector.h" diff --git a/AMDiS/src/SystemVector.h b/AMDiS/src/SystemVector.h index 142ba3d9..25820156 100644 --- a/AMDiS/src/SystemVector.h +++ b/AMDiS/src/SystemVector.h @@ -27,7 +27,6 @@ #include "DOFVector.h" #include "CreatorInterface.h" #include "Serializable.h" -#include "OpenMP.h" #include "Serializer.h" namespace AMDiS { @@ -35,58 +34,6 @@ namespace AMDiS { /// A system vector is a vector of dof vectors used for vector valued problems. class SystemVector : public Serializable { - public: - /// Creator. - class Creator - : public CreatorInterface<SystemVector> { - public: - /// Creators constructor. - Creator(std::string name_, - std::vector<FiniteElemSpace*> feSpace_, - int size_) - : name(name_), - feSpace(feSpace_), - size(size_) - {} - - /// Destructor - virtual ~Creator() {} - - /// Implementation of CreatorInterface::create(). - SystemVector *create() - { - char number[10]; - std::string numberedName; - SystemVector *result = new SystemVector(name, feSpace, size); - for (int i = 0; i < size; i++) { - sprintf(number, "[%d]", i); - numberedName = name + std::string(number); - result->setDOFVector(i, new DOFVector<double>(feSpace[i], numberedName)); - } - return result; - } - - /// Implementation of CreatorInterface::free(). - void free(SystemVector *vec) - { - for (int i = 0; i < size; i++) { - delete vec->getDOFVector(i); - vec->setDOFVector(i, NULL); - } - delete vec; - } - - private: - /// Name of the system vector - std::string name; - - /// Finite element space used for creation. - std::vector<FiniteElemSpace*> feSpace; - - /// Number of component vectors to be created - int size; - }; - public: /// Constructor. SystemVector(std::string name_, @@ -437,9 +384,6 @@ namespace AMDiS { TEST_EXIT_DBG(size == matrix.getNumRows())("incompatible sizes\n"); TEST_EXIT_DBG(size == matrix.getNumCols())("incompatible sizes\n"); -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 1) num_threads(std::min(size, omp_get_max_threads())) -#endif for (i = 0; i < size; i++) { if (!add) result.getDOFVector(i)->set(0.0); @@ -463,9 +407,6 @@ namespace AMDiS { int size = x.getNumVectors(); int i; -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 1) num_threads(std::min(size, omp_get_max_threads())) -#endif for (i = 0; i < size; i++) axpy(a, *(x.getDOFVector(i)), *(y.getDOFVector(i))); } diff --git a/AMDiS/src/Traverse.cc b/AMDiS/src/Traverse.cc index 66074185..b8149f8d 100644 --- a/AMDiS/src/Traverse.cc +++ b/AMDiS/src/Traverse.cc @@ -21,7 +21,6 @@ #include "MacroElement.h" #include "FixVec.h" #include "Parametric.h" -#include "OpenMP.h" #include "Debug.h" namespace AMDiS { diff --git a/AMDiS/src/Traverse.h b/AMDiS/src/Traverse.h index 42c0d86b..11bf82af 100644 --- a/AMDiS/src/Traverse.h +++ b/AMDiS/src/Traverse.h @@ -38,7 +38,6 @@ #include "Global.h" #include "ElInfo.h" #include "ElInfoStack.h" -#include "OpenMP.h" #include "AMDiS_fwd.h" namespace AMDiS { diff --git a/AMDiS/src/TraverseParallel.cc b/AMDiS/src/TraverseParallel.cc deleted file mode 100644 index f5fb591f..00000000 --- a/AMDiS/src/TraverseParallel.cc +++ /dev/null @@ -1,91 +0,0 @@ -// -// Software License for AMDiS -// -// Copyright (c) 2010 Dresden University of Technology -// All rights reserved. -// Authors: Simon Vey, Thomas Witkowski et al. -// -// This file is part of AMDiS -// -// See also license.opensource.txt in the distribution. - - -#include "TraverseParallel.h" -#include "OpenMP.h" -#include "Mesh.h" - -#ifdef _OPENMP - -namespace AMDiS { - - TraverseParallelStack::TraverseParallelStack(int n, int mode) - : nThreads(0), - parallelMode(mode) - { - if (n == 0) - nThreads = omp_get_overall_max_threads(); - else - nThreads = n; - - stacks.resize(nThreads); - - if (parallelMode == 0) { - int i = 0; - for (std::vector<TraverseStack*>::iterator it = stacks.begin(); - it != stacks.end(); ++it) { - - (*it) = new TraverseStack(); - (*it)->setMyThreadId(i++); - (*it)->setMaxThreads(nThreads); - } - } else { - for (std::vector<TraverseStack*>::iterator it = stacks.begin(); - it != stacks.end(); ++it) - (*it) = new TraverseStack(); - } - } - - TraverseParallelStack::~TraverseParallelStack() - { - for (std::vector<TraverseStack*>::iterator it = stacks.begin(); - it != stacks.end(); ++it) - delete (*it); - } - - ElInfo* TraverseParallelStack::traverseFirst(Mesh *mesh, int level, Flag fill_flag) - { - FUNCNAME("TraverseParallelStack::traverseFirst()"); - - TEST_EXIT(fill_flag.isSet(Mesh::CALL_LEAF_EL))("not yet implemented"); - TEST_EXIT(stacks[omp_get_thread_num()]) - ("something wrong with parallel stack traverse"); - - if (parallelMode == 0) { - return stacks[omp_get_thread_num()]->traverseFirst(mesh, level, fill_flag); - } else { - ElInfo *elInfo = stacks[omp_get_thread_num()]->traverseFirst(mesh, level, fill_flag); - while (elInfo && - elInfo->getElement()->getIndex() % nThreads != omp_get_thread_num()) { - elInfo = stacks[omp_get_thread_num()]->traverseNext(elInfo); - } - return elInfo; - } - } - - ElInfo* TraverseParallelStack::traverseNext(ElInfo* elInfoOld) - { - if (parallelMode == 0) { - return stacks[omp_get_thread_num()]->traverseNext(elInfoOld); - } else { - ElInfo *elInfo = stacks[omp_get_thread_num()]->traverseNext(elInfoOld); - while (elInfo && - elInfo->getElement()->getIndex() % nThreads != omp_get_thread_num()) { - elInfo = stacks[omp_get_thread_num()]->traverseNext(elInfo); - } - return elInfo; - } - } - -} - -#endif // _OPENMP diff --git a/AMDiS/src/TraverseParallel.h b/AMDiS/src/TraverseParallel.h deleted file mode 100644 index f3c7f778..00000000 --- a/AMDiS/src/TraverseParallel.h +++ /dev/null @@ -1,62 +0,0 @@ -// ============================================================================ -// == == -// == AMDiS - Adaptive multidimensional simulations == -// == == -// == http://www.amdis-fem.org == -// == == -// ============================================================================ -// -// Software License for AMDiS -// -// Copyright (c) 2010 Dresden University of Technology -// All rights reserved. -// Authors: Simon Vey, Thomas Witkowski et al. -// -// This file is part of AMDiS -// -// See also license.opensource.txt in the distribution. - - - -/** \file TraverseParallel.h */ - -#ifndef AMDIS_TRAVERSEPARALLEL_H -#define AMDIS_TRAVERSEPARALLEL_H - -#ifdef _OPENMP - -#include "OpenMP.h" -#include "Traverse.h" -#include <vector> - -namespace AMDiS { - - /** \ingroup Traverse - * \brief - */ - class TraverseParallelStack - { - public: - TraverseParallelStack(int nThreads = 0, int mode = 0); - - ~TraverseParallelStack(); - - ElInfo* traverseFirst(Mesh *mesh, int level, Flag fillFlag); - - ElInfo* traverseNext(ElInfo* elInfoOld); - - private: - /// Number of threads using the stack in parallel. - int nThreads; - - int parallelMode; - - std::vector<TraverseStack*> stacks; - }; - -} - -#endif // _OPENMP - -#endif // AMDIS_TRAVERSEPARALLEL_H - diff --git a/AMDiS/src/ZeroOrderAssembler.cc b/AMDiS/src/ZeroOrderAssembler.cc index 51e68be0..28429c09 100644 --- a/AMDiS/src/ZeroOrderAssembler.cc +++ b/AMDiS/src/ZeroOrderAssembler.cc @@ -19,7 +19,6 @@ #include "FiniteElemSpace.h" #include "Quadrature.h" #include "DOFVector.h" -#include "OpenMP.h" namespace AMDiS { @@ -179,16 +178,11 @@ namespace AMDiS { int nPoints = quadrature->getNumPoints(); if (firstCall) { -#ifdef _OPENMP -#pragma omp critical -#endif - { - const BasisFunction *basFcts = rowFeSpace->getBasisFcts(); - psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI); - basFcts = colFeSpace->getBasisFcts(); - phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI); - firstCall = false; - } + const BasisFunction *basFcts = rowFeSpace->getBasisFcts(); + psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI); + basFcts = colFeSpace->getBasisFcts(); + phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI); + firstCall = false; } if (num_rows(c) < static_cast<unsigned int>(nPoints)) @@ -237,16 +231,11 @@ namespace AMDiS { int nPoints = quadrature->getNumPoints(); if (firstCall) { -#ifdef _OPENMP -#pragma omp critical -#endif - { - const BasisFunction *basFcts = rowFeSpace->getBasisFcts(); - psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI); - basFcts = colFeSpace->getBasisFcts(); - phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI); - firstCall = false; - } + const BasisFunction *basFcts = rowFeSpace->getBasisFcts(); + psiFast = updateFastQuadrature(psiFast, basFcts, INIT_PHI); + basFcts = colFeSpace->getBasisFcts(); + phiFast = updateFastQuadrature(phiFast, basFcts, INIT_PHI); + firstCall = false; } ElementVector c(nPoints, 0.0); @@ -275,16 +264,11 @@ namespace AMDiS { void PrecalcZOA::calculateElementMatrix(const ElInfo *elInfo, ElementMatrix& mat) { if (firstCall) { -#ifdef _OPENMP -#pragma omp critical -#endif - { - q00 = Q00PsiPhi::provideQ00PsiPhi(rowFeSpace->getBasisFcts(), - colFeSpace->getBasisFcts(), - quadrature); - q0 = Q0Psi::provideQ0Psi(rowFeSpace->getBasisFcts(), quadrature); - firstCall = false; - } + q00 = Q00PsiPhi::provideQ00PsiPhi(rowFeSpace->getBasisFcts(), + colFeSpace->getBasisFcts(), + quadrature); + q0 = Q0Psi::provideQ0Psi(rowFeSpace->getBasisFcts(), quadrature); + firstCall = false; } ElementVector c(1, 0.0); @@ -317,16 +301,11 @@ namespace AMDiS { void PrecalcZOA::calculateElementVector(const ElInfo *elInfo, ElementVector& vec) { if (firstCall) { -#ifdef _OPENMP -#pragma omp critical -#endif - { - q00 = Q00PsiPhi::provideQ00PsiPhi(rowFeSpace->getBasisFcts(), - colFeSpace->getBasisFcts(), - quadrature); - q0 = Q0Psi::provideQ0Psi(rowFeSpace->getBasisFcts(), quadrature); - firstCall = false; - } + q00 = Q00PsiPhi::provideQ00PsiPhi(rowFeSpace->getBasisFcts(), + colFeSpace->getBasisFcts(), + quadrature); + q0 = Q0Psi::provideQ0Psi(rowFeSpace->getBasisFcts(), quadrature); + firstCall = false; } std::vector<OperatorTerm*>::iterator termIt; diff --git a/AMDiS/src/compositeFEM/CompositeFEMOperator.cc b/AMDiS/src/compositeFEM/CompositeFEMOperator.cc index 114d321c..cb572ba3 100644 --- a/AMDiS/src/compositeFEM/CompositeFEMOperator.cc +++ b/AMDiS/src/compositeFEM/CompositeFEMOperator.cc @@ -12,15 +12,13 @@ #include <boost/numeric/mtl/mtl.hpp> #include "CompositeFEMOperator.h" -#include "OpenMP.h" #include "SubElementAssembler.h" #include "SubElInfo.h" #include "SubPolytope.h" -void -CompositeFEMOperator::getElementMatrix(const ElInfo *elInfo, - ElementMatrix& userMat, - double factor) +void CompositeFEMOperator::getElementMatrix(const ElInfo *elInfo, + ElementMatrix& userMat, + double factor) { FUNCNAME("CompositeFEMOperator::getElementMatrix"); @@ -69,9 +67,7 @@ CompositeFEMOperator::getElementMatrix(const ElInfo *elInfo, colFeSpace); } - /** - * Get intersection points. - */ + // Get intersection points. intersecPoints = elLS->getElIntersecPoints(); subPolytope = new SubPolytope(elInfo, intersecPoints, @@ -154,10 +150,10 @@ CompositeFEMOperator::getElementMatrix(const ElInfo *elInfo, delete subPolytope; } -void -CompositeFEMOperator::getElementVector(const ElInfo *elInfo, - ElementVector& userVec, - double factor) + +void CompositeFEMOperator::getElementVector(const ElInfo *elInfo, + ElementVector& userVec, + double factor) { FUNCNAME("CompositeFEMOperator::getElementVector"); diff --git a/AMDiS/src/io/FileWriter.cc b/AMDiS/src/io/FileWriter.cc index c37155ea..b2a35dbc 100644 --- a/AMDiS/src/io/FileWriter.cc +++ b/AMDiS/src/io/FileWriter.cc @@ -26,7 +26,6 @@ #include "Flag.h" #include "ElInfo.h" #include "Mesh.h" -#include "OpenMP.h" #if HAVE_PARALLEL_DOMAIN_AMDIS #include "mpi.h" diff --git a/AMDiS/src/parallel/PetscProblemStat.cc b/AMDiS/src/parallel/PetscProblemStat.cc index a5566466..78c6b0eb 100644 --- a/AMDiS/src/parallel/PetscProblemStat.cc +++ b/AMDiS/src/parallel/PetscProblemStat.cc @@ -14,14 +14,7 @@ #include <set> #include "parallel/PetscProblemStat.h" -#include "parallel/StdMpi.h" -#include "parallel/ParallelDebug.h" #include "parallel/PetscSolver.h" -#include "DOFVector.h" -#include "Debug.h" -#include "SystemVector.h" - -#include "petscksp.h" namespace AMDiS { @@ -36,628 +29,11 @@ namespace AMDiS { double wtime = MPI::Wtime(); - fillPetscMatrix(systemMatrix, rhs); - solvePetscMatrix(*solution, adaptInfo); + petscSolver->fillPetscMatrix(systemMatrix, rhs); + petscSolver->solvePetscMatrix(*solution, adaptInfo); INFO(info, 8)("solution of discrete system needed %.5f seconds\n", MPI::Wtime() - wtime); } - - void PetscProblemStat::setDofMatrix(DOFMatrix* mat, int dispMult, - int dispAddRow, int dispAddCol) - { - FUNCNAME("PetscProblemStat::setDofMatrix()"); - - TEST_EXIT(mat)("No DOFMatrix!\n"); - - using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end; - namespace traits= mtl::traits; - typedef DOFMatrix::base_matrix_type Matrix; - - traits::col<Matrix>::type col(mat->getBaseMatrix()); - traits::const_value<Matrix>::type value(mat->getBaseMatrix()); - - typedef traits::range_generator<row, Matrix>::type cursor_type; - typedef traits::range_generator<nz, cursor_type>::type icursor_type; - - vector<int> cols; - vector<double> values; - cols.reserve(300); - values.reserve(300); - - vector<int> globalCols; - - - // === Traverse all rows of the dof matrix and insert row wise the values === - // === to the PETSc matrix. === - - for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), - cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) { - - // Global index of the current row dof. - int globalRowDof = meshDistributor->mapLocalToGlobal(*cursor); - // Test if the current row dof is a periodic dof. - bool periodicRow = meshDistributor->isPeriodicDof(globalRowDof); - - if (!periodicRow) { - // === Row DOF index is not periodic. === - - // Calculate PETSc row index. - - int rowIndex = globalRowDof * dispMult + dispAddRow; - - cols.clear(); - values.clear(); - - for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); - icursor != icend; ++icursor) { - - // Global index of the current column index. - int globalColDof = meshDistributor->mapLocalToGlobal(col(*icursor)); - // Test if the current col dof is a periodic dof. - bool periodicCol = meshDistributor->isPeriodicDof(globalColDof); - // Calculate PETSc col index. - int colIndex = globalColDof * dispMult + dispAddCol; - - // Ignore all zero entries, expect it is a diagonal entry. - if (value(*icursor) == 0.0 && rowIndex != colIndex) - continue; - - if (!periodicCol) { - // Calculate the exact position of the column index in the PETSc matrix. - cols.push_back(globalColDof * dispMult + dispAddCol); - values.push_back(value(*icursor)); - } else { - // === Row index is not periodic, but column index is. === - - // Create set of all periodic associations of the column index. - std::set<int> perAsc; - std::set<int>& perColAsc = - meshDistributor->getPerDofAssociations(globalColDof); - for (std::set<int>::iterator it = perColAsc.begin(); - it != perColAsc.end(); ++it) - if (*it >= -3) - perAsc.insert(*it); - - // Scale value to the number of periodic associations of the column index. - double scaledValue = - value(*icursor) * pow(0.5, static_cast<double>(perAsc.size())); - - - // === Create set of all matrix column indices due to the periodic === - // === associations of the column DOF index. === - - vector<int> newCols; - - // First, add the original matrix index. - newCols.push_back(globalColDof); - - // And add all periodic matrix indices. - for (std::set<int>::iterator it = perAsc.begin(); - it != perAsc.end(); ++it) { - int nCols = static_cast<int>(newCols.size()); - - for (int i = 0; i < nCols; i++) { - TEST_EXIT_DBG(meshDistributor->isPeriodicDof(newCols[i], *it)) - ("Wrong periodic DOF associations at boundary %d with DOF %d!\n", - *it, newCols[i]); - - newCols.push_back(meshDistributor->getPeriodicMapping(newCols[i], *it)); - } - } - - for (unsigned int i = 0; i < newCols.size(); i++) { - cols.push_back(newCols[i] * dispMult + dispAddCol); - values.push_back(scaledValue); - } - } - } - - MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), - &(cols[0]), &(values[0]), ADD_VALUES); - } else { - // === Row DOF index is periodic. === - - // Because this row is periodic, we will have to add the entries of this - // matrix row to multiple rows. The following maps store to each row an - // array of column indices and values of the entries that must be added to - // the PETSc matrix. - map<int, vector<int> > colsMap; - map<int, vector<double> > valsMap; - - // Traverse all column entries. - for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); - icursor != icend; ++icursor) { - - // Global index of the current column index. - int globalColDof = meshDistributor->mapLocalToGlobal(col(*icursor)); - - // Ignore all zero entries, expect it is a diagonal entry. - if (value(*icursor) == 0.0 && globalRowDof != globalColDof) - continue; - - - // === Add all periodic associations of both, the row and the column === - // === indices to the set perAsc. === - - std::set<int> perAsc; - - if (meshDistributor->isPeriodicDof(globalColDof)) { - std::set<int>& perColAsc = - meshDistributor->getPerDofAssociations(globalColDof); - for (std::set<int>::iterator it = perColAsc.begin(); - it != perColAsc.end(); ++it) - if (*it >= -3) - perAsc.insert(*it); - } - - std::set<int>& perRowAsc = - meshDistributor->getPerDofAssociations(globalRowDof); - for (std::set<int>::iterator it = perRowAsc.begin(); - it != perRowAsc.end(); ++it) - if (*it >= -3) - perAsc.insert(*it); - - // Scale the value with respect to the number of periodic associations. - double scaledValue = - value(*icursor) * pow(0.5, static_cast<double>(perAsc.size())); - - - // === Create all matrix entries with respect to the periodic === - // === associations of the row and column indices. === - - vector<pair<int, int> > entry; - - // First, add the original entry. - entry.push_back(make_pair(globalRowDof, globalColDof)); - - // Then, traverse the periodic associations of the row and column indices - // and create the corresponding entries. - for (std::set<int>::iterator it = perAsc.begin(); it != perAsc.end(); ++it) { - int nEntry = static_cast<int>(entry.size()); - for (int i = 0; i < nEntry; i++) { - int perRowDof = 0; - if (meshDistributor->getPeriodicMapping()[*it].count(entry[i].first)) - perRowDof = meshDistributor->getPeriodicMapping(entry[i].first, *it); - else - perRowDof = entry[i].first; - - int perColDof; - if (meshDistributor->getPeriodicMapping()[*it].count(entry[i].second)) - perColDof = meshDistributor->getPeriodicMapping(entry[i].second, *it); - else - perColDof = entry[i].second; - - - entry.push_back(make_pair(perRowDof, perColDof)); - } - } - - - // === Translate the matrix entries to PETSc's matrix. - - for (vector<pair<int, int> >::iterator eIt = entry.begin(); - eIt != entry.end(); ++eIt) { - // Calculate row and column indices of the PETSc matrix. - int rowIndex = eIt->first * dispMult + dispAddRow; - int colIndex = eIt->second * dispMult + dispAddCol; - - colsMap[rowIndex].push_back(colIndex); - valsMap[rowIndex].push_back(scaledValue); - } - } - - - // === Finally, add all periodic rows to the PETSc matrix. === - - for (map<int, vector<int> >::iterator rowIt = colsMap.begin(); - rowIt != colsMap.end(); ++rowIt) { - TEST_EXIT_DBG(rowIt->second.size() == valsMap[rowIt->first].size()) - ("Should not happen!\n"); - - int rowIndex = rowIt->first; - MatSetValues(petscMatrix, 1, &rowIndex, rowIt->second.size(), - &(rowIt->second[0]), &(valsMap[rowIt->first][0]), ADD_VALUES); - } - } - } - } - - - void PetscProblemStat::setDofVector(Vec& petscVec, DOFVector<double>* vec, - int dispMult, int dispAdd, bool rankOnly) - { - FUNCNAME("PetscProblemStat::setDofVector()"); - - // Traverse all used DOFs in the dof vector. - DOFVector<double>::Iterator dofIt(vec, USED_DOFS); - for (dofIt.reset(); !dofIt.end(); ++dofIt) { - if (rankOnly && !meshDistributor->getIsRankDof(dofIt.getDOFIndex())) - continue; - - // Calculate global row index of the dof. - DegreeOfFreedom globalRowDof = - meshDistributor->mapLocalToGlobal(dofIt.getDOFIndex()); - // Calculate PETSc index of the row dof. - int index = globalRowDof * dispMult + dispAdd; - - if (meshDistributor->isPeriodicDof(globalRowDof)) { - std::set<int>& perAsc = meshDistributor->getPerDofAssociations(globalRowDof); - double value = *dofIt / (perAsc.size() + 1.0); - VecSetValues(petscVec, 1, &index, &value, ADD_VALUES); - - for (std::set<int>::iterator perIt = perAsc.begin(); perIt != perAsc.end(); ++perIt) { - int mappedDof = meshDistributor->getPeriodicMapping(globalRowDof, *perIt); - int mappedIndex = mappedDof * dispMult + dispAdd; - VecSetValues(petscVec, 1, &mappedIndex, &value, ADD_VALUES); - } - } else { - // The dof index is not periodic. - double value = *dofIt; - VecSetValues(petscVec, 1, &index, &value, ADD_VALUES); - } - } - } - - - void PetscProblemStat::createPetscNnzStructure(Matrix<DOFMatrix*> *mat) - { - FUNCNAME("PetscProblemStat::createPetscNnzStructure()"); - - TEST_EXIT_DBG(!d_nnz)("There is something wrong!\n"); - TEST_EXIT_DBG(!o_nnz)("There is something wrong!\n"); - - int nRankRows = meshDistributor->getNumberRankDofs() * nComponents; - d_nnz = new int[nRankRows]; - o_nnz = new int[nRankRows]; - for (int i = 0; i < nRankRows; i++) { - d_nnz[i] = 0; - o_nnz[i] = 0; - } - - using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end; - namespace traits = mtl::traits; - typedef DOFMatrix::base_matrix_type Matrix; - typedef vector<pair<int, int> > MatrixNnzEntry; - typedef map<int, DofContainer> RankToDofContainer; - - // Stores to each rank a list of nnz entries (i.e. pairs of row and column index) - // that this rank will send to. These nnz entries will be assembled on this rank, - // but because the row DOFs are not DOFs of this rank they will be send to the - // owner of the row DOFs. - map<int, MatrixNnzEntry> sendMatrixEntry; - - // Maps to each DOF that must be send to another rank the rank number of the - // receiving rank. - map<DegreeOfFreedom, int> sendDofToRank; - - - // First, create for all ranks we send data to MatrixNnzEntry object with 0 entries. - for (RankToDofContainer::iterator it = meshDistributor->getRecvDofs().begin(); - it != meshDistributor->getRecvDofs().end(); ++it) { - sendMatrixEntry[it->first].resize(0); - - for (DofContainer::iterator dofIt = it->second.begin(); - dofIt != it->second.end(); ++dofIt) - sendDofToRank[**dofIt] = it->first; - } - - - std::set<int> recvFromRank; - for (RankToDofContainer::iterator it = meshDistributor->getSendDofs().begin(); - it != meshDistributor->getSendDofs().end(); ++it) - recvFromRank.insert(it->first); - - - for (int i = 0; i < nComponents; i++) { - for (int j = 0; j < nComponents; j++) { - if (!(*mat)[i][j]) - continue; - - Matrix bmat = (*mat)[i][j]->getBaseMatrix(); - - traits::col<Matrix>::type col(bmat); - traits::const_value<Matrix>::type value(bmat); - - typedef traits::range_generator<row, Matrix>::type cursor_type; - typedef traits::range_generator<nz, cursor_type>::type icursor_type; - - for (cursor_type cursor = begin<row>(bmat), - cend = end<row>(bmat); cursor != cend; ++cursor) { - - int globalRowDof = meshDistributor->mapLocalToGlobal(*cursor); - - vector<int> rows; - rows.push_back(globalRowDof); - vector<int> rowRank; - if (meshDistributor->getIsRankDof(*cursor)) { - rowRank.push_back(meshDistributor->getMpiRank()); - } else { - // Find out who is the member of this DOF. - TEST_EXIT_DBG(sendDofToRank.count(*cursor))("DOF %d has no receiver!\n", *cursor); - - rowRank.push_back(sendDofToRank[*cursor]); - } - - // Map the local row number to the global DOF index and create from it - // the global PETSc row index of this DOF. - - int petscRowIdx = globalRowDof * nComponents + i; - - if (meshDistributor->getIsRankDof(*cursor)) { - - // === The current row DOF is a rank dof, so create the corresponding === - // === nnz values directly on rank's nnz data. === - - // This is the local row index of the local PETSc matrix. - int localPetscRowIdx = - petscRowIdx - meshDistributor->getRstart() * nComponents; - - TEST_EXIT_DBG(localPetscRowIdx >= 0 && localPetscRowIdx < nRankRows) - ("Should not happen! \n Debug info: localRowIdx = %d globalRowIndx = %d petscRowIdx = %d localPetscRowIdx = %d rStart = %d nCompontens = %d nRankRows = %d\n", - *cursor, - meshDistributor->mapLocalToGlobal(*cursor), - petscRowIdx, - localPetscRowIdx, - meshDistributor->getRstart(), - nComponents, - nRankRows); - - - // Traverse all non zero entries in this row. - for (icursor_type icursor = begin<nz>(cursor), - icend = end<nz>(cursor); icursor != icend; ++icursor) { - int petscColIdx = - meshDistributor->mapLocalToGlobal(col(*icursor)) * nComponents + j; - - if (value(*icursor) != 0.0 || petscRowIdx == petscColIdx) { - // The row DOF is a rank DOF, if also the column is a rank DOF, - // increment the d_nnz values for this row, otherwise the o_nnz value. - if (petscColIdx >= meshDistributor->getRstart() * nComponents && - petscColIdx < meshDistributor->getRstart() * nComponents + nRankRows) - d_nnz[localPetscRowIdx]++; - else - o_nnz[localPetscRowIdx]++; - } - } - } else { - // === The current row DOF is not a rank dof, i.e., it will be created === - // === on this rank, but after this it will be send to another rank === - // === matrix. So we need to send also the corresponding nnz structure === - // === of this row to the corresponding rank. === - - // Send all non zero entries to the member of the row DOF. - int sendToRank = sendDofToRank[*cursor]; - - for (icursor_type icursor = begin<nz>(cursor), - icend = end<nz>(cursor); icursor != icend; ++icursor) { - if (value(*icursor) != 0.0) { - int petscColIdx = - meshDistributor->mapLocalToGlobal(col(*icursor)) * nComponents + j; - - sendMatrixEntry[sendToRank]. - push_back(make_pair(petscRowIdx, petscColIdx)); - } - } - - } // if (isRankDof[*cursor]) ... else ... - } // for each row in mat[i][j] - } - } - - // === Send and recv the nnz row structure to/from other ranks. === - - StdMpi<MatrixNnzEntry> stdMpi(meshDistributor->getMpiComm(), true); - stdMpi.send(sendMatrixEntry); - for (std::set<int>::iterator it = recvFromRank.begin(); - it != recvFromRank.end(); ++it) - stdMpi.recv(*it); - stdMpi.startCommunication(); - - - // === Evaluate the nnz structure this rank got from other ranks and add it to === - // === the PETSc nnz data structure. === - - for (map<int, MatrixNnzEntry>::iterator it = stdMpi.getRecvData().begin(); - it != stdMpi.getRecvData().end(); ++it) { - if (it->second.size() > 0) { - for (unsigned int i = 0; i < it->second.size(); i++) { - int r = it->second[i].first; - int c = it->second[i].second; - - int localRowIdx = r - meshDistributor->getRstart() * nComponents; - - TEST_EXIT_DBG(localRowIdx >= 0 && localRowIdx < nRankRows) - ("Got row index %d/%d (nRankRows = %d) from rank %d. Should not happen!\n", - r, localRowIdx, nRankRows, it->first); - - if (c < meshDistributor->getRstart() * nComponents || - c >= meshDistributor->getRstart() * nComponents + nRankRows) - o_nnz[localRowIdx]++; - else - d_nnz[localRowIdx]++; - } - } - } - - // The above algorithm for calculating the number of nnz per row over- - // approximates the value, i.e., the number is always equal or larger to - // the real number of nnz values in the global parallel matrix. For small - // matrices, the problem may arise, that the result is larger than the - // number of elements in a row. This is fixed in the following. - - if (nRankRows < 100) - for (int i = 0; i < nRankRows; i++) - d_nnz[i] = std::min(d_nnz[i], nRankRows); - } - - - void PetscProblemStat::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec) - { - FUNCNAME("PetscProblemStat::fillPetscMatrix()"); - - double wtime = MPI::Wtime(); - int nRankRows = meshDistributor->getNumberRankDofs() * nComponents; - int nOverallRows = meshDistributor->getNumberOverallDofs() * nComponents; - - // === Create PETSc vector (rhs, solution and a temporary vector). === - - VecCreate(PETSC_COMM_WORLD, &petscRhsVec); - VecSetSizes(petscRhsVec, nRankRows, nOverallRows); - VecSetType(petscRhsVec, VECMPI); - - VecCreate(PETSC_COMM_WORLD, &petscSolVec); - VecSetSizes(petscSolVec, nRankRows, nOverallRows); - VecSetType(petscSolVec, VECMPI); - - VecCreate(PETSC_COMM_WORLD, &petscTmpVec); - VecSetSizes(petscTmpVec, nRankRows, nOverallRows); - VecSetType(petscTmpVec, VECMPI); - - int recvAllValues = 0; - int sendValue = static_cast<int>(meshDistributor->getLastMeshChangeIndex() != lastMeshNnz); - meshDistributor->getMpiComm().Allreduce(&sendValue, &recvAllValues, 1, MPI_INT, MPI_SUM); - - if (!d_nnz || recvAllValues != 0) { - if (d_nnz) { - delete [] d_nnz; - d_nnz = NULL; - delete [] o_nnz; - o_nnz = NULL; - } - - createPetscNnzStructure(mat); - lastMeshNnz = meshDistributor->getLastMeshChangeIndex(); - } - - - // === Create PETSc matrix with the computed nnz data structure. === - - MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows, - 0, d_nnz, 0, o_nnz, &petscMatrix); - -#if (DEBUG != 0) - INFO(info, 8)("Fill petsc matrix 1 needed %.5f seconds\n", MPI::Wtime() - wtime); -#endif - -#if (DEBUG != 0) - int a, b; - MatGetOwnershipRange(petscMatrix, &a, &b); - TEST_EXIT(a == meshDistributor->getRstart() * nComponents) - ("Wrong matrix ownership range!\n"); - TEST_EXIT(b == meshDistributor->getRstart() * nComponents + nRankRows) - ("Wrong matrix ownership range!\n"); -#endif - - - // === Transfer values from DOF matrices to the PETSc matrix. === - - for (int i = 0; i < nComponents; i++) - for (int j = 0; j < nComponents; j++) - if ((*mat)[i][j]) - setDofMatrix((*mat)[i][j], nComponents, i, j); - -#if (DEBUG != 0) - INFO(info, 8)("Fill petsc matrix 2 needed %.5f seconds\n", MPI::Wtime() - wtime); -#endif - - MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY); - -#if (DEBUG != 0) - INFO(info, 8)("Fill petsc matrix 3 needed %.5f seconds\n", - TIME_USED(MPI::Wtime(), wtime)); -#endif - - // === Transfer values from DOF vector to the PETSc vector. === - - for (int i = 0; i < nComponents; i++) - setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i); - - VecAssemblyBegin(petscRhsVec); - VecAssemblyEnd(petscRhsVec); - - INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", MPI::Wtime() - wtime); - } - - - void PetscProblemStat::solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo) - { - FUNCNAME("PetscProblemStat::solvePetscMatrix()"); - - // === Set old solution to be initiual guess for PETSc solver. === - if (!zeroStartVector) { - VecSet(petscSolVec, 0.0); - - for (int i = 0; i < nComponents; i++) - setDofVector(petscSolVec, vec.getDOFVector(i), nComponents, i, true); - - VecAssemblyBegin(petscSolVec); - VecAssemblyEnd(petscSolVec); - } - - // === Init PETSc solver. === - - KSP solver; - PC pc; - PetscSolver* petscSolver = - PetscSolver::getPetscSolver(petscMatrix, meshDistributor, nComponents); - petscSolver->providePetscSolver(solver, pc); - delete petscSolver; - - // Do not delete the solution vector, use it for the initial guess. - if (!zeroStartVector) - KSPSetInitialGuessNonzero(solver, PETSC_TRUE); - - - // === Run PETSc. === - - KSPSolve(solver, petscRhsVec, petscSolVec); - - - // === Transfere values from PETSc's solution vectors to the dof vectors. - - PetscScalar *vecPointer; - VecGetArray(petscSolVec, &vecPointer); - - int nRankDofs = meshDistributor->getNumberRankDofs(); - for (int i = 0; i < nComponents; i++) { - DOFVector<double> &dofvec = *(vec.getDOFVector(i)); - for (int j = 0; j < nRankDofs; j++) - dofvec[meshDistributor->mapLocalToDofIndex(j)] = - vecPointer[j * nComponents + i]; - } - - VecRestoreArray(petscSolVec, &vecPointer); - - - // === Synchronize DOFs at common DOFs, i.e., DOFs that correspond to more === - // === than one partition. === - meshDistributor->synchVector(vec); - - - // === Print information about solution process. === - - int iterations = 0; - KSPGetIterationNumber(solver, &iterations); - MSG(" Number of iterations: %d\n", iterations); - adaptInfo->setSolverIterations(iterations); - - double norm = 0.0; - MatMult(petscMatrix, petscSolVec, petscTmpVec); - VecAXPY(petscTmpVec, -1.0, petscRhsVec); - VecNorm(petscTmpVec, NORM_2, &norm); - MSG(" Residual norm: %e\n", norm); - - - // === Destroy Petsc's variables. === - - MatDestroy(petscMatrix); - VecDestroy(petscRhsVec); - VecDestroy(petscSolVec); - VecDestroy(petscTmpVec); - KSPDestroy(solver); - } - } diff --git a/AMDiS/src/parallel/PetscProblemStat.h b/AMDiS/src/parallel/PetscProblemStat.h index 77278bec..73eacb29 100644 --- a/AMDiS/src/parallel/PetscProblemStat.h +++ b/AMDiS/src/parallel/PetscProblemStat.h @@ -25,14 +25,8 @@ #include "AMDiS_fwd.h" #include "Global.h" -#include "MeshDistributor.h" -#include "ProblemVec.h" -#include "ProblemInstat.h" -#include "ParallelProblemStatBase.h" - -#include "petsc.h" -#include "petscsys.h" -#include "petscao.h" +#include "parallel/ParallelProblemStatBase.h" +#include "parallel/PetscSolver.h" namespace AMDiS { @@ -41,67 +35,33 @@ namespace AMDiS { public: PetscProblemStat(std::string nameStr, ProblemIterationInterface *problemIteration = NULL) - : ParallelProblemStatBase(nameStr, problemIteration), - d_nnz(NULL), - o_nnz(NULL), - lastMeshNnz(0), - zeroStartVector(false) + : ParallelProblemStatBase(nameStr, problemIteration) { - GET_PARAMETER(0, "parallel->use zero start vector", "%d", &zeroStartVector); + string name(""); + GET_PARAMETER(0, "parallel->solver", &name); + + if (name == "petsc-schur") { +#ifdef HAVE_PETSC_DEV + petscSolver = new PetscSolverSchur(meshDistributor); +#else + ERROR_EXIT("Petsc schur complement solver is only supported when petsc-dev is used!\n"); +#endif + } else if (name == "petsc" || name == "") { + petscSolver = new PetscSolverGlobalMatrix(meshDistributor); + } else { + ERROR_EXIT("No parallel solver %s available!\n", name.c_str()); + } } ~PetscProblemStat() - {} + { + delete petscSolver; + } void solve(AdaptInfo *adaptInfo, bool fixedMatrix = false); protected: - /// Creates a new non zero pattern structure for the Petsc matrix. - void createPetscNnzStructure(Matrix<DOFMatrix*> *mat); - - /** \brief - * Create a PETSc matrix and PETSc vectors. The given DOF matrices are used to - * create the nnz structure of the PETSc matrix and the values are transfered to it. - * The given DOF vectors are used to the the values of the PETSc rhs vector. - * - * \param[in] mat - * \param[in] vec - */ - void fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec); - - /// Takes a dof matrix and sends the values to the global petsc matrix. - void setDofMatrix(DOFMatrix* mat, int dispMult = 1, - int dispAddRow = 0, int dispAddCol = 0); - - /// Takes a dof vector and sends its values to a given petsc vector. - void setDofVector(Vec& petscVec, DOFVector<double>* vec, - int disMult = 1, int dispAdd = 0, bool rankOnly = false); - - /// Use PETSc to solve the linear system of equations - void solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo); - - protected: - /// Petsc's matrix structure. - Mat petscMatrix; - - /** \brief - * Petsc's vector structures for the rhs vector, the solution vector and a - * temporary vector for calculating the final residuum. - */ - Vec petscRhsVec, petscSolVec, petscTmpVec; - - /// Arrays definig the non zero pattern of Petsc's matrix. - int *d_nnz, *o_nnz; - - /** \brief - * Stores the mesh change index of the mesh the nnz structure was created for. - * Therefore, if the mesh change index is higher than this value, we have to create - * a new nnz structure for PETSc matrices, because the mesh has been changed and - * therefore also the assembled matrix structure. - */ - int lastMeshNnz; - - bool zeroStartVector; + PetscSolver *petscSolver; }; typedef PetscProblemStat ParallelProblemStat; diff --git a/AMDiS/src/parallel/PetscSolver.cc b/AMDiS/src/parallel/PetscSolver.cc index 680cab4f..b3cc59fa 100644 --- a/AMDiS/src/parallel/PetscSolver.cc +++ b/AMDiS/src/parallel/PetscSolver.cc @@ -11,7 +11,7 @@ #include "parallel/PetscSolver.h" - +#include "parallel/StdMpi.h" namespace AMDiS { @@ -26,48 +26,639 @@ namespace AMDiS { } - PetscSolver* PetscSolver::getPetscSolver(Mat& petscMatrix, MeshDistributor *dist, int n) + void PetscSolverGlobalMatrix::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec) { - FUNCNAME("PetscSolver::getPetscSolver()"); + FUNCNAME("PetscSolverGlobalMatrix::fillPetscMatrix()"); + + double wtime = MPI::Wtime(); + int nComponents = mat->getNumRows(); + int nRankRows = meshDistributor->getNumberRankDofs() * nComponents; + int nOverallRows = meshDistributor->getNumberOverallDofs() * nComponents; + + // === Create PETSc vector (rhs, solution and a temporary vector). === + + VecCreate(PETSC_COMM_WORLD, &petscRhsVec); + VecSetSizes(petscRhsVec, nRankRows, nOverallRows); + VecSetType(petscRhsVec, VECMPI); + + VecCreate(PETSC_COMM_WORLD, &petscSolVec); + VecSetSizes(petscSolVec, nRankRows, nOverallRows); + VecSetType(petscSolVec, VECMPI); + + VecCreate(PETSC_COMM_WORLD, &petscTmpVec); + VecSetSizes(petscTmpVec, nRankRows, nOverallRows); + VecSetType(petscTmpVec, VECMPI); + + int recvAllValues = 0; + int sendValue = static_cast<int>(meshDistributor->getLastMeshChangeIndex() != lastMeshNnz); + meshDistributor->getMpiComm().Allreduce(&sendValue, &recvAllValues, 1, MPI_INT, MPI_SUM); + + if (!d_nnz || recvAllValues != 0) { + if (d_nnz) { + delete [] d_nnz; + d_nnz = NULL; + delete [] o_nnz; + o_nnz = NULL; + } + + createPetscNnzStructure(mat); + lastMeshNnz = meshDistributor->getLastMeshChangeIndex(); + } - string name(""); - GET_PARAMETER(0, "parallel->solver", &name); + + // === Create PETSc matrix with the computed nnz data structure. === + + MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows, + 0, d_nnz, 0, o_nnz, &petscMatrix); - if (name == "petsc-schur") { -#ifdef HAVE_PETSC_DEV - return new PetscSolverSchur(petscMatrix, dist, n); -#else - ERROR_EXIT("Petsc schur complement solver is only supported when petsc-dev is used!\n"); +#if (DEBUG != 0) + MSG("Fill petsc matrix 1 needed %.5f seconds\n", MPI::Wtime() - wtime); #endif - } else if (name == "petsc" || name == "") { - return new PetscSolverGlobalMatrix(petscMatrix, dist, n); - } else { - ERROR_EXIT("No parallel solver %s available!\n", name.c_str()); - } + +#if (DEBUG != 0) + int a, b; + MatGetOwnershipRange(petscMatrix, &a, &b); + TEST_EXIT(a == meshDistributor->getRstart() * nComponents) + ("Wrong matrix ownership range!\n"); + TEST_EXIT(b == meshDistributor->getRstart() * nComponents + nRankRows) + ("Wrong matrix ownership range!\n"); +#endif + + + // === Transfer values from DOF matrices to the PETSc matrix. === + + for (int i = 0; i < nComponents; i++) + for (int j = 0; j < nComponents; j++) + if ((*mat)[i][j]) + setDofMatrix((*mat)[i][j], nComponents, i, j); + +#if (DEBUG != 0) + MSG("Fill petsc matrix 2 needed %.5f seconds\n", MPI::Wtime() - wtime); +#endif + + MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY); + MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY); + +#if (DEBUG != 0) + MSG("Fill petsc matrix 3 needed %.5f seconds\n", + TIME_USED(MPI::Wtime(), wtime)); +#endif + + // === Transfer values from DOF vector to the PETSc vector. === + + for (int i = 0; i < nComponents; i++) + setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i); + + VecAssemblyBegin(petscRhsVec); + VecAssemblyEnd(petscRhsVec); + + MSG("Fill petsc matrix needed %.5f seconds\n", MPI::Wtime() - wtime); } - void PetscSolverGlobalMatrix::providePetscSolver(KSP &solver, PC &pc) + void PetscSolverGlobalMatrix::solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo) { - FUNCNAME("PetscSolverGlobalMatrix::providePetscProblemStat()"); - + FUNCNAME("PetscSolverGlobalMatrix::solvePetscMatrix()"); + + int nComponents = vec.getSize(); + + // === Set old solution to be initiual guess for PETSc solver. === + if (!zeroStartVector) { + VecSet(petscSolVec, 0.0); + + for (int i = 0; i < nComponents; i++) + setDofVector(petscSolVec, vec.getDOFVector(i), nComponents, i, true); + + VecAssemblyBegin(petscSolVec); + VecAssemblyEnd(petscSolVec); + } + + // === Init PETSc solver. === + + KSP solver; + PC pc; + KSPCreate(PETSC_COMM_WORLD, &solver); KSPGetPC(solver, &pc); - - KSPSetOperators(solver, matrix, matrix, SAME_NONZERO_PATTERN); + KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT); KSPSetType(solver, KSPBCGS); KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0); KSPSetFromOptions(solver); PCSetFromOptions(pc); + + // Do not delete the solution vector, use it for the initial guess. + if (!zeroStartVector) + KSPSetInitialGuessNonzero(solver, PETSC_TRUE); + + + // === Run PETSc. === + + KSPSolve(solver, petscRhsVec, petscSolVec); + + + // === Transfere values from PETSc's solution vectors to the dof vectors. + + PetscScalar *vecPointer; + VecGetArray(petscSolVec, &vecPointer); + + int nRankDofs = meshDistributor->getNumberRankDofs(); + for (int i = 0; i < nComponents; i++) { + DOFVector<double> &dofvec = *(vec.getDOFVector(i)); + for (int j = 0; j < nRankDofs; j++) + dofvec[meshDistributor->mapLocalToDofIndex(j)] = + vecPointer[j * nComponents + i]; + } + + VecRestoreArray(petscSolVec, &vecPointer); + + + // === Synchronize DOFs at common DOFs, i.e., DOFs that correspond to more === + // === than one partition. === + meshDistributor->synchVector(vec); + + + // === Print information about solution process. === + + int iterations = 0; + KSPGetIterationNumber(solver, &iterations); + MSG(" Number of iterations: %d\n", iterations); + adaptInfo->setSolverIterations(iterations); + + double norm = 0.0; + MatMult(petscMatrix, petscSolVec, petscTmpVec); + VecAXPY(petscTmpVec, -1.0, petscRhsVec); + VecNorm(petscTmpVec, NORM_2, &norm); + MSG(" Residual norm: %e\n", norm); + + + // === Destroy Petsc's variables. === + + MatDestroy(petscMatrix); + VecDestroy(petscRhsVec); + VecDestroy(petscSolVec); + VecDestroy(petscTmpVec); + KSPDestroy(solver); + } + + + void PetscSolverGlobalMatrix::setDofMatrix(DOFMatrix* mat, int dispMult, + int dispAddRow, int dispAddCol) + { + FUNCNAME("PetscSolverGlobalMatrix::setDofMatrix()"); + + TEST_EXIT(mat)("No DOFMatrix!\n"); + + using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end; + namespace traits= mtl::traits; + typedef DOFMatrix::base_matrix_type Matrix; + + traits::col<Matrix>::type col(mat->getBaseMatrix()); + traits::const_value<Matrix>::type value(mat->getBaseMatrix()); + + typedef traits::range_generator<row, Matrix>::type cursor_type; + typedef traits::range_generator<nz, cursor_type>::type icursor_type; + + vector<int> cols; + vector<double> values; + cols.reserve(300); + values.reserve(300); + + vector<int> globalCols; + + + // === Traverse all rows of the dof matrix and insert row wise the values === + // === to the PETSc matrix. === + + for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), + cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) { + + // Global index of the current row dof. + int globalRowDof = meshDistributor->mapLocalToGlobal(*cursor); + // Test if the current row dof is a periodic dof. + bool periodicRow = meshDistributor->isPeriodicDof(globalRowDof); + + if (!periodicRow) { + // === Row DOF index is not periodic. === + + // Calculate PETSc row index. + + int rowIndex = globalRowDof * dispMult + dispAddRow; + + cols.clear(); + values.clear(); + + for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); + icursor != icend; ++icursor) { + + // Global index of the current column index. + int globalColDof = meshDistributor->mapLocalToGlobal(col(*icursor)); + // Test if the current col dof is a periodic dof. + bool periodicCol = meshDistributor->isPeriodicDof(globalColDof); + // Calculate PETSc col index. + int colIndex = globalColDof * dispMult + dispAddCol; + + // Ignore all zero entries, expect it is a diagonal entry. + if (value(*icursor) == 0.0 && rowIndex != colIndex) + continue; + + if (!periodicCol) { + // Calculate the exact position of the column index in the PETSc matrix. + cols.push_back(globalColDof * dispMult + dispAddCol); + values.push_back(value(*icursor)); + } else { + // === Row index is not periodic, but column index is. === + + // Create set of all periodic associations of the column index. + std::set<int> perAsc; + std::set<int>& perColAsc = + meshDistributor->getPerDofAssociations(globalColDof); + for (std::set<int>::iterator it = perColAsc.begin(); + it != perColAsc.end(); ++it) + if (*it >= -3) + perAsc.insert(*it); + + // Scale value to the number of periodic associations of the column index. + double scaledValue = + value(*icursor) * pow(0.5, static_cast<double>(perAsc.size())); + + + // === Create set of all matrix column indices due to the periodic === + // === associations of the column DOF index. === + + vector<int> newCols; + + // First, add the original matrix index. + newCols.push_back(globalColDof); + + // And add all periodic matrix indices. + for (std::set<int>::iterator it = perAsc.begin(); + it != perAsc.end(); ++it) { + int nCols = static_cast<int>(newCols.size()); + + for (int i = 0; i < nCols; i++) { + TEST_EXIT_DBG(meshDistributor->isPeriodicDof(newCols[i], *it)) + ("Wrong periodic DOF associations at boundary %d with DOF %d!\n", + *it, newCols[i]); + + newCols.push_back(meshDistributor->getPeriodicMapping(newCols[i], *it)); + } + } + + for (unsigned int i = 0; i < newCols.size(); i++) { + cols.push_back(newCols[i] * dispMult + dispAddCol); + values.push_back(scaledValue); + } + } + } + + MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), + &(cols[0]), &(values[0]), ADD_VALUES); + } else { + // === Row DOF index is periodic. === + + // Because this row is periodic, we will have to add the entries of this + // matrix row to multiple rows. The following maps store to each row an + // array of column indices and values of the entries that must be added to + // the PETSc matrix. + map<int, vector<int> > colsMap; + map<int, vector<double> > valsMap; + + // Traverse all column entries. + for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); + icursor != icend; ++icursor) { + + // Global index of the current column index. + int globalColDof = meshDistributor->mapLocalToGlobal(col(*icursor)); + + // Ignore all zero entries, expect it is a diagonal entry. + if (value(*icursor) == 0.0 && globalRowDof != globalColDof) + continue; + + + // === Add all periodic associations of both, the row and the column === + // === indices to the set perAsc. === + + std::set<int> perAsc; + + if (meshDistributor->isPeriodicDof(globalColDof)) { + std::set<int>& perColAsc = + meshDistributor->getPerDofAssociations(globalColDof); + for (std::set<int>::iterator it = perColAsc.begin(); + it != perColAsc.end(); ++it) + if (*it >= -3) + perAsc.insert(*it); + } + + std::set<int>& perRowAsc = + meshDistributor->getPerDofAssociations(globalRowDof); + for (std::set<int>::iterator it = perRowAsc.begin(); + it != perRowAsc.end(); ++it) + if (*it >= -3) + perAsc.insert(*it); + + // Scale the value with respect to the number of periodic associations. + double scaledValue = + value(*icursor) * pow(0.5, static_cast<double>(perAsc.size())); + + + // === Create all matrix entries with respect to the periodic === + // === associations of the row and column indices. === + + vector<pair<int, int> > entry; + + // First, add the original entry. + entry.push_back(make_pair(globalRowDof, globalColDof)); + + // Then, traverse the periodic associations of the row and column indices + // and create the corresponding entries. + for (std::set<int>::iterator it = perAsc.begin(); it != perAsc.end(); ++it) { + int nEntry = static_cast<int>(entry.size()); + for (int i = 0; i < nEntry; i++) { + int perRowDof = 0; + if (meshDistributor->getPeriodicMapping()[*it].count(entry[i].first)) + perRowDof = meshDistributor->getPeriodicMapping(entry[i].first, *it); + else + perRowDof = entry[i].first; + + int perColDof; + if (meshDistributor->getPeriodicMapping()[*it].count(entry[i].second)) + perColDof = meshDistributor->getPeriodicMapping(entry[i].second, *it); + else + perColDof = entry[i].second; + + + entry.push_back(make_pair(perRowDof, perColDof)); + } + } + + + // === Translate the matrix entries to PETSc's matrix. + + for (vector<pair<int, int> >::iterator eIt = entry.begin(); + eIt != entry.end(); ++eIt) { + // Calculate row and column indices of the PETSc matrix. + int rowIndex = eIt->first * dispMult + dispAddRow; + int colIndex = eIt->second * dispMult + dispAddCol; + + colsMap[rowIndex].push_back(colIndex); + valsMap[rowIndex].push_back(scaledValue); + } + } + + + // === Finally, add all periodic rows to the PETSc matrix. === + + for (map<int, vector<int> >::iterator rowIt = colsMap.begin(); + rowIt != colsMap.end(); ++rowIt) { + TEST_EXIT_DBG(rowIt->second.size() == valsMap[rowIt->first].size()) + ("Should not happen!\n"); + + int rowIndex = rowIt->first; + MatSetValues(petscMatrix, 1, &rowIndex, rowIt->second.size(), + &(rowIt->second[0]), &(valsMap[rowIt->first][0]), ADD_VALUES); + } + } + } + } + + + void PetscSolverGlobalMatrix::setDofVector(Vec& petscVec, DOFVector<double>* vec, + int dispMult, int dispAdd, bool rankOnly) + { + FUNCNAME("PetscSolverGlobalMatrix::setDofVector()"); + + // Traverse all used DOFs in the dof vector. + DOFVector<double>::Iterator dofIt(vec, USED_DOFS); + for (dofIt.reset(); !dofIt.end(); ++dofIt) { + if (rankOnly && !meshDistributor->getIsRankDof(dofIt.getDOFIndex())) + continue; + + // Calculate global row index of the dof. + DegreeOfFreedom globalRowDof = + meshDistributor->mapLocalToGlobal(dofIt.getDOFIndex()); + // Calculate PETSc index of the row dof. + int index = globalRowDof * dispMult + dispAdd; + + if (meshDistributor->isPeriodicDof(globalRowDof)) { + std::set<int>& perAsc = meshDistributor->getPerDofAssociations(globalRowDof); + double value = *dofIt / (perAsc.size() + 1.0); + VecSetValues(petscVec, 1, &index, &value, ADD_VALUES); + + for (std::set<int>::iterator perIt = perAsc.begin(); perIt != perAsc.end(); ++perIt) { + int mappedDof = meshDistributor->getPeriodicMapping(globalRowDof, *perIt); + int mappedIndex = mappedDof * dispMult + dispAdd; + VecSetValues(petscVec, 1, &mappedIndex, &value, ADD_VALUES); + } + } else { + // The dof index is not periodic. + double value = *dofIt; + VecSetValues(petscVec, 1, &index, &value, ADD_VALUES); + } + } + } + + + void PetscSolverGlobalMatrix::createPetscNnzStructure(Matrix<DOFMatrix*> *mat) + { + FUNCNAME("PetscSolverGlobalMatrix::createPetscNnzStructure()"); + + TEST_EXIT_DBG(!d_nnz)("There is something wrong!\n"); + TEST_EXIT_DBG(!o_nnz)("There is something wrong!\n"); + + int nComponents = mat->getNumRows(); + int nRankRows = meshDistributor->getNumberRankDofs() * nComponents; + d_nnz = new int[nRankRows]; + o_nnz = new int[nRankRows]; + for (int i = 0; i < nRankRows; i++) { + d_nnz[i] = 0; + o_nnz[i] = 0; + } + + using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end; + namespace traits = mtl::traits; + typedef DOFMatrix::base_matrix_type Matrix; + typedef vector<pair<int, int> > MatrixNnzEntry; + typedef map<int, DofContainer> RankToDofContainer; + + // Stores to each rank a list of nnz entries (i.e. pairs of row and column index) + // that this rank will send to. These nnz entries will be assembled on this rank, + // but because the row DOFs are not DOFs of this rank they will be send to the + // owner of the row DOFs. + map<int, MatrixNnzEntry> sendMatrixEntry; + + // Maps to each DOF that must be send to another rank the rank number of the + // receiving rank. + map<DegreeOfFreedom, int> sendDofToRank; + + + // First, create for all ranks we send data to MatrixNnzEntry object with 0 entries. + for (RankToDofContainer::iterator it = meshDistributor->getRecvDofs().begin(); + it != meshDistributor->getRecvDofs().end(); ++it) { + sendMatrixEntry[it->first].resize(0); + + for (DofContainer::iterator dofIt = it->second.begin(); + dofIt != it->second.end(); ++dofIt) + sendDofToRank[**dofIt] = it->first; + } + + + std::set<int> recvFromRank; + for (RankToDofContainer::iterator it = meshDistributor->getSendDofs().begin(); + it != meshDistributor->getSendDofs().end(); ++it) + recvFromRank.insert(it->first); + + + for (int i = 0; i < nComponents; i++) { + for (int j = 0; j < nComponents; j++) { + if (!(*mat)[i][j]) + continue; + + Matrix bmat = (*mat)[i][j]->getBaseMatrix(); + + traits::col<Matrix>::type col(bmat); + traits::const_value<Matrix>::type value(bmat); + + typedef traits::range_generator<row, Matrix>::type cursor_type; + typedef traits::range_generator<nz, cursor_type>::type icursor_type; + + for (cursor_type cursor = begin<row>(bmat), + cend = end<row>(bmat); cursor != cend; ++cursor) { + + int globalRowDof = meshDistributor->mapLocalToGlobal(*cursor); + + vector<int> rows; + rows.push_back(globalRowDof); + vector<int> rowRank; + if (meshDistributor->getIsRankDof(*cursor)) { + rowRank.push_back(meshDistributor->getMpiRank()); + } else { + // Find out who is the member of this DOF. + TEST_EXIT_DBG(sendDofToRank.count(*cursor))("DOF %d has no receiver!\n", *cursor); + + rowRank.push_back(sendDofToRank[*cursor]); + } + + // Map the local row number to the global DOF index and create from it + // the global PETSc row index of this DOF. + + int petscRowIdx = globalRowDof * nComponents + i; + + if (meshDistributor->getIsRankDof(*cursor)) { + + // === The current row DOF is a rank dof, so create the corresponding === + // === nnz values directly on rank's nnz data. === + + // This is the local row index of the local PETSc matrix. + int localPetscRowIdx = + petscRowIdx - meshDistributor->getRstart() * nComponents; + + TEST_EXIT_DBG(localPetscRowIdx >= 0 && localPetscRowIdx < nRankRows) + ("Should not happen! \n Debug info: localRowIdx = %d globalRowIndx = %d petscRowIdx = %d localPetscRowIdx = %d rStart = %d nCompontens = %d nRankRows = %d\n", + *cursor, + meshDistributor->mapLocalToGlobal(*cursor), + petscRowIdx, + localPetscRowIdx, + meshDistributor->getRstart(), + nComponents, + nRankRows); + + + // Traverse all non zero entries in this row. + for (icursor_type icursor = begin<nz>(cursor), + icend = end<nz>(cursor); icursor != icend; ++icursor) { + int petscColIdx = + meshDistributor->mapLocalToGlobal(col(*icursor)) * nComponents + j; + + if (value(*icursor) != 0.0 || petscRowIdx == petscColIdx) { + // The row DOF is a rank DOF, if also the column is a rank DOF, + // increment the d_nnz values for this row, otherwise the o_nnz value. + if (petscColIdx >= meshDistributor->getRstart() * nComponents && + petscColIdx < meshDistributor->getRstart() * nComponents + nRankRows) + d_nnz[localPetscRowIdx]++; + else + o_nnz[localPetscRowIdx]++; + } + } + } else { + // === The current row DOF is not a rank dof, i.e., it will be created === + // === on this rank, but after this it will be send to another rank === + // === matrix. So we need to send also the corresponding nnz structure === + // === of this row to the corresponding rank. === + + // Send all non zero entries to the member of the row DOF. + int sendToRank = sendDofToRank[*cursor]; + + for (icursor_type icursor = begin<nz>(cursor), + icend = end<nz>(cursor); icursor != icend; ++icursor) { + if (value(*icursor) != 0.0) { + int petscColIdx = + meshDistributor->mapLocalToGlobal(col(*icursor)) * nComponents + j; + + sendMatrixEntry[sendToRank]. + push_back(make_pair(petscRowIdx, petscColIdx)); + } + } + + } // if (isRankDof[*cursor]) ... else ... + } // for each row in mat[i][j] + } + } + + // === Send and recv the nnz row structure to/from other ranks. === + + StdMpi<MatrixNnzEntry> stdMpi(meshDistributor->getMpiComm(), true); + stdMpi.send(sendMatrixEntry); + for (std::set<int>::iterator it = recvFromRank.begin(); + it != recvFromRank.end(); ++it) + stdMpi.recv(*it); + stdMpi.startCommunication(); + + + // === Evaluate the nnz structure this rank got from other ranks and add it to === + // === the PETSc nnz data structure. === + + for (map<int, MatrixNnzEntry>::iterator it = stdMpi.getRecvData().begin(); + it != stdMpi.getRecvData().end(); ++it) { + if (it->second.size() > 0) { + for (unsigned int i = 0; i < it->second.size(); i++) { + int r = it->second[i].first; + int c = it->second[i].second; + + int localRowIdx = r - meshDistributor->getRstart() * nComponents; + + TEST_EXIT_DBG(localRowIdx >= 0 && localRowIdx < nRankRows) + ("Got row index %d/%d (nRankRows = %d) from rank %d. Should not happen!\n", + r, localRowIdx, nRankRows, it->first); + + if (c < meshDistributor->getRstart() * nComponents || + c >= meshDistributor->getRstart() * nComponents + nRankRows) + o_nnz[localRowIdx]++; + else + d_nnz[localRowIdx]++; + } + } + } + + // The above algorithm for calculating the number of nnz per row over- + // approximates the value, i.e., the number is always equal or larger to + // the real number of nnz values in the global parallel matrix. For small + // matrices, the problem may arise, that the result is larger than the + // number of elements in a row. This is fixed in the following. + + if (nRankRows < 100) + for (int i = 0; i < nRankRows; i++) + d_nnz[i] = std::min(d_nnz[i], nRankRows); } #ifdef HAVE_PETSC_DEV - void PetscSolverSchur::providePetscSolver(KSP &solver, PC &pc) + void PetscSolverSchur::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec) { - FUNCNAME("PetscSolverSchur::providePetscProblemStat()"); - + FUNCNAME("PetscSolverSchur::fillPetscMatrix()"); + + int nComponents = mat->getNumRows(); + typedef map<int, DofContainer> RankToDofContainer; typedef map<DegreeOfFreedom, bool> DofIndexToBool; @@ -84,8 +675,9 @@ namespace AMDiS { boundaryDofsSet.insert(meshDistributor->mapLocalToGlobal(**dofIt) * nComponents + i); } - vector<DegreeOfFreedom> boundaryDofs(boundaryDofsSet.begin(), - boundaryDofsSet.end()); + boundaryDofs.clear(); + boundaryDofs.insert(boundaryDofs.begin(), + boundaryDofsSet.begin(), boundaryDofsSet.end()); std::set<DegreeOfFreedom> otherBoundaryLocalDofs; RankToDofContainer& recvDofs = meshDistributor->getRecvDofs(); @@ -105,9 +697,18 @@ namespace AMDiS { for (int i = 0; i < nComponents; i++) interiorDofsSet.insert(meshDistributor->mapLocalToGlobal(dofIt->first) * nComponents + i); - vector<DegreeOfFreedom> interiorDofs(interiorDofsSet.begin(), - interiorDofsSet.end()); + interiorDofs.clear(); + interiorDofs.insert(interiorDofs.begin(), + interiorDofsSet.begin(), interiorDofsSet.end()); + } + + + void PetscSolverSchur::solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo) + { + FUNCNAME("PetscSolverSchur::solvePetscMatrix()"); + KSP solver; + PC pc; KSPCreate(PETSC_COMM_WORLD, &solver); KSPGetPC(solver, &pc); PCSetType(pc, PCFIELDSPLIT); @@ -123,7 +724,7 @@ namespace AMDiS { PCFieldSplitSetIS(pc, "boundary", boundaryIs); - KSPSetOperators(solver, matrix, matrix, SAME_NONZERO_PATTERN); + // KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT); KSPSetFromOptions(solver); PCSetFromOptions(pc); diff --git a/AMDiS/src/parallel/PetscSolver.h b/AMDiS/src/parallel/PetscSolver.h index f241ded9..e5492b4e 100644 --- a/AMDiS/src/parallel/PetscSolver.h +++ b/AMDiS/src/parallel/PetscSolver.h @@ -23,8 +23,15 @@ #ifndef AMDIS_PETSC_SOLVER_H #define AMDIS_PETSC_SOLVER_H -#include "parallel/PetscProblemStat.h" -#include "CreatorInterface.h" +#include "AMDiS_fwd.h" +#include "Global.h" +#include "Parameters.h" +#include "DOFMatrix.h" +#include "parallel/MeshDistributor.h" + +#include "petsc.h" +#include "petscsys.h" +#include "petscao.h" namespace AMDiS { @@ -32,52 +39,101 @@ namespace AMDiS { class PetscSolver { - protected: - PetscSolver(Mat& petscMatrix, MeshDistributor *dist, int n) - : matrix(petscMatrix), - meshDistributor(dist), - nComponents(n) + public: + PetscSolver(MeshDistributor *m) + : meshDistributor(m) {} - public: virtual ~PetscSolver() {} - /// Is used to create a PETSc solver and preconditioner object. This function may - /// be overridden to create some special PETSc solver. - virtual void providePetscSolver(KSP &solver, PC &pc) = 0; + /** \brief + * Create a PETSc matrix and PETSc vectors. The given DOF matrices are used to + * create the nnz structure of the PETSc matrix and the values are transfered to it. + * The given DOF vectors are used to the the values of the PETSc rhs vector. + * + * \param[in] mat + * \param[in] vec + */ + virtual void fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec) = 0; - static PetscSolver* getPetscSolver(Mat& petscMatrix, - MeshDistributor *dist, - int n); + /// Use PETSc to solve the linear system of equations + virtual void solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo) = 0; protected: - Mat& matrix; - MeshDistributor *meshDistributor; - - int nComponents; }; class PetscSolverGlobalMatrix : public PetscSolver { public: - PetscSolverGlobalMatrix(Mat& petscMatrix, MeshDistributor *dist, int n) - : PetscSolver(petscMatrix, dist, n) - {} + PetscSolverGlobalMatrix(MeshDistributor *m) + : PetscSolver(m), + d_nnz(NULL), + o_nnz(NULL), + lastMeshNnz(0), + zeroStartVector(false) + { + GET_PARAMETER(0, "parallel->use zero start vector", "%d", &zeroStartVector); + } + + void fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec); + + void solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo); - void providePetscSolver(KSP &solver, PC &pc); + protected: + /// Creates a new non zero pattern structure for the Petsc matrix. + void createPetscNnzStructure(Matrix<DOFMatrix*> *mat); + + /// Takes a dof matrix and sends the values to the global petsc matrix. + void setDofMatrix(DOFMatrix* mat, int dispMult = 1, + int dispAddRow = 0, int dispAddCol = 0); + + /// Takes a dof vector and sends its values to a given petsc vector. + void setDofVector(Vec& petscVec, DOFVector<double>* vec, + int disMult = 1, int dispAdd = 0, bool rankOnly = false); + + protected: + /// Petsc's matrix structure. + Mat petscMatrix; + + /** \brief + * Petsc's vector structures for the rhs vector, the solution vector and a + * temporary vector for calculating the final residuum. + */ + Vec petscRhsVec, petscSolVec, petscTmpVec; + + /// Arrays definig the non zero pattern of Petsc's matrix. + int *d_nnz, *o_nnz; + + /** \brief + * Stores the mesh change index of the mesh the nnz structure was created for. + * Therefore, if the mesh change index is higher than this value, we have to create + * a new nnz structure for PETSc matrices, because the mesh has been changed and + * therefore also the assembled matrix structure. + */ + int lastMeshNnz; + + bool zeroStartVector; }; + #ifdef HAVE_PETSC_DEV class PetscSolverSchur : public PetscSolver { public: - PetscSolverSchur(Mat& petscMatrix, MeshDistributor *dist, int n) - : PetscSolver(petscMatrix, dist, n) + PetscSolverSchur(MeshDistributor *m) + : PetscSolver(m) {} - void providePetscSolver(KSP &solver, PC &pc); + void fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec); + + void solvePetscMatrix(SystemVector &vec, AdaptInfo *adaptInfo); + + protected: + vector<DegreeOfFreedom> boundaryDofs; + + vector<DegreeOfFreedom> interiorDofs; }; #endif -- GitLab