From 8817dab18088f5ef6a0d731e64edf9fd454a2800 Mon Sep 17 00:00:00 2001 From: Thomas Witkowski Date: Thu, 3 Jul 2008 14:11:05 +0000 Subject: [PATCH] * Several optimizations --- AMDiS/src/AdaptInstationary.cc | 12 ++++-- AMDiS/src/Assembler.cc | 21 ++++++---- AMDiS/src/Assembler.h | 5 +++ AMDiS/src/ElInfo.h | 4 +- AMDiS/src/ElInfo1d.cc | 4 +- AMDiS/src/ElInfo1d.h | 4 +- AMDiS/src/ElInfo2d.cc | 4 +- AMDiS/src/ElInfo2d.h | 4 +- AMDiS/src/ElInfo3d.cc | 71 ++++++++++++++++++-------------- AMDiS/src/ElInfo3d.h | 21 ++++++++-- AMDiS/src/ElementFileWriter.h | 4 ++ AMDiS/src/FileWriter.h | 12 ++++++ AMDiS/src/Lagrange.cc | 11 ++--- AMDiS/src/OEMSolver.hh | 2 +- AMDiS/src/ProblemInstat.cc | 9 ++++ AMDiS/src/ProblemInstat.h | 4 ++ AMDiS/src/ProblemScal.cc | 10 +++++ AMDiS/src/ProblemScal.h | 5 +++ AMDiS/src/ProblemTimeInterface.h | 5 +++ AMDiS/src/ProblemVec.cc | 10 +++++ AMDiS/src/ProblemVec.h | 5 +++ AMDiS/src/Serializer.h | 6 ++- 22 files changed, 171 insertions(+), 62 deletions(-) diff --git a/AMDiS/src/AdaptInstationary.cc b/AMDiS/src/AdaptInstationary.cc index b52fca3d..e6f67c81 100644 --- a/AMDiS/src/AdaptInstationary.cc +++ b/AMDiS/src/AdaptInstationary.cc @@ -205,13 +205,17 @@ namespace AMDiS { problemTime_->initTimestep(adaptInfo_); - #ifdef _OPENMP -#pragma omp parallel sections - { +#ifdef _OPENMP + if (problemTime_->existsDelayedCalculation()) { +#pragma omp parallel sections num_threads(2) + { #pragma omp section - problemTime_->startDelayedTimestepCalculation(); + problemTime_->startDelayedTimestepCalculation(); #pragma omp section + oneTimestep(); + } + } else { oneTimestep(); } #else diff --git a/AMDiS/src/Assembler.cc b/AMDiS/src/Assembler.cc index b01d9f3c..b9486fb2 100644 --- a/AMDiS/src/Assembler.cc +++ b/AMDiS/src/Assembler.cc @@ -30,6 +30,7 @@ namespace AMDiS { : nRow(0), nCol(0), coordsAtQPs(NULL), + coordsNumAllocated(0), quadrature(quadrat), psiFast(NULL), phiFast(NULL), @@ -128,16 +129,20 @@ namespace AMDiS { return coordsAtQPs; } - // not yet calcualted ! - if (coordsAtQPs) - DELETE [] coordsAtQPs; - // allocate memory - coordsAtQPs = NEW WorldVector[numPoints]; + if (coordsAtQPs) { + if (coordsNumAllocated != numPoints) { + DELETE [] coordsAtQPs; + coordsAtQPs = NEW WorldVector[numPoints]; + coordsNumAllocated = numPoints; + } + } else { + coordsAtQPs = NEW WorldVector[numPoints]; + coordsNumAllocated = numPoints; + } // set new values - WorldVector* k = NULL; - int l; - for (k = &(coordsAtQPs[0]), l = 0; k < &(coordsAtQPs[numPoints]); ++k, ++l) { + WorldVector* k = &(coordsAtQPs[0]); + for (int l = 0; k < &(coordsAtQPs[numPoints]); ++k, ++l) { elInfo->coordToWorld(localQuad->getLambda(l), k); } diff --git a/AMDiS/src/Assembler.h b/AMDiS/src/Assembler.h index e27e092b..fa01e0e5 100644 --- a/AMDiS/src/Assembler.h +++ b/AMDiS/src/Assembler.h @@ -245,6 +245,11 @@ namespace AMDiS { */ bool coordsValid; + /** \brief + * Used for \ref getCoordsAtQP(). Stores the number of allocated WorldVectors. + */ + int coordsNumAllocated; + /** \brief * Needed Quadrature. Constructed in the constructor of SubAssembler */ diff --git a/AMDiS/src/ElInfo.h b/AMDiS/src/ElInfo.h index d4636cb4..4b6a21e0 100644 --- a/AMDiS/src/ElInfo.h +++ b/AMDiS/src/ElInfo.h @@ -414,7 +414,7 @@ namespace AMDiS { * value of the determinant of the affine linear paraetrization's Jacobian. * pure virtual => must be overriden in sub-class. */ - virtual double calcGrdLambda(DimVec >& grd_lam) const = 0; + virtual double calcGrdLambda(DimVec >& grd_lam) = 0; /** \brief * calculates a normal of the given side (1d,2d: edge, 3d: face) of \ref element. @@ -422,7 +422,7 @@ namespace AMDiS { * transformation to the reference element. * pure virtual => must be overriden in sub-class. */ - virtual double getNormal(int side, WorldVector &normal) const = 0; + virtual double getNormal(int side, WorldVector &normal) = 0; /** \brief diff --git a/AMDiS/src/ElInfo1d.cc b/AMDiS/src/ElInfo1d.cc index 7eceb7ec..b4737d41 100644 --- a/AMDiS/src/ElInfo1d.cc +++ b/AMDiS/src/ElInfo1d.cc @@ -83,7 +83,7 @@ namespace AMDiS { /* value of the determinante from the transformation to the reference */ /* element */ /****************************************************************************/ - double ElInfo1d::calcGrdLambda(DimVec >& grd_lam) const + double ElInfo1d::calcGrdLambda(DimVec >& grd_lam) { FUNCNAME("ElInfo1d::calcGrdLambda()"); @@ -149,7 +149,7 @@ namespace AMDiS { /* coord; return the absulute value of the determinant from the */ /* transformation to the reference element */ /****************************************************************************/ - double ElInfo1d::getNormal(int side, WorldVector &normal) const + double ElInfo1d::getNormal(int side, WorldVector &normal) { normal = coord_[side] - coord_[(side + 1) % 2]; double det = norm(&normal); diff --git a/AMDiS/src/ElInfo1d.h b/AMDiS/src/ElInfo1d.h index f2eff810..9cb395c0 100644 --- a/AMDiS/src/ElInfo1d.h +++ b/AMDiS/src/ElInfo1d.h @@ -63,12 +63,12 @@ namespace AMDiS { /** \brief * 1-dimensional realisation of ElInfo's calcGrdLambda method. */ - double calcGrdLambda(DimVec >& grd_lam) const; + double calcGrdLambda(DimVec >& grd_lam); /** \brief * 1-dimensional realisation of ElInfo's getNormal method. */ - double getNormal(int side, WorldVector &normal) const; + double getNormal(int side, WorldVector &normal); /** \brief * 1-dimensional realisation of ElInfo's getElementNormal method. diff --git a/AMDiS/src/ElInfo2d.cc b/AMDiS/src/ElInfo2d.cc index 72e708dd..485cd385 100644 --- a/AMDiS/src/ElInfo2d.cc +++ b/AMDiS/src/ElInfo2d.cc @@ -391,7 +391,7 @@ namespace AMDiS { } } - double ElInfo2d::calcGrdLambda(DimVec >& grd_lam) const + double ElInfo2d::calcGrdLambda(DimVec >& grd_lam) { FUNCNAME("ElInfo2d::calcGrdLambda()"); @@ -512,7 +512,7 @@ namespace AMDiS { } - double ElInfo2d::getNormal(int side, WorldVector &normal) const + double ElInfo2d::getNormal(int side, WorldVector &normal) { FUNCNAME("ElInfo::getNormal()"); diff --git a/AMDiS/src/ElInfo2d.h b/AMDiS/src/ElInfo2d.h index f09787c1..217196af 100644 --- a/AMDiS/src/ElInfo2d.h +++ b/AMDiS/src/ElInfo2d.h @@ -63,12 +63,12 @@ namespace AMDiS { /** \brief * 2-dimensional realisation of ElInfo's calcGrdLambda method. */ - double calcGrdLambda(DimVec >& grd_lam) const; + double calcGrdLambda(DimVec >& grd_lam); /** \brief * 2-dimensional realisation of ElInfo's getNormal method. */ - double getNormal(int side, WorldVector &normal) const; + double getNormal(int side, WorldVector &normal); /** \brief * 2-dimensional realisation of ElInfo's getElementNormal method. diff --git a/AMDiS/src/ElInfo3d.cc b/AMDiS/src/ElInfo3d.cc index 5d71ba52..1d2b4d56 100644 --- a/AMDiS/src/ElInfo3d.cc +++ b/AMDiS/src/ElInfo3d.cc @@ -107,30 +107,35 @@ namespace AMDiS { } } - double ElInfo3d::calcGrdLambda(DimVec >& grd_lam) const + double ElInfo3d::calcGrdLambda(DimVec >& grd_lam) { FUNCNAME("ElInfo3d::calcGrdLambda()"); TEST_EXIT_DBG(dimOfWorld == 3) ("dim != dim_of_world ! use parametric elements!\n"); - WorldVector e1, e2, e3; - WorldVector v0; + int myRank = omp_get_thread_num(); + + WorldVector *e1 = &tmpWorldVecs[myRank][0]; + WorldVector *e2 = &tmpWorldVecs[myRank][1]; + WorldVector *e3 = &tmpWorldVecs[myRank][2]; + WorldVector *v0 = &tmpWorldVecs[myRank][3]; + double det, adet; double a11, a12, a13, a21, a22, a23, a31, a32, a33; testFlag(Mesh::FILL_COORDS); - v0 = coord_[0]; + *v0 = coord_[0]; for (int i = 0; i < 3; i++) { - e1[i] = coord_[1][i] - v0[i]; - e2[i] = coord_[2][i] - v0[i]; - e3[i] = coord_[3][i] - v0[i]; + (*e1)[i] = coord_[1][i] - (*v0)[i]; + (*e2)[i] = coord_[2][i] - (*v0)[i]; + (*e3)[i] = coord_[3][i] - (*v0)[i]; } - det = e1[0] * (e2[1] * e3[2] - e2[2] * e3[1]) - - e1[1] * (e2[0] * e3[2] - e2[2] * e3[0]) - + e1[2] * (e2[0] * e3[1] - e2[1] * e3[0]); + det = (*e1)[0] * ((*e2)[1] * (*e3)[2] - (*e2)[2] * (*e3)[1]) + - (*e1)[1] * ((*e2)[0] * (*e3)[2] - (*e2)[2] * (*e3)[0]) + + (*e1)[2] * ((*e2)[0] * (*e3)[1] - (*e2)[1] * (*e3)[0]); adet = abs(det); @@ -141,15 +146,15 @@ namespace AMDiS { grd_lam[i][j] = 0.0; } else { det = 1.0 / det; - a11 = (e2[1] * e3[2] - e2[2] * e3[1]) * det; /* (a_ij) = A^{-T} */ - a12 = (e2[2] * e3[0] - e2[0] * e3[2]) * det; - a13 = (e2[0] * e3[1] - e2[1] * e3[0]) * det; - a21 = (e1[2] * e3[1] - e1[1] * e3[2]) * det; - a22 = (e1[0] * e3[2] - e1[2] * e3[0]) * det; - a23 = (e1[1] * e3[0] - e1[0] * e3[1]) * det; - a31 = (e1[1] * e2[2] - e1[2] * e2[1]) * det; - a32 = (e1[2] * e2[0] - e1[0] * e2[2]) * det; - a33 = (e1[0] * e2[1] - e1[1] * e2[0]) * det; + a11 = ((*e2)[1] * (*e3)[2] - (*e2)[2] * (*e3)[1]) * det; /* (a_ij) = A^{-T} */ + a12 = ((*e2)[2] * (*e3)[0] - (*e2)[0] * (*e3)[2]) * det; + a13 = ((*e2)[0] * (*e3)[1] - (*e2)[1] * (*e3)[0]) * det; + a21 = ((*e1)[2] * (*e3)[1] - (*e1)[1] * (*e3)[2]) * det; + a22 = ((*e1)[0] * (*e3)[2] - (*e1)[2] * (*e3)[0]) * det; + a23 = ((*e1)[1] * (*e3)[0] - (*e1)[0] * (*e3)[1]) * det; + a31 = ((*e1)[1] * (*e2)[2] - (*e1)[2] * (*e2)[1]) * det; + a32 = ((*e1)[2] * (*e2)[0] - (*e1)[0] * (*e2)[2]) * det; + a33 = ((*e1)[0] * (*e2)[1] - (*e1)[1] * (*e2)[0]) * det; grd_lam[1][0] = a11; grd_lam[1][1] = a12; @@ -161,9 +166,9 @@ namespace AMDiS { grd_lam[3][1] = a32; grd_lam[3][2] = a33; - grd_lam[0][0] = -grd_lam[1][0] -grd_lam[2][0] -grd_lam[3][0]; - grd_lam[0][1] = -grd_lam[1][1] -grd_lam[2][1] -grd_lam[3][1]; - grd_lam[0][2] = -grd_lam[1][2] -grd_lam[2][2] -grd_lam[3][2]; + grd_lam[0][0] = -grd_lam[1][0] - grd_lam[2][0] - grd_lam[3][0]; + grd_lam[0][1] = -grd_lam[1][1] - grd_lam[2][1] - grd_lam[3][1]; + grd_lam[0][2] = -grd_lam[1][2] - grd_lam[2][2] - grd_lam[3][2]; } return adet; @@ -305,11 +310,17 @@ namespace AMDiS { } - double ElInfo3d::getNormal(int face, WorldVector &normal) const + double ElInfo3d::getNormal(int face, WorldVector &normal) { - FUNCNAME("ElInfo3d::getNormal"); + FUNCNAME("ElInfo3d::getNormal()"); + double det = 0.0; - WorldVector e0, e1, e2; + + int myRank = omp_get_thread_num(); + + WorldVector *e0 = &tmpWorldVecs[myRank][0]; + WorldVector *e1 = &tmpWorldVecs[myRank][1]; + WorldVector *e2 = &tmpWorldVecs[myRank][2]; if (dimOfWorld == 3) { int i0 = (face + 1) % 4; @@ -317,14 +328,14 @@ namespace AMDiS { int i2 = (face + 3) % 4; for (int i = 0; i < dimOfWorld; i++) { - e0[i] = coord_[i1][i] - coord_[i0][i]; - e1[i] = coord_[i2][i] - coord_[i0][i]; - e2[i] = coord_[face][i] - coord_[i0][i]; + (*e0)[i] = coord_[i1][i] - coord_[i0][i]; + (*e1)[i] = coord_[i2][i] - coord_[i0][i]; + (*e2)[i] = coord_[face][i] - coord_[i0][i]; } - vectorProduct(e0, e1, normal); + vectorProduct(*e0, *e1, normal); - if ((e2*normal) < 0.0) + if ((*e2 * normal) < 0.0) for (int i = 0; i < dimOfWorld; i++) normal[i] = -normal[i]; diff --git a/AMDiS/src/ElInfo3d.h b/AMDiS/src/ElInfo3d.h index eee554a7..6d5f82a4 100644 --- a/AMDiS/src/ElInfo3d.h +++ b/AMDiS/src/ElInfo3d.h @@ -24,6 +24,7 @@ #include "ElInfo.h" #include "MemoryManager.h" +#include "OpenMP.h" namespace AMDiS { @@ -44,7 +45,15 @@ namespace AMDiS { /** \brief * Constructor. Calls ElInfo's protected Constructor. */ - ElInfo3d(Mesh* aMesh) : ElInfo(aMesh) {}; + ElInfo3d(Mesh* aMesh) + : ElInfo(aMesh) + { + tmpWorldVecs.resize(omp_get_max_threads()); + + for (int i = 0; i < static_cast(tmpWorldVecs.size()); i++) { + tmpWorldVecs[i].resize(4); + } + }; /** \brief * Assignment operator @@ -74,12 +83,12 @@ namespace AMDiS { /** \brief * 3-dimensional realisation of ElInfo's calcGrdLambda method. */ - double calcGrdLambda(DimVec >& grd_lam) const; + double calcGrdLambda(DimVec >& grd_lam); /** \brief * 3-dimensional realisation of ElInfo's getNormal method. */ - double getNormal(int side, WorldVector &normal) const; + double getNormal(int side, WorldVector &normal); /** \brief * update ElInfo after refinement (of some neighbours). Only in 3d! @@ -126,6 +135,12 @@ namespace AMDiS { * element with vertices (0,0,0), (1,1,1), (1,1,0), (1,0,0). */ signed char orientation; + + /** \brief + * Tmp vectors used for calculations in calcGrdLambda and getNormal(). + * Thread safe! + */ + ::std::vector< ::std::vector< WorldVector > > tmpWorldVecs; }; } diff --git a/AMDiS/src/ElementFileWriter.h b/AMDiS/src/ElementFileWriter.h index 72224701..44d5b171 100644 --- a/AMDiS/src/ElementFileWriter.h +++ b/AMDiS/src/ElementFileWriter.h @@ -32,6 +32,10 @@ namespace AMDiS { void writeDelayedFiles() {}; + bool isWritingDelayed() { + return false; + }; + protected: /** * Writes element data in tecplot format. diff --git a/AMDiS/src/FileWriter.h b/AMDiS/src/FileWriter.h index 2b67ec2d..440de6cf 100644 --- a/AMDiS/src/FileWriter.h +++ b/AMDiS/src/FileWriter.h @@ -72,6 +72,8 @@ namespace AMDiS { virtual void writeDelayedFiles() = 0; + virtual bool isWritingDelayed() = 0; + void setTraverseProperties(int level, Flag flag, bool (*writeElem)(ElInfo*)) @@ -153,8 +155,18 @@ namespace AMDiS { Flag traverseFlag = Mesh::CALL_LEAF_EL, bool (*writeElem)(ElInfo*) = NULL); + /** \brief + * Starts the delayed writing. + */ virtual void writeDelayedFiles(); + /** \brief + * Returns true, if the file writer is waiting to start writing. + */ + bool isWritingDelayed() { + return writingIsDelayed_; + } + protected: /** \brief * Initialization of the filewriter. diff --git a/AMDiS/src/Lagrange.cc b/AMDiS/src/Lagrange.cc index 9fbacbd5..473a0751 100644 --- a/AMDiS/src/Lagrange.cc +++ b/AMDiS/src/Lagrange.cc @@ -947,13 +947,14 @@ namespace AMDiS { for (int pos = 0, j = 0; pos <= dim; pos++) { posIndex = INDEX_OF_DIM(pos, dim); - n0 = admin->getNumberOfPreDOFs(posIndex); - node0 = admin->getMesh()->getNode(posIndex); - num = Global::getGeo(posIndex, dim); nrDOFs = admin->getNumberOfDOFs(posIndex); - for (int i = 0; i < num; node0++, i++) { - if (nrDOFs) { + if (nrDOFs) { + n0 = admin->getNumberOfPreDOFs(posIndex); + node0 = admin->getMesh()->getNode(posIndex); + num = Global::getGeo(posIndex, dim); + + for (int i = 0; i < num; node0++, i++) { indi = orderOfPositionIndices(el, posIndex, i); for (int k = 0; k < nrDOFs; k++) diff --git a/AMDiS/src/OEMSolver.hh b/AMDiS/src/OEMSolver.hh index 0bb86e02..b10d5d81 100644 --- a/AMDiS/src/OEMSolver.hh +++ b/AMDiS/src/OEMSolver.hh @@ -96,7 +96,7 @@ namespace AMDiS { } if (res <= tolerance) { - INFO(info,6)("finished successfully with %d iterations\n"); + INFO(info,6)("finished successfully with %d iterations\n", iter); return(1); } diff --git a/AMDiS/src/ProblemInstat.cc b/AMDiS/src/ProblemInstat.cc index 4778cdf0..376a8f92 100644 --- a/AMDiS/src/ProblemInstat.cc +++ b/AMDiS/src/ProblemInstat.cc @@ -194,4 +194,13 @@ namespace AMDiS { problemStat->writeDelayedFiles(); } + bool ProblemInstatScal::existsDelayedCalculation() + { + return problemStat->existsDelayedCalculation(); + } + + bool ProblemInstatVec::existsDelayedCalculation() + { + return problemStat->existsDelayedCalculation(); + } } diff --git a/AMDiS/src/ProblemInstat.h b/AMDiS/src/ProblemInstat.h index db398bc6..5661fbf9 100644 --- a/AMDiS/src/ProblemInstat.h +++ b/AMDiS/src/ProblemInstat.h @@ -206,6 +206,8 @@ namespace AMDiS { virtual void startDelayedTimestepCalculation(); + virtual bool existsDelayedCalculation(); + virtual void serialize(::std::ostream &out) {}; @@ -295,6 +297,8 @@ namespace AMDiS { virtual void startDelayedTimestepCalculation(); + virtual bool existsDelayedCalculation(); + virtual void serialize(::std::ostream &out) {}; virtual void deserialize(::std::istream &in) {}; diff --git a/AMDiS/src/ProblemScal.cc b/AMDiS/src/ProblemScal.cc index a96f3e2f..a040ef5d 100644 --- a/AMDiS/src/ProblemScal.cc +++ b/AMDiS/src/ProblemScal.cc @@ -42,6 +42,16 @@ namespace AMDiS { } } + bool ProblemScal::existsDelayedCalculation() + { + for (int i = 0; i < static_cast(fileWriters_.size()); i++) { + if (fileWriters_[i]->isWritingDelayed()) + return true; + } + + return false; + } + void ProblemScal::interpolInitialSolution(AbstractFunction > *fct) { solution_->interpol(fct); diff --git a/AMDiS/src/ProblemScal.h b/AMDiS/src/ProblemScal.h index ad4d8002..3d42a2bd 100644 --- a/AMDiS/src/ProblemScal.h +++ b/AMDiS/src/ProblemScal.h @@ -200,6 +200,11 @@ namespace AMDiS { */ void writeDelayedFiles(); + /** \brief + * Returns true, if there is calculation waiting to be started. + */ + bool existsDelayedCalculation(); + /** \brief * Interpolates fct to \ref solution. */ diff --git a/AMDiS/src/ProblemTimeInterface.h b/AMDiS/src/ProblemTimeInterface.h index 2f4ce23d..70739439 100644 --- a/AMDiS/src/ProblemTimeInterface.h +++ b/AMDiS/src/ProblemTimeInterface.h @@ -75,6 +75,11 @@ namespace AMDiS { */ virtual void startDelayedTimestepCalculation() = 0; + /** \brief + * Returns true, if there is some delayed calculation waiting to be started. + */ + virtual bool existsDelayedCalculation() = 0; + /** \brief * Function that serializes the problem plus information about the iteration. */ diff --git a/AMDiS/src/ProblemVec.cc b/AMDiS/src/ProblemVec.cc index eb7ae905..7c6a4942 100644 --- a/AMDiS/src/ProblemVec.cc +++ b/AMDiS/src/ProblemVec.cc @@ -845,6 +845,16 @@ namespace AMDiS { } } + bool ProblemVec::existsDelayedCalculation() + { + for (int i = 0; i < static_cast(fileWriters_.size()); i++) { + if (fileWriters_[i]->isWritingDelayed()) + return true; + } + + return false; + } + void ProblemVec::interpolInitialSolution(::std::vector >*> *fct) { FUNCNAME("ProblemVec::interpolInitialSolution()"); diff --git a/AMDiS/src/ProblemVec.h b/AMDiS/src/ProblemVec.h index 4f4e77d7..a3a8ae84 100644 --- a/AMDiS/src/ProblemVec.h +++ b/AMDiS/src/ProblemVec.h @@ -231,6 +231,11 @@ namespace AMDiS { */ void writeDelayedFiles(); + /** \brief + * Returns true, if there is calculation waiting to be started. + */ + bool existsDelayedCalculation(); + /** \brief * Interpolates fct to \ref solution. */ diff --git a/AMDiS/src/Serializer.h b/AMDiS/src/Serializer.h index 23ab41a7..ba4f6079 100644 --- a/AMDiS/src/Serializer.h +++ b/AMDiS/src/Serializer.h @@ -75,7 +75,11 @@ namespace AMDiS { MSG("problem serialized to %s \n", name_.c_str()); }; - virtual void writeDelayedFiles() {}; + void writeDelayedFiles() {}; + + bool isWritingDelayed() { + return false; + }; protected: /** \brief -- GitLab