Commit ef52255e authored by Thomas Witkowski's avatar Thomas Witkowski
Browse files

* Bugfixes for muli-threaded runs. Valgrind and Intel thread checker run now with 0 Errors!

parent 3ee08357
...@@ -25,10 +25,10 @@ namespace AMDiS { ...@@ -25,10 +25,10 @@ namespace AMDiS {
nDOF = NEW DimVec<int>(dim, DEFAULT_VALUE, -1); nDOF = NEW DimVec<int>(dim, DEFAULT_VALUE, -1);
dow = Global::getGeo(WORLD); dow = Global::getGeo(WORLD);
grdTmpVec1.resize(omp_get_max_threads()); grdTmpVec1.resize(omp_get_num_procs());
grdTmpVec2.resize(omp_get_max_threads()); grdTmpVec2.resize(omp_get_num_procs());
for (int i = 0; i < omp_get_max_threads(); i++) { for (int i = 0; i < omp_get_num_procs(); i++) {
grdTmpVec1[i] = NEW DimVec<double>(dim, DEFAULT_VALUE, 0.0); grdTmpVec1[i] = NEW DimVec<double>(dim, DEFAULT_VALUE, 0.0);
grdTmpVec2[i] = NEW DimVec<double>(dim, DEFAULT_VALUE, 0.0); grdTmpVec2[i] = NEW DimVec<double>(dim, DEFAULT_VALUE, 0.0);
} }
......
...@@ -11,7 +11,7 @@ namespace AMDiS { ...@@ -11,7 +11,7 @@ namespace AMDiS {
BoundaryManager::BoundaryManager(const FiniteElemSpace *feSpace) BoundaryManager::BoundaryManager(const FiniteElemSpace *feSpace)
{ {
localBounds.resize(omp_get_max_threads()); localBounds.resize(omp_get_num_procs());
allocatedMemoryLocalBounds = feSpace->getBasisFcts()->getNumber(); allocatedMemoryLocalBounds = feSpace->getBasisFcts()->getNumber();
for (int i = 0; i < static_cast<int>(localBounds.size()); i++) { for (int i = 0; i < static_cast<int>(localBounds.size()); i++) {
localBounds[i] = GET_MEMORY(BoundaryType, allocatedMemoryLocalBounds); localBounds[i] = GET_MEMORY(BoundaryType, allocatedMemoryLocalBounds);
......
...@@ -31,11 +31,11 @@ namespace AMDiS { ...@@ -31,11 +31,11 @@ namespace AMDiS {
nBasFcts = feSpace->getBasisFcts()->getNumber(); nBasFcts = feSpace->getBasisFcts()->getNumber();
int dim = feSpace->getMesh()->getDim(); int dim = feSpace->getMesh()->getDim();
localIndices.resize(omp_get_max_threads()); localIndices.resize(omp_get_num_procs());
grdPhis.resize(omp_get_max_threads()); grdPhis.resize(omp_get_num_procs());
D2Phis.resize(omp_get_max_threads()); D2Phis.resize(omp_get_num_procs());
for (int i = 0; i < omp_get_max_threads(); i++) { for (int i = 0; i < omp_get_num_procs(); i++) {
localIndices[i] = GET_MEMORY(DegreeOfFreedom, this->nBasFcts); localIndices[i] = GET_MEMORY(DegreeOfFreedom, this->nBasFcts);
grdPhis[i] = NEW DimVec<double>(dim, DEFAULT_VALUE, 0.0); grdPhis[i] = NEW DimVec<double>(dim, DEFAULT_VALUE, 0.0);
D2Phis[i] = NEW DimMat<double>(dim, NO_INIT); D2Phis[i] = NEW DimMat<double>(dim, NO_INIT);
......
...@@ -14,7 +14,7 @@ namespace AMDiS { ...@@ -14,7 +14,7 @@ namespace AMDiS {
f(fct), f(fct),
dofVec(NULL) dofVec(NULL)
{ {
worldCoords.resize(omp_get_max_threads()); worldCoords.resize(omp_get_num_procs());
}; };
DirichletBC::DirichletBC(BoundaryType type, DirichletBC::DirichletBC(BoundaryType type,
...@@ -23,7 +23,7 @@ namespace AMDiS { ...@@ -23,7 +23,7 @@ namespace AMDiS {
f(NULL), f(NULL),
dofVec(vec) dofVec(vec)
{ {
worldCoords.resize(omp_get_max_threads()); worldCoords.resize(omp_get_num_procs());
} }
void DirichletBC::fillBoundaryCondition(DOFMatrix* matrix, void DirichletBC::fillBoundaryCondition(DOFMatrix* matrix,
......
...@@ -15,6 +15,10 @@ inline int omp_get_max_threads() { ...@@ -15,6 +15,10 @@ inline int omp_get_max_threads() {
return 1; return 1;
} }
inline int omp_get_num_procs() {
return 1;
}
inline int omp_get_num_threads() { inline int omp_get_num_threads() {
return 1; return 1;
} }
......
...@@ -180,7 +180,7 @@ namespace AMDiS { ...@@ -180,7 +180,7 @@ namespace AMDiS {
uhOld(NULL), uhOld(NULL),
optimized(true) optimized(true)
{ {
int maxThreads = omp_get_max_threads(); int maxThreads = omp_get_num_procs();
assembler.resize(maxThreads); assembler.resize(maxThreads);
secondOrder.resize(maxThreads); secondOrder.resize(maxThreads);
......
...@@ -6,7 +6,7 @@ namespace AMDiS { ...@@ -6,7 +6,7 @@ namespace AMDiS {
secondOrder[0].push_back(term); secondOrder[0].push_back(term);
term->operat = this; term->operat = this;
for (int i = 1; i < omp_get_max_threads(); i++) { for (int i = 1; i < omp_get_num_procs(); i++) {
T *newTerm = NEW T(static_cast<const T>(*term)); T *newTerm = NEW T(static_cast<const T>(*term));
secondOrder[i].push_back(newTerm); secondOrder[i].push_back(newTerm);
} }
...@@ -23,7 +23,7 @@ namespace AMDiS { ...@@ -23,7 +23,7 @@ namespace AMDiS {
} }
term->operat = this; term->operat = this;
for (int i = 1; i < omp_get_max_threads(); i++) { for (int i = 1; i < omp_get_num_procs(); i++) {
T *newTerm = NEW T(static_cast<const T>(*term)); T *newTerm = NEW T(static_cast<const T>(*term));
if (type == GRD_PSI) { if (type == GRD_PSI) {
firstOrderGrdPsi[i].push_back(newTerm); firstOrderGrdPsi[i].push_back(newTerm);
...@@ -39,7 +39,7 @@ namespace AMDiS { ...@@ -39,7 +39,7 @@ namespace AMDiS {
zeroOrder[0].push_back(term); zeroOrder[0].push_back(term);
term->operat = this; term->operat = this;
for (int i = 1; i < omp_get_max_threads(); i++) { for (int i = 1; i < omp_get_num_procs(); i++) {
T *newTerm = NEW T(static_cast<const T>(*term)); T *newTerm = NEW T(static_cast<const T>(*term));
zeroOrder[i].push_back(newTerm); zeroOrder[i].push_back(newTerm);
} }
......
...@@ -943,6 +943,14 @@ namespace AMDiS { ...@@ -943,6 +943,14 @@ namespace AMDiS {
tmpVector->set(0.0); tmpVector->set(0.0);
} }
// After creating privat copies of the DOFMatrix and the DOFVector, all threads
// have to wait at this barrier. Especially for small problems this is required,
// because otherwise one thread may be finished with assembling, before another
// has made his private copy.
#ifdef _OPENMP
#pragma omp barrier
#endif
// Because we are using the parallel traverse stack, each thread will // Because we are using the parallel traverse stack, each thread will
// traverse only a part of the mesh. // traverse only a part of the mesh.
ElInfo *elInfo = stack.traverseFirst(mesh, -1, assembleFlag); ElInfo *elInfo = stack.traverseFirst(mesh, -1, assembleFlag);
......
...@@ -14,9 +14,9 @@ namespace AMDiS { ...@@ -14,9 +14,9 @@ namespace AMDiS {
GET_PARAMETER(0, name + "->C3", "%f", &C3); GET_PARAMETER(0, name + "->C3", "%f", &C3);
C3 = C3 > 1.e-25 ? sqr(C3) : 0.0; C3 = C3 > 1.e-25 ? sqr(C3) : 0.0;
seqEstimators_.resize(omp_get_max_threads()); seqEstimators_.resize(omp_get_num_procs());
for (int i = 0; i < omp_get_max_threads(); i++) { for (int i = 0; i < omp_get_num_procs(); i++) {
seqEstimators_[i] = NEW ResidualEstimator(name, r); seqEstimators_[i] = NEW ResidualEstimator(name, r);
} }
} }
......
...@@ -88,8 +88,8 @@ namespace AMDiS { ...@@ -88,8 +88,8 @@ namespace AMDiS {
q11 = Q11PsiPhi::provideQ11PsiPhi(owner->getRowFESpace()->getBasisFcts(), q11 = Q11PsiPhi::provideQ11PsiPhi(owner->getRowFESpace()->getBasisFcts(),
owner->getColFESpace()->getBasisFcts(), owner->getColFESpace()->getBasisFcts(),
quadrature); quadrature);
tmpLALt.resize(omp_get_max_threads()); tmpLALt.resize(omp_get_num_procs());
for (int i = 0; i < omp_get_max_threads(); i++) { for (int i = 0; i < omp_get_num_procs(); i++) {
tmpLALt[i] = NEW DimMat<double>*; tmpLALt[i] = NEW DimMat<double>*;
*(tmpLALt[i]) = NEW DimMat<double>(dim, NO_INIT); *(tmpLALt[i]) = NEW DimMat<double>(dim, NO_INIT);
} }
...@@ -163,7 +163,7 @@ namespace AMDiS { ...@@ -163,7 +163,7 @@ namespace AMDiS {
Quad2::Quad2(Operator *op, Assembler *assembler, Quadrature *quad) Quad2::Quad2(Operator *op, Assembler *assembler, Quadrature *quad)
: SecondOrderAssembler(op, assembler, quad, true) : SecondOrderAssembler(op, assembler, quad, true)
{ {
tmpLALt.resize(omp_get_max_threads()); tmpLALt.resize(omp_get_num_procs());
} }
Quad2::~Quad2() Quad2::~Quad2()
......
...@@ -34,7 +34,7 @@ namespace AMDiS { ...@@ -34,7 +34,7 @@ namespace AMDiS {
nRow = psi->getNumber(); nRow = psi->getNumber();
nCol = phi->getNumber(); nCol = phi->getNumber();
int maxThreads = omp_get_max_threads(); int maxThreads = omp_get_num_procs();
terms.resize(maxThreads); terms.resize(maxThreads);
switch (order) { switch (order) {
......
...@@ -9,7 +9,7 @@ namespace AMDiS { ...@@ -9,7 +9,7 @@ namespace AMDiS {
TraverseParallelStack::TraverseParallelStack(int nThreads) TraverseParallelStack::TraverseParallelStack(int nThreads)
{ {
if (nThreads == 0) { if (nThreads == 0) {
nThreads_ = omp_get_max_threads(); nThreads_ = omp_get_num_procs();
} else { } else {
nThreads_ = nThreads; nThreads_ = nThreads;
} }
......
...@@ -39,7 +39,7 @@ namespace AMDiS { ...@@ -39,7 +39,7 @@ namespace AMDiS {
optimized ? &optimizedSubAssemblers : &standardSubAssemblers; optimized ? &optimizedSubAssemblers : &standardSubAssemblers;
int myRank = omp_get_thread_num(); int myRank = omp_get_thread_num();
std::vector<OperatorTerm*> opTerms = op->zeroOrder[myRank]; std::vector<OperatorTerm*> opTerms = op->zeroOrder[myRank];
sort(opTerms.begin(), opTerms.end()); sort(opTerms.begin(), opTerms.end());
...@@ -227,12 +227,12 @@ namespace AMDiS { ...@@ -227,12 +227,12 @@ namespace AMDiS {
FastQuadZOA::FastQuadZOA(Operator *op, Assembler *assembler, Quadrature *quad) FastQuadZOA::FastQuadZOA(Operator *op, Assembler *assembler, Quadrature *quad)
: ZeroOrderAssembler(op, assembler, quad, true) : ZeroOrderAssembler(op, assembler, quad, true)
{ {
cPtrs.resize(omp_get_max_threads()); cPtrs.resize(omp_get_num_procs());
} }
FastQuadZOA::~FastQuadZOA() FastQuadZOA::~FastQuadZOA()
{ {
for (int i = 0; i < omp_get_max_threads(); i++) { for (int i = 0; i < omp_get_num_procs(); i++) {
FREE_MEMORY(cPtrs[i], double, quadrature->getNumPoints()); FREE_MEMORY(cPtrs[i], double, quadrature->getNumPoints());
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment