ParallelProblem.cc 55.4 KB
Newer Older
1
#include "ParallelProblem.h"
2
3
4
5
6
7
8
9
10
11
#include "ProblemScal.h"
#include "ProblemVec.h"
#include "ProblemInstat.h"
#include "AdaptInfo.h"
#include "AdaptStationary.h"
#include "ConditionalEstimator.h"
#include "ConditionalMarker.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
12
#include "MacroElement.h"
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#include "PartitionElementData.h"
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "DualTraverse.h"
#include "MeshStructure.h"
#include "DOFVector.h"
#include "FiniteElemSpace.h"
#include "RefinementManager.h"
#include "CoarseningManager.h"
#include "Lagrange.h"
#include "ElementFileWriter.h"
#include "MacroWriter.h"
#include "ValueWriter.h"
#include "SystemVector.h"
27
#include "VtkWriter.h"
28
29
#include "mpi.h"
#include <queue>
30
#include <time.h>
31
32
33
34
35

namespace AMDiS {

  bool elementInPartition(ElInfo *elInfo)
  {
36
37
38
39
40
41
42
    PartitionElementData *elementData = dynamic_cast<PartitionElementData*>
      (elInfo->getElement()->getElementData(PARTITION_ED));
    if (elementData && elementData->getPartitionStatus() == IN) {
      return true;
    } else {
      return false;
    }
43
44
45
46
47
48
49
50
51
52
53
54
55
  }

  class MyDualTraverse : public DualTraverse
  {
  public:
    MyDualTraverse(int coarseLevel)
      : coarseLevel_(coarseLevel)
    {};

    bool skipEl1(ElInfo *elInfo)
    {
      PartitionElementData *elementData = dynamic_cast<PartitionElementData*>
	(elInfo->getElement()->getElementData(PARTITION_ED));
56
57
      if (elementData) {
	if (elInfo->getElement()->isLeaf() && 
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
	   elementData->getLevel() < coarseLevel_)
	  return false;
	if(elementData->getLevel() == coarseLevel_)
	  return false;
      }
      return true;
    };
  private:
    int coarseLevel_;
  };

  // =========================================================================
  // ===== class ParallelProblemBase =========================================
  // =========================================================================

73
74
  ParallelProblemBase::ParallelProblemBase(const std::string& name,
					   ProblemIterationInterface *iterationIF,
75
76
					   ProblemTimeInterface *timeIF)
    : iterationIF_(iterationIF),
77
      timeIF_(timeIF),
78
79
      debugMode(0),
      debugServerProcess(false)
80
  {
81
82
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

    GET_PARAMETER(0, name + "->debug mode", "%d", &debugMode);

    if (debugMode) {
      MPI::Group group = MPI::COMM_WORLD.Get_group();
      
      int rankSize = mpiSize - 1;
      int *ranks = GET_MEMORY(int, rankSize);
      for (int i = 0; i < rankSize; i++) {
	ranks[i] = i + 1;
      }
      
      amdisGroup = group.Incl(rankSize, ranks);
      
      mpiComm = MPI::COMM_WORLD.Create(amdisGroup);
      
      if (mpiComm != MPI::COMM_NULL) {
	mpiRank = mpiComm.Get_rank();
	mpiSize = mpiComm.Get_size();
	debugServerProcess = false;
      } else {
	debugServerProcess = true;
      }
      
      FREE_MEMORY(ranks, int, rankSize);
    } else {
      mpiComm = MPI::COMM_WORLD;
    }
111
112
  }

113
114
115
116
117
118
  void ParallelProblemBase::exitParallelization(AdaptInfo *adaptInfo)
  {
    if (!timeIF_) 
      closeTimestep(adaptInfo);

    amdisGroup.Free();
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  }

  void ParallelProblemBase::closeTimestep(AdaptInfo *adaptInfo)
  {
    if (mpiSize > 1 && doBuildGlobalSolution(adaptInfo)) {
      if (debugMode && mpiRank == 0) {
	// Send adaptInfo inforamtion to debug server
	double sendTime = adaptInfo->getTime();
	double sendTimestep = adaptInfo->getTimestep();
	MPI::COMM_WORLD.Send(&sendTime, 1, MPI_DOUBLE, 0, 100);
	MPI::COMM_WORLD.Send(&sendTimestep, 1, MPI_DOUBLE, 0, 100);
      }
      synchronizeMeshes(adaptInfo);	
      exchangeRankSolutions(adaptInfo);
      buildGlobalSolution(adaptInfo);
    }
    
    if (timeIF_) 
      timeIF_->closeTimestep(adaptInfo);
  }
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

  Flag ParallelProblemBase::oneIteration(AdaptInfo *adaptInfo, Flag toDo)
  {
    Flag flag;
    
    if (mpiSize > 1 && toDo.isSet(MARK | ADAPT)) {
      flag = iterationIF_->oneIteration(adaptInfo, MARK | ADAPT);
      
      double localWeightSum = setElemWeights(adaptInfo);
      if (doPartitioning(adaptInfo, localWeightSum)) {
	clock_t partitioningStart = clock();

	synchronizeMeshes(adaptInfo);
	partitionMesh(adaptInfo);
	refineOverlap(adaptInfo);
	createOverlap(adaptInfo);
	synchronizeMeshes(adaptInfo);
	exchangeDOFVectors(adaptInfo);
	coarsenOutOfPartition(adaptInfo);
	
	clock_t partitioningEnd = clock();
	partitioningTime = TIME_USED(partitioningStart, 
				     partitioningEnd);
	computationStart = partitioningEnd;
      }
      
      flag |= iterationIF_->oneIteration(adaptInfo, toDo & ~(MARK | ADAPT));
    } else {
      flag = iterationIF_->oneIteration(adaptInfo, toDo);
    }
    
    // synchronize adaption flag
    unsigned long *flagBuffer = GET_MEMORY(unsigned long, mpiSize);
    
    unsigned long localFlag = flag.getFlags();
    
    mpiComm.Allgather(&localFlag, 1, MPI_UNSIGNED_LONG,
		      flagBuffer, 1, MPI_UNSIGNED_LONG);
    for (int i = 0; i < mpiSize; i++) {
      flag.setFlag(flagBuffer[i]);
    }
    FREE_MEMORY(flagBuffer, unsigned long, mpiSize);
    
    return flag;
  };

185
186
187
188
189
190
191
192
193
194
195
  // =========================================================================
  // ===== class ParallelProblem =============================================
  // =========================================================================

  ParallelProblem::ParallelProblem(const std::string& name,
				   ProblemIterationInterface *iterationIF,
				   ProblemTimeInterface *timeIF,
				   std::vector<DOFVector<double>*> vectors,
				   Mesh *mesh,
				   RefinementManager *rm,
				   CoarseningManager *cm)
196
    : ParallelProblemBase(name, iterationIF, timeIF),
197
      name_(name),
198
199
200
      mesh(mesh),
      refinementManager(rm),
      coarseningManager(cm),
201
202
203
204
205
206
207
208
209
210
211
      repartitionSteps_(1),
      puEveryTimestep_(false),
      dofVectors_(vectors),
      upperPartThreshold_(1.5),
      lowerPartThreshold_(2.0/3.0),
      globalCoarseGridLevel_(0),
      localCoarseGridLevel_(0),
      globalRefinements_(0),
      adaptiveThresholds_(0),
      thresholdIncFactor_(2.0),
      thresholdDecFactor_(0.5),
212
      repartTimeFactor_(10.0)
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
  {
    GET_PARAMETER(0, name_ + "->upper part threshold", "%f", 
		  &upperPartThreshold_);
    GET_PARAMETER(0, name_ + "->lower part threshold", "%f", 
		  &lowerPartThreshold_);
    GET_PARAMETER(0, name_ + "->global coarse grid level", "%d", 
		  &globalCoarseGridLevel_);
    GET_PARAMETER(0, name_ + "->local coarse grid level", "%d", 
		  &localCoarseGridLevel_);
    GET_PARAMETER(0, name_ + "->global refinements", "%d", 
		  &globalRefinements_);


    TEST_EXIT(localCoarseGridLevel_ >= globalCoarseGridLevel_)
      ("local coarse grid level < global coarse grid level\n");

229
    partitioner_ = NEW ParMetisPartitioner(mesh, &mpiComm);
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260

    GET_PARAMETER(0, name_ + "->adaptive thresholds", "%d", 
		  &adaptiveThresholds_);
    GET_PARAMETER(0, name_ + "->threshold inc factor", "%f", 
		  &thresholdIncFactor_);
    GET_PARAMETER(0, name_ + "->threshold dec factor", "%f", 
		  &thresholdDecFactor_);
    GET_PARAMETER(0, name_ + "->repart time factor", "%f", 
		  &repartTimeFactor_);


    TEST_EXIT(lowerPartThreshold_ <= 1.0)("invalid lower part threshold\n");
    TEST_EXIT(upperPartThreshold_ >= 1.0)("invalid upper part threshold\n");

    if (adaptiveThresholds_) {
      TEST_EXIT(thresholdDecFactor_ <= 1.0)("invalid threshold dec factor\n");
      TEST_EXIT(thresholdIncFactor_ >= 1.0)("invalid threshold inc factor\n");
    }
    minUpperTH_ = upperPartThreshold_;
    maxLowerTH_ = lowerPartThreshold_;
  }

  ParallelProblem::~ParallelProblem() 
  {
    DELETE partitioner_;
  }

  bool ParallelProblem::doPartitioning(AdaptInfo *adaptInfo, double localWeightSum) 
  {
    FUNCNAME("ParallelProblem::doPartitioning()");

261
262
263
    double *weightSum = GET_MEMORY(double, mpiSize);
    int *partArray = GET_MEMORY(int, mpiSize);
    int part = 0;
264

265
266
    mpiComm.Gather(&localWeightSum, 1, MPI_DOUBLE,
		     weightSum, 1, MPI_DOUBLE, 0);
267

268
    if (mpiRank == 0) {
269
270

      double average = 0.0;
271
      for (int i = 0; i < mpiSize; i++) {
272
273
274
	average += weightSum[i];
      }

275
      average /= mpiSize;
276

277
278
      for (int i = 0; i < mpiSize; i++) {
	if ((weightSum[i] / average) > upperPartThreshold_) {
279
280
281
	  part = 1;
	  break;
	}
282
	if ((weightSum[i] / average) < lowerPartThreshold_) {
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
	  part = 1;
	  break;
	}
      }

      double computationTime = TIME_USED(computationStart, clock());
      if (adaptiveThresholds_) {

	bool timeOver = ((computationTime / partitioningTime) >= repartTimeFactor_);

	if (part == 1 && !timeOver) {
	  // inc thresholds
	  upperPartThreshold_ *= thresholdIncFactor_;
	  lowerPartThreshold_ /= thresholdIncFactor_;

	  // avoid repartitioning
	  part = 0;
	}
      
	if (part == 0 && timeOver) {
	  // dec thresholds
	  upperPartThreshold_ *= thresholdDecFactor_;
	  lowerPartThreshold_ /= thresholdDecFactor_;

	  upperPartThreshold_ = max(minUpperTH_, upperPartThreshold_);
	  lowerPartThreshold_ = min(maxLowerTH_, lowerPartThreshold_);
	}
      }

312
      for (int i = 0; i < mpiSize; i++) {
313
314
315
316
	partArray[i] = part;
      }      
    }

317
318
    mpiComm.Scatter(partArray, 1, MPI_INT,
		    &part, 1, MPI_INT, 0);
319
    
320
321
    FREE_MEMORY(weightSum, double, mpiSize);
    FREE_MEMORY(partArray, int, mpiSize);
322
323
324
325
326
327
328
329
330
331
332

    return (part == 1);
  }

  bool ParallelProblem::doBuildGlobalSolution(AdaptInfo *adaptInfo) {
    return true;
  }

  void ParallelProblem::partitionMesh(AdaptInfo *adaptInfo)
  {
    static bool initial = true;
333
    if (initial) {
334
      initial = false;
335
      partitioner_->fillCoarsePartitionVec(&oldPartitionVec);
336
337
      partitioner_->partition(&elemWeights_, INITIAL);
    } else {
338
      oldPartitionVec = partitionVec;
339
340
341
      partitioner_->partition(&elemWeights_, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

342
    partitioner_->fillCoarsePartitionVec(&partitionVec);
343
344
345
346
  }

  void ParallelProblem::refineOverlap(AdaptInfo *adaptInfo)
  {
347
    int dim = mesh->getDim();
348
349
350
351

    bool finished = (localCoarseGridLevel_ == 0);

    //for(j = globalCoarseGridLevel_; j < localCoarseGridLevel_; j++) {
352
    while (!finished) {
353
354
355
356
      std::map<DegreeOfFreedom, int> inOut; // 1: in, 2: out, 3: border dof

      // mark in/out/border dofs
      TraverseStack stack;
357
      ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
358
      while (elInfo) {
359
360
361
362
363
364
	Element *element = elInfo->getElement();
	PartitionElementData *partitionData = 
	  dynamic_cast<PartitionElementData*>(elInfo->getElement()->getElementData(PARTITION_ED));

	const DegreeOfFreedom **dofs = element->getDOF();

365
366
	if (partitionData->getPartitionStatus() == IN) {
	  for (int i = 0; i < dim + 1; i++) {
367
	    DegreeOfFreedom dof = dofs[i][0];
368
369
370
371
	    if (inOut[dof] == 2) 
	      inOut[dof] = 3;
	    if (inOut[dof] == 0) 
	      inOut[dof] = 1;
372
373
	  }
	} else {
374
	  for (int i = 0; i < dim + 1; i++) {
375
	    DegreeOfFreedom dof = dofs[i][0];
376
377
378
379
	    if (inOut[dof] == 1) 
	      inOut[dof] = 3;
	    if (inOut[dof] == 0) 
	      inOut[dof] = 2;
380
381
382
383
384
385
386
387
388
	  }
	}

	elInfo = stack.traverseNext(elInfo);
      }

      // refine overlap-border and inner elements
      finished = true;
      bool marked = false;
389
      elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
390
      while (elInfo) {
391
392
393
394
395
396
	Element *element = elInfo->getElement();
	PartitionElementData *partitionData = 
	  dynamic_cast<PartitionElementData*>(elInfo->getElement()->getElementData(PARTITION_ED));

	int level = partitionData->getLevel();

397
398
	if (level < localCoarseGridLevel_) {
	  if (partitionData->getPartitionStatus() != IN) {
399
	    const DegreeOfFreedom **dofs = element->getDOF();
400
	    for (int i = 0; i < dim + 1; i++) {
401
	      DegreeOfFreedom dof = dofs[i][0];
402
	      if (inOut[dof] == 3) {
403
404
		element->setMark(1);
		marked = true;
405
406
		if ((level + 1) < localCoarseGridLevel_) 
		  finished = false;
407
408
409
410
411
412
		break;
	      }
	    }
	  } else {
	    element->setMark(1);
	    marked = true;
413
414
	    if ((level + 1) < localCoarseGridLevel_) 
	      finished = false;
415
416
417
418
419
	  }
	}

	elInfo = stack.traverseNext(elInfo);
      }
420
421
      if (marked) 
	refinementManager->refineMesh(mesh);
422
423
424
425
426
427
    }
  }

  void ParallelProblem::globalRefineOutOfPartition(AdaptInfo *adaptInfo)
  {
    TraverseStack stack;
428
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
429
    while (elInfo) {
430
431
432
433
434
435
436
      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(elInfo->getElement()->getElementData(PARTITION_ED));
      int refinements = globalCoarseGridLevel_ - partitionData->getLevel();
      elInfo->getElement()->setMark(max(0, refinements));
      elInfo = stack.traverseNext(elInfo);
    }

437
    refinementManager->refineMesh(mesh);
438
439
440
441
442
443
444
  }

  void ParallelProblem::coarsenOutOfPartition(AdaptInfo *adaptInfo)
  {
    Flag meshCoarsened = 1;    
    while(meshCoarsened.getFlags() != 0) {
      TraverseStack stack;
445
      ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
446
447
448
449
450
451
452
453
454
455
456
457
      while(elInfo) {
	Element *element = elInfo->getElement();
	PartitionElementData *partitionData = 
	  dynamic_cast<PartitionElementData*>
	  (element->getElementData(PARTITION_ED));
	if(partitionData->getPartitionStatus() == OUT) {
	  int mark = min(0, -partitionData->getLevel() + globalCoarseGridLevel_);
	  //int mark = -partitionData->getLevel();
	  element->setMark(mark);
	}
	elInfo = stack.traverseNext(elInfo);
      }
458
      meshCoarsened = coarseningManager->coarsenMesh(mesh);
459
    }
460
    mpiComm.Barrier();
461
462
463
464
  }

  void ParallelProblem::exchangeMeshStructureCodes(MeshStructure *structures)
  {
465
466
467
    // every process creates a mesh structure code from its mesh.
    structures[mpiRank].init(mesh);
    const std::vector<unsigned long int>& myCode = structures[mpiRank].getCode();
468
469

    // broadcast code sizes
470
    int *codeSize = GET_MEMORY(int, mpiSize);
471
    int tmp = static_cast<int>(myCode.size());
472
473
474
475
476
    mpiComm.Allgather(&tmp, 1, MPI_INT, codeSize, 1, MPI_INT);
    if (debugMode) {
      // send code sizes also to debug server
      MPI::COMM_WORLD.Gather(&tmp, 1, MPI_INT, NULL, 1, MPI_INT, 0);
    }
477
478

    // broadcast number of elements
479
480
    int *elements = GET_MEMORY(int, mpiSize);
    tmp = structures[mpiRank].getNumElements();
481
482
483
484
485
    mpiComm.Allgather(&tmp, 1, MPI_INT, elements, 1, MPI_INT);
    if (debugMode) {
      // send number of elements also to debug server
      MPI::COMM_WORLD.Gather(&tmp, 1, MPI_INT, NULL, 1, MPI_INT, 0);
    }
486
487

    // broadcast codes
488
    int *codeOffset = GET_MEMORY(int, mpiSize);
489
    int codeSizeSum = 0;
490
    for (int rank = 0; rank < mpiSize; rank++) {
491
492
493
494
495
      codeOffset[rank] = codeSizeSum;
      codeSizeSum += codeSize[rank];
    }

    unsigned long int *code = GET_MEMORY(unsigned long int, codeSizeSum);
496
    unsigned long int *localCode = GET_MEMORY(unsigned long int, codeSize[mpiRank]);  
497
498
499
    unsigned long int *ptr;
    std::vector<unsigned long int>::const_iterator it, end = myCode.end();
  
500
501
    for (ptr = localCode, it = myCode.begin();
	 it != end; 
502
	 ++it, ++ptr) {
503
504
      *ptr = *it;
    }
505
506
507
508
509
510
511
512
513
514

    mpiComm.Allgatherv(localCode, codeSize[mpiRank], 
		       MPI_UNSIGNED_LONG, 
		       code, codeSize, codeOffset,
		       MPI_UNSIGNED_LONG);
    if (debugMode) {
      // send codes also to debug server
      MPI::COMM_WORLD.Send(localCode, codeSize[mpiRank],
			   MPI_UNSIGNED_LONG, 0, 100);
    }
515
    
516
517
    for (int rank = 0; rank < mpiSize; rank++) {
      if (rank != mpiRank) {
518
519
520
521
	std::vector<unsigned long int> remoteCode;
	unsigned long int *ptr;
	unsigned long int *begin = code + codeOffset[rank]; 
	unsigned long int *end = begin + codeSize[rank];
522
	for (ptr = begin; ptr != end; ++ptr) {
523
524
525
526
527
528
529
	  remoteCode.push_back(*ptr);
	}
	structures[rank].init(remoteCode, elements[rank]);
      }
    }

    // free memory
530
    FREE_MEMORY(elements, int, mpiSize);
531
    FREE_MEMORY(code, unsigned long int, codeSizeSum);
532
533
534
    FREE_MEMORY(localCode, unsigned long int, codeSize[mpiRank]);
    FREE_MEMORY(codeOffset, int, mpiSize);
    FREE_MEMORY(codeSize, int, mpiSize);
535
536
537
538
539
540
541
    
  }

  void ParallelProblem::synchronizeMeshes(AdaptInfo *adaptInfo)
  {
    FUNCNAME("ParallelProblem::synchronizeMeshes()");

542
    MeshStructure *structures = NEW MeshStructure[mpiSize];
543

544
    // build composite mesh structure
545
546
547
    exchangeMeshStructureCodes(structures);

    // merge codes
548
549
550
    for (int rank = 0; rank < mpiSize; rank++) {
      if (rank != mpiRank) {
	structures[mpiRank].merge(&structures[rank]);
551
552
      }
    }
553
554
555
556
557
558
559
  
    // build finest mesh on the rank partition
    structures[mpiRank].fitMeshToStructure(mesh,
					   refinementManager,
					   true);

    DELETE [] structures;
560
561
562
563
564
565
566
567
  }


  bool ParallelProblem::writeElement(ElInfo *elInfo)
  {
    Element *element = elInfo->getElement();
    PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
      (element->getElementData(PARTITION_ED));
568
    TEST_EXIT_DBG(partitionData)("no partition data\n");
569
    PartitionStatus status = partitionData->getPartitionStatus();
570
    if (status == IN) 
571
572
573
574
575
576
      return true;
    else
      return false;
  }

  void ParallelProblem::exchangeRankSolutions(AdaptInfo *adaptInfo,
577
					      Mesh *workMesh,
578
579
580
581
					      std::vector<DOFVector<double>*> rankSolutions)
  {
    FUNCNAME("ParallelProblem::exchangeRankSolutions()");

582
    ParallelProblem::fillVertexPartitions(localCoarseGridLevel_, 1, true, overlapDistance_);
583
584
585
    overlapDistance_.clear();

    const FiniteElemSpace *feSpace = rankSolutions[0]->getFESpace();
586
    int dim = workMesh->getDim();
587
588
589
590
    const BasisFunction *basFcts = feSpace->getBasisFcts();
    int numFcts = basFcts->getNumber();
    DegreeOfFreedom *coarseDOFs = GET_MEMORY(DegreeOfFreedom, numFcts);
    DegreeOfFreedom *fineDOFs = GET_MEMORY(DegreeOfFreedom, numFcts);
591
    DOFAdmin *admin = feSpace->getAdmin();
592

593
594
    std::vector<std::vector<DegreeOfFreedom> > sendOrder(mpiSize);
    std::vector<std::vector<DegreeOfFreedom> > recvOrder(mpiSize);
595
596
597
598
599
600

    elementPartitions_.clear();

    int elementPartition = -1;
    Element *coarseElement = NULL;
    TraverseStack stack;
601
    ElInfo *elInfo = stack.traverseFirst(workMesh, -1, Mesh::CALL_EVERY_EL_PREORDER);
602
    while (elInfo) {
603
604
605
606
607
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

608
609
      if (partitionData) {
	if (partitionData->getLevel() == 0) {
610
	  elementPartition = partitionVec[element->getIndex()];
611
612
613
614
	}

	PartitionStatus status = partitionData->getPartitionStatus();

615
616
 	if (status != OUT) {
	  if (partitionData->getLevel() == localCoarseGridLevel_) {
617
	    basFcts->getLocalIndices(element, admin, coarseDOFs);
618
619

	    // collect other partitions element belongs to
620
	    for (int i = 0; i < dim + 1; i++) {
621
622
	      std::set<int>::iterator setBegin = vertexPartitions[coarseDOFs[i]].begin();
	      std::set<int>::iterator setEnd = vertexPartitions[coarseDOFs[i]].end();
623
624
	      for (std::set<int>::iterator setIt = setBegin; setIt != setEnd; ++setIt) {
		elementPartitions_[element].insert(*setIt);
625
626
627
628
629
630
631
	      }	
	    }

	    coarseElement = element;
	  }


632
	  if (element->isLeaf()) {
633
	    basFcts->getLocalIndices(element, admin, fineDOFs);
634

635
636
	    for (int i = 0; i < numFcts; i++) {
	      if (status == OVERLAP) {
637
638
639
		// send dofs
		sendOrder[elementPartition].push_back(fineDOFs[i]);
	      } 
640
	      if (status == IN) {
641
		// recv dofs
642
643
644
645
646
		TEST_EXIT(elementPartition == mpiRank)("???\n");
		std::set<int>::iterator setBegin = elementPartitions_[coarseElement].begin();
		std::set<int>::iterator setEnd = elementPartitions_[coarseElement].end();
		for (std::set<int>::iterator setIt = setBegin; setIt != setEnd; ++setIt) {
		  if (*setIt != mpiRank) {
647
648
649
650
651
652
653
654
655
656
657
658
659
 		    recvOrder[*setIt].push_back(fineDOFs[i]);
		  }
		}
	      }
	    }
	  }
	}
      }
      
      elInfo = stack.traverseNext(elInfo);
    }

    // create send and recv buffers and fill send buffers
660
    DOFVector<double> *solution = rankSolutions[mpiRank];
661
662
663
664
665
666

    std::map<int, double*> sendBuffer;
    std::map<int, double*> recvBuffer;
    std::map<int, int> sendBufferSize;
    std::map<int, int> recvBufferSize;

667
668
    for (int partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
669
670
671
672
673
	int sendSize = static_cast<int>(sendOrder[partition].size());
	int recvSize = static_cast<int>(recvOrder[partition].size());

	sendBufferSize[partition] = sendSize;	
	recvBufferSize[partition] = recvSize;
674
	if (sendSize > 0) {
675
676
677
678
679
680
	  sendBuffer[partition] = GET_MEMORY(double, sendSize);
	  std::vector<DegreeOfFreedom>::iterator dofIt;
	  dofIt = sendOrder[partition].begin();
	  double *bufferIt, *bufferBegin, *bufferEnd;
	  bufferBegin = sendBuffer[partition];
	  bufferEnd = bufferBegin + sendSize;
681
682
683
	  for (bufferIt = bufferBegin; 
	       bufferIt < bufferEnd; 
	       ++bufferIt, ++dofIt) {
684
685
686
	    *bufferIt = (*solution)[*dofIt];
	  }
	}
687
	if (recvSize > 0) {
688
	  recvBuffer[partition] = GET_MEMORY(double, recvSize);
689
	}
690
691
692
693
      }
    }

    // non-blocking sends
694
695
    for (int partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
696
697
698
699
700
701
	if (sendBufferSize[partition] > 0) {
	  mpiComm.Isend(sendBuffer[partition],
			sendBufferSize[partition],
			MPI_DOUBLE,
			partition,
			0);
702
703
704
	}
      }
    }    
705
   
706
    // blocking recieves
707
708
709
    for (int partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
	if (recvBufferSize[partition] > 0) {
710
711
712
713
714
	  mpiComm.Recv(recvBuffer[partition],
		       recvBufferSize[partition],
		       MPI_DOUBLE,
		       partition,
		       0);
715
716
717
718
719
	}
      }
    }    

    // wait for end of communication
720
    mpiComm.Barrier();
721
722

    // copy values into rank solutions
723
724
    for (int partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
725
	std::vector<DegreeOfFreedom>::iterator dofIt = recvOrder[partition].begin();
726
727
	for (int i = 0; i < recvBufferSize[partition]; i++) {
	  (*(rankSolutions[partition]))[*dofIt] = recvBuffer[partition][i];
728
729
730
731
	  ++dofIt;
	}
      }
    }    
732

733
    // free send and recv buffers
734
735
    for (int partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
736
737
738
739
740
741
	if (sendBufferSize[partition] > 0) {
	  FREE_MEMORY(sendBuffer[partition], double, sendBufferSize[partition]);
	}
	if (recvBufferSize[partition] > 0) {
	  FREE_MEMORY(recvBuffer[partition], double, recvBufferSize[partition]);
	}
742
743
      }
    }    
744

745
746
747
748
749
750
751
    FREE_MEMORY(coarseDOFs, DegreeOfFreedom, numFcts);
    FREE_MEMORY(fineDOFs, DegreeOfFreedom, numFcts);
  }

  void ParallelProblem::exchangeDOFVector(AdaptInfo *adaptInfo,
					  DOFVector<double> *values)
  {
752
753
    partitioner_->fillLeafPartitionVec(&oldPartitionVec, &oldPartitionVec);
    partitioner_->fillLeafPartitionVec(&partitionVec, &partitionVec);
754
755
756
757

    // === get send and recieve orders ===
    std::vector<std::vector<DegreeOfFreedom> > sendOrder;
    std::vector<std::vector<DegreeOfFreedom> > recvOrder;
758
759
    sendOrder.resize(mpiSize);
    recvOrder.resize(mpiSize);
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775

    int i;
    const FiniteElemSpace *feSpace = values->getFESpace();
    const BasisFunction *basFcts = feSpace->getBasisFcts();
    int numFcts = basFcts->getNumber();
    DegreeOfFreedom *dofs = GET_MEMORY(DegreeOfFreedom, numFcts);
    DOFAdmin *admin =  feSpace->getAdmin();

    Mesh *mesh = feSpace->getMesh();
    TraverseStack stack;
    ElInfo *elInfo;

    elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while(elInfo) {
      Element *element = elInfo->getElement();
      int index = element->getIndex();
776
777
      int oldPartition = oldPartitionVec[index];
      int newPartition = partitionVec[index];
778
779
780
781
782

      if(oldPartition != newPartition) {
	// get dof indices
	basFcts->getLocalIndices(element, admin, dofs);

783
	if(oldPartition == mpiRank) {
784
785
786
787
788
	  for(i = 0; i < numFcts; i++) {
	    // send element values to new partition
	    sendOrder[newPartition].push_back(dofs[i]);
	  }
	}
789
	if(newPartition == mpiRank) {
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
	  for(i = 0; i < numFcts; i++) {
	    // recv element values from old partition
	    recvOrder[oldPartition].push_back(dofs[i]);
	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    FREE_MEMORY(dofs, DegreeOfFreedom, numFcts);

    // === create send and recv buffers and fill send buffers ===
    std::map<int, double*> sendBuffer;
    std::map<int, double*> recvBuffer;
    std::map<int, int> sendBufferSize;
    std::map<int, int> recvBufferSize;

    int partition;
809
810
    for (partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
811
812
813
814
815
	int sendSize = static_cast<int>(sendOrder[partition].size());
	int recvSize = static_cast<int>(recvOrder[partition].size());
      
	sendBufferSize[partition] = sendSize;	
	recvBufferSize[partition] = recvSize;
816
	if (sendSize > 0) {
817
818
819
820
821
822
	  sendBuffer[partition] = GET_MEMORY(double, sendSize);
	  std::vector<DegreeOfFreedom>::iterator dofIt;
	  dofIt = sendOrder[partition].begin();
	  double *bufferIt, *bufferBegin, *bufferEnd;
	  bufferBegin = sendBuffer[partition];
	  bufferEnd = bufferBegin + sendSize;
823
824
825
	  for (bufferIt = bufferBegin; 
	       bufferIt < bufferEnd; 
	       ++bufferIt, ++dofIt) {
826
827
828
	    *bufferIt = (*values)[*dofIt];
	  }
	}
829
	if (recvSize > 0)
830
831
832
833
834
	  recvBuffer[partition] = GET_MEMORY(double, recvSize);
      }
    }

    // === non-blocking sends ===
835
836
837
838
839
840
841
842
    for (partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
	if (sendBufferSize[partition] > 0) {
	  mpiComm.Isend(sendBuffer[partition],
			  sendBufferSize[partition],
			  MPI_DOUBLE,
			  partition,
			  0);
843
844
845
846
847
	}
      }
    }    
    
    // === blocking receives ===
848
849
850
851
852
853
854
855
    for (partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
	if (recvBufferSize[partition] > 0) {
	  mpiComm.Recv(recvBuffer[partition],
			 recvBufferSize[partition],
			 MPI_DOUBLE,
			 partition,
			 0);
856
857
858
859
860
	}
      }
    }    

    // === wait for end of MPI communication ===
861
    mpiComm.Barrier();
862
863

    // === copy received values into DOFVector ===
864
865
    for (partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
866
	std::vector<DegreeOfFreedom>::iterator dofIt = recvOrder[partition].begin();
867
	for (i = 0; i < recvBufferSize[partition]; i++) {
868
869
870
871
872
873
874
	  (*values)[*dofIt] = recvBuffer[partition][i];
	  ++dofIt;
	}
      }
    }    

    // === free send and receive buffers ===
875
876
877
    for (partition = 0; partition < mpiSize; partition++) {
      if (partition != mpiRank) {
	if (sendBufferSize[partition] > 0)
878
879
880
	  FREE_MEMORY(sendBuffer[partition], 
		      double,
		      sendBufferSize[partition]);
881
	if (recvBufferSize[partition] > 0)
882
883
884
885
886
887
888
889
890
891
892
893
894
895
	  FREE_MEMORY(recvBuffer[partition], 
		      double,
		      recvBufferSize[partition]);
      }
    }
  }

  void ParallelProblem::buildGlobalSolution(AdaptInfo *adaptInfo,
					    std::vector<DOFVector<double>*> rankSolutions,
					    DOFVector<double> *globalSolution)
  {
    FUNCNAME("ParallelProblem::buildGlobalSolution()");

    const FiniteElemSpace *feSpace = globalSolution->getFESpace();
896
    int dim = mesh->getDim();
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
    const BasisFunction *basFcts = feSpace->getBasisFcts();
    int numFcts = basFcts->getNumber();
    DegreeOfFreedom *coarseDOFs = GET_MEMORY(DegreeOfFreedom, numFcts);
    DegreeOfFreedom *fineDOFs = GET_MEMORY(DegreeOfFreedom, numFcts);
    DOFAdmin *admin =  feSpace->getAdmin();

    Lagrange *linearFunctions = Lagrange::getLagrange(dim, 1);

    MSG("Building global solution\n");

    // compute w[DOF][partition]->value
    std::map<DegreeOfFreedom, std::map<int, double> > w;
    std::map<DegreeOfFreedom, std::map<int, double> >::iterator wIt, wBegin, wEnd;

    std::map<DegreeOfFreedom, double> sumW;

    Element *lastCoarseElement = NULL;

    WorldVector<double> worldCoord;
    DimVec<double> baryCoord(dim, NO_INIT);

    std::set<int>::iterator partIt, partBegin, partEnd;

    std::map<DegreeOfFreedom, bool> visited;


923
    MyDualTraverse dualTraverse(localCoarseGridLevel_);
924
925
926
    ElInfo *elInfo1, *elInfo2;
    ElInfo *large, *small;

927
928
929
930
931
932
933
934
935
    bool cont = dualTraverse.traverseFirst(mesh, mesh,
					   -1, -1,
					   Mesh::CALL_EVERY_EL_PREORDER |
					   Mesh::FILL_COORDS | 
					   Mesh::FILL_DET,
					   Mesh::CALL_LEAF_EL | 
					   Mesh::FILL_COORDS,
					   &elInfo1, &elInfo2,
					   &small, &large);
936
937
938
939
940
941
942

    while (cont) {
      Element *element1 = elInfo1->getElement();
      Element *element2 = elInfo2->getElement();
      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>
	(element1->getElementData(PARTITION_ED));
943
      if (partitionData->getPartitionStatus() == IN) {
944
945

	// get coarse dofs
946
	if (element1 != lastCoarseElement) {
947
948
949
950
	  basFcts->getLocalIndices(element1, admin, coarseDOFs);
	  lastCoarseElement = element1;
	}
      
951
	if (elementPartitions_[element1].size() > 1) {
952
953
954
955
	  // get fine dofs
	  basFcts->getLocalIndices(element2, admin, fineDOFs);
      
	  // for all fine DOFs
956
957
	  for (int i = 0; i < numFcts; i++) {
	    if (!visited[fineDOFs[i]]) {
958
959
	      visited[fineDOFs[i]] = true;

960
	      elInfo2->coordToWorld(*(basFcts->getCoords(i)), worldCoord);
961
962
963
	      elInfo1->worldToCoord(worldCoord, &baryCoord);

	      // for all coarse vertex DOFs
964
	      for (int j = 0; j < dim + 1; j++) {
965
966
		partBegin = vertexPartitions[coarseDOFs[j]].begin();
		partEnd = vertexPartitions[coarseDOFs[j]].end();
967
		for (partIt = partBegin; partIt != partEnd; ++partIt) {
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
		  int partition = *partIt/* - 1*/;
		  double val = (*(linearFunctions->getPhi(j)))(baryCoord);
		  w[fineDOFs[i]][partition] += val;

		  sumW[fineDOFs[i]] += val;
		}
	      }
	    }
	  }
	}
      }
      cont = dualTraverse.traverseNext(&elInfo1, &elInfo2,
				       &small, &large);
    }

    FREE_MEMORY(coarseDOFs, DegreeOfFreedom, numFcts);
    FREE_MEMORY(fineDOFs, DegreeOfFreedom, numFcts);

    MSG("PU ...\n");

    wBegin = w.begin();
    wEnd = w.end();

991
    for (wIt = wBegin; wIt != wEnd; ++wIt) {
992
993
994
995
      DegreeOfFreedom dof = wIt->first;
      (*globalSolution)[dof] = 0.0;
    }
    
996
    for (wIt = wBegin; wIt != wEnd; ++wIt) {
997
998
999
1000
1001
      DegreeOfFreedom dof = wIt->first;
      std::map<int, double>::iterator partIt, partBegin, partEnd;
      partBegin = wIt->second.begin();
      partEnd = wIt->second.end();
    
1002
      for (partIt = partBegin; partIt != partEnd; ++partIt) {
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
	int partition = partIt->first;
	double wDOF = partIt->second;
	(*globalSolution)[dof] += wDOF / sumW[dof] * (*(rankSolutions[partition]))[dof];
      }
    }
  }

  double ParallelProblem::errors2map(std::map<int, double> &errVec, 
				     int comp,
				     bool add)
  {
    int elNum = -1;
    double totalErr, error;

1017
1018
    if (!add) 
      errVec.clear();
1019
1020

    TraverseStack stack;
1021
    ElInfo *elInfo = stack.traverseFirst(mesh, 
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
					 -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    totalErr = 0.0;
    while(elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if(partitionData && partitionData->getPartitionStatus() == IN) {
	if(partitionData->getLevel() == 0) {
	  elNum = element->getIndex();
	}
	TEST_EXIT(elNum != -1)("invalid element number\n");
	if(element->isLeaf()) {
	  error = element->getEstimation(comp);
	  errVec[elNum] += error;
	  totalErr += error;
	}
      }
      elInfo = stack.traverseNext(elInfo);
    }

    return totalErr;
  }

  double ParallelProblem::setElemWeights(AdaptInfo *adaptInfo) 
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights_.clear();

    TraverseStack stack;
1057
    ElInfo *elInfo = stack.traverseFirst(mesh, 
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
					 -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while(elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if(partitionData && partitionData->getPartitionStatus() == IN) {
	if(partitionData->getLevel() == 0) {
	  elNum = element->getIndex();
	}
	TEST_EXIT(elNum != -1)("invalid element number\n");
	if(element->isLeaf()) {
	  elemWeights_[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }


  void ParallelProblem::globalRefinements() 
  {
    if (globalRefinements_ <= 0) 
      return;

    TraverseStack stack;
1091
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
    while (elInfo) {
      Element *element = elInfo->getElement();
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
	element->setMark(globalRefinements_);
      }
      
      elInfo = stack.traverseNext(elInfo);
    }

1104
    refinementManager->refineMesh(mesh);
1105
1106
1107
1108
1109
1110
  }


  void ParallelProblem::createOverlap(int level, int overlap, bool openOverlap,
				      std::map<Element*, int> &overlapDistance)
  {
1111
    int dim = mesh->getDim();
1112
1113
1114
1115
1116
1117

    // === create dual graph (one common node) and prepare breadth-first search ===
    std::map<DegreeOfFreedom, std::vector<Element*> > vertexElements;
    std::queue<Element*> overlapQueue;

    TraverseStack stack;
1118
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_EVERY_EL_PREORDER);
1119
    while (elInfo) {
1120
1121
1122
1123
      Element *element = elInfo->getElement();
      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));
      
1124
1125
1126
      if (partitionData) {
	if (partitionData->getLevel() == level) {
	  for (int i = 0; i < dim + 1; i++) {	    
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
	    vertexElements[element->getDOF(i, 0)].push_back(element);
	  }

	  if(partitionData->getPartitionStatus() == IN) {
	    overlapDistance[element] = 0;
	    overlapQueue.push(element);
	  } else {
	    overlapDistance[element] = -1; // still unknown
	  }
	}
      }
      elInfo = stack.traverseNext(elInfo);
    }

    // === breadth-first search on dual graph ===
    std::vector<Element*>::iterator it, itBegin, itEnd;

1144
    while (!overlapQueue.empty()) {
1145
1146
1147
1148
1149
1150
1151
1152
      // get first element in queue
      Element *element = overlapQueue.front();
      overlapQueue.pop();

      // get distance
      int distance = overlapDistance[element];
      TEST_EXIT(distance >= 0)("invalid distance\n");

1153
1154
      if (distance >= overlap) 
	continue;
1155
1156
      
      // get all adjacent elements
1157
      for (int i = 0; i < dim + 1; i++) {
1158
1159
	itBegin = vertexElements[element->getDOF(i, 0)].begin();
	itEnd = vertexElements[element->getDOF(i, 0)].end();
1160
1161
	for (it = itBegin; it != itEnd; ++it) {
	  if (overlapDistance[*it] == -1) {
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
	    // set distance for new member
	    overlapDistance[*it] = distance + 1;
	    // push neighbour to queue
	    overlapQueue.push(*it);
	    // mark as overlap on AMDiS mesh	    
	    PartitionElementData *partitionData = 
	      dynamic_cast<PartitionElementData*>((*it)->getElementData(PARTITION_ED));
	    TEST_EXIT(partitionData)("no partition data\n");
	    partitionData->setPartitionStatus(OVERLAP);
	    partitionData->descend(*it);
	  }
	}
      }
    }
  }

  void ParallelProblem::fillVertexPartitions(int level, int overlap, 
					     bool openOverlap,
					     std::map<Element*, int> &overlapDistance)
  {
1182
    int dim = mesh->getDim();
1183
1184

    // clear partition dof vector
1185
    vertexPartitions.clear();
1186
1187

    // first: partition elements ...
1188
1189
1190
    int partition;
    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_EVERY_EL_PREORDER);
1191
    while (elInfo) {
1192
1193
1194
      Element *element = elInfo->getElement();
      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));
1195
1196
      if (partitionData) {
	if (partitionData->getLevel() == 0) {
1197
	  partition = partitionVec[element->getIndex()];
1198
	}
1199
1200
	if (partitionData->getLevel() == level) {
	  for (int i = 0; i < dim + 1; i++) {
1201
	    vertexPartitions[element->getDOF(i, 0)].insert(partition);
1202
1203
1204
1205
1206
1207
	  }
	}
      }
      elInfo = stack.traverseNext(elInfo);
    }

1208
    if (overlap > 1 || openOverlap == false) {
1209
      // exchange mesh structure codes
1210
      MeshStructure *structures = NEW MeshStructure[mpiSize];
1211
1212
1213
      exchangeMeshStructureCodes(structures);

      // merge codes
1214
1215
      for (int rank = 0; rank < mpiSize; rank++) {
	if (rank != mpiRank) {
1216
	  structures[mpiRank].merge(&structures[rank]);
1217
1218
1219
	}
      }
    
1220
      MeshStructure &compositeStructure = structures[mpiRank];
1221
1222
1223
1224
1225
1226
      compositeStructure.reset();

      // get composite indices of local overlap elements
      std::map<int, Element*> indexElement;
      std::vector<int> innerOverlapElements; // not at open overlap boundary

1227
1228
      elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_EVERY_EL_PREORDER);
      while (elInfo) {
1229
1230
1231
1232
	Element *element = elInfo->getElement();
	PartitionElementData *partitionData = 
	  dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));
	
1233
	if (partitionData && partitionData->getLevel() == level) {
1234
1235
	  int compositeIndex = compositeStructure.getCurrentElement();
	  indexElement[compositeIndex] = element;
1236
	  if (partitionData->getPartitionStatus() == OVERLAP) {
1237
	    int distance = overlapDistance[element];
1238
	    if (distance < overlap || !openOverlap) {
1239
1240
1241
1242
1243
	      innerOverlapElements.push_back(compositeIndex);
	    } 
	  }
	}
	
1244
	if (element->isLeaf()) {
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
	  compositeStructure.skipBranch();
	} else {
	  compositeStructure.nextElement();
	}
	elInfo = stack.traverseNext(elInfo);
      }
    
      // === exchange 'inner' overlap elements ===

      // exchange number of overlap elements
1255
      int *numOverlapElements = GET_MEMORY(int, mpiSize);
1256
      int tmp = static_cast<int>(innerOverlapElements.size());
1257
      mpiComm.Allgather(&tmp, 1, MPI_INT, numOverlapElements, 1, MPI_INT);
1258
1259
      
      // exchange overlap elements
1260
      int *offset = GET_MEMORY(int, mpiSize);
1261
      int sum = 0;
1262
      for (int rank = 0; rank < mpiSize; rank++) {
1263
1264
1265
1266
1267
	offset[rank] = sum;
	sum += numOverlapElements[rank];
      }

      int *recvBuffer = GET_MEMORY(int, sum);
1268
      int *sendBuffer = GET_MEMORY(int, numOverlapElements[mpiRank]);
1269
1270
1271
1272
   
      int *ptr;
      std::vector<int>::iterator elemIt, elemEnd = innerOverlapElements.end();
  
1273
1274
1275
      for (ptr = sendBuffer, elemIt = innerOverlapElements.begin();
	   elemIt != elemEnd; 
	   ++elemIt, ++ptr) {
1276
1277
1278
	*ptr = *elemIt;
      }
  
1279
1280
      mpiComm.Allgatherv(sendBuffer, numOverlapElements[mpiRank], MPI_INT, 
			 recvBuffer, numOverlapElements, offset, MPI_INT);
1281
1282
1283


      // fill vertexPartitions for 'inner' overlap elements
1284
      for (int rank = 0; rank < mpiSize; rank++) {
1285
1286
	int numElements = numOverlapElements[rank];
	int *elements = recvBuffer + offset[rank];
1287
	for (int el = 0; el < numElements; el++) {
1288
	  Element *element = indexElement[elements[el]];
1289
	  for (int i = 0; i < dim + 1; i++) {
1290
	    vertexPartitions[element->getDOF(i, 0)].insert(rank/* + 1*/);
1291
1292
1293
1294
1295
1296
1297
	  }
	}
      }

      // free memory
      DELETE [] structures;
      FREE_MEMORY(recvBuffer, int, sum);
1298
1299
      FREE_MEMORY(sendBuffer, int, numOverlapElements[mpiRank]);
      FREE_MEMORY(numOverlapElements, int, mpiSize);
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
    }
  }

  void ParallelProblem::createOverlap(AdaptInfo *adaptInfo)
  {
    int level = localCoarseGridLevel_, overlap = 1;
    bool openOverlap = true;

    ParallelProblem::createOverlap(level, overlap, openOverlap, overlapDistance_);
  }

  // =========================================================================
  // ===== class ParallelProblemScal =========================================
  // =========================================================================
  
  ParallelProblemScal::ParallelProblemScal(const std::string& name,
1316
					   ProblemScal *prob,
1317
1318
1319
					   ProblemInstatScal *problemInstat,
					   std::vector<DOFVector<double>*> vectors)
    : ParallelProblem(name,
1320
		      prob,
1321
1322
		      problemInstat,
		      vectors,
1323
1324
1325
1326
		      prob->getMesh(),
		      prob->getRefinementManager(),
		      prob->getCoarseningManager()),
      problem(prob),
1327
      problemInstat_(problemInstat),
1328
1329
1330
1331
1332
      oldEstimator(NULL),
      oldMarker(NULL),
      rankSolution(mpiSize),
      usersEstimator(NULL),
      usersMarker(NULL)
1333
  {
1334
1335
    for (int i = 0; i < mpiSize; i++) {
      rankSolution[i] = NEW DOFVector<double>(problem->getFESpace(), "rank solution");
1336
    }
1337
   
1338
    if (problemInstat_) {
1339
1340
1341
1342
1343
1344
      dofVectors_.push_back(problemInstat_->getOldSolution());
    }
  }

  ParallelProblemScal::~ParallelProblemScal()
  {
1345
1346
1347
    for (int i = 0; i < mpiSize; i++) {
      DELETE rankSolution[i];
    }
1348
1349
1350
1351
1352
  }


  void ParallelProblemScal::initParallelization(AdaptInfo *adaptInfo)
  {