ParallelDomainBase.cc 73.7 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4
5
6
7
8
9
10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11
12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16
17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20
#include "StdMpi.h"
21
22

#include "petscksp.h"
23
24
25

namespace AMDiS {

26
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
27
  {    
28
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
29
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
30
31
32
33

    return 0;
  }

34
35
36
37
38
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

39
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
40
41
42
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
					 RefinementManager *refineManager)
43
44
    : iterationIF(iIF),
      timeIF(tIF),
45
      name(iIF->getName()),
46
47
      feSpace(fe),
      mesh(fe->getMesh()),
48
      refinementManager(refineManager),
49
      initialPartitionMesh(true),
50
      nRankDofs(0),
51
      rstart(0),
52
      nComponents(1),
53
54
      deserialized(false),
      lastMeshChangeIndex(0)
55
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
56
57
58
59
60
61
62
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

63
64
65
66
67
68
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

69
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
70
  {
71
72
73
74
75
76
77
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
78
79
      return;

80
81
82
83
84
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

85
86
87
88
89
90
91
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
92
93
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
94
    dbgCreateElementMap(elMap);
95
96
97

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
98
#endif
99

100
    // === Create new global and local DOF numbering. ===
101

102
    // Set of all DOFs of the rank.
103
    std::vector<const DegreeOfFreedom*> rankDofs;
104
    // Number of DOFs in ranks partition that are owned by the rank.
105
    nRankDofs = 0;
106
    // Number of all DOFs in the macro mesh.
107
    int nOverallDOFs = 0;
108

109
    createLocalGlobalNumbering(rankDofs, nRankDofs, nOverallDOFs);
110

Thomas Witkowski's avatar
Thomas Witkowski committed
111
112
    // === Create interior boundary information ===

113
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
114

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
115
116
117
118
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
119
#if (DEBUG != 0)
120
121
    dbgTestElementMap(elMap);
    dbgTestInteriorBoundary();
Thomas Witkowski's avatar
Thomas Witkowski committed
122
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
123

124
    // === Reset all DOFAdmins of the mesh. ===
125

126
    updateDofAdmins();
127

128
129
130
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
131

132
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
133

Thomas Witkowski's avatar
Thomas Witkowski committed
134
    int globalRefinement = 0;
135
    GET_PARAMETER(0, mesh->getName() + "->global refinements", "%d", &globalRefinement);
Thomas Witkowski's avatar
Thomas Witkowski committed
136

Thomas Witkowski's avatar
Thomas Witkowski committed
137
138
    if (globalRefinement > 0) {
      refinementManager->globalRefine(mesh, globalRefinement);
139

140
141
#if (DEBUG != 0)
      elMap.clear();
142
      dbgCreateElementMap(elMap);
143
144
#endif

145
      updateLocalGlobalNumbering(nRankDofs, nOverallDOFs);
146
147
148
149

      updateDofAdmins();

#if (DEBUG != 0)
150
      dbgTestElementMap(elMap);
151
#endif
152

153
      // === Update periodic mapping, if there are periodic boundaries. ===
154
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
155
156
    }

157
158
    lastMeshChangeIndex = mesh->getChangeIndex();

Thomas Witkowski's avatar
Thomas Witkowski committed
159
#if (DEBUG != 0)
160
    dbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
161
#endif
162

163
    nRankRows = nRankDofs * nComponents;
164
    nOverallRows = nOverallDOFs * nComponents;
165
166
  }

167
168

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
169
  {}
170

171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

189

190
191
192
193
194
195
196
197
198
199
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
200
	("Mesh is already refined! This does not work with parallelization!\n");
201
202
203
204
205
206
207
208
209
210
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

211

212
213
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
214
  {
215
216
217
218
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

219
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
220
221
222
223
224
225
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

226
    typedef traits::range_generator<row, Matrix>::type cursor_type;
227
228
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

229
230
231
232
233
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

234
235
236
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

237
238
239
240
241
242
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

243
      // Global index of the current row dof.
244
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
245
      // Test if the current row dof is a periodic dof.
246
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
247

248
249
250
251
252

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

253
254
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
255
256

	// Set only non null values.
257
	if (value(*icursor) != 0.0) {
258
	  // Global index of the current column index.
259
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
260
	  // Calculate the exact position of the column index in the petsc matrix.
261
262
	  int colIndex = globalColDof * dispMult + dispAddCol;

263
264
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
265
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
266
267
268
269
270
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
271
 	    cols.push_back(colIndex);
272
 	    values.push_back(value(*icursor) * scalFactor);
273

274
275
276
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
277
278
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
279
280
 	      values.push_back(value(*icursor) * scalFactor);
	    }
281
 	  } else {
282
	    // Neigher row nor column dof index is periodic, simple add entry.
283
284
285
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
286
	}
287
288
      }

289
290
291
292
293

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
294
      int rowIndex = globalRowDof * dispMult + dispAddRow;
295
      
296
      if (periodicRow) {
297
298
299
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
300
	
301
	int diagIndex = -1;
302
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
303
304
305
306
307
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
308
309
310
	  else
	    diagIndex = i;
	}
311
312
313
314
315
316
317
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
318
319
320
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

321
	// Send the row to all periodic row indices.
322
323
324
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
325
326
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
327
328
329
	}

      } else {
330
331
332
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
333
      }    
334
    }
335
  }
336

337

338
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
339
340
					int dispMult, int dispAdd)
  {
341
    // Traverse all used dofs in the dof vector.
342
343
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
344
      // Calculate global row index of the dof.
345
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
346
      // Calculate petsc index of the row dof.
347
348
349
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
350
351
352
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
353
354
355
356
357
358
359
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
360

361
      } else {
362
	// The dof index is not periodic.
363
364
365
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
366
    }    
367
368
  }

369

370
371
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
372
373
374
375
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

376
377
378
379
380
381
382
383
384
385
386
387
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

388
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
389
390
391
392
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
393
394
    int o_nnz[nRankRows];

395
396
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

397
    for (int i = 0; i < nRankRows; i++) {
398
      d_nnz[i] = 0;
399
400
      o_nnz[i] = 0;
    }
401

402
403
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
404
405
406
407
408
409
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
410
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
411
412
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
413
414
415
416
417
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

418
419
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
420
421
422
423
424
425
426
427
428
429

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
430
	      
431
432
433
434
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
435

436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
471
472
	  }
	}
473
474
475
476
477
478
479
480
481
482
483
484
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
485

486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

518
      if (nSend > 0)
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
545

546
547
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
548
 	delete [] sendBuffers[j];
549
550
551
552
553
554
555
556

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
557

558
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
559

560
561
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
562
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
563
	    o_nnz[r]++;
564
565
	  else
	    d_nnz[r]++;
566
567
568
569
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
570
571

      i++;
572
    }
573
574

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
575
576
577
578
579
580
581
582
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
583

584
585
586
587
588
589
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
590
	if ((*mat)[i][j])
591
592
593
594
595
596
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
597
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
598

599
600
601
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

602
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
603
604
605
  }


606
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
607
608
609
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

610
611
612
613
614
615
616
617
618
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

619
    // === Init Petsc solver. ===
620

621
    KSP solver;
622
623
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
624
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
625
626
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
627
    KSPSetFromOptions(solver);
628
629
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
630

631
632
633

    // === Run Petsc. ===

634
    KSPSolve(solver, petscRhsVec, petscSolVec);
635

636
    // === Transfere values from Petsc's solution vectors to the dof vectors.
637
638
639
640
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
641
      DOFVector<double> *dofvec = vec.getDOFVector(i);
642
      for (int j = 0; j < nRankDofs; j++)
643
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
644
645
646
647
    }

    VecRestoreArray(petscSolVec, &vecPointer);

648
649
650

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
651
    synchVectors(vec);
652

653
654
655

    // === Print information about solution process. ===

656
657
658
659
660
661
662
663
664
665
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

666
667
668

    // === Destroy Petsc's variables. ===

669
    MatDestroy(petscMatrix);
670
671
672
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
673
674
675
676
677
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
#if 0
    StdMpi<std::vector<double>, std::vector<double> > stdMpi(mpiComm);
    stdMpi.prepareCommunication(false);

    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
      std::vector<double> dofs;
      int nSendDOFs = sendIt->second.size();
      dofs.reserve(nComponents * sendIt->second.size());
      
      for (int j = 0; j < nComponents; j++) {
	DOFVector<double> *dofvec = vec.getDOFVector(j);
	for (int k = 0; k < nSendDOFs; k++)
	  dofs.push_back((*dofvec)[*((sendIt->second)[k])]);
      }

      stdMpi.send(sendIt->first, dofs);
    }

    stdMpi.startCommunication();
#endif

#if 1

702
703
704
705
706
707
708
709
710
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
711
712
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
713
714
715

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
716
	DOFVector<double> *dofvec = vec.getDOFVector(j);
717
718
719
720
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

721
722
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
745
	DOFVector<double> *dofvec = vec.getDOFVector(j);
746
 	for (int k = 0; k < nRecvDOFs; k++)
747
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
748
749
750
751
752
753
754
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
755
#endif
756
757
  }

758
759
760
761
762
763
764
765
766
767
768
769

  void ParallelDomainBase::checkMeshChange()
  {
    if (mesh->getChangeIndex() == lastMeshChangeIndex)
      return;

    std::cout << "MESH HAS BEEN CHANGED!" << std::endl;
    exit(0);    

    lastMeshChangeIndex = mesh->getChangeIndex();
  }

770
771
772
773
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
774
    SerUtil::serialize(out, vecSize);
775
776
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
777
      SerUtil::serialize(out, dofIndex);
778
779
780
781
782
783
784
785
786
787
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
788
    SerUtil::deserialize(in, vecSize);
789
790
791
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
792
      SerUtil::deserialize(in, dofIndex);
793
794
795
796
797
798
799
800
801
802
803
804

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
805
    SerUtil::serialize(out, mapSize);
806
807
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
808
      SerUtil::serialize(out, rank);
809
810
811
812
813
814
815
816
817
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
818
    SerUtil::deserialize(in, mapSize);
819
820
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
821
      SerUtil::deserialize(in, rank);
822
823
824
825
      deserialize(in, data[rank], dofMap);      
    }
  }

826

827
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
845
	if (partitionData->getLevel() == 0)
846
	  elNum = element->getIndex();
847
	
Thomas Witkowski's avatar
Thomas Witkowski committed
848
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
849
850
851
852
853
854
855
856
857
858
859
860
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

861

862
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
863
864
865
866
867
868
869
870
871
872
873
874
875
  {
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

876

877
  void ParallelDomainBase::createInteriorBoundaryInfo()
878
  {
879
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
880

881
882
883
884
885
886
887
888
889
890
891
892
893
    int nNeighbours = mesh->getGeo(NEIGH);
    int dim = mesh->getDim();
    GeoIndex subObj = CENTER;
    switch (dim) {
    case 2:
      subObj = EDGE;
      break;
    case 3:
      subObj = FACE;
      break;
    default:
      ERROR_EXIT("What is this?\n");
    }     
894
895
896

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
897

898
    TraverseStack stack;
899
900
901
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
902
903
904
905
906
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
907

908
      // Check, if the element is within rank's partition.
909
      if (partitionData->getPartitionStatus() == IN) {
910
	for (int i = 0; i < nNeighbours; i++) {
911
912
913
914
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
915
916
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->
						getElementData(PARTITION_ED));
917

918
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
919

920
921
922
923
924
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

925
	    AtomicBoundary bound;	    	    
926
927
	    bound.rankObj.el = element;
	    bound.rankObj.elIndex = element->getIndex();
928
	    bound.rankObj.elType = elInfo->getType();
929
	    bound.rankObj.subObj = subObj;
930
	    bound.rankObj.ithObj = i;
931
932
	    // Do not set a pointer to the element, because if will be deleted from
	    // mesh after partitioning and the pointer would become invalid.
933
934
	    bound.neighObj.el = NULL;
	    bound.neighObj.elIndex = elInfo->getNeighbour(i)->getIndex();
935
	    bound.neighObj.elType = -1;
936
	    bound.neighObj.subObj = subObj;
937
	    bound.neighObj.ithObj = elInfo->getSideOfNeighbour(i);
938
	    
939
940
941
942
943
	    if (dim == 2) {
	      // i == 2  =>  getSideOfNeighbour(i) == 2
	      TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
		("Should not happen!\n");
	    }
944

945
	    // Get rank number of the neighbouring element.
946
947
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

948
949
950
951
952
953
954
955
956

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

957
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(subObj, i))) {	      
958
959
960
961
962
963
964
965
966
967
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
	      AtomicBoundary& b = ((mpiRank > otherElementRank) ?
				   myIntBoundary.getNewAtomic(otherElementRank) :
				   otherIntBoundary.getNewAtomic(otherElementRank));
	      b = bound;	      
	    }
968
969
970
971
972
973
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
974

975
976
977
978

    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
979

Thomas Witkowski's avatar
Thomas Witkowski committed
980
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
981

982
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
983
984
985
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
986
987
    int requestCounter = 0;

988

989
990
991
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===

Thomas Witkowski's avatar
Thomas Witkowski committed
992
993
    for (RankToBoundMap::iterator rankIt = myIntBoundary.boundary.begin();
	 rankIt != myIntBoundary.boundary.end(); ++rankIt) {    
Thomas Witkowski's avatar
Thomas Witkowski committed
994
      int nSendInt = rankIt->second.size();
995
996
      int* buffer = new int[nSendInt * 2];
      for (int i = 0; i < nSendInt; i++) {
997
998
	buffer[i * 2] = (rankIt->second)[i].rankObj.elIndex;
	buffer[i * 2 + 1] = (rankIt->second)[i].rankObj.ithObj;
999
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
1000
1001
      sendBuffers.push_back(buffer);
      
Thomas Witkowski's avatar
Thomas Witkowski committed
1002
      request[requestCounter++] =
1003
	mpiComm.Isend(buffer, nSendInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1004
1005
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1006
1007
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1008
      int nRecvInt = rankIt->second.size();
1009
      int *buffer = new int[nRecvInt * 2];
Thomas Witkowski's avatar
Thomas Witkowski committed
1010
      recvBuffers.push_back(buffer);
Thomas Witkowski's avatar
Thomas Witkowski committed
1011

Thomas Witkowski's avatar
Thomas Witkowski committed
1012
      request[requestCounter++] = 
1013
	mpiComm.Irecv(buffer, nRecvInt * 2, MPI_INT, rankIt->first, 0);
Thomas Witkowski's avatar
Thomas Witkowski committed
1014
1015
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
1016
1017
1018

    MPI::Request::Waitall(requestCounter, request);

1019
1020
1021
1022
    
    // === The information about all neighbouring boundaries has been received. So ===
    // === the rank tests if its own atomic boundaries are in the same order. If   ===
    // === not, the atomic boundaries are swaped to the correct order.             ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1023
1024

    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
1025
1026
    for (RankToBoundMap::iterator rankIt = otherIntBoundary.boundary.begin();
	 rankIt != otherIntBoundary.boundary.end(); ++rankIt) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1027
1028
1029
1030
1031
1032
1033

      // === We have received from rank "rankIt->first" the ordered list of element ===
      // === indices. We now have to sort the corresponding list in this rank to    ===
      // === get the same order.                                                    ===
      
      for (int j = 0; j < static_cast<int>(rankIt->second.size()); j++) {
	// If the expected object is not at place, search for it.
1034
1035
	if ((rankIt->second)[j].neighObj.elIndex != recvBuffers[i][j * 2] ||
	    (rankIt->second)[j].neighObj.ithObj != recvBuffers[i][j * 2 + 1]) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1036
1037
	  int k = j + 1;
	  for (; k < static_cast<int>(rankIt->second.size()); k++)
1038
1039
	    if ((rankIt->second)[k].neighObj.elIndex == recvBuffers[i][j * 2] && 
		(rankIt->second)[k].neighObj.ithObj == recvBuffers[i][j * 2 + 1])
Thomas Witkowski's avatar
Thomas Witkowski committed
1040
1041
1042
	      break;

	  // The element must always be found, because the list is just in another order.
Thomas Witkowski's avatar
Thomas Witkowski committed
1043
	  TEST_EXIT_DBG(k < static_cast<int>(rankIt->second.size()))
1044
	    ("Should never happen!\n");
Thomas Witkowski's avatar
Thomas Witkowski committed
1045
1046
1047
1048
1049
1050
1051
1052

	  // Swap the current with the found element.
	  AtomicBoundary tmpBound = (rankIt->second)[k];
	  (rankIt->second)[k] = (rankIt->second)[j];
	  (rankIt->second)[j] = tmpBound;	
	}
      }

1053
1054
      delete [] recvBuffers[i];
      i++;
Thomas Witkowski's avatar
Thomas Witkowski committed
1055
1056
1057
1058
    }

    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
1059
1060
1061
1062
1063

    recvBuffers.clear();
    sendBuffers.clear();
 

1064
    // === Do the same for the periodic boundaries. ===
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077

    requestCounter = 0;

    for (RankToBoundMap::iterator rankIt = periodicBoundary.boundary.begin();
	 rankIt != periodicBoundary.boundary.end(); ++rankIt) {

      TEST_EXIT_DBG(rankIt->first != mpiRank)
	("It is no possible to have an interior boundary within a rank partition!\n");

      if (rankIt->first < mpiRank) {
	int nSendInt = rankIt->second.size();
	int* buffer = new int[nSendInt * 2];
	for (int i = 0; i < nSendInt; i++) {
1078
1079
	  buffer[i * 2] = (rankIt->second)[i].rankObj.elIndex;
	  buffer[i * 2 + 1] = (rankIt->second)[i].rankObj.ithObj;
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
	}
	sendBuffers.push_back(buffer);

	request[requestCounter++] =
	  mpiComm.Isend(buffer, nSendInt * 2, MPI_INT, rankIt->first, 0);
      } else {
	int nRecvInt = rankIt->second.size();
	int *buffer = new int[nRecvInt * 2];
	recvBuffers.push_back(buffer);
	
	request[requestCounter++] = 
	  mpiComm.Irecv(buffer, nRecvInt * 2, MPI_INT, rankIt->first, 0);	
      }
    }


    MPI::Request::Waitall(requestCounter, request);


    i = 0;
    for (RankToBoundMap::iterator rankIt = periodicBoundary.boundary.begin();
	 rankIt != periodicBoundary.boundary.end(); ++rankIt) {
      if (rankIt->first > mpiRank) {

	for (int j = 0; j < static_cast<int>(rankIt->second.size()); j++) 
1105
1106
	  if (periodicBoundary.boundary[rankIt->first][j].neighObj.elIndex != recvBuffers[i][j * 2] ||
	      periodicBoundary.boundary[rankIt->first][j].neighObj.ithObj != recvBuffers[i][j * 2 + 1]) {
1107
1108
1109
	    int k = j + 1;

	    for (; k < static_cast<int>(rankIt->second.size()); k++)
1110
1111
	      if (periodicBoundary.boundary[rankIt->first][k].neighObj.elIndex == recvBuffers[i][j * 2] && 
		  periodicBoundary.boundary[rankIt->first][k].neighObj.ithObj == recvBuffers[i][j * 2 + 1])
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130