ParallelDomainBase.cc 75 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4
5
6
7
8
9
10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11
12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16
17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20
#include "StdMpi.h"
21
22

#include "petscksp.h"
23
24
25

namespace AMDiS {

26
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
27
  {    
28
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
29
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
30
31
32
33

    return 0;
  }

34
35
36
37
38
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

39
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
40
41
42
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
					 RefinementManager *refineManager)
43
44
    : iterationIF(iIF),
      timeIF(tIF),
45
      name(iIF->getName()),
46
47
      feSpace(fe),
      mesh(fe->getMesh()),
48
      refinementManager(refineManager),
49
      initialPartitionMesh(true),
50
      nRankDofs(0),
51
      rstart(0),
52
53
      nComponents(1),
      deserialized(false)
54
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
55
56
57
58
59
60
61
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

62
63
64
65
66
67
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

68
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
69
  {
70
71
72
73
74
75
76
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
77
78
      return;

79
80
81
82
83
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

84
85
86
87
88
89
90
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
91
92
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
93
    dbgCreateElementMap(elMap);
94
95
96

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
97
#endif
98

99
    // === Create new global and local DOF numbering. ===
100

101
    // Set of all DOFs of the rank.
102
    std::vector<const DegreeOfFreedom*> rankDofs;
103
    // Number of DOFs in ranks partition that are owned by the rank.
104
    nRankDofs = 0;
105
    // Number of all DOFs in the macro mesh.
106
    int nOverallDOFs = 0;
107

108
    createLocalGlobalNumbering(rankDofs, nRankDofs, nOverallDOFs);
109

Thomas Witkowski's avatar
Thomas Witkowski committed
110
111
    // === Create interior boundary information ===

112
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
113

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
114
115
116
117
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
118
#if (DEBUG != 0)
119
120
    dbgTestElementMap(elMap);
    dbgTestInteriorBoundary();
Thomas Witkowski's avatar
Thomas Witkowski committed
121
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
122

123
    // === Reset all DOFAdmins of the mesh. ===
124

125
    updateDofAdmins();
126

127
128
129
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
130

131
    //    exit(0);
132

133
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
134

Thomas Witkowski's avatar
Thomas Witkowski committed
135
    int globalRefinement = 0;
136
    GET_PARAMETER(0, mesh->getName() + "->global refinements", "%d", &globalRefinement);
Thomas Witkowski's avatar
Thomas Witkowski committed
137

Thomas Witkowski's avatar
Thomas Witkowski committed
138
139
    if (globalRefinement > 0) {
      refinementManager->globalRefine(mesh, globalRefinement);
140

141
142
#if (DEBUG != 0)
      elMap.clear();
143
      dbgCreateElementMap(elMap);
144
145
#endif

146
      updateLocalGlobalNumbering(nRankDofs, nOverallDOFs);
147
148
149
150

      updateDofAdmins();

#if (DEBUG != 0)
151
      dbgTestElementMap(elMap);
152
#endif
153

154
      // === Update periodic mapping, if there are periodic boundaries. ===
155
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
156
157
    }

158
    //  exit(0);
159

Thomas Witkowski's avatar
Thomas Witkowski committed
160
#if (DEBUG != 0)
161
    dbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
162
#endif
163

164
    nRankRows = nRankDofs * nComponents;
165
    nOverallRows = nOverallDOFs * nComponents;
166
167
  }

168
169

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
170
  {}
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

190

191
192
193
194
195
196
197
198
199
200
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
201
	("Mesh is already refined! This does not work with parallelization!\n");
202
203
204
205
206
207
208
209
210
211
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

212

213
214
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
215
  {
216
217
218
219
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

220
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
221
222
223
224
225
226
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

227
    typedef traits::range_generator<row, Matrix>::type cursor_type;
228
229
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

230
231
232
233
234
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

235
236
237
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

238
239
240
241
242
243
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

244
      // Global index of the current row dof.
245
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
246
      // Test if the current row dof is a periodic dof.
247
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
248

249
250
251
252
253

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

254
255
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
256
257

	// Set only non null values.
258
	if (value(*icursor) != 0.0) {
259
	  // Global index of the current column index.
260
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
261
	  // Calculate the exact position of the column index in the petsc matrix.
262
263
	  int colIndex = globalColDof * dispMult + dispAddCol;

264
265
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
266
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
267
268
269
270
271
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
272
 	    cols.push_back(colIndex);
273
 	    values.push_back(value(*icursor) * scalFactor);
274

275
276
277
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
278
279
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
280
281
 	      values.push_back(value(*icursor) * scalFactor);
	    }
282
 	  } else {
283
	    // Neigher row nor column dof index is periodic, simple add entry.
284
285
286
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
287
	}
288
289
      }

290
291
292
293
294

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
295
      int rowIndex = globalRowDof * dispMult + dispAddRow;
296
      
297
      if (periodicRow) {
298
299
300
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
301
	
302
	int diagIndex = -1;
303
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
304
305
306
307
308
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
309
310
311
	  else
	    diagIndex = i;
	}
312
313
314
315
316
317
318
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
319
320
321
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

322
	// Send the row to all periodic row indices.
323
324
325
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
326
327
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
328
329
330
	}

      } else {
331
332
333
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
334
      }    
335
    }
336
  }
337

338

339
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
340
341
					int dispMult, int dispAdd)
  {
342
    // Traverse all used dofs in the dof vector.
343
344
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
345
      // Calculate global row index of the dof.
346
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
347
      // Calculate petsc index of the row dof.
348
349
350
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
351
352
353
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
354
355
356
357
358
359
360
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
361

362
      } else {
363
	// The dof index is not periodic.
364
365
366
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
367
    }    
368
369
  }

370

371
372
  void ParallelDomainBase::fillPetscMatrix(DOFMatrix *mat, DOFVector<double> *vec)
  {
373
374
375
376
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    ERROR_EXIT("Not yet tested for scalar problem definition!\n");

377
378
379
380
    MatCreate(PETSC_COMM_WORLD, &petscMatrix);
    MatSetSizes(petscMatrix, nRankRows, nRankRows, nOverallRows, nOverallRows);
    MatSetType(petscMatrix, MATAIJ);

381
382
383
384
385
386
387
388
389
390
391
392
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

393
394
395
396
397
    setDofMatrix(mat);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

398
    setDofVector(petscRhsVec, vec);
Thomas Witkowski's avatar
Thomas Witkowski committed
399
400
401

    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);
402
403
404
405
406
  }

  
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
407
408
409
410
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

411
412
413
414
415
416
417
418
419
420
421
422
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

423
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
424
425
426
427
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
428
429
    int o_nnz[nRankRows];

430
431
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

432
    for (int i = 0; i < nRankRows; i++) {
433
      d_nnz[i] = 0;
434
435
      o_nnz[i] = 0;
    }
436

437
438
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
439
440
441
442
443
444
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
445
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
446
447
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
448
449
450
451
452
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

453
454
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
455
456
457
458
459
460
461
462
463
464

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
465
	      
466
467
468
469
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
470

471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
506
507
	  }
	}
508
509
510
511
512
513
514
515
516
517
518
519
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
520

521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

553
      if (nSend > 0)
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
580

581
582
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
583
 	delete [] sendBuffers[j];
584
585
586
587
588
589
590
591

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
592

593
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
594

595
596
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
597
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
598
	    o_nnz[r]++;
599
600
	  else
	    d_nnz[r]++;
601
602
603
604
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
605
606

      i++;
607
    }
608
609

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
610
611
612
613
614
615
616
617
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
618

619
620
621
622
623
624
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
625
	if ((*mat)[i][j])
626
627
628
629
630
631
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
632
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
633

634
635
636
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

637
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
638
639
640
  }


641
  void ParallelDomainBase::solvePetscMatrix(DOFVector<double> *vec)
642
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
643
644
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

645
    ERROR_EXIT("Not yet tested for scalar problem definition!\n");
646

647
    KSP ksp;
648
    KSPCreate(PETSC_COMM_WORLD, &ksp);
649
    KSPSetOperators(ksp, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN);
650
    KSPSetTolerances(ksp, 1.e-7, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
651
    KSPSetType(ksp, KSPBCGS);
652
    KSPMonitorSet(ksp, myKSPMonitor, PETSC_NULL, 0);
653
654
655
656
657
    KSPSolve(ksp, petscRhsVec, petscSolVec);

    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

658
    for (int i = 0; i < nRankDofs; i++)
659
      (*vec)[mapLocalToDofIndex[i]] = vecPointer[i];
660
661
662

    VecRestoreArray(petscSolVec, &vecPointer);

663
664
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());
Thomas Witkowski's avatar
Thomas Witkowski committed
665
666
667

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
668
669
    
    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
670
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
671
	 sendIt != sendDofs.end(); ++sendIt, i++) {
672
673
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs];
674

675
      for (int j = 0; j < nSendDOFs; j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
676
	sendBuffers[i][j] = (*vec)[*((sendIt->second)[j])];
677

Thomas Witkowski's avatar
Thomas Witkowski committed
678
679
      request[requestCounter++] =
	mpiComm.Isend(sendBuffers[i], nSendDOFs, MPI_DOUBLE, sendIt->first, 0);
680
681
682
    }

    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
683
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
684
	 recvIt != recvDofs.end(); ++recvIt, i++) {
685
686
      int nRecvDOFs = recvIt->second.size();
      recvBuffers[i] = new double[nRecvDOFs];
687

Thomas Witkowski's avatar
Thomas Witkowski committed
688
689
      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
690
691
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
692
693
694

    MPI::Request::Waitall(requestCounter, request);

695
    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
696
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
697
	 recvIt != recvDofs.end(); ++recvIt, i++) {
Thomas Witkowski's avatar
Thomas Witkowski committed
698
      for (int j = 0; j < static_cast<int>(recvIt->second.size()); j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
699
	(*vec)[*(recvIt->second)[j]] = recvBuffers[i][j];
700
701
702
703

      delete [] recvBuffers[i];
    }
    
704
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
705
      delete [] sendBuffers[i];
706
707

    MatDestroy(petscMatrix);
708
709
710
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
711
712
  }

713

714
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
715
716
717
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

718
719
720
721
722
723
724
725
726
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

727
    // === Init Petsc solver. ===
728

729
    KSP solver;
730
731
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
732
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
733
734
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
735
    KSPSetFromOptions(solver);
736
737
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
738

739
740
741

    // === Run Petsc. ===

742
    KSPSolve(solver, petscRhsVec, petscSolVec);
743

744
    // === Transfere values from Petsc's solution vectors to the dof vectors.
745
746
747
748
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
749
      DOFVector<double> *dofvec = vec.getDOFVector(i);
750
      for (int j = 0; j < nRankDofs; j++)
751
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
752
753
754
755
    }

    VecRestoreArray(petscSolVec, &vecPointer);

756
757
758

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
759
    synchVectors(vec);
760

761
762
763

    // === Print information about solution process. ===

764
765
766
767
768
769
770
771
772
773
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

774
775
776

    // === Destroy Petsc's variables. ===

777
    MatDestroy(petscMatrix);
778
779
780
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
781
782
783
784
785
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
#if 0
    StdMpi<std::vector<double>, std::vector<double> > stdMpi(mpiComm);
    stdMpi.prepareCommunication(false);

    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
      std::vector<double> dofs;
      int nSendDOFs = sendIt->second.size();
      dofs.reserve(nComponents * sendIt->second.size());
      
      for (int j = 0; j < nComponents; j++) {
	DOFVector<double> *dofvec = vec.getDOFVector(j);
	for (int k = 0; k < nSendDOFs; k++)
	  dofs.push_back((*dofvec)[*((sendIt->second)[k])]);
      }

      stdMpi.send(sendIt->first, dofs);
    }

    stdMpi.startCommunication();
#endif

#if 1

810
811
812
813
814
815
816
817
818
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
819
820
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
821
822
823

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
824
	DOFVector<double> *dofvec = vec.getDOFVector(j);
825
826
827
828
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

829
830
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
853
	DOFVector<double> *dofvec = vec.getDOFVector(j);
854
 	for (int k = 0; k < nRecvDOFs; k++)
855
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
856
857
858
859
860
861
862
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
863
#endif
864
865
  }

866
867
868
869
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
870
    SerUtil::serialize(out, vecSize);
871
872
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
873
      SerUtil::serialize(out, dofIndex);
874
875
876
877
878
879
880
881
882
883
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
884
    SerUtil::deserialize(in, vecSize);
885
886
887
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
888
      SerUtil::deserialize(in, dofIndex);
889
890
891
892
893
894
895
896
897
898
899
900

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
901
    SerUtil::serialize(out, mapSize);
902
903
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
904
      SerUtil::serialize(out, rank);
905
906
907
908
909
910
911
912
913
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
914
    SerUtil::deserialize(in, mapSize);
915
916
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
917
      SerUtil::deserialize(in, rank);
918
919
920
921
      deserialize(in, data[rank], dofMap);      
    }
  }

922

923
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
941
	if (partitionData->getLevel() == 0)
942
	  elNum = element->getIndex();
943
	
Thomas Witkowski's avatar
Thomas Witkowski committed
944
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
945
946
947
948
949
950
951
952
953
954
955
956
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

957

958
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
959
960
961
962
963
964
965
966
967
968
969
970
971
  {
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

972

973
  void ParallelDomainBase::createInteriorBoundaryInfo()
974
  {
975
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
976

977
978
979
980
981
982
983
984
985
986
987
988
989
    int nNeighbours = mesh->getGeo(NEIGH);
    int dim = mesh->getDim();
    GeoIndex subObj = CENTER;
    switch (dim) {
    case 2:
      subObj = EDGE;
      break;
    case 3:
      subObj = FACE;
      break;
    default:
      ERROR_EXIT("What is this?\n");
    }     
990
991
992

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
993

994
    TraverseStack stack;
995
996
997
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
998
999
1000
1001
1002
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
1003

1004
      // Check, if the element is within rank's partition.
1005
      if (partitionData->getPartitionStatus() == IN) {
1006
	for (int i = 0; i < nNeighbours; i++) {
1007
1008
1009
1010
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
1011
1012
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->
						getElementData(PARTITION_ED));
1013

1014
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1015

1016
1017
1018
1019
1020
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

1021
	    AtomicBoundary bound;	    	    
1022
1023
	    bound.rankObj.el = element;
	    bound.rankObj.elIndex = element->getIndex();
1024
	    bound.rankObj.subObj = subObj;
1025
	    bound.rankObj.ithObj = i;
1026
1027
	    // Do not set a pointer to the element, because if will be deleted from
	    // mesh after partitioning and the pointer would become invalid.
1028
1029
	    bound.neighObj.el = NULL;
	    bound.neighObj.elIndex = elInfo->getNeighbour(i)->getIndex();
1030
	    bound.neighObj.subObj = subObj;
1031
	    bound.neighObj.ithObj = elInfo->getSideOfNeighbour(i);
1032
	    
1033
1034
1035
1036
1037
	    if (dim == 2) {
	      // i == 2  =>  getSideOfNeighbour(i) == 2
	      TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
		("Should not happen!\n");
	    }
1038

1039
	    // Get rank number of the neighbouring element.
1040
1041
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

1042
1043
1044
1045
1046
1047
1048
1049
1050

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

1051
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(subObj, i))) {	      
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
	      AtomicBoundary& b = ((mpiRank > otherElementRank) ?
				   myIntBoundary.getNewAtomic(otherElementRank) :
				   otherIntBoundary.getNewAtomic(otherElementRank));
	      b = bound;	      
	    }
1062
1063
1064
1065
1066
1067
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
1068

1069
1070
1071
1072

    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1073

Thomas Witkowski's avatar
Thomas Witkowski committed
1074
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
1075

1076
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
1077
1078
1079
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
1080
1081
    int requestCounter = 0;

1082

1083
1084
1085
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===