ParallelDomainBase.cc 75.7 KB
Newer Older
Thomas Witkowski's avatar
Thomas Witkowski committed
1
2
#include <algorithm>

3
#include "ParallelDomainBase.h"
4
5
6
7
8
9
10
#include "ParMetisPartitioner.h"
#include "Mesh.h"
#include "Traverse.h"
#include "ElInfo.h"
#include "Element.h"
#include "MacroElement.h"
#include "PartitionElementData.h"
11
12
#include "DOFMatrix.h"
#include "DOFVector.h"
13
#include "SystemVector.h"
14
#include "VtkWriter.h"
15
#include "ElementDofIterator.h"
16
17
#include "ProblemStatBase.h"
#include "StandardProblemIteration.h"
18
#include "ElementFileWriter.h"
19
#include "VertexVector.h"
20
#include "StdMpi.h"
21
22

#include "petscksp.h"
23
24
25

namespace AMDiS {

26
  PetscErrorCode myKSPMonitor(KSP ksp, PetscInt iter, PetscReal rnorm, void *)
27
  {    
28
    if (iter % 100 == 0 && MPI::COMM_WORLD.Get_rank() == 0)
Thomas Witkowski's avatar
Thomas Witkowski committed
29
      std::cout << "  Petsc-Iteration " << iter << ": " << rnorm << std::endl;
30
31
32
33

    return 0;
  }

34
35
36
37
38
  inline bool cmpDofsByValue(const DegreeOfFreedom* dof1, const DegreeOfFreedom* dof2)
  {
    return (*dof1 < *dof2);
  }

39
  ParallelDomainBase::ParallelDomainBase(ProblemIterationInterface *iIF,
40
41
42
					 ProblemTimeInterface *tIF,
					 FiniteElemSpace *fe,
					 RefinementManager *refineManager)
43
44
    : iterationIF(iIF),
      timeIF(tIF),
45
      name(iIF->getName()),
46
47
      feSpace(fe),
      mesh(fe->getMesh()),
48
      refinementManager(refineManager),
49
      initialPartitionMesh(true),
50
      nRankDofs(0),
51
      rstart(0),
52
53
      nComponents(1),
      deserialized(false)
54
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
55
56
57
58
59
60
61
    FUNCNAME("ParallelDomainBase::ParalleDomainBase()");

    TEST_EXIT(mesh->getNumberOfDOFAdmin() == 1)
      ("Only meshes with one DOFAdmin are supported!\n");
    TEST_EXIT(mesh->getDOFAdmin(0).getNumberOfPreDOFs(0) == 0)
      ("Wrong pre dof number for DOFAdmin!\n");

62
63
64
65
66
67
    mpiRank = MPI::COMM_WORLD.Get_rank();
    mpiSize = MPI::COMM_WORLD.Get_size();
    mpiComm = MPI::COMM_WORLD;
    partitioner = new ParMetisPartitioner(mesh, &mpiComm);
  }

68
  void ParallelDomainBase::initParallelization(AdaptInfo *adaptInfo)
69
  {
70
71
72
73
74
75
76
    FUNCNAME("ParallelDomainBase::initParallelization()");

    TEST_EXIT(mpiSize > 1)
      ("Parallelization does not work with only one process!\n");

    // If the problem has been already read from a file, we do not need to do anything.
    if (deserialized)
77
78
      return;

79
80
81
82
83
    // Test, if the mesh is the macro mesh only! Paritioning of the mesh is supported
    // only for macro meshes, so it will not work yet if the mesh is already refined
    // in some way.
    testForMacroMesh();

84
85
86
87
88
89
90
    // create an initial partitioning of the mesh
    partitioner->createPartitionData();
    // set the element weights, which are 1 at the very first begin
    setElemWeights(adaptInfo);
    // and now partition the mesh
    partitionMesh(adaptInfo);   

Thomas Witkowski's avatar
Thomas Witkowski committed
91
92
#if (DEBUG != 0)
    ElementIdxToDofs elMap;
93
    dbgCreateElementMap(elMap);
94
95
96

    if (mpiRank == 0)
      writePartitioningMesh("part.vtu");
Thomas Witkowski's avatar
Thomas Witkowski committed
97
#endif
98

99
    // === Create new global and local DOF numbering. ===
100

101
    // Set of all DOFs of the rank.
102
    std::vector<const DegreeOfFreedom*> rankDofs;
103
    // Number of DOFs in ranks partition that are owned by the rank.
104
    nRankDofs = 0;
105
    // Number of all DOFs in the macro mesh.
106
    int nOverallDOFs = 0;
107

108
    createLocalGlobalNumbering(rankDofs, nRankDofs, nOverallDOFs);
109

Thomas Witkowski's avatar
Thomas Witkowski committed
110
111
    // === Create interior boundary information ===

112
    createInteriorBoundaryInfo();
Thomas Witkowski's avatar
Thomas Witkowski committed
113

Thomas Witkowski's avatar
n    
Thomas Witkowski committed
114
115
116
117
    // === Remove all macro elements that are not part of the rank partition. ===

    removeMacroElements();

Thomas Witkowski's avatar
Thomas Witkowski committed
118
#if (DEBUG != 0)
119
120
    dbgTestElementMap(elMap);
    dbgTestInteriorBoundary();
Thomas Witkowski's avatar
Thomas Witkowski committed
121
#endif
Thomas Witkowski's avatar
Thomas Witkowski committed
122

123
    // === Reset all DOFAdmins of the mesh. ===
124

125
    updateDofAdmins();
126

127
128
129
    // === Create periodic dof mapping, if there are periodic boundaries. ===

    createPeriodicMap();
130

131
    // === Global refinements. ===
Thomas Witkowski's avatar
Thomas Witkowski committed
132

Thomas Witkowski's avatar
Thomas Witkowski committed
133
    int globalRefinement = 0;
134
    GET_PARAMETER(0, mesh->getName() + "->global refinements", "%d", &globalRefinement);
Thomas Witkowski's avatar
Thomas Witkowski committed
135

Thomas Witkowski's avatar
Thomas Witkowski committed
136
137
    if (globalRefinement > 0) {
      refinementManager->globalRefine(mesh, globalRefinement);
138

139
140
#if (DEBUG != 0)
      elMap.clear();
141
      dbgCreateElementMap(elMap);
142
143
#endif

144
      updateLocalGlobalNumbering(nRankDofs, nOverallDOFs);
145
146
147
148

      updateDofAdmins();

#if (DEBUG != 0)
149
      dbgTestElementMap(elMap);
150
#endif
151

152
      // === Update periodic mapping, if there are periodic boundaries. ===
153
      createPeriodicMap();
Thomas Witkowski's avatar
Thomas Witkowski committed
154
155
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
156
#if (DEBUG != 0)
157
    dbgTestCommonDofs(true);
Thomas Witkowski's avatar
Thomas Witkowski committed
158
#endif
159

160
    nRankRows = nRankDofs * nComponents;
161
    nOverallRows = nOverallDOFs * nComponents;
162
163
  }

164
165

  void ParallelDomainBase::exitParallelization(AdaptInfo *adaptInfo)
166
  {}
167

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
  
  void ParallelDomainBase::updateDofAdmins()
  {
    int nAdmins = mesh->getNumberOfDOFAdmin();
    for (int i = 0; i < nAdmins; i++) {
      DOFAdmin& admin = const_cast<DOFAdmin&>(mesh->getDOFAdmin(i));
      
      for (int j = 0; j < admin.getSize(); j++)
	admin.setDOFFree(j, true);
      for (int j = 0; j < static_cast<int>(mapLocalGlobalDOFs.size()); j++)
 	admin.setDOFFree(j, false);

      admin.setUsedSize(mapLocalGlobalDOFs.size());
      admin.setUsedCount(mapLocalGlobalDOFs.size());
      admin.setFirstHole(mapLocalGlobalDOFs.size());
    }
  }

186

187
188
189
190
191
192
193
194
195
196
  void ParallelDomainBase::testForMacroMesh()
  {
    FUNCNAME("ParallelDomainBase::testForMacroMesh()");

    int nMacroElements = 0;

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1, Mesh::CALL_LEAF_EL);
    while (elInfo) {
      TEST_EXIT(elInfo->getLevel() == 0)
197
	("Mesh is already refined! This does not work with parallelization!\n");
198
199
200
201
202
203
204
205
206
207
      
      nMacroElements++;

      elInfo = stack.traverseNext(elInfo);
    }

    TEST_EXIT(nMacroElements >= mpiSize)
      ("The mesh has less macro elements than number of mpi processes!\n");
  }

208

209
210
  void ParallelDomainBase::setDofMatrix(DOFMatrix* mat, int dispMult, 
					int dispAddRow, int dispAddCol)
211
  {
212
213
214
215
    FUNCNAME("ParallelDomainBase::setDofMatrix()");

    TEST_EXIT(mat)("No DOFMatrix!\n");

216
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
217
218
219
220
221
222
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    traits::col<Matrix>::type col(mat->getBaseMatrix());
    traits::const_value<Matrix>::type value(mat->getBaseMatrix());

223
    typedef traits::range_generator<row, Matrix>::type cursor_type;
224
225
    typedef traits::range_generator<nz, cursor_type>::type icursor_type;

226
227
228
229
230
    std::vector<int> cols;
    std::vector<double> values;
    cols.reserve(300);
    values.reserve(300);

231
232
233
    // === Traverse all rows of the dof matrix and insert row wise the values ===
    // === to the petsc matrix.                                               ===

234
235
236
237
238
239
    for (cursor_type cursor = begin<row>(mat->getBaseMatrix()), 
	   cend = end<row>(mat->getBaseMatrix()); cursor != cend; ++cursor) {

      cols.clear();
      values.clear();

240
      // Global index of the current row dof.
241
      int globalRowDof = mapLocalGlobalDOFs[*cursor];
242
      // Test if the current row dof is a periodic dof.
243
      bool periodicRow = (periodicDof.count(globalRowDof) > 0);
244

245
246
247
248
249

      // === Traverse all non zero entries of the row and produce vector cols ===
      // === with the column indices of all row entries and vector values     ===
      // === with the corresponding values.

250
251
      for (icursor_type icursor = begin<nz>(cursor), icend = end<nz>(cursor); 
	   icursor != icend; ++icursor) {
252
253

	// Set only non null values.
254
	if (value(*icursor) != 0.0) {
255
	  // Global index of the current column index.
256
	  int globalColDof = mapLocalGlobalDOFs[col(*icursor)];
257
	  // Calculate the exact position of the column index in the petsc matrix.
258
259
	  int colIndex = globalColDof * dispMult + dispAddCol;

260
261
	  // If the current row is not periodic, but the current dof index is periodic,
	  // we have to duplicate the value to the other corresponding periodic columns.
262
 	  if (periodicRow == false && periodicDof.count(globalColDof) > 0) {
263
264
265
266
267
	    // The value is assign to n matrix entries, therefore, every entry 
	    // has only 1/n value of the original entry.
	    double scalFactor = 1.0 / (periodicDof[globalColDof].size() + 1.0);

	    // Insert original entry.
268
 	    cols.push_back(colIndex);
269
 	    values.push_back(value(*icursor) * scalFactor);
270

271
272
273
	    // Insert the periodic entries.
 	    for (std::set<DegreeOfFreedom>::iterator it = 
		   periodicDof[globalColDof].begin();
274
275
 		 it != periodicDof[globalColDof].end(); ++it) {
 	      cols.push_back(*it * dispMult + dispAddCol);
276
277
 	      values.push_back(value(*icursor) * scalFactor);
	    }
278
 	  } else {
279
	    // Neigher row nor column dof index is periodic, simple add entry.
280
281
282
	    cols.push_back(colIndex);
	    values.push_back(value(*icursor));
	  }
283
	}
284
285
      }

286
287
288
289
290

      // === Up to now we have assembled on row. Now, the row must be send to the ===
      // === corresponding rows to the petsc matrix.                              ===

      // Calculate petsc row index.
291
      int rowIndex = globalRowDof * dispMult + dispAddRow;
292
      
293
      if (periodicRow) {
294
295
296
	// The row dof is periodic, so send dof to all the corresponding rows.

	double scalFactor = 1.0 / (periodicDof[globalRowDof].size() + 1.0);
297
	
298
	int diagIndex = -1;
299
	for (int i = 0; i < static_cast<int>(values.size()); i++) {
300
301
302
303
304
	  // Change only the non diagonal values in the col. For the diagonal test
	  // we compare the global dof indices of the dof matrix (not of the petsc
	  // matrix!).
	  if ((cols[i] - dispAddCol) / dispMult != globalRowDof)
	    values[i] *= scalFactor;
305
306
307
	  else
	    diagIndex = i;
	}
308
309
310
311
312
313
314
	
	// Send the main row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);	
 
	// Set diagonal element to zero, i.e., the diagonal element of the current
	// row is not send to the periodic row indices.
315
316
317
	if (diagIndex != -1)
	  values[diagIndex] = 0.0;

318
	// Send the row to all periodic row indices.
319
320
321
	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRowDof].begin();
	     it != periodicDof[globalRowDof].end(); ++it) {
	  int perRowIndex = *it * dispMult + dispAddRow;
322
323
	  MatSetValues(petscMatrix, 1, &perRowIndex, cols.size(), 
		       &(cols[0]), &(values[0]), ADD_VALUES);
324
325
326
	}

      } else {
327
328
329
	// The row dof is not periodic, simply send the row to the petsc matrix.
	MatSetValues(petscMatrix, 1, &rowIndex, cols.size(), 
		     &(cols[0]), &(values[0]), ADD_VALUES);
330
      }    
331
    }
332
  }
333

334

335
  void ParallelDomainBase::setDofVector(Vec& petscVec, DOFVector<double>* vec, 
336
337
					int dispMult, int dispAdd)
  {
338
    // Traverse all used dofs in the dof vector.
339
340
    DOFVector<double>::Iterator dofIt(vec, USED_DOFS);
    for (dofIt.reset(); !dofIt.end(); ++dofIt) {
341
      // Calculate global row index of the dof.
342
      int globalRow = mapLocalGlobalDOFs[dofIt.getDOFIndex()];
343
      // Calculate petsc index of the row dof.
344
345
346
      int index = globalRow * dispMult + dispAdd;

      if (periodicDof.count(globalRow) > 0) {
347
348
349
	// The dof index is periodic, so devide the value to all dof entries.

	double value = *dofIt / (periodicDof[globalRow].size() + 1.0);
350
351
352
353
354
355
356
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);

	for (std::set<DegreeOfFreedom>::iterator it = periodicDof[globalRow].begin();
	     it != periodicDof[globalRow].end(); ++it) {
	  index = *it * dispMult + dispAdd;
	  VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
	}
357

358
      } else {
359
	// The dof index is not periodic.
360
361
362
	double value = *dofIt;
	VecSetValues(petscVec, 1, &index, &value, ADD_VALUES);
      }
363
    }    
364
365
  }

366

367
368
  void ParallelDomainBase::fillPetscMatrix(DOFMatrix *mat, DOFVector<double> *vec)
  {
369
370
371
372
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    ERROR_EXIT("Not yet tested for scalar problem definition!\n");

373
374
375
376
    MatCreate(PETSC_COMM_WORLD, &petscMatrix);
    MatSetSizes(petscMatrix, nRankRows, nRankRows, nOverallRows, nOverallRows);
    MatSetType(petscMatrix, MATAIJ);

377
378
379
380
381
382
383
384
385
386
387
388
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

389
390
391
392
393
    setDofMatrix(mat);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

394
    setDofVector(petscRhsVec, vec);
Thomas Witkowski's avatar
Thomas Witkowski committed
395
396
397

    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);
398
399
400
401
402
  }

  
  void ParallelDomainBase::fillPetscMatrix(Matrix<DOFMatrix*> *mat, SystemVector *vec)
  {
403
404
405
406
    FUNCNAME("ParallelDomainBase::fillPetscMatrix()");

    clock_t first = clock();

407
408
409
410
411
412
413
414
415
416
417
418
    VecCreate(PETSC_COMM_WORLD, &petscRhsVec);
    VecSetSizes(petscRhsVec, nRankRows, nOverallRows);
    VecSetType(petscRhsVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscSolVec);
    VecSetSizes(petscSolVec, nRankRows, nOverallRows);
    VecSetType(petscSolVec, VECMPI);

    VecCreate(PETSC_COMM_WORLD, &petscTmpVec);
    VecSetSizes(petscTmpVec, nRankRows, nOverallRows);
    VecSetType(petscTmpVec, VECMPI);

419
    using mtl::tag::row; using mtl::tag::nz; using mtl::begin; using mtl::end;
420
421
422
423
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    int d_nnz[nRankRows];
424
425
    int o_nnz[nRankRows];

426
427
    std::map<int, std::vector< std::pair<int, int> > > sendMatrixEntry;

428
    for (int i = 0; i < nRankRows; i++) {
429
      d_nnz[i] = 0;
430
431
      o_nnz[i] = 0;
    }
432

433
434
    for (int i = 0; i < nComponents; i++) {
      for (int j = 0; j < nComponents; j++) {
435
436
437
438
439
440
 	if ((*mat)[i][j]) {
	  Matrix bmat = (*mat)[i][j]->getBaseMatrix();

	  traits::col<Matrix>::type col(bmat);
	  traits::const_value<Matrix>::type value(bmat);
	  
441
	  typedef traits::range_generator<row, Matrix>::type cursor_type;
442
443
	  typedef traits::range_generator<nz, cursor_type>::type icursor_type;
	  
444
445
446
447
448
	  for (cursor_type cursor = begin<row>(bmat), 
		 cend = end<row>(bmat); cursor != cend; ++cursor) {

	    int r = mapLocalGlobalDOFs[*cursor] * nComponents + i;

449
450
	    if (isRankDof[*cursor]) {
	      r -= rstart * nComponents;
451
452
453
454
455
456
457
458
459
460

#if (DEBUG != 0)    
	      if (r < 0 || r >= nRankRows) {
		std::cout << "ERROR in rank: " << mpiRank << std::endl;
		std::cout << "  Wrong r = " << r << " " << *cursor << " " 
			  << mapLocalGlobalDOFs[*cursor] << " " 
			  << nRankRows << std::endl;
		ERROR_EXIT("Should not happen!\n");
	      }
#endif
461
	      
462
463
464
465
	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
466

467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
		  if (c >= rstart * nComponents && 
		      c < rstart * nComponents + nRankRows)
		    d_nnz[r]++;
		  else
		    o_nnz[r]++;		  
		}    
	      }
	    } else {
	      int sendToRank = -1;

	      for (RankToDofContainer::iterator it = recvDofs.begin();
		   it != recvDofs.end(); ++it) {
		for (DofContainer::iterator dofIt = it->second.begin();
		     dofIt != it->second.end(); ++dofIt) {
		  if (**dofIt == *cursor) {
		    sendToRank = it->first;
		    break;
		  }
		}

		if (sendToRank != -1)
		  break;
	      }

	      TEST_EXIT_DBG(sendToRank != -1)("Should not happen!\n");

	      for (icursor_type icursor = begin<nz>(cursor), 
		     icend = end<nz>(cursor); icursor != icend; ++icursor) {
		if (value(*icursor) != 0.0) {
		  int c = mapLocalGlobalDOFs[col(*icursor)] * nComponents + j;
		  
		  sendMatrixEntry[sendToRank].push_back(std::make_pair(r, c));
		}
	      }
	    }
502
503
	  }
	}
504
505
506
507
508
509
510
511
512
513
514
515
      }
    }

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;

    std::vector<int*> sendBuffers;
    sendBuffers.reserve(recvDofs.size());

    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();
516

517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
      request[requestCounter++] = mpiComm.Isend(&nSend, 1, MPI_INT, it->first, 0);
      
      if (nSend > 0) {
	int *sendbuf = new int[nSend * 2];
	for (int i = 0; i < nSend; i++) {
	  sendbuf[i * 2] = sendMatrixEntry[it->first][i].first;
	  sendbuf[i * 2 + 1] = sendMatrixEntry[it->first][i].second;
	}
	sendBuffers.push_back(sendbuf);
      } else {
	sendBuffers.push_back(NULL);
      }
    }

    std::vector<int> recvSize;
    recvSize.reserve(sendDofs.size());
    
    int i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it)
      request[requestCounter++] = 
	mpiComm.Irecv(&(recvSize[i++]), 1, MPI_INT, it->first, 0);

    MPI::Request::Waitall(requestCounter, request);

    requestCounter = 0;

    i = 0;
    for (RankToDofContainer::iterator it = recvDofs.begin(); 
	 it != recvDofs.end(); ++it) {
      int nSend = sendMatrixEntry[it->first].size();

549
      if (nSend > 0)
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
	request[requestCounter++] = 
	  mpiComm.Isend(sendBuffers[i], nSend * 2, MPI_INT, it->first, 0);

      i++;
    }

    std::vector<int*> recvBuffers;
    recvBuffers.reserve(sendDofs.size());
    
    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	int *recvbuf = new int[recvSize[i] * 2];
	recvBuffers.push_back(recvbuf);

	request[requestCounter++] =
	  mpiComm.Irecv(recvbuf, recvSize[i] * 2, MPI_INT, it->first, 0);
      } else {
	recvBuffers.push_back(NULL);
      }

      i++;
    }

    MPI::Request::Waitall(requestCounter, request);
Thomas Witkowski's avatar
Thomas Witkowski committed
576

577
578
    for (int j = 0; j < static_cast<int>(sendBuffers.size()); j++)
      if (sendBuffers[j])
Thomas Witkowski's avatar
Thomas Witkowski committed
579
 	delete [] sendBuffers[j];
580
581
582
583
584
585
586
587

    i = 0;
    for (RankToDofContainer::iterator it = sendDofs.begin();
	 it != sendDofs.end(); ++it) {
      if (recvSize[i] > 0) {
	for (int j = 0; j < recvSize[i]; j++) {
	  int r = recvBuffers[i][j * 2];
	  int c = recvBuffers[i][j * 2 + 1];
588

589
	  r -= rstart * nComponents;
Thomas Witkowski's avatar
Thomas Witkowski committed
590

591
592
	  TEST_EXIT_DBG(r >= 0 && r < nRankRows)("Should not happen!\n");
	  
593
	  if (c < rstart * nComponents || c >= rstart * nComponents + nRankRows)
594
	    o_nnz[r]++;
595
596
	  else
	    d_nnz[r]++;
597
598
599
600
	}

	delete [] recvBuffers[i];
      }
Thomas Witkowski's avatar
Thomas Witkowski committed
601
602

      i++;
603
    }
604
605

    MatCreateMPIAIJ(PETSC_COMM_WORLD, nRankRows, nRankRows, nOverallRows, nOverallRows,
606
607
608
609
610
611
612
613
		    0, d_nnz, 0, o_nnz, &petscMatrix);

#if (DEBUG != 0)
    int a, b;
    MatGetOwnershipRange(petscMatrix, &a, &b);
    TEST_EXIT(a == rstart * nComponents)("Wrong matrix ownership range!\n");
    TEST_EXIT(b == rstart * nComponents + nRankRows)("Wrong matrix ownership range!\n");
#endif
614

615
616
617
618
619
620
    using mtl::tag::major; using mtl::tag::nz; using mtl::begin; using mtl::end;
    namespace traits= mtl::traits;
    typedef DOFMatrix::base_matrix_type Matrix;

    for (int i = 0; i < nComponents; i++)
      for (int j = 0; j < nComponents; j++)
621
	if ((*mat)[i][j])
622
623
624
625
626
627
	  setDofMatrix((*mat)[i][j], nComponents, i, j);

    MatAssemblyBegin(petscMatrix, MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(petscMatrix, MAT_FINAL_ASSEMBLY);

    for (int i = 0; i < nComponents; i++)
628
      setDofVector(petscRhsVec, vec->getDOFVector(i), nComponents, i);
629

630
631
632
    VecAssemblyBegin(petscRhsVec);
    VecAssemblyEnd(petscRhsVec);

633
    INFO(info, 8)("Fill petsc matrix needed %.5f seconds\n", TIME_USED(first, clock()));
634
635
636
  }


637
  void ParallelDomainBase::solvePetscMatrix(DOFVector<double> *vec)
638
  {
Thomas Witkowski's avatar
Thomas Witkowski committed
639
640
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

641
    ERROR_EXIT("Not yet tested for scalar problem definition!\n");
642

643
    KSP ksp;
644
    KSPCreate(PETSC_COMM_WORLD, &ksp);
645
    KSPSetOperators(ksp, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN);
646
    KSPSetTolerances(ksp, 1.e-7, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
647
    KSPSetType(ksp, KSPBCGS);
648
    KSPMonitorSet(ksp, myKSPMonitor, PETSC_NULL, 0);
649
650
651
652
653
    KSPSolve(ksp, petscRhsVec, petscSolVec);

    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

654
    for (int i = 0; i < nRankDofs; i++)
655
      (*vec)[mapLocalToDofIndex[i]] = vecPointer[i];
656
657
658

    VecRestoreArray(petscSolVec, &vecPointer);

659
660
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());
Thomas Witkowski's avatar
Thomas Witkowski committed
661
662
663

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
664
665
    
    int i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
666
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
667
	 sendIt != sendDofs.end(); ++sendIt, i++) {
668
669
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs];
670

671
      for (int j = 0; j < nSendDOFs; j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
672
	sendBuffers[i][j] = (*vec)[*((sendIt->second)[j])];
673

Thomas Witkowski's avatar
Thomas Witkowski committed
674
675
      request[requestCounter++] =
	mpiComm.Isend(sendBuffers[i], nSendDOFs, MPI_DOUBLE, sendIt->first, 0);
676
677
678
    }

    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
679
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
680
	 recvIt != recvDofs.end(); ++recvIt, i++) {
681
682
      int nRecvDOFs = recvIt->second.size();
      recvBuffers[i] = new double[nRecvDOFs];
683

Thomas Witkowski's avatar
Thomas Witkowski committed
684
685
      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
686
687
    }

Thomas Witkowski's avatar
Thomas Witkowski committed
688
689
690

    MPI::Request::Waitall(requestCounter, request);

691
    i = 0;
Thomas Witkowski's avatar
Thomas Witkowski committed
692
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
693
	 recvIt != recvDofs.end(); ++recvIt, i++) {
Thomas Witkowski's avatar
Thomas Witkowski committed
694
      for (int j = 0; j < static_cast<int>(recvIt->second.size()); j++)
Thomas Witkowski's avatar
Thomas Witkowski committed
695
	(*vec)[*(recvIt->second)[j]] = recvBuffers[i][j];
696
697
698
699

      delete [] recvBuffers[i];
    }
    
700
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
701
      delete [] sendBuffers[i];
702
703

    MatDestroy(petscMatrix);
704
705
706
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
707
708
  }

709

710
  void ParallelDomainBase::solvePetscMatrix(SystemVector &vec)
711
712
713
  {
    FUNCNAME("ParallelDomainBase::solvePetscMatrix()");

714
715
716
717
718
719
720
721
722
#if 0
    // Set old solution to be initiual guess for petsc solver.
    for (int i = 0; i < nComponents; i++)
      setDofVector(petscSolVec, vec->getDOFVector(i), nComponents, i);

    VecAssemblyBegin(petscSolVec);
    VecAssemblyEnd(petscSolVec);
#endif

723
    // === Init Petsc solver. ===
724

725
    KSP solver;
726
727
    KSPCreate(PETSC_COMM_WORLD, &solver);
    KSPSetOperators(solver, petscMatrix, petscMatrix, SAME_NONZERO_PATTERN); 
728
    KSPSetTolerances(solver, 0.0, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT);
729
730
    KSPSetType(solver, KSPBCGS);
    KSPMonitorSet(solver, myKSPMonitor, PETSC_NULL, 0);
731
    KSPSetFromOptions(solver);
732
733
    // Do not delete the solution vector, use it for the initial guess.
    //    KSPSetInitialGuessNonzero(solver, PETSC_TRUE);
734

735
736
737

    // === Run Petsc. ===

738
    KSPSolve(solver, petscRhsVec, petscSolVec);
739

740
    // === Transfere values from Petsc's solution vectors to the dof vectors.
741
742
743
744
    PetscScalar *vecPointer;
    VecGetArray(petscSolVec, &vecPointer);

    for (int i = 0; i < nComponents; i++) {
745
      DOFVector<double> *dofvec = vec.getDOFVector(i);
746
      for (int j = 0; j < nRankDofs; j++)
747
	(*dofvec)[mapLocalToDofIndex[j]] = vecPointer[j * nComponents + i];      
748
749
750
751
    }

    VecRestoreArray(petscSolVec, &vecPointer);

752
753
754

    // === Synchronize dofs at common dofs, i.e., dofs that correspond to more ===
    // === than one partition.                                                 ===
755
    synchVectors(vec);
756

757
758
759

    // === Print information about solution process. ===

760
761
762
763
764
765
766
767
768
769
    int iterations = 0;
    KSPGetIterationNumber(solver, &iterations);
    MSG("  Number of iterations: %d\n", iterations);
    
    double norm = 0.0;
    MatMult(petscMatrix, petscSolVec, petscTmpVec);
    VecAXPY(petscTmpVec, -1.0, petscRhsVec);
    VecNorm(petscTmpVec, NORM_2, &norm);
    MSG("  Residual norm: %e\n", norm);

770
771
772

    // === Destroy Petsc's variables. ===

773
    MatDestroy(petscMatrix);
774
775
776
    VecDestroy(petscRhsVec);
    VecDestroy(petscSolVec);
    VecDestroy(petscTmpVec);
777
778
779
780
781
    KSPDestroy(solver);
  }
  
  void ParallelDomainBase::synchVectors(SystemVector &vec)
  {
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
#if 0
    StdMpi<std::vector<double>, std::vector<double> > stdMpi(mpiComm);
    stdMpi.prepareCommunication(false);

    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
      std::vector<double> dofs;
      int nSendDOFs = sendIt->second.size();
      dofs.reserve(nComponents * sendIt->second.size());
      
      for (int j = 0; j < nComponents; j++) {
	DOFVector<double> *dofvec = vec.getDOFVector(j);
	for (int k = 0; k < nSendDOFs; k++)
	  dofs.push_back((*dofvec)[*((sendIt->second)[k])]);
      }

      stdMpi.send(sendIt->first, dofs);
    }

    stdMpi.startCommunication();
#endif

#if 1

806
807
808
809
810
811
812
813
814
    std::vector<double*> sendBuffers(sendDofs.size());
    std::vector<double*> recvBuffers(recvDofs.size());

    MPI::Request request[sendDofs.size() + recvDofs.size()];
    int requestCounter = 0;
    
    int i = 0;
    for (RankToDofContainer::iterator sendIt = sendDofs.begin();
	 sendIt != sendDofs.end(); ++sendIt, i++) {
815
816
      int nSendDOFs = sendIt->second.size();
      sendBuffers[i] = new double[nSendDOFs * nComponents];
817
818
819

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
820
	DOFVector<double> *dofvec = vec.getDOFVector(j);
821
822
823
824
	for (int k = 0; k < nSendDOFs; k++)
	  sendBuffers[i][counter++] = (*dofvec)[*((sendIt->second)[k])];
      }

825
826
      request[requestCounter++] = mpiComm.Isend(sendBuffers[i], nSendDOFs * nComponents,
						MPI_DOUBLE, sendIt->first, 0);
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
    }

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size() * nComponents;
      recvBuffers[i] = new double[nRecvDOFs];

      request[requestCounter++] =
	mpiComm.Irecv(recvBuffers[i], nRecvDOFs, MPI_DOUBLE, recvIt->first, 0);
    }


    MPI::Request::Waitall(requestCounter, request);

    i = 0;
    for (RankToDofContainer::iterator recvIt = recvDofs.begin();
	 recvIt != recvDofs.end(); ++recvIt, i++) {
      int nRecvDOFs = recvIt->second.size();

      int counter = 0;
      for (int j = 0; j < nComponents; j++) {
849
	DOFVector<double> *dofvec = vec.getDOFVector(j);
850
 	for (int k = 0; k < nRecvDOFs; k++)
851
	  (*dofvec)[*(recvIt->second)[k]] = recvBuffers[i][counter++];
852
853
854
855
856
857
858
      }

      delete [] recvBuffers[i];
    }
    
    for (int i = 0; i < static_cast<int>(sendBuffers.size()); i++)
      delete [] sendBuffers[i];
859
#endif
860
861
  }

862
863
864
865
  
  void ParallelDomainBase::serialize(std::ostream &out, DofContainer &data)
  {
    int vecSize = data.size();
866
    SerUtil::serialize(out, vecSize);
867
868
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = (*data[i]);
869
      SerUtil::serialize(out, dofIndex);
870
871
872
873
874
875
876
877
878
879
    }
  }


  void ParallelDomainBase::deserialize(std::istream &in, DofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    FUNCNAME("ParallelDomainBase::deserialize()");

    int vecSize = 0;
880
    SerUtil::deserialize(in, vecSize);
881
882
883
    data.resize(vecSize);
    for (int i = 0; i < vecSize; i++) {
      int dofIndex = 0;
884
      SerUtil::deserialize(in, dofIndex);
885
886
887
888
889
890
891
892
893
894
895
896

      TEST_EXIT_DBG(dofMap.count(dofIndex) != 0)
	("Dof index could not be deserialized correctly!\n");

      data[i] = dofMap[dofIndex];
    }
  }


  void ParallelDomainBase::serialize(std::ostream &out, RankToDofContainer &data)
  {
    int mapSize = data.size();
897
    SerUtil::serialize(out, mapSize);
898
899
    for (RankToDofContainer::iterator it = data.begin(); it != data.end(); ++it) {
      int rank = it->first;
900
      SerUtil::serialize(out, rank);
901
902
903
904
905
906
907
908
909
      serialize(out, it->second);
    }
  }

  
  void ParallelDomainBase::deserialize(std::istream &in, RankToDofContainer &data,
				       std::map<int, const DegreeOfFreedom*> &dofMap)
  {
    int mapSize = 0;
910
    SerUtil::deserialize(in, mapSize);
911
912
    for (int i = 0; i < mapSize; i++) {
      int rank = 0;
913
      SerUtil::deserialize(in, rank);
914
915
916
917
      deserialize(in, data[rank], dofMap);      
    }
  }

918

919
  double ParallelDomainBase::setElemWeights(AdaptInfo *adaptInfo) 
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
  {
    double localWeightSum = 0.0;
    int elNum = -1;

    elemWeights.clear();

    TraverseStack stack;
    ElInfo *elInfo = stack.traverseFirst(mesh, -1,
					 Mesh::CALL_EVERY_EL_PREORDER);
    while (elInfo) {
      Element *element = elInfo->getElement();

      // get partition data
      PartitionElementData *partitionData = dynamic_cast<PartitionElementData*>
	(element->getElementData(PARTITION_ED));

      if (partitionData && partitionData->getPartitionStatus() == IN) {
937
	if (partitionData->getLevel() == 0)
938
	  elNum = element->getIndex();
939
	
Thomas Witkowski's avatar
Thomas Witkowski committed
940
	TEST_EXIT_DBG(elNum != -1)("invalid element number\n");
941
942
943
944
945
946
947
948
949
950
951
952
	if (element->isLeaf()) {
	  elemWeights[elNum] += 1.0;
	  localWeightSum += 1.0;
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }

    return localWeightSum;
  }

953

954
  void ParallelDomainBase::partitionMesh(AdaptInfo *adaptInfo)
955
956
957
958
959
960
961
962
963
964
965
966
967
  {
    if (initialPartitionMesh) {
      initialPartitionMesh = false;
      partitioner->fillCoarsePartitionVec(&oldPartitionVec);
      partitioner->partition(&elemWeights, INITIAL);
    } else {
      oldPartitionVec = partitionVec;
      partitioner->partition(&elemWeights, ADAPTIVE_REPART, 100.0 /*0.000001*/);
    }    

    partitioner->fillCoarsePartitionVec(&partitionVec);
  }

968

969
  void ParallelDomainBase::createInteriorBoundaryInfo()
970
  {
971
    FUNCNAME("ParallelDomainBase::createInteriorBoundaryInfo()");
Thomas Witkowski's avatar
Thomas Witkowski committed
972

973
974
975
976
977
978
979
980
981
982
983
984
985
    int nNeighbours = mesh->getGeo(NEIGH);
    int dim = mesh->getDim();
    GeoIndex subObj = CENTER;
    switch (dim) {
    case 2:
      subObj = EDGE;
      break;
    case 3:
      subObj = FACE;
      break;
    default:
      ERROR_EXIT("What is this?\n");
    }     
986
987
988

    // === First, traverse the mesh and search for all elements that have an  ===
    // === boundary with an element within another partition.                 ===
Thomas Witkowski's avatar
Thomas Witkowski committed
989

990
    TraverseStack stack;
991
992
993
    ElInfo *elInfo = 
      stack.traverseFirst(mesh, -1, 
			  Mesh::CALL_LEAF_EL | Mesh::FILL_NEIGH | Mesh::FILL_BOUND);
994
995
996
997
998
    while (elInfo) {
      Element *element = elInfo->getElement();

      PartitionElementData *partitionData = 
	dynamic_cast<PartitionElementData*>(element->getElementData(PARTITION_ED));   
999

1000
      // Check, if the element is within rank's partition.
1001
      if (partitionData->getPartitionStatus() == IN) {
1002
	for (int i = 0; i < nNeighbours; i++) {
1003
1004
1005
1006
	  if (!elInfo->getNeighbour(i))
	    continue;

	  PartitionElementData *neighbourPartitionData =
1007
1008
	    dynamic_cast<PartitionElementData*>(elInfo->getNeighbour(i)->
						getElementData(PARTITION_ED));
1009

1010
 	  if (neighbourPartitionData->getPartitionStatus() == OUT) {
Thomas Witkowski's avatar
Thomas Witkowski committed
1011

1012
1013
1014
1015
1016
	    // We have found an element that is in rank's partition, but has a 
	    // neighbour outside of the rank's partition.

	    // === Create information about the boundary between the two elements. ===

1017
	    AtomicBoundary bound;	    	    
1018
1019
	    bound.rankObj.el = element;
	    bound.rankObj.elIndex = element->getIndex();
1020
	    bound.rankObj.elType = elInfo->getType();
1021
	    bound.rankObj.subObj = subObj;
1022
	    bound.rankObj.ithObj = i;
1023
1024
	    // Do not set a pointer to the element, because if will be deleted from
	    // mesh after partitioning and the pointer would become invalid.
1025
1026
	    bound.neighObj.el = NULL;
	    bound.neighObj.elIndex = elInfo->getNeighbour(i)->getIndex();
1027
	    bound.neighObj.elType = -1;
1028
	    bound.neighObj.subObj = subObj;
1029
	    bound.neighObj.ithObj = elInfo->getSideOfNeighbour(i);
1030
	    
1031
1032
1033
1034
1035
	    if (dim == 2) {
	      // i == 2  =>  getSideOfNeighbour(i) == 2
	      TEST_EXIT_DBG(i != 2 || elInfo->getSideOfNeighbour(i) == 2)
		("Should not happen!\n");
	    }
1036

1037
	    // Get rank number of the neighbouring element.
1038
1039
	    int otherElementRank = partitionVec[elInfo->getNeighbour(i)->getIndex()];

1040
1041
1042
1043
1044
1045
1046
1047
1048

	    // === Add the boundary information object to the corresponding overall ===
	    // === boundary. There are three possibilities: if the boundary is a    ===
	    // === periodic boundary, just add it to \ref periodicBounadry. Here it ===
	    // === does not matter which rank is responsible for this boundary.     ===
	    // === Otherwise, the boundary is added either to \ref myIntBoundary or ===
	    // === to \ref otherIntBoundary. It dependes on which rank is respon-   ===
	    // === sible for this boundary.                                         ===

1049
	    if (BoundaryManager::isBoundaryPeriodic(elInfo->getBoundary(subObj, i))) {	      
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
	      // We have found an element that is at an interior, periodic boundary.
	      AtomicBoundary& b = periodicBoundary.getNewAtomic(otherElementRank);
	      b = bound;
	    } else {
	      // We have found an element that is at an interior, non-periodic boundary.
	      AtomicBoundary& b = ((mpiRank > otherElementRank) ?
				   myIntBoundary.getNewAtomic(otherElementRank) :
				   otherIntBoundary.getNewAtomic(otherElementRank));
	      b = bound;	      
	    }
1060
1061
1062
1063
1064
1065
 	  }
	}
      }

      elInfo = stack.traverseNext(elInfo);
    }
Thomas Witkowski's avatar
Thomas Witkowski committed
1066

1067
1068
1069
1070

    // === Once we have this information, we must care about the order of the atomic ===
    // === bounds in the three boundary handling object. Eventually all the bound-   ===
    // === aries have to be in the same order on both ranks that share the bounday.  ===
Thomas Witkowski's avatar
Thomas Witkowski committed
1071

Thomas Witkowski's avatar
Thomas Witkowski committed
1072
    std::vector<int*> sendBuffers, recvBuffers;
Thomas Witkowski's avatar
Thomas Witkowski committed
1073

1074
    // First we communicate myInt/otherIntBoundary, and than the periodic boundaries.
1075
1076
1077
    MPI::Request request[max(myIntBoundary.boundary.size() + 
			     otherIntBoundary.boundary.size(),
			     periodicBoundary.boundary.size())];
Thomas Witkowski's avatar
Thomas Witkowski committed
1078
1079
    int requestCounter = 0;

1080

1081
1082
1083
    // === The owner of the boundaries send their atomic boundary order to the ===
    // === neighbouring ranks.                                                 ===

Thomas Witkowski's avatar
Thomas Witkowski committed
1084
1085
    for (RankToBoundMap::iterator rankIt = myIntBoundary.boundary.begin();
	 rankIt != myIntBoundary.boundary.end(); ++rankIt) {    
Thomas Witkowski's avatar
Thomas Witkowski committed